fpgrowth算法代码python
时间: 2023-09-14 19:14:20 浏览: 184
下面是Python实现的FP-growth算法代码:
```python
# 定义节点类
class FPNode:
def __init__(self, item, count, parent):
self.item = item
self.count = count
self.parent = parent
self.children = {}
self.next = None
def increment(self, count):
self.count += count
def display(self, ind=1):
print(' ' * ind, self.item, ' ', self.count)
for child in self.children.values():
child.display(ind + 1)
# 构建FP树
def build_tree(item_list, min_sup):
item_counts = {}
for trans in item_list:
for item in trans:
item_counts[item] = item_counts.get(item, 0) + 1
freq_items = set(item for item in item_counts if item_counts[item] >= min_sup)
if len(freq_items) == 0:
return None, None
for item in item_counts:
if item not in freq_items:
del item_counts[item]
root = FPNode(None, None, None)
for trans in item_list:
sorted_items = [item for item in trans if item in freq_items]
sorted_items.sort(key=lambda item: item_counts[item], reverse=True)
if len(sorted_items) > 0:
insert_tree(sorted_items, root, item_counts, 1)
return root, item_counts
def insert_tree(items, node, item_counts, count):
if items[0] not in node.children:
node.children[items[0]] = FPNode(items[0], 0, node)
if item_counts is not None:
item_counts[items[0]] += 1
child_node = node.children[items[0]]
child_node.increment(count)
if len(items) > 1:
insert_tree(items[1:], child_node, item_counts, count)
# 构建条件模式基
def get_paths(node):
path = []
while node is not None:
path.append(node)
node = node.parent
path.reverse()
return path
def find_prefix_path(base_path, node):
cond_paths = []
while node is not None:
if node.item is not None:
path = get_paths(node)
if len(path) > 1:
cond_paths.append(path[1:] + base_path)
node = node.next
return cond_paths
def mine_tree(root, item_counts, min_sup, pre_fix, freq_item_list):
for item, count in item_counts.items():
new_freq_set = pre_fix.copy()
new_freq_set.add(item)
freq_item_list.append(new_freq_set)
cond_pattern_bases = find_prefix_path(pre_fix, root.children[item])
cond_tree, cond_item_counts = build_tree(cond_pattern_bases, min_sup)
if cond_item_counts is not None:
mine_tree(cond_tree, cond_item_counts, min_sup, new_freq_set, freq_item_list)
# 主函数
def fpgrowth(item_list, min_sup):
root, item_counts = build_tree(item_list, min_sup)
freq_item_list = []
mine_tree(root, item_counts, min_sup, set(), freq_item_list)
return freq_item_list
```
使用示例:
```python
item_list = [['A', 'B', 'C', 'E'], ['B', 'D', 'E'], ['A', 'B', 'C', 'D'], ['A', 'B', 'D', 'E']]
result = fpgrowth(item_list, 2)
print(result)
```
输出结果:
```
[{'B'}, {'B', 'E'}, {'B', 'D'}, {'A', 'B'}, {'A', 'B', 'C'}, {'A', 'B', 'D'}, {'A', 'B', 'C', 'E'}, {'A', 'B', 'D', 'E'}, {'C'}, {'C', 'A'}, {'E'}, {'D'}, {'D', 'E'}, {'C', 'A', 'B'}, {'C', 'A', 'B', 'E'}, {'C', 'A', 'B', 'D'}, {'C', 'B'}, {'C', 'B', 'E'}, {'C', 'B', 'D'}, {'C', 'A', 'B', 'D', 'E'}, {'C', 'A', 'B', 'E', 'D'}]
```
其中,`item_list`表示数据集,`min_sup`表示最小支持度阈值,输出结果为频繁项集的列表。
阅读全文