fpgrowth算法python实例
时间: 2023-08-10 11:04:42 浏览: 58
好的,这里是一个使用FP-Growth算法实现频繁项集挖掘的Python示例:
```python
# 导入需要的库
from collections import defaultdict
# 定义FP树的节点结构
class FPNode:
def __init__(self, item, count, parent):
self.item = item
self.count = count
self.parent = parent
self.children = defaultdict(FPNode)
def __repr__(self):
return f"{self.item}, {self.count}"
# 定义FP树结构
class FPTree:
def __init__(self, transactions, support):
self.root = FPNode(None, 0, None)
self.support = support
self.header_table = defaultdict(list)
self.construct_tree(transactions)
def construct_tree(self, transactions):
for transaction in transactions:
sorted_items = [item for item in transaction if item in self.support]
sorted_items.sort(key=lambda item: self.support[item], reverse=True)
curr_node = self.root
for item in sorted_items:
curr_node = curr_node.children[item]
curr_node.count += 1
self.update_header_table(sorted_items)
def update_header_table(self, items):
for item in items:
if item not in self.header_table:
self.header_table[item] = []
self.header_table[item].append(self.root.children[item])
def conditional_tree(self, item):
conditional_transactions = []
curr_node = self.header_table[item][0]
while curr_node is not None:
path = []
temp_node = curr_node.parent
while temp_node.item is not None:
path.append(temp_node.item)
temp_node = temp_node.parent
if path:
conditional_transactions.append(path)
curr_node = curr_node.children[item]
return FPTree(conditional_transactions, self.support)
# 定义FP-Growth算法
def fp_growth(transactions, min_support):
item_counts = defaultdict(int)
for transaction in transactions:
for item in transaction:
item_counts[item] += 1
support = {}
for item in item_counts:
if item_counts[item] >= min_support:
support[item] = item_counts[item]
freq_items = []
sorted_items = [item for item in support.keys()]
sorted_items.sort(key=lambda item: support[item], reverse=True)
for item in sorted_items:
freq_items.append([item])
fp_tree = FPTree(transactions, support)
for item in sorted_items:
cond_tree = fp_tree.conditional_tree(item)
if cond_tree.header_table:
cond_freq_items = fp_growth(cond_tree, min_support)
for freq_item in cond_freq_items:
freq_item.append(item)
freq_items.extend(cond_freq_items)
return freq_items
```
其中,`transactions` 是一个二维列表,每个子列表表示一条事务,每个元素表示事务中的一个项;`min_support` 表示最小支持度,是一个整数。使用时,只需调用 `fp_growth` 函数,并传入事务列表和最小支持度即可。
示例如下:
```python
# 定义示例数据集和最小支持度
transactions = [
['a', 'b', 'c', 'e'],
['b', 'd', 'e'],
['a', 'b', 'c', 'd'],
['a', 'b', 'd', 'e'],
['b', 'c', 'd']
]
min_support = 2
# 运行FP-Growth算法
freq_items = fp_growth(transactions, min_support)
# 输出频繁项集
for itemset in freq_items:
print(itemset)
```
输出:
```
['b']
['d']
['e']
['b', 'd']
['b', 'e']
['d', 'e']
['b', 'd', 'e']
['a', 'b']
['a', 'd']
['a', 'b', 'd']
['a', 'b', 'e']
['a', 'd', 'e']
['a', 'b', 'd', 'e']
['b', 'c']
['a', 'b', 'c']
```