python实现挖掘所有后缀为“啤酒”的频繁模式 a. 寻找所有后缀为“啤酒”的路径 b. 挖掘后缀为“啤酒”的条件模式库 c. 去掉“啤酒”条件模式库中的非频繁项 d. 构造“啤酒”条件FP树
时间: 2023-05-31 10:03:54 浏览: 161
以下是Python实现挖掘所有后缀为“啤酒”的频繁模式的示例代码:
# 导入相应的库
from fp_growth import find_frequent_itemsets
# 定义函数寻找所有后缀为“啤酒”的路径
def find_beer_suffix_paths(tree, suffix, paths):
if tree.item == suffix:
paths.append(tree.prefix_path())
for child in tree.children:
find_beer_suffix_paths(child, suffix, paths)
# 定义函数挖掘后缀为“啤酒”的条件模式库
def find_beer_suffix_conditional_patterns(tree, suffix, patterns):
paths = []
find_beer_suffix_paths(tree, suffix, paths)
for path in paths:
pattern = []
for item in path:
if item != suffix:
pattern.append(item)
if pattern:
patterns.append(pattern)
# 定义函数去掉“啤酒”条件模式库中的非频繁项
def remove_infrequent_items(patterns, min_support):
item_counts = {}
for pattern in patterns:
for item in pattern:
if item in item_counts:
item_counts[item] += 1
else:
item_counts[item] = 1
frequent_items = set(item for item, count in item_counts.items() if count >= min_support)
filtered_patterns = []
for pattern in patterns:
filtered_pattern = [item for item in pattern if item in frequent_items]
if filtered_pattern:
filtered_patterns.append(filtered_pattern)
return filtered_patterns
# 定义函数构造“啤酒”条件FP树
def construct_beer_suffix_fp_tree(transactions, suffix):
headers = {}
root = Node(None, None)
for transaction in transactions:
transaction = [item for item in transaction if item != suffix]
transaction.sort(reverse=True, key=lambda item: headers.get(item, 0))
node = root
for item in transaction:
if item not in node.children:
node.children[item] = Node(item, node)
if item in headers:
headers[item].append(node.children[item])
else:
headers[item] = [node.children[item]]
node = node.children[item]
node.count += 1
return root, headers
# 此处省略定义Node类的代码
# 示例数据
transactions = [['啤酒', '薯片'], ['啤酒', '尿布'], ['啤酒', '尿布', '可乐'], ['薯片', '可乐'], ['啤酒', '薯片', '可乐']]
min_support = 2
# 构造“啤酒”条件FP树
root, headers = construct_beer_suffix_fp_tree(transactions, '啤酒')
# 挖掘后缀为“啤酒”的条件模式库
patterns = []
find_beer_suffix_conditional_patterns(root, '啤酒', patterns)
# 去掉“啤酒”条件模式库中的非频繁项
patterns = remove_infrequent_items(patterns, min_support)
# 使用fp-growth算法寻找频繁模式
frequent_itemsets = find_frequent_itemsets(patterns, min_support)
# 输出结果
for itemset, support in frequent_itemsets:
print(itemset, support)
阅读全文