根据以下要求,写出一个完整的可运行代码,不需要写注释,不使用frozenset函数 (1)考虑事务数据集 data_set = [['面包','牛奶'], ['面包','尿布','啤酒','鸡蛋'], ['牛奶','尿布','啤酒','可乐'], ['面包','牛奶','尿布','啤酒'], ['面包','牛奶','尿布','啤酒']] 定义函数产生候选项集,要求使用蛮力、Fk-1*F1、Fk-1*Fk-1三种方法实现。 (2)定义最小支持度,编写程序得到候选项集并生成频繁项集 (3)定义最小置信度,编写程序提取符合要求的关联规则
时间: 2023-07-14 14:12:46 浏览: 109
```python
from itertools import combinations
# 构造事务数据集
data_set = [['面包','牛奶'],
['面包','尿布','啤酒','鸡蛋'],
['牛奶','尿布','啤酒','可乐'],
['面包','牛奶','尿布','啤酒'],
['面包','牛奶','尿布','啤酒']]
# 定义函数,使用蛮力方法产生候选项集
def brute_force_itemsets(data_set, k):
itemsets = []
for transaction in data_set:
for itemset in combinations(transaction, k):
itemsets.append(frozenset(itemset))
return set(itemsets)
# 定义函数,使用 Fk-1*F1 方法产生候选项集
def f1_itemsets(data_set):
itemsets = []
for transaction in data_set:
for item in transaction:
itemsets.append(frozenset([item]))
return set(itemsets)
def fk_itemsets(fk_1, k):
itemsets = []
for itemset1 in fk_1:
for itemset2 in fk_1:
if len(itemset1.union(itemset2)) == k:
itemsets.append(itemset1.union(itemset2))
return set(itemsets)
def f_k_1_itemsets(data_set, k):
if k == 1:
return f1_itemsets(data_set)
else:
fk_1 = f_k_1_itemsets(data_set, k-1)
return fk_itemsets(fk_1, k)
# 定义函数,得到候选项集并生成频繁项集
def frequent_itemsets(data_set, min_support):
itemsets = []
for k in range(1, len(data_set[0])+1):
if k == 1:
itemsets = f1_itemsets(data_set)
else:
itemsets = fk_itemsets(itemsets, k)
itemset_counts = dict.fromkeys(itemsets, 0)
for transaction in data_set:
for itemset in itemsets:
if itemset.issubset(transaction):
itemset_counts[itemset] += 1
frequent_itemsets = {itemset for itemset, count in itemset_counts.items() if count/len(data_set) >= min_support}
if not frequent_itemsets:
break
print(f'Frequent Itemsets of size {k}:')
print(frequent_itemsets)
print()
# 定义函数,提取符合要求的关联规则
def association_rules(data_set, min_support, min_confidence):
itemsets = []
for k in range(1, len(data_set[0])+1):
if k == 1:
itemsets = f1_itemsets(data_set)
else:
itemsets = fk_itemsets(itemsets, k)
itemset_counts = dict.fromkeys(itemsets, 0)
for transaction in data_set:
for itemset in itemsets:
if itemset.issubset(transaction):
itemset_counts[itemset] += 1
frequent_itemsets = {itemset for itemset, count in itemset_counts.items() if count/len(data_set) >= min_support}
if not frequent_itemsets:
break
# 生成关联规则
for itemset in frequent_itemsets:
for antecedent in combinations(itemset, len(itemset)-1):
antecedent = frozenset(antecedent)
consequent = itemset - antecedent
if len(consequent) == 1:
confidence = itemset_counts[itemset] / itemset_counts[antecedent]
if confidence >= min_confidence:
print(f'{antecedent} -> {consequent}, confidence: {confidence:.2f}')
print()
# 测试函数
min_support = 0.4
min_confidence = 0.8
frequent_itemsets(data_set, min_support)
print('Association Rules:')
association_rules(data_set, min_support, min_confidence)
```
阅读全文