关联规则的python代码
时间: 2023-11-12 07:56:25 浏览: 113
数据挖掘大作业-基于python实现关联规则挖掘可视化系统+源代码+文档说明
关联规则(Association Rule)是指在大规模数据集中发现项与项之间的关系,通常用于市场分析、商品推荐等领域。Apriori算法是一种常用的关联规则挖掘算法。下面是使用Python实现Apriori算法的示例代码:
```python
# 导入所需模块
from itertools import combinations
from collections import defaultdict
# 定义Apriori算法函数
def apriori(transactions, support):
item_2_tranidxs = defaultdict(list)
itemset = set()
for tran_idx, transaction in enumerate(transactions):
for item in transaction:
item_2_tranidxs[item].append(tran_idx)
itemset.add(frozenset([item]))
valid_items = set()
k = 1
while True:
cur_freqitems = set()
cur_item_2_tranidxs = defaultdict(list)
if k == 1:
items = itemset
else:
prev_items = cur_items
items = set([prev_items[i] | prev_items[j] for i in range(len(prev_items)) for j in range(i+1, len(prev_items)) if len(prev_items[i] | prev_items[j]) == k])
for item in items:
tranidxs = item_2_tranidxs[item.pop()] if k == 1 else item_2_tranidxs[item]
if len(tranidxs)/len(transactions) >= support:
cur_freqitems.add(item)
valid_items |= item
if k > 1:
cur_item_2_tranidxs[item] = tranidxs
if not cur_freqitems:
break
cur_items = list(cur_freqitems)
k += 1
for item in cur_items:
item_2_tranidxs[item].extend(cur_item_2_tranidxs[item])
return [(item, len(item_2_tranidxs[item])/len(transactions)) for item in valid_items]
# 示例:使用mushroom数据集进行关联规则挖掘
with open('mushroom.dat') as f:
transactions = [set(line.strip().split()) for line in f.readlines()]
support = 0.3
freqitems = apriori(transactions, support)
for item, _ in freqitems:
print(', '.join(item))
```
阅读全文