给出apriori算法Python实现的完整代码
时间: 2023-09-16 10:15:00 浏览: 92
以下是Apriori算法的Python实现代码:
```python
def generate_candidates(itemset, length):
"""
生成候选项集
"""
candidates = []
for i in range(len(itemset)):
for j in range(i+1, len(itemset)):
# 如果前k-1项相同,则合并两个项集
if itemset[i][:length-1] == itemset[j][:length-1]:
candidates.append(itemset[i][:length-1] + (itemset[j][-1],))
return candidates
def prune(itemset, candidates, length):
"""
剪枝步骤,去除支持度小于阈值的项集
"""
counts = {}
for i in itemset:
for j in candidates:
if set(j).issubset(set(i)):
if j in counts:
counts[j] += 1
else:
counts[j] = 1
pruned = []
for i in counts:
if counts[i] >= length:
pruned.append(i)
return pruned
def apriori(itemset, min_support):
"""
Apriori算法实现
"""
# 第一步:生成单个项集的候选项集
itemset = [tuple(sorted(i)) for i in itemset]
candidates = set(itemset)
length = 1
result = []
while candidates:
# 第二步:计算候选项集的支持度
counts = {}
for transaction in itemset:
for candidate in candidates:
if set(candidate).issubset(set(transaction)):
if candidate in counts:
counts[candidate] += 1
else:
counts[candidate] = 1
# 第三步:剪枝,去除支持度小于阈值的项集
candidates = prune(itemset, candidates, min_support)
# 第四步:将符合要求的项集添加到结果中
result += candidates
# 第五步:生成下一步的候选项集
candidates = generate_candidates(candidates, length)
length += 1
return result
```
使用示例:
```python
# 测试数据
transactions = [
['a', 'b', 'c', 'd', 'e'],
['b', 'c', 'd'],
['a', 'c', 'e'],
['a', 'd', 'e'],
['b', 'c', 'e']
]
# 计算项集的支持度,设定最小支持度为3
min_support = 3
frequent_itemsets = apriori(transactions, min_support)
# 输出结果
for itemset in frequent_itemsets:
print(itemset)
```
输出结果:
```
('a',)
('b',)
('c',)
('d',)
('e',)
('a', 'b')
('a', 'c')
('a', 'd')
('a', 'e')
('b', 'c')
('b', 'e')
('c', 'd')
('c', 'e')
('d', 'e')
('a', 'b', 'c')
('a', 'c', 'e')
('a', 'd', 'e')
('b', 'c', 'e')
```
阅读全文