apriori算法流程
时间: 2023-09-16 15:11:51 浏览: 42
以下是一个简单的Apriori算法的示例代码:
```python
def generate_candidates(prev_candidates, k):
candidates = set()
for i in range(len(prev_candidates)):
for j in range(i+1, len(prev_candidates)):
L1 = list(prev_candidates[i])[:k-2]
L2 = list(prev_candidates[j])[:k-2]
L1.sort()
L2.sort()
if L1 == L2:
candidates.add(prev_candidates[i] | prev_candidates[j])
return candidates
def prune_infrequent_items(candidates, dataset, min_support_count):
item_counts = {}
for transaction in dataset:
for itemset in candidates:
if itemset.issubset(transaction):
if itemset in item_counts:
item_counts[itemset] += 1
else:
item_counts[itemset] = 1
frequent_itemsets = set()
for itemset, count in item_counts.items():
if count >= min_support_count:
frequent_itemsets.add(itemset)
return frequent_itemsets
def apriori(dataset, min_support, min_support_count):
transactions = [set(transaction) for transaction in dataset]
# 第一步:生成频繁1项集
item_counts = {}
for transaction in transactions:
for item in transaction:
item_counts[item] = item_counts.get(item, 0) + 1
frequent_itemsets = set()
for item, count in item_counts.items():
if count >= min_support_count:
frequent_itemsets.add(frozenset([item]))
# 迭代生成频繁k项集
k = 2
while frequent_itemsets:
candidates = generate_candidates(frequent_itemsets, k)
frequent_itemsets = prune_infrequent_items(candidates, transactions, min_support_count)
k += 1
return frequent_itemsets
# 示例用法
dataset = [
['apple', 'banana', 'mango'],
['banana', 'orange'],
['apple', 'banana'],
['apple', 'banana', 'orange'],
['apple', 'mango']
]
min_support = 0.4
min_support_count = min_support * len(dataset)
frequent_itemsets = apriori(dataset, min_support, min_support_count)
for itemset in frequent_itemsets:
print(itemset)
```
这段代码