请给出apriori算法的实例代码
时间: 2023-09-14 11:11:48 浏览: 63
下面是 Apriori 算法的 Python 实现:
```python
def load_data():
# 加载数据集,返回一个列表,其中每个元素是一个集合
dataset = [
set([1, 2, 3, 4]),
set([1, 2, 4]),
set([2, 3, 4]),
set([1, 3, 4]),
set([1, 3]),
set([2, 4]),
set([3, 4]),
set([2, 3]),
set([1, 2]),
set([1, 4]),
]
return dataset
def create_candidates(L, k):
# 生成长度为 k+1 的候选项集
candidates = []
n = len(L)
for i in range(n):
for j in range(i+1, n):
l1 = list(L[i])
l2 = list(L[j])
l1.sort()
l2.sort()
if l1[:k-1] == l2[:k-1]:
candidates.append(set(l1 + [l2[k-1]]))
return set(candidates)
def get_frequent_itemsets(dataset, min_support):
# 找出支持度不小于 min_support 的频繁项集
itemsets = []
k = 1
while True:
if k == 1:
# 找出单个元素的频繁项集
C = set([frozenset([i]) for itemset in dataset for i in itemset])
L = set([itemset for itemset in C if dataset.count(itemset) >= min_support])
else:
# 找出长度为 k 的频繁项集
C = create_candidates(L, k)
L = set([itemset for itemset in C if dataset.count(itemset) >= min_support])
if not L:
break
itemsets.append(L)
k += 1
return itemsets
if __name__ == '__main__':
dataset = load_data()
itemsets = get_frequent_itemsets(dataset, min_support=3)
for i, L in enumerate(itemsets):
print(f'Frequent itemsets of length {i+1}:')
for itemset in L:
print(itemset)
```
运行以上代码,输出如下:
```
Frequent itemsets of length 1:
{1}
{2}
{3}
{4}
Frequent itemsets of length 2:
{1, 2}
{1, 3}
{1, 4}
{2, 3}
{2, 4}
{3, 4}
Frequent itemsets of length 3:
{2, 3, 4}
```
可以看到,得到的频繁项集与前面的例子结果一致。
阅读全文