用python实现apriori算法
时间: 2023-09-19 14:07:25 浏览: 62
Apriori算法的实现的python代码.doc
5星 · 资源好评率100%
好的,以下是用 Python 实现 Apriori 算法的示例代码:
```python
def load_dataset():
"""
加载数据集。
"""
dataset = [['milk', 'bread', 'eggs'],
['milk', 'bread', 'butter'],
['milk', 'butter'],
['bread', 'butter'],
['bread', 'eggs']]
return dataset
def create_c1(dataset):
"""
创建 C1 集合,即所有项的单元素集合。
"""
c1 = set()
for transaction in dataset:
for item in transaction:
c1.add(frozenset([item])) # 使用 frozenset 可以使集合可以作为字典的键
return c1
def scan_dataset(dataset, candidate_set, min_support):
"""
扫描数据集,计算每个项集的支持度,并返回支持度大于等于 min_support 的项集。
"""
support_count = {}
for transaction in dataset:
for candidate in candidate_set:
if candidate.issubset(transaction):
support_count[candidate] = support_count.get(candidate, 0) + 1
num_items = float(len(dataset))
frequent_set = []
support_data = {}
for itemset in support_count:
support = support_count[itemset] / num_items
if support >= min_support:
frequent_set.append(itemset)
support_data[itemset] = support
return frequent_set, support_data
def apriori_gen(frequent_set, k):
"""
由频繁项集生成候选项集。
"""
candidate_set = []
len_fset = len(frequent_set)
for i in range(len_fset):
for j in range(i + 1, len_fset):
l1 = list(frequent_set[i])[:k - 2]
l2 = list(frequent_set[j])[:k - 2]
l1.sort()
l2.sort()
if l1 == l2:
candidate_set.append(frequent_set[i] | frequent_set[j])
return candidate_set
def apriori(dataset, min_support=0.5):
"""
Apriori 算法。
"""
c1 = create_c1(dataset)
frequent_set, support_data = scan_dataset(dataset, c1, min_support)
frequent_sets = [frequent_set]
k = 2
while len(frequent_sets[k - 2]) > 0:
candidate_set = apriori_gen(frequent_sets[k - 2], k)
frequent_set, support_data_k = scan_dataset(dataset, candidate_set, min_support)
support_data.update(support_data_k)
frequent_sets.append(frequent_set)
k += 1
return frequent_sets, support_data
if __name__ == '__main__':
dataset = load_dataset()
frequent_sets, support_data = apriori(dataset)
print('频繁项集:')
print(frequent_sets)
print('支持度:')
print(support_data)
```
这里的数据集是一个简单的例子,你可以根据自己的实际需求修改数据集和参数。
阅读全文