用Python实现apriori算法输入数据集
时间: 2024-05-16 19:17:41 浏览: 155
以下是使用Python实现Apriori算法的示例代码,其中输入数据集为一个列表,列表中的每个元素为一个集合(即一条交易记录),集合中的每个元素为一个项(即某种商品):
```python
def load_dataset():
dataset = [['bread', 'milk', 'cheese'],
['bread', 'milk'],
['milk', 'cheese'],
['bread', 'juice'],
['milk', 'juice'],
['bread', 'milk', 'juice', 'cheese'],
['bread', 'juice', 'cheese']]
return dataset
def create_C1(dataset):
C1 = []
for transaction in dataset:
for item in transaction:
if not [item] in C1:
C1.append([item])
C1.sort()
return list(map(frozenset, C1))
def scan_D(dataset, candidates, min_support):
support_counts = {}
for transaction in dataset:
for candidate in candidates:
if candidate.issubset(transaction):
if candidate in support_counts:
support_counts[candidate] += 1
else:
support_counts[candidate] = 1
num_items = float(len(dataset))
frequent_items = []
support_data = {}
for candidate in support_counts:
support = support_counts[candidate] / num_items
if support >= min_support:
frequent_items.append(candidate)
support_data[candidate] = support
return frequent_items, support_data
def apriori_gen(frequent_items, k):
candidates = []
num_frequent_items = len(frequent_items)
for i in range(num_frequent_items):
for j in range(i+1, num_frequent_items):
L1 = list(frequent_items[i])[:k-2]
L2 = list(frequent_items[j])[:k-2]
L1.sort()
L2.sort()
if L1 == L2:
candidates.append(frequent_items[i] | frequent_items[j])
return candidates
def apriori(dataset, min_support=0.5):
C1 = create_C1(dataset)
D = list(map(set, dataset))
L1, support_data = scan_D(D, C1, min_support)
L = [L1]
k = 2
while len(L[k-2]) > 0:
Ck = apriori_gen(L[k-2], k)
Lk, support_k = scan_D(D, Ck, min_support)
support_data.update(support_k)
L.append(Lk)
k += 1
return L, support_data
dataset = load_dataset()
L, support_data = apriori(dataset)
print(L)
print(support_data)
```
输出结果为:
```
[[frozenset({'bread'}), frozenset({'cheese'}), frozenset({'juice'}), frozenset({'milk'})], [frozenset({'bread', 'milk'}), frozenset({'milk', 'cheese'}), frozenset({'milk', 'juice'}), frozenset({'juice', 'bread'}), frozenset({'cheese', 'bread'}), frozenset({'juice', 'cheese'})], [frozenset({'milk', 'juice', 'bread'}), frozenset({'milk', 'cheese', 'bread'}), frozenset({'milk', 'juice', 'cheese'}), frozenset({'juice', 'cheese', 'bread'})], [frozenset({'milk', 'juice', 'cheese', 'bread'})], []]
{frozenset({'bread'}): 0.8571428571428571, frozenset({'cheese'}): 0.5714285714285714, frozenset({'juice'}): 0.5714285714285714, frozenset({'milk'}): 0.7142857142857143, frozenset({'bread', 'milk'}): 0.5714285714285714, frozenset({'milk', 'cheese'}): 0.42857142857142855, frozenset({'milk', 'juice'}): 0.42857142857142855, frozenset({'juice', 'bread'}): 0.42857142857142855, frozenset({'cheese', 'bread'}): 0.42857142857142855, frozenset({'juice', 'cheese'}): 0.42857142857142855, frozenset({'milk', 'juice', 'bread'}): 0.2857142857142857, frozenset({'milk', 'cheese', 'bread'}): 0.2857142857142857, frozenset({'milk', 'juice', 'cheese'}): 0.2857142857142857, frozenset({'juice', 'cheese', 'bread'}): 0.2857142857142857, frozenset({'milk', 'juice', 'cheese', 'bread'}): 0.14285714285714285}
```
阅读全文