apriori算法python实现
时间: 2023-05-29 09:02:29 浏览: 133
以下是一个简单的apriori算法的Python实现:
```
# 输入数据集
def load_dataset():
return [[1,3,4], [2,3,5], [1,2,3,5], [2,5]]
# 生成所有候选项集
def create_candidates(dataset, k):
candidates = []
for transaction in dataset:
for item in transaction:
if not [item] in candidates:
candidates.append([item])
candidates.sort()
return [frozenset(candidate) for candidate in candidates]
# 从候选项集中选择符合支持度要求的项集
def filter_candidates(dataset, candidates, min_support):
item_counts = {}
for transaction in dataset:
for candidate in candidates:
if candidate.issubset(transaction):
if not candidate in item_counts:
item_counts[candidate] = 1
else:
item_counts[candidate] += 1
num_items = float(len(dataset))
frequent_items = []
support_data = {}
for item,count in item_counts.items():
support = count/num_items
if support >= min_support:
frequent_items.append(item)
support_data[item] = support
return frequent_items, support_data
# 生成下一级频繁项集
def join_items(itemset, k):
joined_items = []
n = len(itemset)
for i in range(n):
for j in range(i+1, n):
item1 = list(itemset[i])[:k-2]
item2 = list(itemset[j])[:k-2]
item1.sort()
item2.sort()
if item1 == item2:
joined_items.append(itemset[i] | itemset[j])
return joined_items
# apriori算法主程序
def apriori(dataset, min_support):
candidates = create_candidates(dataset, 1)
itemsets, support_data = filter_candidates(dataset, candidates, min_support)
frequent_itemsets = [itemsets]
k = 2
while len(frequent_itemsets[k-2]) > 0:
candidates = join_items(frequent_itemsets[k-2], k)
itemsets, support = filter_candidates(dataset, candidates, min_support)
support_data.update(support)
frequent_itemsets.append(itemsets)
k += 1
return frequent_itemsets, support_data
if __name__ == '__main__':
dataset = load_dataset()
min_support = 0.5
frequent_itemsets, support_data = apriori(dataset, min_support)
for i, item in enumerate(frequent_itemsets):
print('Frequent itemsets of size {}:'.format(i+1))
for x in item:
print('{} : {}'.format(tuple(x), round(support_data[x],3)))
```
该算法使用了一个较为简单的数据集作为输入,调用apriori函数即可输出频繁项集及其支持度。在这个实现中,我们使用frozenset来确保集合的唯一性,而不改变集合的顺序。
阅读全文