python实现apriori算法完整代码
时间: 2023-11-15 18:07:02 浏览: 92
抱歉,作为AI语言模型,我并不能提供完整的代码实现。但我可以给你提供一个参考:
```
import itertools
def load_data():
# 加载数据集
data = [['bread', 'milk'],
['bread', 'diaper', 'beer', 'egg'],
['milk', 'diaper', 'beer', 'cola'],
['bread', 'milk', 'diaper', 'beer'],
['bread', 'milk', 'diaper', 'cola']]
return data
def create_C1(data):
# 创建单元素项集C1
C1 = set()
for transaction in data:
for item in transaction:
item_set = frozenset([item])
C1.add(item_set)
return C1
def is_apriori(Ck_item, Lk_sub1):
# 判断Ck是否满足Apriori性质
for item in Ck_item:
sub_Ck = Ck_item - frozenset([item])
if sub_Ck not in Lk_sub1:
return False
return True
def generate_Ck(Lk_sub1, k):
# 根据Lk-1创建Ck
Ck = set()
len_Lk_sub1 = len(Lk_sub1)
list_Lk_sub1 = list(Lk_sub1)
for i in range(len_Lk_sub1):
for j in range(1, len_Lk_sub1):
l1 = list(list_Lk_sub1[i])
l2 = list(list_Lk_sub1[j])
l1.sort()
l2.sort()
if l1[0:k-2] == l2[0:k-2]:
Ck_item = list_Lk_sub1[i] | list_Lk_sub1[j]
if is_apriori(Ck_item, Lk_sub1):
Ck.add(Ck_item)
return Ck
def generate_Lk_by_Ck(data, Ck, min_support, support_data):
# 根据Ck生成Lk
Lk = set()
item_count = {}
for transaction in data:
for item in Ck:
if item.issubset(transaction):
if item not in item_count:
item_count[item] = 1
else:
item_count[item] += 1
t_num = float(len(data))
for item in item_count:
if (item_count[item] / t_num) >= min_support:
Lk.add(item)
support_data[item] = item_count[item] / t_num
return Lk
def generate_L(data, k, min_support):
# 生成所有频繁项集
support_data = {}
C1 = create_C1(data)
L1 = generate_Lk_by_Ck(data, C1, min_support, support_data)
Lk_sub1 = L1.copy()
L = []
L.append(Lk_sub1)
for i in range(2, k+1):
Ci = generate_Ck(Lk_sub1, i)
Li = generate_Lk_by_Ck(data, Ci, min_support, support_data)
Lk_sub1 = Li.copy()
L.append(Lk_sub1)
return L, support_data
def generate_rules(L, support_data, min_confidence):
# 根据频繁项集和支持度生成关联规则
rules = []
for i in range(1, len(L)):
for freq_set in L[i]:
H1 = [frozenset([item]) for item in freq_set]
if i > 1:
rules_from_conseq(freq_set, H1, support_data, rules, min_confidence)
else:
calc_confidence(freq_set, H1, support_data, rules, min_confidence)
return rules
def calc_confidence(freq_set, H, support_data, rules, min_confidence):
# 计算关联规则置信度
prunedH = []
for conseq in H:
conf = support_data[freq_set] / support_data[freq_set - conseq]
if conf >= min_confidence:
print(freq_set-conseq, "-->", conseq, "conf:", conf)
rules.append((freq_set - conseq, conseq, conf))
prunedH.append(conseq)
return prunedH
def rules_from_conseq(freq_set, H, support_data, rules, min_confidence):
# 根据后件递归生成关联规则
m = len(H[0])
if len(freq_set) > (m + 1):
Hmp1 = generate_Ck(H, m+1)
Hmp1 = calc_confidence(freq_set, Hmp1, support_data, rules, min_confidence)
if len(Hmp1) > 1:
rules_from_conseq(freq_set, Hmp1, support_data, rules, min_confidence)
if __name__ == '__main__':
data = load_data()
L, support_data = generate_L(data, k=3, min_support=0.5)
rules = generate_rules(L, support_data, min_confidence=0.7)
```
这是一个简单的Python实现,可以在给定数据集上运行Apriori算法并生成关联规则。
阅读全文