def create_C1(dataset): C1 = [] for transaction in dataset: for item in transaction: if not [item] in C1: C1.append([item]) C1.sort() return list(map(frozenset, C1)) def scan_D(D, Ck, min_support): ssCnt = {} for tid in D: for can in Ck: if can.issubset(tid): if not can in ssCnt: ssCnt[can] = 1 else: ssCnt[can] += 1 num_items = float(len(D)) retList = [] support_data = {} for key in ssCnt: support = ssCnt[key] / num_items if support >= min_support: retList.insert(0, key) support_data[key] = support return retList, support_data def apriori_gen(Lk, k): retList = [] len_Lk = len(Lk) for i in range(len_Lk): for j in range(i+1, len_Lk): L1 = list(Lk[i])[:k-2] L2 = list(Lk[j])[:k-2] L1.sort() L2.sort() if L1 == L2: retList.append(Lk[i] | Lk[j]) return retList def apriori(dataset, min_support=0.5): C1 = create_C1(dataset) D = list(map(set, dataset)) L1, support_data = scan_D(D, C1, min_support) L = [L1] k = 2 while len(L[k-2]) > 0: Ck = apriori_gen(L[k-2], k) Lk, support_k = scan_D(D, Ck, min_support) support_data.update(support_k) L.append(Lk) k += 1 return L, support_data再此代码基础上增加置信度
时间: 2024-03-14 21:47:24 浏览: 160
在Apriori算法中,可以通过计算关联规则的置信度来评估规则的可靠性。置信度表示当一个规则的前提项出现时,结论项也会出现的概率。可以使用以下代码来计算关联规则的置信度:
```python
def generate_rules(L, support_data, min_confidence=0.7):
"""生成关联规则"""
big_rules_list = []
for i in range(1, len(L)):
for freq_set in L[i]:
H1 = [frozenset([item]) for item in freq_set]
if i > 1:
rules_from_conseq(freq_set, H1, support_data, big_rules_list, min_confidence)
else:
calc_confidence(freq_set, H1, support_data, big_rules_list, min_confidence)
return big_rules_list
def calc_confidence(freq_set, H, support_data, brl, min_confidence=0.7):
"""计算规则的置信度"""
pruned_H = []
for conseq in H:
conf = support_data[freq_set] / support_data[freq_set - conseq]
if conf >= min_confidence:
print(freq_set - conseq, "-->", conseq, "conf:", conf)
brl.append((freq_set - conseq, conseq, conf))
pruned_H.append(conseq)
return pruned_H
def rules_from_conseq(freq_set, H, support_data, brl, min_confidence=0.7):
"""从H中生成更多的关联规则"""
m = len(H[0])
if len(freq_set) > (m + 1):
Hmp1 = apriori_gen(H, m + 1)
Hmp1 = calc_confidence(freq_set, Hmp1, support_data, brl, min_confidence)
if len(Hmp1) > 1:
rules_from_conseq(freq_set, Hmp1, support_data, brl, min_confidence)
```
其中,`generate_rules`函数用于生成关联规则,`calc_confidence`函数用于计算规则的置信度,`rules_from_conseq`函数用于从H中生成更多的关联规则。在计算置信度时,需要计算规则的支持度以及规则的前提项和结论项的支持度,然后使用支持度计算置信度,并与最小置信度进行比较。如果置信度大于或等于最小置信度,则输出关联规则。
阅读全文