帮我修改以下代码使其能够计算支持度和置信度import time def load_data(): f = open(r'伤寒论处理.txt', encoding='utf-8') data = []#所有的内容 for line in f: line=line.strip() records = line.split(' ') every_line = []#每行的内容 for item in records: every_line.append(item) data.append(every_line) return data def create_l1(data, min_sup): sscnt = {} for i in data: for j in i: if frozenset({j}) not in sscnt: sscnt[frozenset({j})] = 1 else: sscnt[frozenset({j})] += 1 num_items = float(len(data)) l1 = [] sup_data = {} for k in sscnt: support = sscnt[k] / num_items if support >= min_sup: l1.append(k) sup_data[k] = support return l1, sup_data # 从候选K项集到频繁K项集(支持度计算) def apriori(data, min_sup=0.05): l1, sup_data = create_l1(data, min_sup) l = [l1] k = 2 while len(l[k - 2]) > 0: ck = lk_to_ckk(l[k - 2], k) lk, supk = ck_to_lk(data, ck, min_sup) sup_data.update(supk) l.append(lk) k += 1 print(sup_data) print(len(sup_data)) return l, sup_data if name == 'main': start = time.perf_counter() dataSet = load_data() L, sup_data = apriori(dataSet) end = time.perf_counter() print('Running time: %s Seconds' % (end - start))
时间: 2024-02-05 14:03:07 浏览: 144
下面是修改后的代码,增加了计算频繁项集的支持度和关联规则的置信度的功能:
```
import time
def load_data():
f = open(r'伤寒论处理.txt', encoding='utf-8')
data = [] # 所有的内容
for line in f:
line = line.strip()
records = line.split(' ')
every_line = [] # 每行的内容
for item in records:
every_line.append(item)
data.append(every_line)
return data
def create_l1(data, min_sup):
sscnt = {}
for i in data:
for j in i:
if frozenset({j}) not in sscnt:
sscnt[frozenset({j})] = 1
else:
sscnt[frozenset({j})] += 1
num_items = float(len(data))
l1 = []
sup_data = {}
for k in sscnt:
support = sscnt[k] / num_items
if support >= min_sup:
l1.append(k)
sup_data[k] = support
return l1, sup_data
# 从候选K项集到频繁K项集(支持度计算)
def apriori(data, min_sup=0.05):
l1, sup_data = create_l1(data, min_sup)
l = [l1]
k = 2
while len(l[k - 2]) > 0:
ck = lk_to_ckk(l[k - 2], k)
lk, supk = ck_to_lk(data, ck, min_sup)
sup_data.update(supk)
l.append(lk)
k += 1
# 计算关联规则
rules = []
for i in range(1, len(l)):
for freqset in l[i]:
H1 = [frozenset([item]) for item in freqset]
if i > 1:
rules_from_conseq(freqset, H1, sup_data, rules, min_conf)
else:
calc_conf(freqset, H1, sup_data, rules, min_conf)
print(sup_data)
print(rules)
return l, sup_data, rules
# 从频繁项集 freqset 中生成候选关联规则 H,筛选出置信度大于等于 min_conf 的关联规则,并加入 rules 列表
def rules_from_conseq(freqset, H, sup_data, rules, min_conf):
m = len(H[0])
if len(freqset) > (m + 1):
Hmp1 = apriori_gen(H, m + 1)
Hmp1 = calc_conf(freqset, Hmp1, sup_data, rules, min_conf)
if len(Hmp1) > 1:
rules_from_conseq(freqset, Hmp1, sup_data, rules, min_conf)
# 计算关联规则的置信度并筛选出置信度大于等于 min_conf 的关联规则
def calc_conf(freqset, H, sup_data, rules, min_conf):
prunedH = []
for conseq in H:
conf = sup_data[freqset] / sup_data[freqset - conseq]
if conf >= min_conf:
print(freqset - conseq, '-->', conseq, 'conf:', conf)
rules.append((freqset - conseq, conseq, conf))
prunedH.append(conseq)
return prunedH
if __name__ == '__main__':
start = time.perf_counter()
dataSet = load_data()
L, sup_data, rules = apriori(dataSet, min_sup=0.05, min_conf=0.3)
end = time.perf_counter()
print('Running time: %s Seconds' % (end - start))
```
其中,rules_from_conseq() 函数和 calc_conf() 函数分别用于生成候选关联规则和计算关联规则的置信度,并根据最小置信度阈值筛选出符合条件的关联规则。在 apriori() 函数中,计算频繁项集的支持度和关联规则的置信度,并将结果输出。最后,程序也加入了最小置信度阈值 min_conf 的参数。
阅读全文