从外部录入数据集不用apriori库的关联规则apriori算法代码
时间: 2023-06-13 12:05:19 浏览: 92
关联规则apriori算法
以下是一个简单的关联规则Apriori算法的Python实现,可以从外部读取数据集文件。这个实现仅用于学习和演示目的,对于大规模数据集,建议使用更高效的实现。
```python
import itertools
# 读取数据集
def load_dataset(filename):
with open(filename, 'r') as f:
dataset = [list(map(int, line.strip().split())) for line in f.readlines()]
return dataset
# 计算项集的支持度
def support_count(dataset, itemset):
count = 0
for item in dataset:
if set(itemset).issubset(item):
count += 1
return count
# 生成候选项集
def generate_candidate_itemsets(dataset, k):
candidate_itemsets = []
for item in dataset:
for combination in itertools.combinations(item, k):
if list(combination) not in candidate_itemsets:
candidate_itemsets.append(list(combination))
return candidate_itemsets
# 生成频繁项集
def generate_frequent_itemsets(dataset, min_support):
frequent_itemsets = []
k = 1
while True:
candidate_itemsets = generate_candidate_itemsets(dataset, k)
frequent_itemsets_k = []
for itemset in candidate_itemsets:
support = support_count(dataset, itemset)
if support >= min_support:
frequent_itemsets_k.append(itemset)
if len(frequent_itemsets_k) == 0:
break
frequent_itemsets += frequent_itemsets_k
k += 1
return frequent_itemsets
# 生成关联规则
def generate_association_rules(frequent_itemsets, min_confidence):
association_rules = []
for itemset in frequent_itemsets:
for i in range(1, len(itemset)):
for antecedent in itertools.combinations(itemset, i):
antecedent = list(antecedent)
consequent = list(set(itemset) - set(antecedent))
support_antecedent = support_count(dataset, antecedent)
support_consequent = support_count(dataset, consequent)
confidence = support_count(dataset, itemset) / support_antecedent
if confidence >= min_confidence:
association_rules.append((antecedent, consequent, confidence))
return association_rules
if __name__ == '__main__':
# 设置最小支持度和最小置信度
min_support = 2
min_confidence = 0.5
# 读取数据集
dataset = load_dataset('dataset.txt')
# 生成频繁项集
frequent_itemsets = generate_frequent_itemsets(dataset, min_support)
print('Frequent Itemsets:')
print(frequent_itemsets)
# 生成关联规则
association_rules = generate_association_rules(frequent_itemsets, min_confidence)
print('Association Rules:')
for antecedent, consequent, confidence in association_rules:
print('{} => {}: {:.2f}'.format(antecedent, consequent, confidence))
```
在这个实现中,`load_dataset()`函数从外部文件中读取数据集,`support_count()`函数计算项集的支持度,`generate_candidate_itemsets()`函数生成候选项集,`generate_frequent_itemsets()`函数生成频繁项集,`generate_association_rules()`函数生成关联规则。您可以根据自己的需要调整代码。
阅读全文