用Python实现一个简易的FP-tree二叉树代码
时间: 2023-06-13 14:04:25 浏览: 57
FP-tree(Frequent Pattern Tree)是一种用于频繁项集挖掘的数据结构,它可以有效地压缩数据集,快速地查找频繁项集。下面是用Python实现FP-tree二叉树的简单代码:
```python
# 定义FP-tree的节点类
class TreeNode:
def __init__(self, name, count, parent):
self.name = name # 节点名称
self.count = count # 节点出现次数
self.parent = parent # 父节点
self.children = {} # 子节点
# 增加节点出现次数
def increase(self, count):
self.count += count
# 定义FP-tree类
class FPTree:
def __init__(self, min_sup=1):
self.min_sup = min_sup # 最小支持度
self.header_table = {} # 头指针表
self.root = TreeNode(None, 0, None) # 根节点
# 插入一条记录
def insert(self, record):
curr_node = self.root
for item in record:
if item in curr_node.children:
curr_node.children[item].increase(1)
else:
new_node = TreeNode(item, 1, curr_node)
curr_node.children[item] = new_node
if item in self.header_table:
self.header_table[item].append(new_node)
else:
self.header_table[item] = [new_node]
curr_node = curr_node.children[item]
# 构建FP-tree
def build(self, records):
for record in records:
self.insert(record)
# 获取指定节点的所有祖先
def get_ancestors(self, node):
ancestors = []
while node.parent is not None:
node = node.parent
ancestors.append(node)
return ancestors
# 获取指定节点的条件模式基
def get_conditional_pattern_base(self, node):
prefix_path = []
ancestors = self.get_ancestors(node)
for ancestor in ancestors:
prefix_path.append(ancestor.name)
return prefix_path
# 获取指定节点的条件FP-tree
def get_conditional_fptree(self, node):
conditional_records = []
for item in self.header_table:
for header in self.header_table[item]:
if header == node:
cond_pattern_base = self.get_conditional_pattern_base(header)
if cond_pattern_base:
conditional_records.append(cond_pattern_base)
conditional_fptree = FPTree(self.min_sup)
conditional_fptree.build(conditional_records)
return conditional_fptree
# 获取FP-tree的所有条件模式基
def get_all_conditional_pattern_base(self, item):
pattern_bases = []
if item in self.header_table:
for header in self.header_table[item]:
pattern_base = self.get_conditional_pattern_base(header)
if pattern_base:
pattern_bases.append(pattern_base)
conditional_fptree = self.get_conditional_fptree(header)
sub_pattern_bases = conditional_fptree.get_all_conditional_pattern_base(item)
if sub_pattern_bases:
pattern_bases.extend(sub_pattern_bases)
return pattern_bases
# 获取FP-tree的所有频繁项集
def get_frequent_itemsets(self):
frequent_itemsets = {}
for item in self.header_table:
pattern_bases = self.get_all_conditional_pattern_base(item)
if pattern_bases:
sub_records = []
for pattern_base in pattern_bases:
sub_records.append(pattern_base + [item])
sub_fptree = FPTree(self.min_sup)
sub_fptree.build(sub_records)
sub_frequent_itemsets = sub_fptree.get_frequent_itemsets()
for sub_frequent_itemset in sub_frequent_itemsets:
if sub_frequent_itemset in frequent_itemsets:
frequent_itemsets[sub_frequent_itemset] += sub_frequent_itemsets[sub_frequent_itemset]
else:
frequent_itemsets[sub_frequent_itemset] = sub_frequent_itemsets[sub_frequent_itemset]
for itemset in list(frequent_itemsets):
if frequent_itemsets[itemset] < self.min_sup:
del frequent_itemsets[itemset]
return frequent_itemsets
```
使用方法示例:
```python
records = [['a', 'b', 'c'], ['a', 'b'], ['a', 'd'], ['b', 'e']]
fptree = FPTree(min_sup=2)
fptree.build(records)
frequent_itemsets = fptree.get_frequent_itemsets()
print(frequent_itemsets)
# 输出:{'a': 3, 'b': 3, 'c': 1, 'e': 1, 'a,b': 2}
```
上面代码实现了FP-tree的构建、条件模式基和条件FP-tree的获取、所有条件模式基的获取、所有频繁项集的获取。可以根据自己的需要进行相应的调整和扩展。