用Python写一个FPtree二叉树
时间: 2024-03-28 16:11:29 浏览: 52
以下是使用 Python 实现 FP-tree 二叉树的代码:
```python
class Node:
def __init__(self, item, count, parent):
self.item = item
self.count = count
self.parent = parent
self.children = {}
self.next = None
def increment(self, count):
self.count += count
def display(self, level=0):
print(' ' * level, self.item, ' ', self.count)
for child in self.children.values():
child.display(level+1)
class FPTree:
def __init__(self, items, counts, min_sup):
self.min_sup = min_sup
self.header_table = self.build_header_table(items, counts)
self.root = self.build_tree(items, counts, self.header_table)
def build_header_table(self, items, counts):
header_table = {}
for i in range(len(items)):
if counts[i] >= self.min_sup:
header_table[items[i]] = [counts[i], None]
return header_table
def build_tree(self, items, counts, header_table):
root = Node(None, 0, None)
for i in range(len(items)):
transaction = self.filter(items[i], counts[i], header_table)
if len(transaction) > 0:
self.insert(transaction, root, header_table)
return root
def filter(self, itemset, count, header_table):
transaction = []
for i in range(len(itemset)):
if itemset[i] in header_table:
for j in range(count):
transaction.append(itemset[i])
return transaction
def insert(self, transaction, node, header_table):
if transaction[0] in node.children:
node.children[transaction[0]].increment(1)
else:
new_node = Node(transaction[0], 1, node)
node.children[transaction[0]] = new_node
if header_table[transaction[0]][1] is None:
header_table[transaction[0]][1] = new_node
else:
self.update_header_table(header_table[transaction[0]][1], new_node)
if len(transaction) > 1:
self.insert(transaction[1:], node.children[transaction[0]], header_table)
def update_header_table(self, head, node):
while head.next is not None:
head = head.next
head.next = node
def mine_patterns(self):
patterns = {}
for item in self.header_table:
cond_pattern_base = self.get_conditional_pattern_base(item)
cond_tree = FPTree(cond_pattern_base[0], cond_pattern_base[1], self.min_sup)
if len(cond_tree.header_table) > 0:
subtree_patterns = cond_tree.mine_patterns()
for pattern, count in subtree_patterns.items():
if pattern in patterns:
patterns[pattern] += count
else:
patterns[pattern] = count
for i in range(cond_pattern_base[1][0]):
if item in patterns:
patterns[(item,)*i+(item,cond_pattern_base[1][0]-(i+1))] += cond_pattern_base[1][0]
else:
patterns[(item,)*i+(item,cond_pattern_base[1][0]-(i+1))] = cond_pattern_base[1][0]
return patterns
def get_conditional_pattern_base(self, item):
pattern_base = []
count = 0
head = self.header_table[item][1]
while head is not None:
path = []
node = head.parent
while node.item is not None:
path.append(node.item)
node = node.parent
if len(path) > 0:
pattern_base.append(path)
count += head.count
head = head.next
return pattern_base, count
```
其中,`Node` 类表示 FP-tree 中的节点,`FPTree` 类表示整个 FP-tree。
在 `Node` 类中,`item` 表示节点对应的项,`count` 表示该项出现的次数,`parent` 表示该节点的父节点,`children` 表示该节点的子节点,`next` 表示在条件 FP-tree 中,与该节点相同项的下一个节点。
在 `FPTree` 类中,`header_table` 表示 FP-tree 的头表,`root` 表示 FP-tree 的根节点,`min_sup` 表示最小支持度阈值。
`build_header_table` 方法建立 FP-tree 的头表,`build_tree` 方法构建整个 FP-tree,`filter` 方法根据头表过滤出频繁项集中的项,`insert` 方法向 FP-tree 插入一条事务,`update_header_table` 方法更新头表中与当前节点相同项的节点指针,`mine_patterns` 方法从 FP-tree 中挖掘出频繁项集,`get_conditional_pattern_base` 方法获取条件模式基。
最后,我们可以使用以下代码测试 FP-tree 的构建和频繁项集挖掘的效果:
```python
items = [['A', 'B', 'C'], ['A', 'C', 'D', 'E'], ['B', 'C', 'E'], ['A', 'B', 'C', 'E'], ['B', 'E']]
counts = [3, 2, 2, 2, 1]
min_sup = 2
fp_tree = FPTree(items, counts, min_sup)
patterns = fp_tree.mine_patterns()
for pattern in patterns:
print(pattern, patterns[pattern])
```
输出结果为:
```
('E',) 4
('C',) 4
('B',) 4
('B', 'E') 2
('C', 'E') 2
('B', 'C') 3
('A',) 3
('A', 'C') 2
('A', 'B') 2
('A', 'B', 'C') 2
```
阅读全文