fpgrowth算法python代码
时间: 2023-09-10 21:01:14 浏览: 103
FP-growth算法是一种用于频繁模式挖掘的快速和高效的算法。下面是FP-growth算法的Python代码:
```python
# 定义FP树的节点类
class TreeNode:
def __init__(self, name, count, parent):
self.name = name # 节点名称
self.count = count # 节点计数
self.parent = parent # 父节点
self.children = {} # 子节点
# 增加节点计数
def increment(self, count):
self.count += count
# 构建FP树
def createFPtree(dataset, minSupport):
headerTable = {}
# 第一次遍历数据集,创建头指针表
for trans in dataset:
for item in trans:
headerTable[item] = headerTable.get(item, 0) + dataset[trans]
# 移除不满足最小支持度的项
headerTable = {k: v for k, v in headerTable.items() if v >= minSupport}
freqItemSet = set(headerTable.keys())
if len(freqItemSet) == 0: # 如果没有项满足最小支持度,则退出
return None, None
# 更新头指针表
for k in headerTable:
headerTable[k] = [headerTable[k], None]
# 创建根节点
root = TreeNode('NULL', 1, None)
# 第二次遍历数据集,构建FP树
for trans, count in dataset.items():
localD = {}
for item in trans:
if item in freqItemSet:
localD[item] = headerTable[item][0]
if len(localD) > 0:
orderedItems = [v[0] for v in sorted(localD.items(), key=lambda p: p[1], reverse=True)]
updateFPtree(orderedItems, root, headerTable, count)
return root, headerTable
# 更新FP树
def updateFPtree(items, root, headerTable, count):
if items[0] in root.children:
root.children[items[0]].increment(count)
else:
root.children[items[0]] = TreeNode(items[0], count, root)
if headerTable[items[0]][1] is None:
headerTable[items[0]][1] = root.children[items[0]]
else:
updateHeader(headerTable[items[0]][1], root.children[items[0]])
if len(items) > 1:
updateFPtree(items[1:], root.children[items[0]], headerTable, count)
# 更新头指针表
def updateHeader(nodeToTest, targetNode):
while nodeToTest.nodeLink is not None:
nodeToTest = nodeToTest.nodeLink
nodeToTest.nodeLink = targetNode
# 抽取条件模式基
def findPrefixPath(treeNode):
conditionPatterns = {}
while treeNode is not None:
prefixPath = []
ascendTree(treeNode, prefixPath)
if len(prefixPath) > 1:
conditionPatterns[frozenset(prefixPath[1:])] = treeNode.count
treeNode = treeNode.nodeLink
return conditionPatterns
# 递归上溯FP树
def ascendTree(treeNode, prefixPath):
if treeNode.parent is not None:
prefixPath.append(treeNode.name)
ascendTree(treeNode.parent, prefixPath)
# 递归查找频繁项集
def mineFPtree(headerTable, minSupport, preFix, freqItemList):
sortedItemList = [v[0] for v in sorted(headerTable.items(), key=lambda p: p[1][0])]
for basePat in sortedItemList:
newFreqSet = preFix.copy()
newFreqSet.add(basePat)
freqItemList.append(newFreqSet)
conditionPatterns = findPrefixPath(headerTable[basePat][1])
myCondTree, myHead = createFPtree(conditionPatterns, minSupport)
if myHead is not None:
mineFPtree(myHead, minSupport, newFreqSet, freqItemList)
# FP-growth算法
def fpGrowth(dataset, minSupport):
headerTable = {}
# 第一次遍历数据集,创建头指针表
for trans in dataset:
for item in trans:
headerTable[item] = headerTable.get(item, 0) + dataset[trans]
# 移除不满足最小支持度的项
headerTable = {k: v for k, v in headerTable.items() if v >= minSupport}
freqItemSet = set(headerTable.keys())
if len(freqItemSet) == 0: # 如果没有项满足最小支持度,则退出
return []
for k in headerTable:
headerTable[k] = [headerTable[k], None]
freqItemList = []
mineFPtree(headerTable, minSupport, set(), freqItemList)
return freqItemList
# 使用示例
dataset = [['A', 'B', 'C', 'D'],
['B', 'D'],
['A', 'B', 'E'],
['A', 'B', 'C', 'D', 'E'],
['A', 'D', 'E'],
['A', 'B', 'C'],
['A', 'C', 'D', 'E'],
['B', 'C', 'E']]
minSupport = 2
freqItemSet = fpGrowth(dataset, minSupport)
print(freqItemSet)
```
以上是FP-growth算法的Python代码,通过构建FP树和递归查找频繁项集的方式进行频繁模式挖掘。
阅读全文