基于关联规则的FP-Growth算法的matlab代码
时间: 2024-03-05 18:50:49 浏览: 104
以下是基于关联规则的FP-Growth算法的Matlab代码:
```
% 输入数据为包含事务数据的矩阵或表格,minsup为支持度阈值,minconf为置信度阈值
% 输出频繁项集及其支持度,以及关联规则及其置信度
function [freqItemsets, assocRules] = fpGrowthAssoc(data, minsup, minconf)
% 将数据转换为二进制矩阵,其中1表示该项出现,0表示该项未出现
binData = double(data > 0);
% 计算事务数和项数
[numTrans, numItems] = size(binData);
% 初始化FP树
tree = struct('item', [], 'count', 0, 'parent', [], 'children', []);
headerTable = repmat(struct('item', [], 'count', 0, 'nodeLink', []), numItems, 1);
for i = 1:numTrans
trans = find(binData(i, :));
tree = insertTree(tree, headerTable, trans, 1);
end
% 构建频繁项集
freqItemsets = struct('itemset', [], 'support', []);
freqItemsets = findFreqItemsets(tree, headerTable, minsup, [], freqItemsets);
% 构建关联规则
assocRules = struct('antecedent', [], 'consequent', [], 'confidence', []);
for i = 1:length(freqItemsets)
if length(freqItemsets(i).itemset) > 1
rules = findAssocRules(tree, headerTable, freqItemsets(i).itemset, minconf);
assocRules = [assocRules; rules];
end
end
end
% 向FP树中插入一条事务
function tree = insertTree(tree, headerTable, trans, count)
if isempty(trans)
return
end
item = trans(1);
childIdx = findItemIdx(tree.children, item);
if isempty(childIdx)
% 如果该项在当前节点的子节点中不存在,则创建一个新的子节点并插入
newChild = struct('item', item, 'count', count, 'parent', tree, 'children', []);
tree.children = [tree.children, newChild];
% 更新头指针表
headerIdx = findItemIdx(headerTable, item);
if isempty(headerTable(headerIdx).nodeLink)
headerTable(headerIdx).nodeLink = newChild;
else
curNode = headerTable(headerIdx).nodeLink;
while ~isempty(curNode.nodeLink)
curNode = curNode.nodeLink;
end
curNode.nodeLink = newChild;
end
% 递归插入剩余项
tree = insertTree(newChild, headerTable, trans(2:end), count);
else
% 如果该项在当前节点的子节点中存在,则更新计数并递归插入剩余项
childNode = tree.children(childIdx);
childNode.count = childNode.count + count;
tree.children(childIdx) = childNode;
tree = insertTree(childNode, headerTable, trans(2:end), count);
end
end
% 查找频繁项集
function freqItemsets = findFreqItemsets(tree, headerTable, minsup, prefix, freqItemsets)
% 如果当前节点的计数大于等于支持度阈值,则将其加入频繁项集
if tree.count >= minsup
freqItemset = struct('itemset', [prefix, tree.item], 'support', tree.count);
freqItemsets = [freqItemsets, freqItemset];
end
% 对于每个项头表中的项,构建条件模式基并递归查找频繁项集
for i = 1:length(headerTable)
headerNode = headerTable(i).nodeLink;
if ~isempty(headerNode)
prefixPath = prefix;
freqItem = headerTable(i).item;
freqItemset = struct('itemset', [prefix, freqItem], 'support', headerTable(i).count);
freqItemsets = [freqItemsets, freqItemset];
while ~isempty(headerNode)
prefixPath = [prefixPath, freqItem];
condBase = [];
curNode = headerNode;
while ~isempty(curNode.parent)
condBase = [curNode.item, condBase];
curNode = curNode.parent;
end
for j = 1:curNode.count
condBase = [freqItem, condBase];
end
condTree = struct('item', [], 'count', 0, 'parent', [], 'children', []);
condHeaderTable = repmat(struct('item', [], 'count', 0, 'nodeLink', []), length(headerTable), 1);
for j = 1:size(condBase, 2)
condTree = insertTree(condTree, condHeaderTable, condBase(j), 1);
end
freqItemsets = findFreqItemsets(condTree, condHeaderTable, minsup, prefixPath, freqItemsets);
headerNode = headerNode.nodeLink;
end
end
end
end
% 查找关联规则
function rules = findAssocRules(tree, headerTable, itemset, minconf)
rules = struct('antecedent', [], 'consequent', [], 'confidence', []);
if length(itemset) == 1
return
end
% 构建项子集
subsets = generateSubsets(itemset);
% 对于每个项子集,计算置信度并加入关联规则
for i = 1:size(subsets, 1)
antecedent = subsets(i, :);
consequent = setdiff(itemset, antecedent);
antecedentIdx = findItemIdx(headerTable, antecedent(end));
consequentIdx = findItemIdx(headerTable, consequent(end));
antecedentSupport = headerTable(antecedentIdx).count;
consequentSupport = headerTable(consequentIdx).count;
itemsetSupport = tree.count;
confidence = itemsetSupport / antecedentSupport;
if confidence >= minconf
rule = struct('antecedent', antecedent, 'consequent', consequent, 'confidence', confidence);
rules = [rules, rule];
end
end
end
% 查找项在数组中的索引
function idx = findItemIdx(array, item)
idx = find(array == item, 1);
end
% 生成项子集
function subsets = generateSubsets(itemset)
subsets = [];
for i = 1:length(itemset)
subsets = [subsets; nchoosek(itemset, i)];
end
end
```
该代码实现了基于关联规则的FP-Growth算法,可以通过输入数据、支持度阈值和置信度阈值来输出频繁项集及其支持度,以及关联规则及其置信度。其中,insertTree函数用于向FP树中插入一条事务;findFreqItemsets函数用于查找频繁项集;findAssocRules函数用于查找关联规则;其他函数则用于辅助实现算法。
阅读全文