matlab实现K均值聚类的代码
时间: 2023-10-14 18:05:16 浏览: 106
我可以为你提供 MATLAB 实现 K 均值聚类的代码,以下是示例代码:
% 载入数据
load fisheriris
X = meas;
% 设置聚类数量
K = 3;
% 初始位置为数据集中随机选取的 K 个点
initial_centroids = kMeansInitCentroids(X, K);
% 计算聚类
[centroids, idx] = runkMeans(X, initial_centroids, max_iters);
% 输出聚类结果
fprintf('Program paused. Press enter to continue.\n');
pause;
% 可视化聚类结果
figure;
plotDataPoints(X, idx, K);
title(sprintf('K-Means Clustering (K = %d)', K));
% 选择聚类数量的方法:肘部法(Elbow Method)
figure;
plotElbowMethod(X, initial_centroids, max_iters);
% 计算质心
function centroids = computeCentroids(X, idx, K)
[m n] = size(X);
centroids = zeros(K, n);
for i=1:K
centroids(i,:) = mean(X(idx==i,:));
end
end
% 计算最近质心
function idx = findClosestCentroids(X, centroids)
K = size(centroids, 1);
idx = zeros(size(X,1), 1);
for i=1:size(X,1)
distances = sum((X(i,:) - centroids).^2, 2);
[min_distance, index] = min(distances);
idx(i) = index;
end
end
% 初始化聚类质心
function centroids = kMeansInitCentroids(X, K)
centroids = zeros(K, size(X, 2));
randidx = randperm(size(X, 1));
centroids = X(randidx(1:K), :);
end
% 运行K-Means聚类
function [centroids, idx] = runkMeans(X, initial_centroids, max_iters)
[m n] = size(X);
K = size(initial_centroids, 1);
centroids = initial_centroids;
previous_centroids = centroids;
% 记录每次迭代的聚类样本
idx = zeros(m, 1);
% 迭代计算直至质心不变
for i=1:max_iters
fprintf('K-Means iteration %d/%d...\n', i, max_iters);
% 计算最近质心
idx = findClosestCentroids(X, centroids);
% 计算质心
centroids = computeCentroids(X, idx, K);
% 可视化迭代效果
plotProgresskMeans(X, centroids, previous_centroids, idx, K, i);
previous_centroids = centroids;
end
end
% 用不同的颜色可视化聚类的数据点
function plotDataPoints(X, idx, K)
palette = hsv(K + 1);
colors = palette(idx, :);
scatter(X(:,1), X(:,2), 15, colors);
end
% 画出所有样本的关于聚类中心的距离的等高线图
function plotElbowMethod(X, initial_centroids, max_iters)
Ks = 1:10;
costs = zeros(length(Ks), 1);
for i = 1:length(Ks)
K = Ks(i);
centroids = initial_centroids;
previous_centroids = centroids;
% 记录每次迭代的聚类样本
idx = zeros(size(X, 1), 1);
% 迭代计算直至质心不变
for j = 1:max_iters
idx = findClosestCentroids(X, centroids);
centroids = computeCentroids(X, idx, K);
previous_centroids = centroids;
end
% 计算代价函数
distances = zeros(size(X, 1), K);
for j = 1:K
distances(:, j) = sum((X - repmat(centroids(j, :), size(X, 1), 1)) .^ 2, 2);
end
% 计算聚合代价
cost = sum(min(distances, [], 2));
costs(i) = cost;
end
% 绘制肘部法图
plot(Ks, costs, '-o');
xlabel('聚类数量');
ylabel('代价函数');
title('肘部法');
end
% 画出聚类结果和迭代效果
function plotProgresskMeans(X, centroids, previous, idx, K, i)
plotDataPoints(X, idx, K);
plot(centroids(:,1), centroids(:,2), 'x', 'MarkerEdgeColor', 'k', 'LineWidth', 3);
for j = 1:size(centroids, 1)
drawLine(centroids(j, :), previous(j, :));
end
title(sprintf('K-Means iteration %d/%d', i, 10));
end
% 画出关于两个点之间的直线
function drawLine(p1, p2)
line([p1(1) p2(1)], [p1(2) p2(2)], 'Color', 'r', 'LineWidth', 2);
end
注意:该代码仅供参考,并且可能需要针对具体的数据进行微调。
阅读全文