【问题描述】编程实现K-means算法,对平面坐标上的点进行聚类 【输入形式】输入有多行,第1行3个整数,依次是N,M,K,N为点的个数,M为坐标数,K为聚类数,下面N行输入N个点的坐标,最后1行输入K个点的编号,这K个点作为初始的聚类中心 【输出形式】输出有K行,每行输出该类中的点的编号 【样例输入】 11 2 3 2 10 2 5 8 4 5 8 7 5 6 4 1 2 4 9 7 3 1 3 3 9 1 4 7 【样例输出】 cluster1:1,4,8,11 cluster2:3,5,6,9 cluster3:2,7,10用c++编写
时间: 2024-01-04 19:02:06 浏览: 212
K-means聚类算法的C++实现
5星 · 资源好评率100%
以下是使用 C++ 实现 K-means 算法的代码,可以参考一下:
```c++
#include <iostream>
#include <vector>
#include <cmath>
#include <algorithm>
using namespace std;
// Calculate Euclidean distance between two points
double distance(const vector<double>& x, const vector<double>& y) {
double d = 0.0;
for (int i = 0; i < x.size(); i++) {
d += pow(x[i] - y[i], 2);
}
return sqrt(d);
}
// Assign each data point to nearest centroid
void assign_cluster(const vector<vector<double>>& data, const vector<vector<double>>& centroids, vector<int>& labels) {
for (int i = 0; i < data.size(); i++) {
double min_dist = INFINITY;
int min_index = 0;
for (int j = 0; j < centroids.size(); j++) {
double d = distance(data[i], centroids[j]);
if (d < min_dist) {
min_dist = d;
min_index = j;
}
}
labels[i] = min_index;
}
}
// Update centroids based on current cluster labels
void update_centroids(const vector<vector<double>>& data, const vector<int>& labels, vector<vector<double>>& centroids) {
vector<vector<double>> new_centroids(centroids.size(), vector<double>(data[0].size(), 0.0));
vector<int> counts(centroids.size(), 0);
for (int i = 0; i < data.size(); i++) {
int c = labels[i];
for (int j = 0; j < data[i].size(); j++) {
new_centroids[c][j] += data[i][j];
}
counts[c]++;
}
for (int i = 0; i < centroids.size(); i++) {
if (counts[i] > 0) {
for (int j = 0; j < centroids[i].size(); j++) {
centroids[i][j] = new_centroids[i][j] / counts[i];
}
}
}
}
// K-means clustering algorithm
void kmeans(const vector<vector<double>>& data, int k, vector<int>& labels, vector<vector<double>>& centroids) {
// Initialize centroids randomly
centroids = vector<vector<double>>(k, vector<double>(data[0].size(), 0.0));
for (int i = 0; i < k; i++) {
centroids[i] = data[rand() % data.size()];
}
// Initialize cluster labels
labels = vector<int>(data.size(), 0);
while (true) {
// Assign each data point to nearest centroid
assign_cluster(data, centroids, labels);
// Update centroids
vector<vector<double>> old_centroids = centroids;
update_centroids(data, labels, centroids);
// Check for convergence
bool converged = true;
for (int i = 0; i < centroids.size(); i++) {
if (distance(old_centroids[i], centroids[i]) > 1e-6) {
converged = false;
break;
}
}
if (converged) {
break;
}
}
}
// Print cluster labels
void print_clusters(const vector<int>& labels) {
vector<vector<int>> clusters(labels.size());
for (int i = 0; i < labels.size(); i++) {
clusters[labels[i]].push_back(i + 1);
}
for (int i = 0; i < clusters.size(); i++) {
cout << "cluster" << i + 1 << ":";
for (int j = 0; j < clusters[i].size(); j++) {
cout << clusters[i][j];
if (j < clusters[i].size() - 1) {
cout << ",";
}
}
cout << endl;
}
}
// Main function
int main() {
// Read input data
int n, m, k;
cin >> n >> m >> k;
vector<vector<double>> data(n, vector<double>(m));
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
cin >> data[i][j];
}
}
vector<int> initial_centroids(k);
for (int i = 0; i < k; i++) {
cin >> initial_centroids[i];
}
// Run K-means algorithm
vector<int> labels;
vector<vector<double>> centroids;
kmeans(data, k, labels, centroids);
// Print cluster labels
print_clusters(labels);
return 0;
}
```
该程序也读入输入数据,包括点的个数、坐标数、聚类数、所有点的坐标和初始聚类中心的编号,然后使用 K-means 算法进行聚类,输出每个聚类中的点的编号。注意,这里使用 vector 容器进行数据存储和操作,需要包含头文件 `<vector>`。
阅读全文