K-medoids聚类 C++代码
时间: 2023-10-26 07:07:42 浏览: 89
以下是K-medoids聚类的C++代码,实现了从数据集中选择k个中心点作为初始值,并迭代更新中心点和簇分配,直到收敛:
```c++
#include <iostream>
#include <cmath>
#include <vector>
using namespace std;
double euclidean_distance(const vector<double>& p1, const vector<double>& p2) {
double sum = 0;
for (int i = 0; i < p1.size(); i++) {
sum += pow(p1[i] - p2[i], 2);
}
return sqrt(sum);
}
int find_medoid(const vector<vector<double>>& data, const vector<int>& cluster) {
int medoid = -1;
double min_cost = numeric_limits<double>::max();
for (int i = 0; i < data.size(); i++) {
double cost = 0;
for (int j = 0; j < data.size(); j++) {
if (i != j && cluster[j] == cluster[i]) {
cost += euclidean_distance(data[i], data[j]);
}
}
if (cost < min_cost) {
min_cost = cost;
medoid = i;
}
}
return medoid;
}
vector<int> k_medoids(const vector<vector<double>>& data, int k, int max_iter) {
vector<int> clusters(data.size(), -1);
vector<int> medoids(k, -1);
// 初始化中心点
for (int i = 0; i < k; i++) {
medoids[i] = rand() % data.size();
for (int j = 0; j < i; j++) {
if (medoids[i] == medoids[j]) {
i--;
break;
}
}
clusters[medoids[i]] = i;
}
for (int iter = 0; iter < max_iter; iter++) {
// 簇分配
for (int i = 0; i < data.size(); i++) {
if (clusters[i] == -1) {
double min_dist = numeric_limits<double>::max();
int min_medoid = -1;
for (int j = 0; j < k; j++) {
double dist = euclidean_distance(data[i], data[medoids[j]]);
if (dist < min_dist) {
min_dist = dist;
min_medoid = j;
}
}
clusters[i] = min_medoid;
}
}
// 更新中心点
bool changed = false;
for (int i = 0; i < k; i++) {
int medoid = find_medoid(data, clusters, i);
if (medoid != medoids[i]) {
medoids[i] = medoid;
clusters[medoid] = i;
changed = true;
}
}
if (!changed) {
break;
}
}
return clusters;
}
int main() {
vector<vector<double>> data = {{2, 6}, {3, 4}, {3, 8}, {4, 7}, {6, 2}, {6, 4}, {7, 3}, {7, 4}, {8, 5}, {7, 6}};
int k = 2;
int max_iter = 100;
vector<int> clusters = k_medoids(data, k, max_iter);
for (int i = 0; i < clusters.size(); i++) {
cout << i << ": " << clusters[i] << endl;
}
return 0;
}
```
阅读全文