KM算法 C语言实现
时间: 2024-09-19 08:04:38 浏览: 40
K-means(K均值聚类)算法是一种常用的无监督机器学习方法,用于数据分群。下面是K-means算法的一个基本的C语言实现示例:
```c
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
typedef struct Point {
double x, y;
} Point;
double distance(Point a, Point b) {
return sqrt(pow(a.x - b.x, 2) + pow(a.y - b.y, 2));
}
int assign_cluster(Point* data, int k, Point* centroids) {
int cluster_size[k];
for (int i = 0; i < k; i++) {
cluster_size[i] = 0;
}
for (int i = 0; i < sizeof(data)/sizeof(data[0]); i++) {
int min_distance = INT_MAX;
int closest_center = -1;
for (int j = 0; j < k; j++) {
double temp_dist = distance(data[i], centroids[j]);
if (temp_dist < min_distance) {
min_distance = temp_dist;
closest_center = j;
}
}
cluster_size[closest_center]++;
}
return closest_center;
}
void update_centroids(Point* data, int k, int* cluster_ids, Point* centroids) {
for (int i = 0; i < k; i++) {
Point sum = {0, 0};
for (int j = 0; j < sizeof(data)/sizeof(data[0]); j++) {
if (cluster_ids[j] == i) {
sum.x += data[j].x;
sum.y += data[j].y;
}
}
centroids[i].x = sum.x / cluster_size[i];
centroids[i].y = sum.y / cluster_size[i];
}
}
void kmeans_algorithm(Point* data, int k, int max_iterations, Point* initial_centroids) {
Point* centroids = malloc(k * sizeof(Point)); // 初始化质心
int* cluster_ids = malloc(sizeof(int)*sizeof(data)/sizeof(data[0]));
// 首次随机选择k个点作为初始质心
for (int i = 0; i < k; i++) {
centroids[i] = data[rand() % sizeof(data)/sizeof(data[0])];
}
for (int iter = 0; iter < max_iterations; iter++) {
// 分配每个点到最近的质心
for (int i = 0; i < sizeof(data)/sizeof(data[0]); i++) {
cluster_ids[i] = assign_cluster(data, k, centroids);
}
// 更新质心
update_centroids(data, k, cluster_ids, centroids);
// 如果质心不再变化,算法停止
bool convergence = true;
for (int i = 0; i < k; i++) {
if (distance(initial_centroids[i], centroids[i]) > 0.001) {
convergence = false;
break;
}
}
if (convergence) {
break;
}
}
free(cluster_ids);
free(centroids);
printf("Cluster assignments:\n");
for (int i = 0; i < sizeof(data)/sizeof(data[0]); i++) {
printf("(%.2lf, %.2lf) -> Cluster %d\n", data[i].x, data[i].y, cluster_ids[i]);
}
}
int main() {
// 创建数据点...
Point data[] = {{1.5, 2.5}, {2.8, 4.8}, {0.9, 2.3}, {4.5, 6.1}};
int k = 2;
int max_iterations = 10;
Point initial_centroids[] = {{0, 0}, {10, 10}}; // 初始化两个质心
kmeans_algorithm(data, k, max_iterations, initial_centroids);
return 0;
}
```
这段代码首先定义了距离计算函数和K-means的基本步骤,包括数据点分配到簇、更新质心以及判断算法是否收敛。在主函数中,我们创建了一些示例数据并执行了K-means算法。
请注意,这是一个非常基础的实现,实际项目中可能需要处理更多的边界条件和优化细节。
阅读全文