用C语言写一个k_means聚类的代码
时间: 2024-03-16 07:47:57 浏览: 79
k-means 聚类代码
以下是一个简单的K-Means聚类的C语言实现示例:
```c
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#define MAX_ITERATIONS 1000
#define DATA_SIZE 100
#define K 3
// 生成随机数据
void generate_data(float data[][2], int n) {
int i;
srand((unsigned int)time(NULL));
for (i = 0; i < n; i++) {
data[i][0] = (float)(rand() % 100) / 100;
data[i][1] = (float)(rand() % 100) / 100;
}
}
// 计算欧氏距离
float euclidean_distance(float x1, float y1, float x2, float y2) {
return sqrt(pow((x1 - x2), 2) + pow((y1 - y2), 2));
}
// 初始化聚类中心
void init_centers(float data[][2], int n, float centers[][2], int k) {
int i, j, index;
float max_dist, dist, *min_dist;
min_dist = (float *)malloc(sizeof(float) * n);
// 随机选择一个点作为第一个聚类中心
index = rand() % n;
centers[0][0] = data[index][0];
centers[0][1] = data[index][1];
// 选出剩下的k-1个聚类中心
for (i = 1; i < k; i++) {
max_dist = 0;
// 计算每个点到当前所有聚类中心的最小距离
for (j = 0; j < n; j++) {
dist = euclidean_distance(data[j][0], data[j][1], centers[i-1][0], centers[i-1][1]);
min_dist[j] = (i == 1) ? dist : fmin(dist, min_dist[j]);
if (min_dist[j] > max_dist) {
max_dist = min_dist[j];
index = j;
}
}
centers[i][0] = data[index][0];
centers[i][1] = data[index][1];
}
free(min_dist);
}
// 执行K-Means聚类
void k_means(float data[][2], int n, float centers[][2], int k, int *labels, int max_iterations) {
int i, j, iter, index;
float dist, min_dist;
// 分配每个点的标签
for (i = 0; i < n; i++) {
labels[i] = -1;
}
// 迭代更新聚类中心和标签
for (iter = 0; iter < max_iterations; iter++) {
int changed = 0;
for (i = 0; i < n; i++) {
min_dist = INFINITY;
for (j = 0; j < k; j++) {
dist = euclidean_distance(data[i][0], data[i][1], centers[j][0], centers[j][1]);
if (dist < min_dist) {
min_dist = dist;
index = j;
}
}
if (labels[i] != index) {
labels[i] = index;
changed = 1;
}
}
if (!changed) {
break;
}
for (j = 0; j < k; j++) {
float sum_x = 0, sum_y = 0;
int count = 0;
for (i = 0; i < n; i++) {
if (labels[i] == j) {
sum_x += data[i][0];
sum_y += data[i][1];
count++;
}
}
if (count > 0) {
centers[j][0] = sum_x / count;
centers[j][1] = sum_y / count;
}
}
}
}
// 输出聚类结果
void print_result(float data[][2], int n, float centers[][2], int k, int *labels) {
int i, j;
for (i = 0; i < k; i++) {
printf("Cluster %d: ", i);
for (j = 0; j < n; j++) {
if (labels[j] == i) {
printf("(%f, %f) ", data[j][0], data[j][1]);
}
}
printf("\nCenter: (%f, %f)\n", centers[i][0], centers[i][1]);
}
}
int main() {
float data[DATA_SIZE][2];
float centers[K][2];
int labels[DATA_SIZE];
generate_data(data, DATA_SIZE);
init_centers(data, DATA_SIZE, centers, K);
k_means(data, DATA_SIZE, centers, K, labels, MAX_ITERATIONS);
print_result(data, DATA_SIZE, centers, K, labels);
return 0;
}
```
这个例子中首先生成了100个二维随机数据,然后用K-Means算法将其分成3类。在K-Means算法中,首先随机选择一个点作为第一个聚类中心,然后选出剩下的k-1个聚类中心。接下来,迭代更新每个点的标签和每个聚类中心的位置,直到收敛或达到最大迭代次数。最后输出每个聚类的数据点和聚类中心的位置。
阅读全文