kmeans聚类算法python代码
时间: 2023-08-29 12:05:42 浏览: 98
下面是使用Python实现K-means聚类算法的示例代码:
``` python
import numpy as np
import matplotlib.pyplot as plt
# 生成随机数据
def create_data():
X = np.random.randn(200, 2)
X[:50, :] += np.array([-2, 2])
X[50:100, :] += np.array([2, 2])
X[100:150, :] += np.array([-2, -2])
X[150:, :] += np.array([2, -2])
return X
# 计算两点之间的距离
def euclidean_distance(x1, x2):
return np.sqrt(np.sum((x1 - x2) ** 2))
class KMeans:
def __init__(self, K=2, max_iters=100):
self.K = K
self.max_iters = max_iters
# 初始化聚类中心
def initialize_centers(self, X):
self.centers = X[np.random.choice(len(X), self.K, replace=False)]
# 分配数据点到最近的聚类中心
def assign_clusters(self, X):
clusters = [[] for _ in range(self.K)]
for i, x in enumerate(X):
distances = [euclidean_distance(x, center) for center in self.centers]
closest_center = np.argmin(distances)
clusters[closest_center].append(i)
return clusters
# 更新聚类中心为当前聚类的平均值
def update_centers(self, X, clusters):
for i in range(self.K):
new_center = np.mean(X[clusters[i]], axis=0)
if not np.isnan(new_center).any():
self.centers[i] = new_center
# 预测数据点的聚类
def predict(self, X):
clusters = self.assign_clusters(X)
y_pred = np.zeros(len(X))
for i, cluster in enumerate(clusters):
for j in cluster:
y_pred[j] = i
return y_pred
# 训练模型
def fit(self, X):
self.initialize_centers(X)
for i in range(self.max_iters):
old_centers = np.copy(self.centers)
clusters = self.assign_clusters(X)
self.update_centers(X, clusters)
if np.allclose(self.centers, old_centers):
break
# 测试KMeans算法
X = create_data()
kmeans = KMeans(K=4, max_iters=100)
y_pred = kmeans.fit(X).predict(X)
# 可视化聚类结果
plt.scatter(X[:, 0], X[:, 1], c=y_pred, cmap='viridis')
plt.scatter(kmeans.centers[:, 0], kmeans.centers[:, 1], marker='x', s=200, linewidths=3, color='r')
plt.title('K-Means Clustering')
plt.show()
```
该代码生成了一个随机的二维数据集,并使用K-means算法将其聚类成4个簇。最后,将聚类结果可视化出来。
阅读全文