kmeans聚类算法python实现
时间: 2023-08-28 08:06:23 浏览: 112
python实现kmeans聚类算法
5星 · 资源好评率100%
好的,下面给出 kmeans 聚类算法的 Python 实现代码:
```python
import numpy as np
import matplotlib.pyplot as plt
class KMeans:
def __init__(self, k=2, max_iter=100):
self.k = k # 聚类数
self.max_iter = max_iter # 最大迭代次数
def fit(self, X):
self.X = X
self.centroids = self.initialize_centroids() # 初始化聚类中心
for i in range(self.max_iter):
self.clusters = self.create_clusters() # 创建聚类
prev_centroids = self.centroids
self.update_centroids() # 更新聚类中心
if self.is_converged(prev_centroids): # 判断是否收敛
break
def initialize_centroids(self):
centroids = self.X[np.random.choice(range(len(self.X)), self.k, replace=False)]
return centroids
def create_clusters(self):
clusters = [[] for _ in range(self.k)]
for i, x in enumerate(self.X):
distances = [np.linalg.norm(x - c) for c in self.centroids]
cluster_idx = np.argmin(distances)
clusters[cluster_idx].append(i)
return clusters
def update_centroids(self):
for i, cluster in enumerate(self.clusters):
self.centroids[i] = np.mean(self.X[cluster], axis=0)
def is_converged(self, prev_centroids):
distances = [np.linalg.norm(self.centroids[i] - prev_centroids[i]) for i in range(self.k)]
return sum(distances) == 0
def predict(self, X):
distances = [np.linalg.norm(X - c) for c in self.centroids]
cluster_idx = np.argmin(distances)
return cluster_idx
if __name__ == '__main__':
# 生成数据
np.random.seed(0)
X = np.random.randn(100, 2)
X[:50, :] -= 2*np.ones((50, 2))
X[50:, :] += 2*np.ones((50, 2))
# 聚类
kmeans = KMeans(k=2)
kmeans.fit(X)
# 可视化结果
plt.scatter(X[:, 0], X[:, 1], c=[kmeans.predict(x) for x in X])
plt.scatter(kmeans.centroids[:, 0], kmeans.centroids[:, 1], marker='x', s=200, linewidths=3, color='r')
plt.show()
```
这是一个简单的 kmeans 聚类算法实现,你可以根据自己的需求进行修改。
阅读全文