python实现k-means算法简单案例
时间: 2023-09-26 15:07:29 浏览: 141
好的,下面是一个简单的Python实现k-means算法的案例:
```python
import numpy as np
import matplotlib.pyplot as plt
def kMeans(dataSet, k):
m, n = dataSet.shape
centroids = np.zeros((k, n))
for i in range(k):
index = int(np.random.uniform(0, m))
centroids[i, :] = dataSet[index, :]
clusterChanged = True
while clusterChanged:
clusterChanged = False
clusterAssment = np.zeros((m, 2))
for i in range(m):
minDist = np.inf
minIndex = -1
for j in range(k):
dist = np.sqrt(np.sum(np.power(dataSet[i, :] - centroids[j, :], 2)))
if dist < minDist:
minDist = dist
minIndex = j
if clusterAssment[i, 0] != minIndex:
clusterChanged = True
clusterAssment[i, :] = minIndex, minDist ** 2
for j in range(k):
pointsInCluster = dataSet[np.nonzero(clusterAssment[:, 0] == j)]
centroids[j, :] = np.mean(pointsInCluster, axis=0)
return centroids, clusterAssment
def show(dataSet, k, centroids, clusterAssment):
m, n = dataSet.shape
if n != 2:
print("Dimension of dataSet should be 2!")
return
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w']
for i in range(k):
pointsInCluster = dataSet[np.nonzero(clusterAssment[:, 0] == i)]
plt.scatter(pointsInCluster[:, 0], pointsInCluster[:, 1], marker='o', c=colors[i % len(colors)], alpha=0.5)
plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='k', s=100, linewidths=3)
plt.title('k-means')
plt.xlabel('X')
plt.ylabel('Y')
plt.show()
if __name__ == '__main__':
data = np.random.rand(200, 2)
k = 3
centroids, clusterAssment = kMeans(data, k)
show(data, k, centroids, clusterAssment)
```
该案例中,我们首先随机生成一组数据,然后输入数据和k值,即可得到聚类结果。其中,函数`kMeans()`实现了k-means算法,函数`show()`用于绘制聚类结果的图表。
阅读全文