不调用工具包,实现Kmeans聚类,改变不同的K值和距离度量方式,可视化聚类结果
时间: 2024-04-30 16:22:46 浏览: 101
采用K-means聚类,实现多维矩阵的聚类,并进行可视化展示(matlab).rar
5星 · 资源好评率100%
以下是Python实现Kmeans聚类的代码,并可视化聚类结果:
```python
import numpy as np
import matplotlib.pyplot as plt
class KMeans:
def __init__(self, k=2, max_iter=100, distance_measure='euclidean'):
self.k = k
self.max_iter = max_iter
self.distance_measure = distance_measure
def fit(self, X):
self.centroids = X[np.random.choice(X.shape[0], size=self.k, replace=False)]
for i in range(self.max_iter):
clusters = [[] for _ in range(self.k)]
for x in X:
distances = []
for c in self.centroids:
if self.distance_measure == 'euclidean':
distances.append(np.linalg.norm(x - c))
elif self.distance_measure == 'manhattan':
distances.append(np.sum(np.abs(x - c)))
cluster_index = np.argmin(distances)
clusters[cluster_index].append(x)
new_centroids = []
for i in range(self.k):
new_centroid = np.mean(clusters[i], axis=0)
new_centroids.append(new_centroid)
if np.allclose(self.centroids, new_centroids):
break
self.centroids = new_centroids
def predict(self, X):
y_pred = []
for x in X:
distances = []
for c in self.centroids:
if self.distance_measure == 'euclidean':
distances.append(np.linalg.norm(x - c))
elif self.distance_measure == 'manhattan':
distances.append(np.sum(np.abs(x - c)))
cluster_index = np.argmin(distances)
y_pred.append(cluster_index)
return np.array(y_pred)
# 生成数据
np.random.seed(0)
X = np.vstack((np.random.randn(100, 2) + np.array([2, 2]), np.random.randn(100, 2) + np.array([-2, -2]), np.random.randn(100, 2) + np.array([2, -2])))
y = np.hstack((np.zeros(100), np.ones(100), np.ones(100) * 2))
# 可视化原始数据
plt.scatter(X[:, 0], X[:, 1], c=y)
plt.title('Original Data')
plt.show()
# Kmeans聚类
kmeans = KMeans(k=3, max_iter=100, distance_measure='euclidean')
kmeans.fit(X)
y_pred = kmeans.predict(X)
# 可视化聚类结果
plt.scatter(X[:, 0], X[:, 1], c=y_pred)
plt.scatter(kmeans.centroids[:, 0], kmeans.centroids[:, 1], marker='x', s=200, linewidths=3, color='r')
plt.title('Kmeans Clustering Result')
plt.show()
```
上述代码中生成了一个包含三个簇的数据集,并且使用Kmeans聚类将其分为三个簇。可以通过改变K值和距离度量方式来尝试不同的聚类结果。
阅读全文