不调用工具包,实现Kmeans聚类,改变不同的K值和距离度量方式,可视化聚类结果;
时间: 2024-05-02 17:20:14 浏览: 87
以下是Python代码实现KMeans聚类,可以调整K值和距离度量方式,并可视化聚类结果:
```python
import numpy as np
import matplotlib.pyplot as plt
# 定义计算欧式距离的函数
def euclidean_distance(x1, x2):
return np.sqrt(np.sum((x1 - x2) ** 2))
# 定义计算曼哈顿距离的函数
def manhattan_distance(x1, x2):
return np.sum(np.abs(x1 - x2))
class KMeans:
def __init__(self, k=3, max_iters=100, distance='euclidean'):
self.k = k
self.max_iters = max_iters
self.distance = distance
self.clusters = [[] for _ in range(self.k)]
self.centroids = []
def fit(self, X):
# 随机初始化质心
self.centroids = [X[np.random.choice(len(X))] for _ in range(self.k)]
# 迭代更新质心和聚类
for _ in range(self.max_iters):
# 初始化聚类
self.clusters = [[] for _ in range(self.k)]
# 计算每个样本到质心的距离,并分配到最近的聚类
for x in X:
distances = [euclidean_distance(x, c) if self.distance=='euclidean'
else manhattan_distance(x, c) for c in self.centroids]
closest_cluster = np.argmin(distances)
self.clusters[closest_cluster].append(x)
# 计算每个聚类的新质心
for i, cluster in enumerate(self.clusters):
self.centroids[i] = np.mean(cluster, axis=0)
def predict(self, X):
# 预测每个样本所属的聚类
distances = np.zeros((len(X), self.k))
for i, x in enumerate(X):
for j, c in enumerate(self.centroids):
distances[i, j] = euclidean_distance(x, c) if self.distance=='euclidean' \
else manhattan_distance(x, c)
return np.argmin(distances, axis=1)
# 测试代码
X = np.array([[1, 2], [1, 4], [1, 0],
[4, 2], [4, 4], [4, 0]])
kmeans = KMeans(k=2, max_iters=100, distance='euclidean')
kmeans.fit(X)
clusters = kmeans.predict(X)
colors = ['r', 'g', 'b', 'y', 'c', 'm']
for i, cluster in enumerate(kmeans.clusters):
for x in cluster:
plt.scatter(x[0], x[1], color=colors[i])
for c in kmeans.centroids:
plt.scatter(c[0], c[1], marker='x', s=100, color='k')
plt.show()
```
可以调整K值和距离度量方式,例如:
```python
kmeans = KMeans(k=3, max_iters=100, distance='manhattan')
```
可以看到,聚类结果会随着K值和距离度量方式的不同而发生变化。
阅读全文