Lloyd算法的代码实现
时间: 2023-07-26 17:05:50 浏览: 463
Lloyd算法是一种经典的聚类算法,也称为K-means算法。以下是Lloyd算法的Python实现:
```python
import numpy as np
# 计算两个向量的欧几里得距离
def distance(x1, x2):
return np.sqrt(np.sum((x1 - x2) ** 2))
class KMeans:
def __init__(self, k=5, max_iters=100, plot_steps=False):
self.k = k # 聚类数
self.max_iters = max_iters # 最大迭代次数
self.plot_steps = plot_steps # 是否绘制图像
self.clusters = [[] for _ in range(self.k)] # 聚类结果
self.centroids = [] # 质心
# 初始化质心
def _initialize_centroids(self, X):
self.centroids = []
idx = np.random.permutation(X.shape[0])[:self.k]
self.centroids = [X[i] for i in idx]
# 分配数据点到最近的质心
def _assign_clusters(self, X):
for x in X:
distances = [distance(x, c) for c in self.centroids]
cluster = np.argmin(distances)
self.clusters[cluster].append(x)
# 更新质心为簇的平均值
def _update_centroids(self):
for i in range(self.k):
self.centroids[i] = np.mean(self.clusters[i], axis=0)
# 运行KMeans聚类
def fit(self, X):
self._initialize_centroids(X)
for i in range(self.max_iters):
self.clusters = [[] for _ in range(self.k)]
self._assign_clusters(X)
if self.plot_steps:
self.plot()
old_centroids = self.centroids.copy()
self._update_centroids()
if np.allclose(old_centroids, self.centroids):
break
# 预测新数据点的聚类
def predict(self, X):
distances = [distance(x, c) for x, c in zip(X, self.centroids)]
cluster = np.argmin(distances)
return cluster
# 可视化聚类结果
def plot(self):
fig, ax = plt.subplots(figsize=(12, 8))
for i in range(self.k):
ax.scatter(*zip(*self.clusters[i]), marker='o', s=30)
ax.scatter(*zip(*self.centroids), marker='x', color='black', s=100)
plt.show()
```
使用示例:
```python
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
# 生成数据集
X, y = make_blobs(centers=5, n_samples=1000)
# 运行KMeans聚类
kmeans = KMeans(k=5, max_iters=100, plot_steps=True)
kmeans.fit(X)
# 预测新样本的聚类
new_X = [[-6, -6], [0, 0], [6, 6]]
for x in new_X:
cluster = kmeans.predict([x])
print(f"New point {x} belongs to cluster {cluster}")
# 可视化聚类结果
fig, ax = plt.subplots(figsize=(12, 8))
for i in range(kmeans.k):
ax.scatter(*zip(*kmeans.clusters[i]), marker='o', s=30)
ax.scatter(*zip(*kmeans.centroids), marker='x', color='black', s=100)
plt.show()
```
阅读全文