请用python实现kmeans算法,不要用sci-kit learn
时间: 2023-03-13 07:38:01 浏览: 102
可以参考下面的代码:
# 导入基本库
import numpy as np
# 定义K-Means类
class KMeans:
# 初始化
def __init__(self, k=2, tol=0.001, max_iter=300):
self.k = k
self.tol = tol
self.max_iter = max_iter
# 训练模型
def fit(self,data):
self.centroids = {}
# 随机选择初始聚类中心
for i in range(self.k):
self.centroids[i] = data[i]
# 迭代更新聚类中心
for i in range(self.max_iter):
self.classifications = {}
# 将每个数据点分配到最近的聚类中心
for i in range(self.k):
self.classifications[i] = []
for featureset in data:
distances = [np.linalg.norm(featureset-self.centroids[centroid]) for centroid in self.centroids]
classification = distances.index(min(distances))
self.classifications[classification].append(featureset)
prev_centroids = dict(self.centroids)
# 更新聚类中心
for classification in self.classifications:
self.centroids[classification] = np.average(self.classifications[classification],axis=0)
# 检查更新是否足够小
optimized = True
for c in self.centroids:
original_centroid = prev_centroids[c]
current_centroid = self.centroids[c]
if np.sum((current_centroid-original_centroid)/original_centroid*100.0) > self.tol:
optimized = False
# 如果更新较小,则保存模型
if optimized:
break
# 预测结果
def predict(self,data):
distances = [np.linalg.norm(data-self.centroids[centroid]) for centroid in self.centroids]
classification = distances.index(min(distances))
return classification
阅读全文