多变量k-means聚类算法python
时间: 2023-11-21 20:58:18 浏览: 58
以下是多变量K-means聚类算法的Python实现:
```python
import numpy as np
class KMeansClassifier():
"""初始化KMeansClassifier类"""
def __init__(self, k=3, initCent='random', max_iter=500):
# 类的成员变量
self._k = k # 簇的个数
self._initCent = initCent # 初始化簇质心的方法
self._max_iter = max_iter # 最大迭代次数
def fit(self, X):
m, n = np.shape(X)
# 初始化簇质心
if self._initCent == 'random':
centroids = self._randCent(X, self._k)
elif self._initCent == 'kmeans++':
centroids = self._kmeansPlusPlus(X, self._k)
else:
raise NameError('The initialization method is not recognized')
# 初始化其他变量
clusterAssment = np.zeros((m, 2)) # 存储每个样本的簇分配结果和平方误差
clusterChanged = True # 簇分配结果是否改变的标志
iter_count = 0 # 当前迭代次数
# 迭代计算
while clusterChanged and iter_count < self._max_iter:
clusterChanged = False
iter_count += 1
# 对每个样本进行簇分配
for i in range(m):
minDist = np.inf
minIndex = -1
for j in range(self._k):
distJI = self._euclideanDist(X[i, :], centroids[j, :])
if distJI < minDist:
minDist = distJI
minIndex = j
if clusterAssment[i, 0] != minIndex:
clusterChanged = True
clusterAssment[i, :] = minIndex, minDist ** 2
# 更新簇质心
for j in range(self._k):
pointsInCluster = X[np.nonzero(clusterAssment[:, 0] == j)[0]]
centroids[j, :] = np.mean(pointsInCluster, axis=0)
self._centroids = centroids
self._clusterAssment = clusterAssment
self._iter_count = iter_count
def predict(self, X):
m, n = np.shape(X)
clusterAssment = np.zeros((m, 2))
for i in range(m):
minDist = np.inf
minIndex = -1
for j in range(self._k):
distJI = self._euclideanDist(X[i, :], self._centroids[j, :])
if distJI < minDist:
minDist = distJI
minIndex = j
clusterAssment[i, :] = minIndex, minDist ** 2
return clusterAssment[:, 0]
def _randCent(self, X, k):
n = np.shape(X)[1]
centroids = np.mat(np.zeros((k, n)))
for j in range(n):
minJ = np.min(X[:, j])
rangeJ = float(np.max(X[:, j]) - minJ)
centroids[:, j] = np.mat(minJ + rangeJ * np.random.rand(k, 1))
return centroids
def _kmeansPlusPlus(self, X, k):
m, n = np.shape(X)
centroids = np.mat(np.zeros((k, n)))
centroids[0, :] = X[np.random.randint(m), :]
for i in range(1, k):
dist = np.mat(np.zeros((m, i)))
for j in range(i):
dist[:, j] = np.mat(self._euclideanDist(X, centroids[j, :]))
minDist = np.min(dist, axis=1)
probs = minDist / np.sum(minDist)
cumulativeProbs = np.cumsum(probs)
rand = np.random.rand()
index = np.nonzero(cumulativeProbs >= rand)[0][0]
centroids[i, :] = X[index, :]
return centroids
def _euclideanDist(self, A, B):
return np.sqrt(np.sum(np.power(A - B, 2)))
```
相关推荐
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![py](https://img-home.csdnimg.cn/images/20210720083646.png)
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)