粒kmeans聚类算法的代码
时间: 2023-05-26 20:06:56 浏览: 84
KMeans聚类算法+代码
5星 · 资源好评率100%
以下是粒kmeans聚类算法的Python代码:
```python
import numpy as np
from scipy.spatial.distance import cdist
def generate_fuzzy_membership(data, centers, m):
"""
生成模糊成员关系
:param data: 样本数据, shape为(n_samples, n_features)
:param centers: 聚类中心, shape为(n_clusters, n_features)
:param m: 模糊指数
:return: 模糊成员关系, shape为(n_samples, n_clusters)
"""
# 计算样本到聚类中心的距离
distance = cdist(data, centers)
# 初始化模糊成员关系矩阵
membership = np.zeros((data.shape[0], centers.shape[0]))
for i in range(data.shape[0]):
for j in range(centers.shape[0]):
# 根据公式计算模糊成员关系
u = np.power(distance[i, j]/distance[i], 2/(m-1))
membership[i, j] = 1/np.sum(u)
return membership
def generate_centers(data, membership, m):
"""
生成新的聚类中心
:param data: 样本数据, shape为(n_samples, n_features)
:param membership: 模糊成员关系, shape为(n_samples, n_clusters)
:param m: 模糊指数
:return: 新的聚类中心, shape为(n_clusters, n_features)
"""
# 初始化聚类中心矩阵
centers = np.zeros((membership.shape[1], data.shape[1]))
for j in range(membership.shape[1]):
# 根据公式计算新的聚类中心
centers[j, :] = np.sum(np.power(membership[:, j], m).reshape((-1, 1))*data, axis=0)/np.sum(np.power(membership[:, j], m))
return centers
def calculate_obj_func(data, centers, membership, m):
"""
计算目标函数
:param data: 样本数据, shape为(n_samples, n_features)
:param centers: 聚类中心, shape为(n_clusters, n_features)
:param membership: 模糊成员关系, shape为(n_samples, n_clusters)
:param m: 模糊指数
:return: 目标函数
"""
# 计算样本到聚类中心的距离
distance = cdist(data, centers)
return np.sum(membership**m*distance**2)
def fuzzy_kmeans(data, n_clusters, m=2, max_iter=100, tol=1e-4):
"""
粒kmeans聚类算法
:param data: 样本数据, shape为(n_samples, n_features)
:param n_clusters: 聚类数量
:param m: 模糊指数, 默认值为2
:param max_iter: 最大迭代次数, 默认值为100
:param tol: 容差, 默认值为0.0001
:return: 聚类标签和聚类中心
"""
# 初始化聚类中心
centers = data[np.random.choice(data.shape[0], size=n_clusters, replace=False), :]
# 初始化模糊成员关系
membership = generate_fuzzy_membership(data, centers, m)
# 初始化迭代次数
n_iter = 0
# 初始化目标函数
obj_func = calculate_obj_func(data, centers, membership, m)
# 开始迭代
while n_iter < max_iter:
# 生成新的聚类中心
new_centers = generate_centers(data, membership, m)
# 计算新的模糊成员关系
new_membership = generate_fuzzy_membership(data, new_centers, m)
# 计算新的目标函数
new_obj_func = calculate_obj_func(data, new_centers, new_membership, m)
# 如果目标函数变化小于容差,则算法收敛,退出迭代
if np.abs(new_obj_func - obj_func) < tol:
break
else:
# 否则更新聚类中心和模糊成员关系,并增加迭代次数
centers = new_centers
membership = new_membership
obj_func = new_obj_func
n_iter += 1
# 返回聚类标签和聚类中心
labels = np.argmax(membership, axis=1)
return labels, centers
```
使用示例:
```python
from sklearn.datasets import make_blobs
# 生成样本数据
data, labels = make_blobs(n_samples=1000, centers=3, random_state=10)
# 使用粒kmeans算法进行聚类
n_clusters = 3
labels_pred, centers = fuzzy_kmeans(data, n_clusters, m=2, max_iter=100, tol=1e-4)
# 可视化聚类结果
import matplotlib.pyplot as plt
plt.scatter(data[:, 0], data[:, 1], c=labels_pred)
plt.scatter(centers[:, 0], centers[:, 1], marker='x', color='red', linewidths=2)
plt.show()
```
阅读全文