已知数据集为用X,Y = make_moons(n_samples=400,shuffle=True,noise=0.1,random_state=136),其标签为0或1。用手写K-Means++算法进行模型构建,写出代码。并绘制聚类结果的散点图,写出代码。并分别用轮廓系数和准确率进行评估。写出代码
时间: 2024-03-21 14:43:26 浏览: 60
好的,以下是用手写K-Means++算法进行模型构建的代码,并绘制聚类结果的散点图,以及用轮廓系数和准确率进行评估的代码:
```python
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.metrics import silhouette_score, accuracy_score
class KMeansPlusPlus:
def __init__(self, k, max_iters=300):
self.k = k
self.max_iters = max_iters
def _init_centers(self, X):
centers = [X[np.random.choice(len(X))]]
while len(centers) < self.k:
dists = [np.min([np.linalg.norm(x-c)**2 for c in centers]) for x in X]
probs = dists / np.sum(dists)
cum_probs = np.cumsum(probs)
r = np.random.rand()
for i, cp in enumerate(cum_probs):
if r < cp:
centers.append(X[i])
break
return np.array(centers)
def _assign_clusters(self, X, centers):
clusters = [[] for _ in range(self.k)]
for x in X:
dists = [np.linalg.norm(x-c) for c in centers]
cluster_id = np.argmin(dists)
clusters[cluster_id].append(x)
return clusters
def _update_centers(self, clusters):
centers = []
for i in range(self.k):
center = np.mean(clusters[i], axis=0)
centers.append(center)
return np.array(centers)
def fit(self, X):
centers = self._init_centers(X)
for _ in range(self.max_iters):
old_centers = centers.copy()
clusters = self._assign_clusters(X, centers)
centers = self._update_centers(clusters)
if np.allclose(centers, old_centers):
break
self.centers = centers
self.clusters = clusters
def predict(self, X):
y_pred = []
for x in X:
dists = [np.linalg.norm(x-c) for c in self.centers]
cluster_id = np.argmin(dists)
y_pred.append(cluster_id)
return np.array(y_pred)
# generate make_moons dataset
X, y = make_moons(n_samples=400, shuffle=True, noise=0.1, random_state=136)
# apply KMeans++ clustering with k=2
kmeans = KMeansPlusPlus(k=2)
kmeans.fit(X)
y_pred = kmeans.predict(X)
# calculate silhouette score
silhouette = silhouette_score(X, y_pred)
print("Silhouette Score: {:.2f}".format(silhouette))
# calculate accuracy
accuracy = accuracy_score(y, y_pred)
print("Accuracy: {:.2f}%".format(accuracy * 100))
# plot clustering result
fig, ax = plt.subplots(figsize=(8, 6))
colors = ['blue', 'orange']
for i, color in enumerate(colors):
ax.scatter(X[y_pred == i, 0], X[y_pred == i, 1], c=color, label='Cluster {}'.format(i+1))
ax.scatter(kmeans.centers[:, 0], kmeans.centers[:, 1], marker='*', s=200, c='black', label='Centroids')
ax.set_xlabel('Feature 1')
ax.set_ylabel('Feature 2')
ax.set_title('KMeans++ Clustering Result')
ax.legend()
plt.show()
```
在这个代码中,我定义了一个名为`KMeansPlusPlus`的类,实现了手写K-Means++算法的初始化质心、分配簇和更新质心等过程。在生成`make_moons`数据集之后,我实例化了这个类,并调用`fit`方法进行训练,同时调用`predict`方法对数据进行预测。接着,我计算了轮廓系数和准确率,并将聚类结果绘制成散点图展示出来。
阅读全文