生成kmeans聚类算法代码
时间: 2023-03-10 10:57:18 浏览: 124
KMeans聚类算法的原理是将数据点划分到指定的簇中,以便将相似的数据点分配到同一簇中,使其成为更大的簇。下面是使用Python实现KMeans聚类算法的代码:from sklearn.cluster import KMeans
import numpy as np
# 设置KMeans参数
kmeans = KMeans(n_clusters=3, random_state=0)# 将数据输入模型
data = np.array([[1, 2], [1, 4], [1, 0],
[4, 2], [4, 4], [4, 0]])
kmeans.fit(data)# 生成聚类标签
labels = kmeans.predict(data)
print(labels)
相关问题
Kmeans聚类算法代码
以下是KMeans聚类算法的Python代码示例:
```
import numpy as np
import matplotlib.pyplot as plt
# 生成随机数据
def create_data(num):
np.random.seed(10)
data = np.random.randn(num, 2)
data[0:50, :] += 5
data[50:100, :] -= 5
return data
# 计算欧式距离
def euclidean_distance(x1, x2):
return np.sqrt(np.sum((x1 - x2)**2))
class KMeans:
def __init__(self, k=2, max_iters=100):
self.k = k
self.max_iters = max_iters
def initialize_centroids(self, data):
centroids = data.copy()
np.random.shuffle(centroids)
return centroids[:self.k]
def create_clusters(self, centroids, data):
clusters = [[] for _ in range(self.k)]
for point in data:
distances = [euclidean_distance(point, centroid) for centroid in centroids]
closest_idx = np.argmin(distances)
clusters[closest_idx].append(point)
return clusters
def calculate_centroids(self, clusters):
centroids = np.zeros((self.k, 2))
for idx, cluster in enumerate(clusters):
cluster_mean = np.mean(cluster, axis=0)
centroids[idx] = cluster_mean
return centroids
def predict_cluster(self, clusters, data):
y_pred = np.zeros(len(data))
for cluster_idx, cluster in enumerate(clusters):
for sample_idx in cluster:
y_pred[sample_idx] = cluster_idx
return y_pred
def fit(self, data):
centroids = self.initialize_centroids(data)
for i in range(self.max_iters):
clusters = self.create_clusters(centroids, data)
prev_centroids = centroids
centroids = self.calculate_centroids(clusters)
if (centroids == prev_centroids).all():
break
return self.predict_cluster(clusters, data)
# 测试
data = create_data(100)
model = KMeans(k=2)
y_pred = model.fit(data)
# 可视化
plt.scatter(data[:, 0], data[:, 1], c=y_pred, cmap='viridis')
plt.show()
```
注:上述代码实现了一个简单的KMeans聚类算法,用于对二维数据进行聚类,其中KMeans类的fit方法返回的是每个样本所属的簇的标签。
kmeans聚类算法代码jupyter
K-means是一种常用的无监督机器学习算法,用于数据集的分群。下面是一个简单的Python KMeans聚类算法的Jupyter Notebook代码示例:
```python
# 导入必要的库
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
# 假设我们有一个二维数据集
X = np.random.rand(100, 2) # 生成随机数据点
# 初始化KMeans模型
kmeans = KMeans(n_clusters=3)
# 训练模型
kmeans.fit(X)
# 预测数据点所属的簇
labels = kmeans.labels_
# 绘制原始数据点和聚类结果
plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300, c='red', label='Centroids')
plt.title('K-means Clustering')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.show()
#
阅读全文