python使用欧氏距离,曼哈顿距离和夹角余弦距离实现k-means算法
时间: 2024-05-12 18:20:53 浏览: 148
以下是使用Python实现k-means算法,其中包括欧氏距离、曼哈顿距离和夹角余弦距离的实现:
```python
import numpy as np
import math
# 欧氏距离
def euclidean_distance(x1, x2):
return np.sqrt(np.sum((x1 - x2)**2))
# 曼哈顿距离
def manhattan_distance(x1, x2):
return np.sum(np.abs(x1 - x2))
# 夹角余弦距离
def cosine_distance(x1, x2):
dot_product = np.dot(x1, x2)
norm_x1 = np.linalg.norm(x1)
norm_x2 = np.linalg.norm(x2)
return 1 - dot_product / (norm_x1 * norm_x2)
class KMeans:
def __init__(self, k=3, max_iters=100, distance="euclidean"):
self.k = k
self.max_iters = max_iters
self.distance = distance
def initialize_centroids(self, X):
n_samples, n_features = X.shape
centroids = np.zeros((self.k, n_features))
for i in range(self.k):
centroid = X[np.random.choice(range(n_samples))]
centroids[i] = centroid
return centroids
def closest_centroid(self, sample, centroids):
distances = np.zeros(self.k)
for i, centroid in enumerate(centroids):
if self.distance == "euclidean":
distances[i] = euclidean_distance(sample, centroid)
elif self.distance == "manhattan":
distances[i] = manhattan_distance(sample, centroid)
else:
distances[i] = cosine_distance(sample, centroid)
closest_index = np.argmin(distances)
return closest_index
def create_clusters(self, X, centroids):
clusters = [[] for _ in range(self.k)]
for sample_i, sample in enumerate(X):
centroid_i = self.closest_centroid(sample, centroids)
clusters[centroid_i].append(sample_i)
return clusters
def calculate_centroids(self, X, clusters):
n_features = X.shape[1]
centroids = np.zeros((self.k, n_features))
for i, cluster in enumerate(clusters):
centroid = np.mean(X[cluster], axis=0)
centroids[i] = centroid
return centroids
def get_cluster_labels(self, clusters, X):
y_pred = np.zeros(X.shape[0])
for cluster_i, cluster in enumerate(clusters):
for sample_i in cluster:
y_pred[sample_i] = cluster_i
return y_pred
def predict(self, X):
centroids = self.initialize_centroids(X)
for _ in range(self.max_iters):
clusters = self.create_clusters(X, centroids)
prev_centroids = centroids
centroids = self.calculate_centroids(X, clusters)
if np.all(centroids == prev_centroids):
break
return self.get_cluster_labels(clusters, X)
```
使用示例:
```python
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
X, y = make_blobs(centers=3, n_samples=500, random_state=1)
kmeans = KMeans(k=3, max_iters=100, distance="euclidean")
y_pred = kmeans.predict(X)
plt.scatter(X[:, 0], X[:, 1], c=y_pred)
plt.title("K-Means Clustering")
plt.show()
```
其中,distance参数可以设置为"euclidean"、"manhattan"或者"cosine",表示使用欧氏距离、曼哈顿距离或夹角余弦距离。
阅读全文