余弦相似度聚类加速 代码实现
时间: 2023-08-26 16:05:11 浏览: 102
余弦相似度算法实现
5星 · 资源好评率100%
以下是余弦相似度聚类加速的Python代码实现:
```python
import numpy as np
def cosine_similarity(x, y):
"""计算余弦相似度"""
dot_product = np.dot(x, y)
norm_x = np.linalg.norm(x)
norm_y = np.linalg.norm(y)
return dot_product / (norm_x * norm_y)
def cluster_cosine_similarities(X):
"""计算数据集中所有样本之间的余弦相似度"""
n_samples = X.shape[0]
similarities = np.zeros((n_samples, n_samples))
for i in range(n_samples):
for j in range(i, n_samples):
similarity = cosine_similarity(X[i], X[j])
similarities[i][j] = similarity
similarities[j][i] = similarity
return similarities
def cluster(data, threshold):
"""基于余弦相似度聚类"""
similarities = cluster_cosine_similarities(data)
n_samples = data.shape[0]
clusters = []
visited = set()
for i in range(n_samples):
if i not in visited:
cluster = set()
for j in range(i, n_samples):
if similarities[i][j] >= threshold:
cluster.add(j)
visited.add(j)
clusters.append(cluster)
return clusters
```
使用示例:
```python
data = np.array([
[1, 2, 3],
[2, 3, 4],
[3, 4, 5],
[4, 5, 6],
[5, 6, 7],
[6, 7, 8]
])
clusters = cluster(data, 0.8)
print(clusters)
```
输出结果:
```
[{0, 1, 2, 3, 4, 5}]
```
说明所有样本都被聚类在了一起。
阅读全文