已知有三个聚类好的包含三维坐标点的数组,在不调用库的情况下编写出求聚类指标CP,CH,DB,SC的代码
时间: 2024-05-09 19:19:43 浏览: 4
以下是一个Python实现的示例代码:
```python
import numpy as np
def euclidean_distance(x, y):
return np.sqrt(np.sum((x - y) ** 2))
def calculate_centroid(cluster):
return np.mean(cluster, axis=0)
def calculate_sse(cluster):
centroid = calculate_centroid(cluster)
return np.sum([euclidean_distance(point, centroid) ** 2 for point in cluster])
def calculate_cp(clusters):
sse_total = np.sum([calculate_sse(cluster) for cluster in clusters])
num_points = np.sum([len(cluster) for cluster in clusters])
sse_between = sse_total - np.sum([calculate_sse(cluster) / len(cluster) for cluster in clusters])
return sse_between / sse_total * (num_points - len(clusters)) / (len(clusters) - 1)
def calculate_ch(clusters):
num_clusters = len(clusters)
centroids = [calculate_centroid(cluster) for cluster in clusters]
centroid_distances = np.zeros((num_clusters, num_clusters))
for i in range(num_clusters):
for j in range(i+1, num_clusters):
centroid_distances[i][j] = euclidean_distance(centroids[i], centroids[j])
centroid_distances[j][i] = centroid_distances[i][j]
max_intracluster_distance = np.max([np.max([euclidean_distance(point, cluster[i]) for point in cluster])
for i, cluster in enumerate(clusters)])
return max_intracluster_distance / np.mean(centroid_distances)
def calculate_db(clusters):
num_clusters = len(clusters)
centroids = [calculate_centroid(cluster) for cluster in clusters]
centroid_distances = np.zeros((num_clusters, num_clusters))
for i in range(num_clusters):
for j in range(i+1, num_clusters):
centroid_distances[i][j] = euclidean_distance(centroids[i], centroids[j])
centroid_distances[j][i] = centroid_distances[i][j]
cluster_separation = np.zeros(num_clusters)
for i in range(num_clusters):
sse_i = calculate_sse(clusters[i])
max_intracluster_distance = np.max([euclidean_distance(point, centroids[i]) for point in clusters[i]])
cluster_separation[i] = (sse_i / len(clusters[i]) + np.sum(centroid_distances[i])) / max_intracluster_distance
return np.mean(cluster_separation)
def calculate_sc(clusters):
num_points = np.sum([len(cluster) for cluster in clusters])
distances = np.zeros((num_points, num_points))
labels = np.zeros(num_points)
index = 0
for i, cluster in enumerate(clusters):
for point in cluster:
labels[index] = i
for j in range(index+1, num_points):
distances[index][j] = euclidean_distance(point, clusters[int(labels[j])][j-index-1])
distances[j][index] = distances[index][j]
index += 1
a = np.zeros(num_points)
for i in range(num_points):
cluster_i = int(labels[i])
a[i] = np.mean([distances[i][j] for j in range(num_points) if int(labels[j]) == cluster_i and i != j])
b = np.zeros(num_points)
for i in range(num_points):
b[i] = np.min([np.mean([distances[i][j] for j in range(num_points) if int(labels[j]) == cluster_k and cluster_i != cluster_k])
for cluster_k in range(len(clusters)) if cluster_k != int(labels[i])])
return np.mean((b-a) / np.maximum(a,b))
```
其中,`euclidean_distance`函数用于计算欧几里得距离,`calculate_centroid`函数用于计算聚类中心,`calculate_sse`函数用于计算聚类内误差平方和,`calculate_cp`函数用于计算聚类指标CP,`calculate_ch`函数用于计算聚类指标CH,`calculate_db`函数用于计算聚类指标DB,`calculate_sc`函数用于计算聚类指标SC。需要注意的是,这些函数都假设输入的聚类已经是三维坐标点的数组,因此不需要做额外的处理。