python 读取csv文件 k中心点算法 可视化
时间: 2023-07-18 21:12:41 浏览: 48
下面是一个使用Python实现K中心点算法并可视化的示例代码。假设我们的数据集是一个CSV文件,每行包含两个数值,代表二维空间中的一个点。
```python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# 读取CSV文件,返回一个二维数组
def read_csv_file(file_path):
data = pd.read_csv(file_path, header=None)
return data.values
# 计算两点之间的欧几里得距离
def euclidean_distance(x1, y1, x2, y2):
return np.sqrt((x1-x2)**2 + (y1-y2)**2)
# 寻找距离最远的点作为新的中心点
def find_furthest_point(points, centers):
max_distance = 0
furthest_point = None
for point in points:
min_distance = np.inf
for center in centers:
distance = euclidean_distance(point[0], point[1], center[0], center[1])
if distance < min_distance:
min_distance = distance
if min_distance > max_distance:
max_distance = min_distance
furthest_point = point
return furthest_point
# K中心点算法
def k_center(points, k):
# 随机选择一个点作为第一个中心点
centers = [points[np.random.randint(0, len(points))]]
while len(centers) < k:
# 找到距离最远的点作为新的中心点
furthest_point = find_furthest_point(points, centers)
centers.append(furthest_point)
# 将每个点归为距离最小的中心点所在的簇中
clusters = [[] for _ in range(k)]
for point in points:
min_distance = np.inf
min_index = None
for i, center in enumerate(centers):
distance = euclidean_distance(point[0], point[1], center[0], center[1])
if distance < min_distance:
min_distance = distance
min_index = i
clusters[min_index].append(point)
return centers, clusters
# 可视化结果
def plot_result(centers, clusters):
colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k']
for i, cluster in enumerate(clusters):
color = colors[i%len(colors)]
for point in cluster:
plt.scatter(point[0], point[1], c=color)
center = centers[i]
plt.scatter(center[0], center[1], marker='*', s=200, c=color)
plt.show()
# 读取CSV文件,运行K中心点算法,绘制可视化结果
file_path = 'data.csv'
points = read_csv_file(file_path)
k = 3
centers, clusters = k_center(points, k)
plot_result(centers, clusters)
```
这段代码中,`read_csv_file`函数用于读取CSV文件,`euclidean_distance`函数用于计算两点之间的欧几里得距离,`find_furthest_point`函数用于寻找距离最远的点作为新的中心点,`k_center`函数是K中心点算法的实现,`plot_result`函数用于将结果可视化。在最后,我们读取CSV文件,运行K中心点算法,绘制可视化结果。