帮我分析以下代码import numpy as np import pandas as pd import matplotlib.pyplot as plt data = pd.read_csv('/data/bigfiles/de091ac1-1335-47b3-82ca-077ec40a6a55.csv') x = data['V1'] y = data['V2'] X = np.array(list(zip(x,y))) # print(X) m = 2 EPS = 1e-7 def distance(X, centroid): return np.sqrt(np.sum((X-centroid)**2, axis=1)) sampleNumber = X.shape[0] # 样本数 classes = 3 U = np.random.rand(sampleNumber, classes) sumU = 1 / np.sum(U,axis=1) U = np.multiply(U.T,sumU) #np.multiply()数组对应位置相乘 U = U.T print(U) U_old = np.zeros((sampleNumber, classes)) while np.max(np.abs(U-U_old))>EPS: centroids = [] for i in range(classes): centroid = np.dot(U[:, i]**m, X) / (np.sum(U[:, i]**m)) centroids.append(centroid) U_old = U.copy() U = np.zeros((sampleNumber, classes)) for i in range(classes): for k in range(classes): U[:, i] += (distance(X, centroids[i]) / distance(X, centroids[k])) ** (2 / (m - 1)) U = 1 / U print(U) Uc = np.argmax(U,axis=1) centroids = np.array(centroids) c_x = centroids[:,0] c_y = centroids[:,1] plt.rcParams['figure.figsize'] = (16,9) for i in range(len(Uc)): plt.scatter(x[i],y[i],c=('green' if Uc[i]==0 else 'blue' if Uc[i]==1 else 'magenta'),alpha=0.5) plt.scatter(c_x,c_y,marker='*',c='black') plt.savefig("/data/workspace/myshixun/task/img/T1.png") a=Image.open("/data/workspace/myshixun/task/img/T1.png")
时间: 2023-06-05 17:08:04 浏览: 146
python数据分析之numpy-pandas-matplotlib-常用代码示例
3星 · 编辑精心推荐
这段代码先导入了numpy、pandas和matplotlib.pyplot三个库,然后使用pandas库中的read_csv函数读取了一个csv文件,将数据存储到了一个名为data的DataFrame对象中。接下来通过data['V1']和data['V2']分别获取了data对象中对应的两列数据,并将它们分别存储到名为x和y的Series对象中。最后,使用numpy库中的array函数将x和y合并为一个矩阵X,其中x和y各自构成X的一列。
阅读全文