import numpy as np import pandas as pd from sklearn.cluster import KMeans, MiniBatchKMeans def main(): file = pd.read_excel('C:/Users/h/Desktop/111.xlsx', 'Sheet2',header=0) # 首行为标题行 file = file.dropna() print(file.head()) z_scaler = lambda x:(x-np.mean(x))/np.std(x) dfScaler = file[['D1','D2','D3','D4','D5','D6','D7','D8','D9','D10','D11','D12','D13','D14']].apply(z_scaler) # 数据归一化 dfData = pd.concat([file[['文物采样点']], dfScaler], axis=1) df = dfData.loc[:,['D1','D2','D3','D9','D10']] X = np.array(df) print("Shape of cluster data:", X.shape) nCluster = 2 kmCluster = KMeans(n_clusters=nCluster).fit(X) print("Cluster centers:\n", kmCluster.cluster_centers_) print("Cluster results:\n", kmCluster.labels_) listName = dfData['文物采样点'].tolist() dictCluster = dict(zip(listName,kmCluster.labels_)) listCluster = [[] for k in range(nCluster)] for v in range(0, len(dictCluster)): k = list(dictCluster.values())[v] listCluster[k].append(list(dictCluster.keys())[v]) for k in range(nCluster): print("第 {} 类:{}".format(k, listCluster[k])) return if __name__ == '__main__': main()
时间: 2024-01-09 07:06:01 浏览: 135
python 3.74 运行import numpy as np 报错lib\site-packages\numpy\__init__.py
这是一个使用 sklearn 库中的 KMeans 算法对数据进行聚类的 Python 代码。首先,代码读取了一个 Excel 文件,然后对数据进行了预处理,包括删除空值和数据归一化。接着,选取了部分特征列作为聚类的输入,使用 KMeans 算法进行聚类,最后将聚类结果输出。此外,该代码还将每个样本点的聚类结果与其对应的名称一起保存到了一个字典中,并将每个类别包含的样本点输出。
阅读全文