import pandas as pd import matplotlib.pyplot as plt from sklearn.cluster import KMeans df = pd.read_csv(r"D:\数学建模\重航数学建模校赛\附件1(前50行).csv",encoding='gbk') # 文件目录加文件名 df.head() #定位数据 X = df.iloc[:,1:] X.head() # 标准化数据 from sklearn.preprocessing import StandardScaler scaler = StandardScaler() standX = scaler.fit_transform(X) standX # 肘部法则的可视化 from sklearn import metrics # 创建遍历,找到最合适的k值 scores = [] for k in range(2,150): labels = KMeans(n_clusters=k,n_init='auto').fit(X).labels_ score = metrics.silhouette_score(X,labels) scores.append(score) # 通过画图找出最合适的K值 plt.plot(list(range(2,150)),scores) plt.xlabel('Number of Clusters Initialized') plt.ylabel('Sihouette Score') plt.show()代码修改
时间: 2024-04-01 17:34:05 浏览: 19
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
df = pd.read_csv(r"D:\数学建模\重航数学建模校赛\附件1(前50行).csv",encoding='gbk') # 文件目录加文件名
X = df.iloc[:,1:]
scaler = StandardScaler()
standX = scaler.fit_transform(X)
scores = []
for k in range(2,150):
labels = KMeans(n_clusters=k,n_init='auto').fit(X).labels_
score = metrics.silhouette_score(X,labels)
scores.append(score)
plt.plot(list(range(2,150)),scores)
plt.xlabel('Number of Clusters Initialized')
plt.ylabel('Sihouette Score')
plt.show()
相关问题
import pandas as pd import numpy as np from sklearn.cluster import DBSCAN import matplotlib.pyplot as plt from sklearn.cluster import KMeans
这段代码导入了pandas、numpy、matplotlib.pyplot和sklearn.cluster库,其中sklearn.cluster库中包括了DBSCAN和KMeans聚类算法。pandas和numpy库常用于数据处理和科学计算,matplotlib.pyplot库常用于数据可视化。你可以通过这些库来进行数据分析和聚类操作。
import pandas as pd import numpy as np from scipy import stats from sklearn.cluster import KMeans import matplotlib.pyplot as plt
这段代码的作用是导入一些常用的机器学习和数据分析库,包括pandas、numpy、scipy、sklearn和matplotlib.pyplot。其中,pandas和numpy用于数据处理和操作,scipy用于科学计算,sklearn用于机器学习算法,matplotlib用于数据可视化。另外,这段代码还使用了KMeans算法来进行聚类分析。