import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.decomposition import FactorAnalysis #Reading Data data=pd.read_csv("D:\复习资料\MVAPureData\who1.csv") data=data.iloc[1:,:] data=data.drop('Country', axis=1, inplace=True) #Converting Data to Numeric for i in range(1,data.shape[1]): data.iloc[:,i]=pd.to_numeric(data.iloc[:,i]) #Filling Missing Values with Mean data=data.fillna(data.mean()) #Factor Analysis using Principal Component Analysis fa=FactorAnalysis(n_components=5,rotation='varimax') fa.fit(data.iloc[:,1:]) loadings=pd.DataFrame(fa.components_.T,columns=['Factor1','Factor2','Factor3','Factor4','Factor5'],index=data.columns[1:]) print('\nFactor Loadings Using Principal Component Analysis:\n',loadings) #Factor Analysis using Principal Factor Analysis fa=FactorAnalysis(n_components=5,rotation='varimax',method='principal') fa.fit(data.iloc[:,1:]) loadings=pd.DataFrame(fa.components_.T,columns=['Factor1','Factor2','Factor3','Factor4','Factor5'],index=data.columns[1:]) print('\nFactor Loadings Using Principal Factor Analysis:\n',loadings) #Factor Analysis using Maximum Likelihood Estimation fa=FactorAnalysis(n_components=5,rotation='varimax',method="ml") fa.fit(data.iloc[:,1:]) loadings=pd.DataFrame(fa.components_.T,columns=['Factor1','Factor2','Factor3','Factor4','Factor5'],index=data.columns[1:]) print('\nFactor Loadings Using Maximum Likelihood Estimation:\n',loadings) #Plotting Factor Loadings plt.figure(figsize=(15,8)) sns.heatmap(loadings,cmap='coolwarm',xticklabels=True,yticklabels=True,annot=True) plt.title('Factor Loadings') plt.xlabel('Factors') plt.ylabel('Variables') plt.show() #Naming Factors factors=fa.transform(data.iloc[:,1:]) factors=pd.DataFrame(factors,columns=['Factor1','Factor2','Factor3','Factor4','Factor5']) factors['Country']=data.iloc[:,0] countries=factors['Country'].tolist() for i in range(factors.shape[1]-1): factors[f'Factor{i+1}']=(factors[f'Factor{i+1}']-factors[f'Factor{i+1}'].mean())/factors[f'Factor{i+1}'].std() factors['Score']=factors.sum(axis=1) factors=factors.sort_values(by=['Score'],ascending=False).reset_index(drop=True) print('\nRanked Countries:\n',factors[['Country','Score']])
时间: 2023-12-30 16:04:42 浏览: 198
python数据分析与可视化 import pandas as pd import numpy as np import m
这段代码是做因子分析的,将WHO(世界卫生组织)提供的数据集进行了处理和分析。首先将数据读入,并将非数值类型的数据转换为数值型数据;然后使用因子分析方法,包括主成分分析、主因子分析和最大似然估计,对数据进行因子分析,得到因子载荷量;接着绘制因子载荷量热力图,便于观察变量和因子的关系;最后对每个国家进行排名,得到评分。
阅读全文