plt.xticks(index,data.index,rotation=90) plt.title('The Proportion of Different Areas',font) plt.ylabel('Proportion',font) plt.legend(['NA_Sales','JP_Sales','EU_Sales'],loc='upper center',ncol=3,framealpha=0.6) plt.show()

代码中的这几行是用来设置图表的刻度标签、标题、y轴标签和图例，并通过调用plt.show()来显示图表。 - plt.xticks(index, data.index, rotation=90)：设置x轴刻度标签，其中index表示刻度位置，data.index表示刻度标签内容，rotation参数表示旋转刻度标签的角度，这里设置为90度。 - plt.title('The Proportion of Different Areas', font)：设置图表的标题为'The Proportion of Different Areas'，font参数用来设置标题的字体样式。 - plt.ylabel('Proportion', font)：设置y轴的标签为'Proportion'，font参数用来设置标签的字体样式。 - plt.legend(['NA_Sales', 'JP_Sales', 'EU_Sales'], loc='upper center', ncol=3, framealpha=0.6)：添加图例，['NA_Sales', 'JP_Sales', 'EU_Sales']为图例的标签内容，loc参数用来设置图例的位置为'upper center'，ncol参数用来设置图例的列数为3，framealpha参数用来设置图例的透明度为0.6。最后，通过plt.show()方法显示图表。这样，您就可以看到包含刻度标签、标题、y轴标签和图例的图表了。

import pandas as pd import numpy as np from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler import matplotlib.pyplot as plt # 读取数据 data = pd.read_csv('D:/pythonProject/venv/BostonHousing2.csv') # 提取前13个指标的数据 X = data.iloc[:, 5:18].values # 数据标准化 scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # 主成分分析 pca = PCA() X_pca = pca.fit_transform(X_scaled) # 特征值和特征向量 eigenvalues = pca.explained_variance_ eigenvectors = pca.components_.T # 碎石图 variance_explained = np.cumsum(eigenvalues / np.sum(eigenvalues)) plt.plot(range(6, 19), variance_explained, marker='o') plt.xlabel('Number of Components') plt.ylabel('Cumulative Proportion of Variance Explained') plt.title('Scree Plot') plt.show() # 选择主成分个数 n_components = np.sum(variance_explained <= 0.95) + 1 # 前2个主成分的载荷图 loadings = pd.DataFrame(eigenvectors[:, 0:2], columns=['PC1', 'PC2'], index=data.columns[0:13]) plt.figure(figsize=(10, 6)) plt.scatter(loadings['PC1'], loadings['PC2'], alpha=0.7) for i, feature in enumerate(loadings.index): plt.text(loadings['PC1'][i], loadings['PC2'][i], feature) plt.xlabel('PC1') plt.ylabel('PC2') plt.title('Loading Plot') plt.grid() plt.show() # 主成分得分图 scores = pd.DataFrame(X_pca[:, 0:n_components], columns=['PC{}'.format(i+1) for i in range(n_components)]) plt.figure(figsize=(10, 6)) plt.scatter(scores['PC1'], scores['PC2'], alpha=0.7) for i, label in enumerate(data['MEDV']): plt.text(scores['PC1'][i], scores['PC2'][i], label) plt.xlabel('PC1') plt.ylabel('PC2') plt.title('Scores Plot') plt.grid() plt.show() # 综合评估和排序 data['PC1_score'] = X_pca[:, 0] sorted_data = data.sort_values(by='PC1_score') # 主成分回归模型 from sklearn.linear_model import LinearRegression Y = data['MEDV'].values.reshape(-1, 1) X_pca_regression = X_pca[:, 0].reshape(-1, 1) regression_model = LinearRegression() regression_model.fit(X_pca_regression, Y) # 回归方程 intercept = regression_model.intercept_[0] slope = regression_model.coef_[0][0] equation = "MEDV = {:.2f} + {:.2f} * PC1".format(intercept, slope) print("Regression Equation:", equation) # 最小二乘估计结果 from statsmodels.api import OLS X_const = np.concatenate((np.ones((506, 1)), X_pca_regression), axis=1) ols_model = OLS(Y, X_const).fit() print("OLS Regression Summary:") print(ols_model.summary())

这段代码是用 Python 对波士顿房价数据进行主成分分析（PCA）。该代码读取了一个名为 "BostonHousing2.csv" 的数据文件，并将前 13 个指标的数据提取出来，进行了数据标准化和主成分分析。其中，碎石图展示了不同主成分个数下的累计方差贡献率，用于选择主成分个数；载荷图展示了前两个主成分对原始数据各个指标的影响程度；得分图展示了每个房价样本在前两个主成分上的得分情况，用于评估房价的相对位置。最后，该代码还进行了主成分回归模型和最小二乘估计结果的计算和展示。

import pandas as pd import numpy as np from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler import matplotlib.pyplot as plt # 读取数据 data = pd.read_csv('D:\\pythonProject\\venv\\BostonHousing2.csv') # 提取前13个指标的数据 X = data.iloc[:, 5:18].values # 数据标准化 scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # 主成分分析 pca = PCA() X_pca = pca.fit_transform(X_scaled) # 特征值和特征向量 eigenvalues = pca.explained_variance_ eigenvectors = pca.components_.T # 碎石图 # variance_explained我给你放到下一个cell里面了，这里用eigenvalues代替variance_explained plt.plot(range(1, 14), eigenvalues, marker='o') plt.xlabel('Number of Components') plt.ylabel('Cumulative Proportion of Variance Explained') plt.title('Scree Plot') plt.show() # 选择主成分个数 variance_explained = np.cumsum(eigenvalues / np.sum(eigenvalues)) n_components = np.sum(variance_explained <= 0.95) + 1 # 前2个主成分的载荷图 loadings = pd.DataFrame(eigenvectors[:, 0:2], columns=['PC1', 'PC2'], index=data.columns[0:13]) plt.figure(figsize=(10, 6)) plt.scatter(loadings['PC1'], loadings['PC2'], alpha=0.7) for i, feature in enumerate(loadings.index): plt.text(loadings['PC1'][i], loadings['PC2'][i], feature) plt.xlabel('PC1') plt.ylabel('PC2') plt.title('Loading Plot') plt.grid() plt.show() # 主成分得分图 scores = pd.DataFrame(X_pca[:, 0:n_components], columns=['PC{}'.format(i+1) for i in range(n_components)]) plt.figure(figsize=(10, 6)) plt.scatter(scores['PC1'], scores['PC2'], alpha=0.7) for i, label in enumerate(data['medv']): plt.text(scores['PC1'][i], scores['PC2'][i], label) plt.xlabel('PC1') plt.ylabel('PC2') plt.title('Scores Plot') plt.grid() plt.show() # 综合评估和排序 data['PC1_score'] = X_pca[:, 0] sorted_data = data.sort_values(by='PC1_score') # 主成分回归模型 from sklearn.linear_model import LinearRegression Y = data['medv'].values.reshape(-1, 1) X_pca_regression = X_pca[:, 0].reshape(-1, 1) regression_model = LinearRegression() regression_model.fit(X_pca_regression, Y) # 回归方程 intercept = regression_model.intercept_[0] slope = regression_model.coef_[0][0] equation = "medv = {:.2f} + {:.2f} * PC1".format(intercept, slope) print("Regression Equation:", equation) # 最小二乘估计结果 from statsmodels.api import OLS X_const = np.concatenate((np.ones((506, 1)), X_pca_regression), axis=1) ols_model = OLS(Y, X_const).fit() print("OLS Regression Summary:") print(ols_model.summary())

这段代码是做主成分分析（PCA）的，它的目的是将原始数据转换为更少的几个维度，以便于分析。具体来说，代码将Boston房价数据集中的前13个指标进行了标准化处理，然后使用PCA进行降维。在降维的过程中，代码画出了碎石图（Scree Plot）来帮助选择主成分的个数。然后，代码画出了主成分的载荷图和得分图，分别用于观察主成分和原始数据的关系。最后，代码使用主成分回归模型和最小二乘估计来评估主成分对房价的影响，并输出回归方程和估计结果。

阅读全文

plt.xticks(index,data.index,rotation=90) plt.title('The Proportion of Different Areas',font) plt.ylabel('Proportion',font) plt.legend(['NA_Sales','JP_Sales','EU_Sales'],loc='upper center',ncol=3,framealpha=0.6) plt.show()

相关推荐

基于plt.title无法显示中文的快速解决

解决python中显示图片的plt.imshow plt.show()内存泄漏问题

解决Python plt.savefig 保存图片时一片空白的问题

The Art of Threshold Tuning: Tips for Enhancing the Performance of Classification Models

Dealing with Imbalanced Data: 7 Strategies to Overcome the Challenge

Demystifying the Confusion Matrix: How to Evaluate the Actual Performance of Classification Models

Beyond Precision and Recall: The Application of F1 Score and ROC Curve

Data Visualization and Chart Display in DBeaver

Data Visualization and Chart Presentation in Jupyter Notebook

【Advanced】Advanced Skills for Data Parsing and Extraction

Practical Applications of Deep Learning in Jupyter Notebook

【Practical Exercise】Design and Simulation of a Basic QPSK Modulation System in MATLAB

7. 传染病模型中的常微分方程数值解法介绍

Numpy.linalg高级应用：奇异值分解（SVD）的深度解析

plt.scatter对于x和y都是二分类 variable的情况，怎么用s展示出聚集在每个y下的x的propor

大家在看

网络游戏中人工智能NPC.pdf

c语言编写的jpeg解码源代码

Noise-Pollution-Monitoring-Device

ggplot_Piper

海康最新视频控件_独立进程.rar

最新推荐

sblim-gather-provider-2.2.8-9.el7.x64-86.rpm.tar.gz

基于pringboot框架的图书进销存管理系统的设计与实现（Java项目编程实战+完整源码+毕设文档+sql文件+学习练手好项目）.zip

2024中国在人工智能领域的创新能力如何研究报告.pdf

安全生产_人脸识别_移动目标跟踪_智能管控平台技术实现与应用_1741777778.zip

人脸识别_TF2_Facenet_训练预测应用仓库_1741778670.zip

虚拟串口软件：实现IP信号到虚拟串口的转换

【Python进阶篇】：掌握这些高级特性，让你的编程能力飞跃提升

后端调用ragflow api

IE6下实现PNG图片背景透明的技术解决方案

【欧姆龙触摸屏故障诊断全攻略】