import pandas as pd from sklearn.datasets import load_wine from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.feature_selection import SelectKBest, f_classif from sklearn.decomposition import PCA from sklearn.metrics import accuracy_score, classification_report from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import accuracy_score from sklearn.ensemble import RandomForestClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.naive_bayes import GaussianNB from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC data = load_wine() # 导入数据集 X = pd.DataFrame(data.data, columns=data.feature_names) y = pd.Series(data.target) # 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) # 构建分类模型 model = LogisticRegression() model.fit(X_train, y_train) # 预测测试集结果 y_pred = model.predict(X_test) #评估模型性能 accuracy = accuracy_score(y_test, y_pred) report = classification_report(y_test, y_pred) print('准确率:', accuracy) # 特征选择 selector = SelectKBest(f_classif, k=6) X_new = selector.fit_transform(X, y) print('所选特征:', selector.get_support()) # 模型降维 pca = PCA(n_components=2) X_new = pca.fit_transform(X_new) # 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X_new, y, test_size=0.2, random_state=0) def Sf(model,X_train, X_test, y_train, y_test,modelname): mode = model() mode.fit(X_train, y_train) y_pred = mode.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print(modelname, accuracy) importance = mode.feature_importances_ print(importance) def Sf1(model,X_train, X_test, y_train, y_test,modelname): mode = model() mode.fit(X_train, y_train) y_pred = mode.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print(modelname, accuracy) modelname='支持向量机' Sf1(SVC,X_train, X_test, y_train, y_test,modelname) modelname='逻辑回归' Sf1(LogisticRegression,X_train, X_test, y_train, y_test,modelname) modelname='高斯朴素贝叶斯算法训练分类器' Sf1(GaussianNB,X_train, X_test, y_train, y_test,modelname) modelname='K近邻分类' Sf1(KNeighborsClassifier,X_train, X_test, y_train, y_test,modelname) modelname='决策树分类' Sf(DecisionTreeClassifier,X_train, X_test, y_train, y_test,modelname) modelname='随机森林分类' Sf(RandomForestClassifier,X_train, X_test, y_train, y_test,modelname)加一个画图展示
时间: 2023-12-24 10:30:35 浏览: 51
抱歉,由于我是一个纯文本AI,无法进行画图展示。但是,你可以使用Python中的matplotlib库进行数据可视化。例如,你可以使用以下代码绘制特征重要性条形图:
import matplotlib.pyplot as plt
importance = mode.feature_importances_
features = list(X.columns)
plt.barh(features, importance)
plt.title('Feature Importance')
plt.xlabel('Importance')
plt.show()
这段代码可以在Sf函数中的print(importance)下方添加,并在运行时展示特征重要性条形图。
相关问题
import pandas as pd from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.preprocessing import StandardScaler
这段代码主要是导入了一些常用的数据处理和机器学习库,包括pandas、sklearn.model_selection、sklearn.neighbors和sklearn.preprocessing。其中pandas库用于数据处理,sklearn.model_selection库用于模型选择和数据集划分,sklearn.neighbors库用于K近邻分类算法,sklearn.preprocessing库用于数据预 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
6 61 62 63 64 65 66 67 68 69 70 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
7 71 72 73 74 75 76 77 78 79 80 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
8 81 82 83 84 85 86 87 88 89 90 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
9 91 92 93 94 95 96 97 98 99 100 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
```
这个结果就是将列名大于 50 小于 60 的部分删除后的新矩阵。
from sklearn.datasets import load_boston import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.linear_model import LassoCV import seaborn as sns from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split
这段代码主要是导入了一些Python的第三方库,包括:
- sklearn.datasets:用于加载波士顿房价数据集。
- numpy:用于进行科学计算,如数组操作、数值计算等。
- pandas:用于数据处理和分析,如数据读取、转换、合并等。
- matplotlib.pyplot:用于绘制数据可视化图表,如折线图、散点图等。
- sklearn.linear_model:用于线性回归模型的构建和训练。
- seaborn:基于matplotlib的数据可视化库,提供更加美观的图表风格。
- sklearn.preprocessing:用于数据预处理,如标准化、归一化、缺失值处理等。
- sklearn.model_selection:用于模型训练和评估的数据集划分、交叉验证等操作。
其中,load_boston函数用于加载波士顿房价数据集,LassoCV是一种线性回归模型,可以进行自动特征选择和正则化,StandardScaler用于对数据进行标准化处理,train_test_split用于将数据集划分为训练集和测试集。