plt.plot(a, train_score, label='train_score')

depth = np.arange(1, 15) err_train_list = [] err_test_list = [] clf = DecisionTreeClassifier(criterion='entropy') for d in depth: clf.set_params(max_depth=d) clf.fit(x_train, y_train) y_train_pred = clf.predict(x_train) err_train = 1-accuracy_score(y_train, y_train_pred) err_train_list.append(err_train) y_test_pred = clf.predict(x_test) err_test = 1-accuracy_score(y_test, y_test_pred) err_test_list.append(err_test) print(d, '测试集错误率：%.2f%%' % (100 * err_test)) plt.figure(facecolor='w') plt.plot(depth, err_test_list, 'ro-', markeredgecolor='k', lw=2, label='测试集错误率') plt.plot(depth, err_train_list, 'go-', markeredgecolor='k', lw=2, label='训练集错误率') plt.xlabel('决策树深度', fontsize=13) plt.ylabel('错误率', fontsize=13) plt.legend(loc='lower left', fontsize=13) plt.title('决策树深度与过拟合', fontsize=15) plt.grid(b=True, ls=':', color='#606060') plt.show()

代码首先定义了一个从1到14的深度范围，并初始化了两个空列表err_train_list和err_test_list来保存训练集和测试集的错误率。然后创建了一个DecisionTreeClassifier对象clf，并设置其criterion参数为'entropy'，即使...

import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import confusion_matrix, classification_report, accuracy_score # 1. 数据准备 train_data = pd.read_csv('train.csv') test_data = pd.read_csv('test_noLabel.csv') # 填充缺失值 train_data.fillna(train_data.mean(), inplace=True) test_data.fillna(test_data.mean(), inplace=True) # 2. 特征工程 X_train = train_data.drop(['Label', 'ID'], axis=1) y_train = train_data['Label'] X_test = test_data.drop('ID', axis=1) scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # 3. 模型建立 model = RandomForestClassifier(n_estimators=100, random_state=42) # 4. 模型训练 model.fit(X_train, y_train) # 5. 进行预测 y_pred = model.predict(X_test) # 6. 保存预测结果 df_result = pd.DataFrame({'ID': test_data['ID'], 'Label': y_pred}) df_result.to_csv('forecast_result.csv', index=False) # 7. 模型评估 y_train_pred = model.predict(X_train) print('训练集准确率：', accuracy_score(y_train, y_train_pred)) print('测试集准确率：', accuracy_score(y_test, y_pred)) print(classification_report(y_test, y_pred)) # 8. 绘制柱形图 feature_importances = pd.Series(model.feature_importances_, index=X_train.columns) feature_importances = feature_importances.sort_values(ascending=False) plt.figure(figsize=(10, 6)) sns.barplot(x=feature_importances, y=feature_importances.index) plt.xlabel('Feature Importance Score') plt.ylabel('Features') plt.title('Visualizing Important Features') plt.show() # 9. 对比类分析 train_data['Label'].value_counts().plot(kind='bar', color=['blue', 'red']) plt.title('Class Distribution') plt.xlabel('Class') plt.ylabel('Frequency') plt.show()

train_data['Label'].value_counts().plot(kind='bar', color=['blue', 'red']) plt.title('Class Distribution') plt.xlabel('Class') plt.ylabel('Frequency') plt.show() 这段代码会生成一个柱形图，展示...

import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_curve, roc_auc_score # 1. 数据读取与处理 data = pd.read_csv('data.csv') X = data.drop('target', axis=1) y = data['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 2. 模型训练 model = LogisticRegression() model.fit(X_train, y_train) # 3. 模型预测 y_pred = model.predict(X_test) y_prob = model.predict_proba(X_test)[:, 1] # 4. 绘制二分类混淆矩阵 confusion_mat = confusion_matrix(y_test, y_pred) plt.imshow(confusion_mat, cmap=plt.cm.Blues) plt.title('Confusion Matrix') plt.colorbar() tick_marks = np.arange(2) plt.xticks(tick_marks, ['0', '1']) plt.yticks(tick_marks, ['0', '1']) plt.xlabel('Predicted Label') plt.ylabel('True Label') for i in range(2): for j in range(2): plt.text(j, i, confusion_mat[i, j], ha='center', va='center', color='white' if confusion_mat[i, j] > confusion_mat.max() / 2 else 'black') plt.show() # 5. 计算精确率、召回率和F1-score precision = precision_score(y_test, y_pred) recall = recall_score(y_test, y_pred) f1 = f1_score(y_test, y_pred) # 6. 计算AUC指标和绘制ROC曲线 auc = roc_auc_score(y_test, y_prob) fpr, tpr, thresholds = roc_curve(y_test, y_prob) plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % auc) plt.plot([0, 1], [0, 1], 'k--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('ROC Curve') plt.legend(loc="lower right") plt.show() # 7. 输出结果 print('Precision:', precision) print('Recall:', recall) print('F1-score:', f1) print('AUC:', auc)对每行代码进行注释

plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % auc) plt.plot([0, 1], [0, 1], 'k--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate')...

import numpy as np import pandas as pd import matplotlib.pyplot as plt df=pd.read_csv('C:\\Users\ASUS\Desktop\AI\实训\汽车销量数据new.csv',sep=',',header=0) plt.rcParams['font.sans-serif'] = ['SimHei'] plt.figure(figsize=(10,4)) ax1=plt.subplot(121) ax1.scatter(df['price'],df['quantity'],c='b') df=(df-df.min())/(df.max()-df.min()) df.to_csv('quantity.txt',sep='\t',index=False) train_data=df.sample(frac=0.8,replace=False) test_data=df.drop(train_data.index) x_train=train_data['price'].values.reshape(-1, 1) y_train=train_data['quantity'].values x_test=test_data['price'].values.reshape(-1, 1) y_test=test_data['quantity'].values from sklearn.linear_model import LinearRegression import joblib #model=SGDRegressor(max_iter=500,learning_rate='constant',eta0=0.01) model = LinearRegression() #训练模型 model.fit(x_train,y_train) #输出训练结果 pre_score=model.score(x_train,y_train) print('训练集准确性得分=',pre_score) print('coef=',model.coef_,'intercept=',model.intercept_) #保存训练后的模型 joblib.dump(model,'LinearRegression.model') ax2=plt.subplot(122) ax2.scatter(x_train,y_train,label='测试集') ax2.plot(x_train,model.predict(x_train),color='blue') ax2.set_xlabel('工龄') ax2.set_ylabel('工资') plt.legend(loc='upper left') model=joblib.load('LinearRegression.model') y_pred=model.predict(x_test)#得到预测值 print('测试集准确性得分=%.5f'%model.score(x_test,y_test)) #计算测试集的损失（用均方差） MSE=np.mean((y_test - y_pred)**2) print('损失MSE={:.5f}'.format(MSE)) plt.rcParams['font.sans-serif'] = ['SimHei'] plt.figure(figsize=(10,4)) ax1=plt.subplot(121) plt.scatter(x_test,y_test,label='测试集') plt.plot(x_test,y_pred,'r',label='预测回归线') ax1.set_xlabel('工龄') ax1.set_ylabel('工资') plt.legend(loc='upper left') ax2=plt.subplot(122) x=range(0,len(y_test)) plt.plot(x,y_test,'g',label='真实值') plt.plot(x,y_pred,'r',label='预测值') ax2.set_xlabel('样本序号') ax2.set_ylabel('工资') plt.legend(loc='upper right') plt.show()怎么预测价格为15万时的销量

要预测价格为15万时的销量，可以使用训练好的线性回归模型进行预测。首先需要将15万的价格转换为模型可接受的输入格式，即将其转换为一个形状为(1,1)的二维数组： python price = np.array([[15]]) ...

import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from keras.models import Sequential from keras.layers import Dense from pyswarm import pso import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from sklearn.metrics import mean_absolute_error from sklearn.metrics import mean_squared_error from sklearn.metrics import r2_score file = "zhong.xlsx" data = pd.read_excel(file) #reading file X=np.array(data.loc[:,'种植密度':'有效积温']) y=np.array(data.loc[:,'产量']) y.shape=(185,1) # 将数据集分为训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.25, random_state=10) SC=StandardScaler() X_train=SC.fit_transform(X_train) X_test=SC.fit_transform(X_test) y_train=SC.fit_transform(y_train) y_test=SC.fit_transform(y_test) print("X_train.shape:", X_train.shape) print("X_test.shape:", X_test.shape) print("y_train.shape:", y_train.shape) print("y_test.shape:", y_test.shape) # 定义BP神经网络模型 def nn_model(X): model = Sequential() model.add(Dense(8, input_dim=X_train.shape[1], activation='relu')) model.add(Dense(12, activation='relu')) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') return model # 定义适应度函数 def fitness_func(X): model = nn_model(X) model.fit(X_train, y_train, epochs=60, verbose=2) score = model.evaluate(X_test, y_test, verbose=2) print(score) # 定义变量的下限和上限 lb = [5, 5] ub = [30, 30] # 利用PySwarm库实现改进的粒子群算法来优化BP神经网络预测模型 result = pso(fitness_func, lb, ub) # 输出最优解和函数值 print('最优解:', result[0]) print('最小函数值:', result[1]) mpl.rcParams["font.family"] = "SimHei" mpl.rcParams["axes.unicode_minus"] = False # 绘制预测值和真实值对比图 model = nn_model(X) model.fit(X_train, y_train, epochs=60, verbose=2) y_pred = model.predict(X_test) y_true = SC.inverse_transform(y_test) y_pred=SC.inverse_transform(y_pred) plt.figure() plt.plot(y_true,"bo-",label = '真实值') plt.plot(y_pred,"ro-", label = '预测值') plt.title('神经网络预测展示') plt.xlabel('序号') plt.ylabel('产量') plt.legend(loc='upper right') plt.show() print("R2 = ",r2_score(y_test, y_pred)) # R2 # 绘制损失函数曲线图 model = nn_model(X) history = model.fit(X_train, y_train, epochs=60, validation_data=(X_test, y_test), verbose=2) plt.plot(history.history['loss'], label='train') plt.plot(history.history['val_loss'], label='test') plt.legend() plt.show() mae = mean_absolute_error(y_test, y_pred) print('MAE: %.3f' % mae) mse = mean_squared_error(y_test, y_pred) print('mse: %.3f' % mse)

最后，你在定义适应度函数时，应该将X作为参数传递给nn_model函数，而不是直接使用全局变量X_train和y_train。应该修改为： def fitness_func(X): model = nn_model(X) model.fit(X_train, y_train, ...

from sklearn.decomposition import PCA pca = PCA(n_components=17) pca.fit(X) print(pca.explained_variance_ratio_) [0.17513053,0.12941834,0.11453698,0.07323991,0.05889187,0.05690304, 0.04869476,0.0393374,0.03703477,0.03240863,0.03062932,0.02574137, 0.01887462,0.0180381,0.01606983,0.01453912,0.01318003] sum(pca.explained_variance_ratio_) X_NEW = pca.transform(X) X_NEW X_NEW.shape X_train,X_test,y_train,y_test = train_test_split(X_NEW,y,test_size=0.20,random_state=123) rf = RandomForestClassifier(max_depth=5) rf.fit(X_train, y_train) y_prob = rf.predict_proba(X_test)[:, 1] y_pred = np.where(y_prob > 0.5, 1, 0) rf.score(X_test, y_pred) confusion_matrix(y_test, y_pred) metrics.roc_auc_score(y_test, y_pred) from sklearn.metrics import roc_curve, auc false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_prob) roc_auc = auc(false_positive_rate, true_positive_rate) import matplotlib.pyplot as plt plt.figure(figsize=(10, 10)) plt.title('ROC') plt.plot(false_positive_rate, true_positive_rate, color='red', label='AUC = %0.2f' % roc_auc) plt.legend(loc='lower right') plt.plot([0, 1], [0, 1], linestyle='--') plt.axis('tight') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.show() 这段代码的意思

接着，使用fit方法对训练数据进行训练，并且使用predict_proba方法获取测试集的预测概率，使用np.where方法将预测概率大于0.5的标记为1，否则标记为0，并且使用score方法计算测试集的准确率，使用confusion_matrix...

dt = DecisionTreeClassifier(max_depth=5) dt.fit(X_train, y_train) y_prob = dt.predict_proba(X_test)[:, 1] y_pred = np.where(y_prob > 0.5, 1, 0) dt.score(X_test, y_pred) confusion_matrix(y_test, y_pred) metrics.roc_auc_score(y_test, y_pred) from sklearn.metrics import roc_curve, auc false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_prob) roc_auc = auc(false_positive_rate, true_positive_rate) import matplotlib.pyplot as plt plt.figure(figsize=(10, 10)) plt.title('ROC') plt.plot(false_positive_rate, true_positive_rate, color='red', label='AUC = %0.2f' % roc_auc) plt.legend(loc='lower right') plt.plot([0, 1], [0, 1], linestyle='--') plt.axis('tight') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.show() 这段代码的意思

其中 max_depth 参数限制了决策树的最大深度，X_train 和 y_train 是训练数据集，X_test 是测试数据集。模型预测结果包括了概率（y_prob）和分类标签（y_pred），在计算模型得分（score）、混淆矩阵（confusion_...

C l1=[] l2=[] l1_test=[] l2_test=[] for i in np.linspace(0.05,1,19): # 实例化 lr1=LR(penalty='l1',C=i,solver='liblinear',max_iter=1000,random_state=0) # 训练 lr1_fit=lr1.fit(X_train,y_train) l1.append(accuracy_score(lr1_fit.predict(X_train),y_train)) l1_test.append(accuracy_score(lr1_fit.predict(X_test),y_test)) lr2=LR(penalty='l2',C=i,solver='liblinear',max_iter=1000,random_state=0) lr2_fit=lr2.fit(X_train,y_train) l2.append(accuracy_score(lr2_fit.predict(X_train),y_train)) l2_test.append(accuracy_score(lr2_fit.predict(X_test),y_test)) graph=[l1, l2, l1_test, l2_test] color=['red','green','yellow','gray'] label=['l1','l2','l1_test','l2_test'] fig=plt.figure(figsize=(10,6)) for i in range(len(graph)): plt.plot(np.linspace(0.05,1,19),graph[i],color=color[i],label=label[i]) plt.legend(loc=0) plt.show()这段代码的作用及详细解释

这是一段Python代码，其中定义了四个空列表l1、l2、l1_test和l2_test。接下来使用numpy库中的linspace函数生成一个从0.05到1的等差数列，共19个数，并将每个数依次赋值给变量i。

dict_of_models = {'RandomForest': RandomPipeline, 'AdaBoost': AdaPipeline, 'SVM': SVMPipeline, 'KNN': KNNPipeline, 'LR': LRPipeline}；def evaluation(model): # calculating the probabilities y_pred_proba = model.predict_proba(X_test) # finding the predicted valued y_pred = np.argmax(y_pred_proba,axis=1) print('Accuracy = ', accuracy_score(y_test, y_pred)) print('-') print(confusion_matrix(y_test,y_pred)) print('-') print(classification_report(y_test,y_pred)) print('-') N, train_score, test_score = learning_curve(model, X_train, y_train, cv=4, scoring='f1', train_sizes=np.linspace(0.1,1,10)) plt.figure(figsize=(5,5)) plt.plot(N, train_score.mean(axis=1), label='train score') plt.plot(N, test_score.mean(axis=1), label='validation score') plt.legend() plt.show()

这段代码定义了一个字典 dict_of_models，其中包含了几个分类模型和对应的 Pipeline 对象。然后定义了一个名为 evaluation 的函数，函数接受一个模型对象作为参数。在函数内部，首先使用传入的模型对象对测试集 X_...

from sklearn.model_selection import train_test_split import sklearn.neural_network as net import matplotlib.cm as cm file_path=r'D:\anaconda3\temp\邮政编码数据.txt' data=pd.read_csv(file_path,header=0) print(data.shape) data.head() X=data.iloc[:,1:-1] Y=data.iloc[:,0] X.shape np.random.seed(1) ids=np.random.choice(len(Y),25) plt.figure(figsize=(8,8)) for i,item in enumerate(ids): img=np.array(X.iloc[item]).reshape(16,16) plt.subplot(5,5,i+1) plt.imshow(img,cmap=cm.gray_r) plt.show X_train,X_test,Y_train,Y_test = train_test_split(X,Y,train_size = 0.60,random_state = 123) nodes = np.arange(1,20,2) acts = ['relu','logistic'] errTrain = np.zeros((len(nodes),2)) errTest = np.zeros((len(nodes),2)) for i,node in enumerate(nodes): for j ,act in enumerate(acts): NeuNet = net.MLPClassifier(hidden_layer_sizes = (node,),activation = act,random_state = 1,max_iter=300) nodes=np.arange(1,20,2) acts=['relu','logistic'] NeuNet.fit(X_train,Y_train) errTrain[i,j] = 1-NeuNet.score(X_train,Y_train) errTest[i,j] = 1-NeuNet.score(X_test,Y_test) plt.plot(nodes,errTest[:,0],label='relu_test',linestyle='-') plt.plot(nodes,errTest[:,1],label='logistic_test',linestyle='-.') plt.plot(nodes,errTrain[:,0],label='relu_train',linestyle='-',linewidth=0.5) plt.plot(nodes,errTrain[:,1],label='logistic_train',linestyle='-',linewidth=0.5) plt.title('2012010812') plt.xlabel('hidden node numbers') plt.ylabel('erros') plt.xticks(nodes) plt.legend()

首先，第一行 from sklearn.model_selection import train_test_split import sklearn.neural_network as net import matplotlib.cm as cm 应该换行，每个语句单独占一行。其次，在这个代码块中，没有导入 pandas ...

import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from keras.models import Sequential from keras.layers import Dense from pyswarm import pso import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler file = "zhong.xlsx" data = pd.read_excel(file) #reading file X=np.array(data.loc[:,'种植密度':'有效积温']) y=np.array(data.loc[:,'产量']) y.shape=(185,1) # 将数据集分为训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.25, random_state=10) SC=StandardScaler() X_train=SC.fit_transform(X_train) X_test=SC.fit_transform(X_test) y_train=SC.fit_transform(y_train) y_test=SC.fit_transform(y_test) print("X_train.shape:", X_train.shape) print("X_test.shape:", X_test.shape) print("y_train.shape:", y_train.shape) print("y_test.shape:", y_test.shape) # 定义BP神经网络模型 def nn_model(X): model = Sequential() model.add(Dense(8, input_dim=X_train.shape[1], activation='relu')) model.add(Dense(12, activation='relu')) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') return model # 定义适应度函数 def fitness_func(X): model = nn_model(X) model.fit(X_train, y_train, epochs=60, verbose=2) score = model.evaluate(X_test, y_test, verbose=2) return score # 定义变量的下限和上限 lb = [5, 5] ub = [30, 30] # 利用PySwarm库实现改进的粒子群算法来优化BP神经网络预测模型 result = pso(fitness_func, lb, ub) # 输出最优解和函数值 print('最优解:', result[0]) print('最小函数值:', result[1]) # 绘制预测值和真实值对比图 model = nn_model(X) model.fit(X_train, y_train, epochs=60, verbose=0) y_pred = model.predict(X_test) y_true = SC.inverse_transform(y_test) y_pred=SC.inverse_transform(y_pred) plt.figure() plt.plot(y_true,"bo-",label = '真实值') plt.plot(y_pred,"ro-", label = '预测值') plt.title('神经网络预测展示') plt.xlabel('序号') plt.ylabel('产量') plt.legend(loc='upper right') plt.show() # 绘制损失函数曲线图 model = nn_model(X) history = model.fit(X_train, y_train, epochs=60, validation_data=(X_test, y_test), verbose=2) plt.plot(history.history['loss'], label='train') plt.plot(history.history['val_loss'], label='test') plt.legend() plt.show()

这是一个使用改进的粒子群算法优化BP神经网络预测模型的代码，通过读取Excel文件并将其分为训练集和测试集，利用StandardScaler进行数据标准化处理。然后定义BP神经网络模型和适应度函数，并定义变量的下限和上限。...

1r_11 = LogisticRegression (C=C, solver=' liblinear' penalty="11", max_iter=5000). fit (x_ train, y_train) print ("Training accuracy of 11 logree with C=(:. 3r]: (:.2f]". format ( C, 1r_11. score (X_train, y_train))) print ("Test accuracy of 11 logreg with C=(:.3f]: (:. 2f)". format ( C, Ir_11. score (X_test, y_test))) plt. plot (lr_11. coef_. T, marker, label="C=(: 3f]". format (C)) plt. sticks (range (cancer. data. shape[1]), cancer. feature_names, rotation=90) xlims = plt. xlim( pit. hlines (0, xlims[0], xlims[1]) plt. xlim (xlims) plt. label ("Feature") plt. ylabel ("Coefficient magnitude") pit. ylim(-5, 5) plt. legend (10c=3)修改代码

plt.plot(lr_11.coef_.T, marker='o', label="C={:.3f}".format(C)) plt.xticks(range(cancer.data.shape[1]), cancer.feature_names, rotation=90) xlims = plt.xlim() plt.hlines(0, xlims[0], xlims[1]) plt.xlim...

帮我改进一下 from sklearn.neighbors import KNeighborsClassifier from sklearn.datasets import load_digits from sklearn import svm, metrics from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix from sklearn.metrics import plot_confusion_matrix from sklearn.naive_bayes import GaussianNB #使绘图支持中文字符 from matplotlib import rcParams rcParams['font.family'] = 'SimHei' plt.rcParams['axes.unicode_minus'] = False # In[2]: digits = load_digits() data = digits.data print(data[0]) print(digits.images[0]) print(digits.target[0]) plt.imshow(digits.images[0]) plt.show() # In[3]: train_x, test_x, train_y, test_y = train_test_split(data, digits.target, test_size=0.3, random_state=82) print(train_x) gnb = GaussianNB() gnb.fit(train_x, train_y) print("训练集得分: %.4lf" % gnb.score(train_x, train_y)) print("测试集得分: %.4lf" % gnb.score(test_x, test_y)) print(gnb.predict(data)) plot_confusion_matrix(gnb, test_x, test_y) plt.show() # In[5]: ################################################################# fig = plt.figure(figsize=(6,13)) for i in range(40): y_pred = gnb.fit(train_x, train_y).predict([data[i]]) plt.subplot(8,5,i+1) plt.imshow(digits.images[i], interpolation='none') plt.title("%d---->%d"%(digits.target[i],y_pred)) plt.show()

plt.colorbar(label="样本数量") plt.show() # 绘制数字和预测结果的子图 fig, axes = plt.subplots(8, 5, figsize=(6, 13)) for i, ax in enumerate(axes.flat): y_pred = gnb.predict([data[i]]) ax.imshow...

from sklearn import metrics from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from imblearn.combine import SMOTETomek from sklearn.metrics import auc, roc_curve, roc_auc_score from sklearn.feature_selection import SelectFromModel import pandas as pd import numpy as np import matplotlib matplotlib.use('TkAgg') import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix #1、数据输入 df_table_all = pd.read_csv(r"D:\trainafter.csv",index_col=0) #2、目标和特征区分 X = df_table_all.drop(["Y"],axis=1).values Y = np.array(df_table_all["Y"]) #3、按比例切割数据 X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.3,random_state=0) #4、样本平衡， st= SMOTETomek() X_train_st,Y_train_st = st.fit_resample(X_train,Y_train) #4、特征选择： #创建特征选择模型 sfm = SelectFromModel(LogisticRegression(penalty='l1',C=1.0,solver="liblinear")) #训练特征选择模型 sfm.fit(X_train,Y_train) #讲数据转换，剩下重要的特征 X_train_tiny = sfm.transform(X_train) X_test_tiny = sfm.transform(X_test) #5、创建模型 model = LogisticRegression(penalty='l1',C=1.0,solver="liblinear") model.fit(X_train_st_tiny,Y_train_st) #6、预测 y_pred = model.predict_proba(X_test_st_tiny) y_cate = model.predict(X_test_st_tiny) c=confusion_matrix(Y_test,y_cate) print(c) def report_auc(y_true,y_prob,title,out_name="",lw=2): fpr,tpr,_=roc_curve(y_true,y_prob,pos_label=1) print(fpr) print(tpr) plt.figure() plt.plot(fpr,tpr,color="darkorange",lw=lw,lable="ROC curve") plt.plot([0,1],[0,1],color="yellow",lw=lw,linestyle="--") plt.xlim([0,1]) plt.ylim([0,1.05]) plt.title(title) plt.legend(loc='lower right') plt.show(0) plt.savefig(r"d:\LR"+out_name,dpi=800) plt.close("all") report_auc(Y_test,y_pred[:,1],"Logistic with L1 panetly",'LG')

其中，LogisticRegression是用于逻辑回归模型的，SMOTETomek是用于处理样本不平衡问题的，auc、roc_curve、roc_auc_score是用于评估分类模型性能的指标，train_test_split是用于将数据集分为训练集和测试集的，...

plt.plot(range(1, 21), train_score_list, label='train_score');里这些参数分别代表什么意思

这是使用 Matplotlib 库...- label='train_score'：是一个字符串，表示这条折线的标签，用于图例中显示。这行代码的作用是将训练集上的得分绘制成一个折线图，并给这条折线添加一个标签，方便在图例中进行标识。

plt.plot(a, train_score, label='train_score')

plt.rcParams['font.sans-serif'] = ['SimHei'] plt.figure() plt.title('参数C的学习曲线') plt.plot(a, train_score, label='train_score') plt.plot(a, score, label='text_score') plt.legend() plt.show()

相关推荐

plt.plot(a, train_score, label='train_score')

plt.rcParams['font.sans-serif'] = ['SimHei'] plt.figure() plt.title('参数C的学习曲线') plt.plot(a, train_score, label='train_score') plt.plot(a, score, label='text_score') plt.legend() plt.show()

相关推荐

example.train

ROC.ipynb_PYHTON_莺尾花_

CNN_卷积神经网络_

plt.plot(range(1, 21), train_score_list, label='train_score');里这些参数分别代表什么意思

大家在看

电法正反演方法和软件使用介绍(“反演”文档)共33张.pptx

IBM DS4700磁盘阵列安装配置指南

Spi_int.rar_dsp spi初始化_spi dsp

海思芯片规格对比.pdf

中南大学943数据结构1997-2020真题&解析

最新推荐

【机器人】将ChatGPT飞书机器人钉钉机器人企业微信机器人公众号部署到vercel及docker_pgj.zip

图数据分析中基于对比学习的异常检测算法的Python实现及应用-含代码及详细解释说明

Python调试器vardbg：动画可视化算法流程

管理建模和仿真的文件

【IT设备维保管理入门指南】：如何制定有效的维护计划，提升设备性能与寿命

python爬取网页链接，url = “https://koubei.16888.com/57233/0-0-0-0”

掌握Web开发：Udacity天气日记项目解析

"互动学习：行动中的多样性与论文攻读经历"

【文献整理高效法】：ENDNOTE软件实用功能及快捷操作揭秘

在使用SQL创建存储过程时，是否可以在定义输入参数时直接为其赋予初始值？