def plotConfMatrix(y_test, y_predict, modelType): #Confusion matrix conf = np.zeros((11,11), dtype=int) titles = ["Cello", "Church Organ", "Clarinet", "Flute", "Guitar", "Harp", "Marimba", "Perldrop", "Piano", "Synlead3", "Violin"] for hit in range(len(y_test)): conf[y_test[hit]][y_predict[hit]] += 1 # Plot confusion matrix fig, ax = plt.subplots(figsize=(8,8)) sn.set(font_scale=1.5) sn.heatmap(conf, annot=True, fmt='d', ax=ax, cmap="YlGnBu", xticklabels=titles, yticklabels=titles) ax.set_ylim(len(conf),0) plt.xlabel('Predicted') plt.ylabel('True') plt.title('Confusion Matrix with sklearn for ' + modelType) plt.show()
时间: 2024-01-02 12:04:49 浏览: 31
这段代码是用来绘制混淆矩阵(confusion matrix)的,它接受三个参数:y_test表示测试集的真实标签,y_predict表示测试集的预测标签,modelType表示模型的类型。代码首先初始化一个11*11的全零矩阵conf,然后遍历每个测试样本,将真实标签和预测标签对应的位置加1,最后使用seaborn库绘制热力图来展示混淆矩阵。其中xticklabels和yticklabels用来设置横纵坐标的标签,即每个类别的名称。最后通过plt.show()函数将图像显示出来。
相关问题
np.random.seed(42) q=np.array(X1[:2928]) w=np.array(x2[:2928]) e=np.array(x3[:2928]) r=np.array(x4[:2928]) t=np.array(x5[:2928]) p=np.array(x6[:2928]) u=np.array(x7[:2928]) eps=np.random.normal(0,0.05,152) X=np.c_[q,w,e,r,t,p,u] beta=[0.1,0.15,0.2,0.5,0.33,0.45,0.6] y=np.dot(X,beta) ''' X_model=sm.add_constant(X) model=sm.OLS(y,X_model) results=model.fit() print(results.summary()) ''' X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) alpha = 0.1 # 设置岭回归的惩罚参数 ridge = Ridge(alpha=alpha) ridge.fit(X_train, y_train) y_pred = ridge.predict(X_test) mse = mean_squared_error(y_test, y_pred) print('MSE:', mse)那这个代码要怎么修改才可以经过领回归之后再求出参数呢
如果您想使用岭回归(Ridge Regression)求解多元线性回归的参数,可以按照以下步骤修改代码:
1. 导入必要的库:
```python
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
```
2. 准备数据:
```python
np.random.seed(42)
q = np.array(X1[:2928])
w = np.array(x2[:2928])
e = np.array(x3[:2928])
r = np.array(x4[:2928])
t = np.array(x5[:2928])
p = np.array(x6[:2928])
u = np.array(x7[:2928])
X = np.c_[q, w, e, r, t, p, u]
beta = [0.1, 0.15, 0.2, 0.5, 0.33, 0.45, 0.6]
y = np.dot(X, beta)
```
3. 划分数据集为训练集和测试集:
```python
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
```
4. 使用岭回归拟合数据:
```python
alpha = 0.1 # 设置岭回归的惩罚参数
ridge = Ridge(alpha=alpha)
ridge.fit(X_train, y_train)
```
5. 计算岭回归的拟合效果:
```python
y_pred = ridge.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print('MSE:', mse)
```
6. 计算多元线性回归的参数:
```python
coef = ridge.coef_ # 计算岭回归的系数
intercept = ridge.intercept_ # 计算岭回归的截距
print('Coefficients:', coef)
print('Intercept:', intercept)
```
这样就可以使用岭回归求解多元线性回归的参数了。
def get_feat_importance(dtest,model): # 预测 predict = model.predict(dtest) # 处理结果 predict = pd.DataFrame(predict, columns=['prob']) result = pd.concat([test[['User_id', 'Coupon_id', 'Date_received']], predict], axis=1) train_data=xgb.DMatrix(offline_train,label=get_label) params={'max_width':3} bst=xgb.train(params,train_data,num_boost_round=1) for importance_type in('weight','gain','cover','total_gain','total_cover'): print('%s: ' % importance_type,bst.get_score(importance_type=importance_type)) return result def get_result(model,test):#线上测试集 dtest = xgb.DMatrix(test.drop(['User_id', 'Coupon_id', 'Date_received'], axis=1)) # 预测 predict = model.predict(dtest) # 处理结果 predict = pd.DataFrame(predict, columns=['pred']) result = pd.concat([test[['User_id', 'Coupon_id', 'Date_received']], predict], axis=1) result.to_csv('result/result.csv', index=False, header=None) print("results are saved.")改写代码,不改变功能
def get_feat_importance(dtest,model):
# 预测
predict = model.predict(dtest)
# 处理结果
predict = pd.DataFrame(predict, columns=['prob'])
result = pd.concat([test[['User_id', 'Coupon_id', 'Date_received']], predict], axis=1)
train_data=xgb.DMatrix(offline_train,label=get_label)
params={'max_width':3}
bst=xgb.train(params,train_data,num_boost_round=1)
# 输出特征重要性
for importance_type in('weight','gain','cover','total_gain','total_cover'):
print('%s: ' % importance_type,bst.get_score(importance_type=importance_type))
return result
def get_result(model,test):
# 线上测试集
dtest = xgb.DMatrix(test.drop(['User_id', 'Coupon_id', 'Date_received'], axis=1))
# 预测
predict = model.predict(dtest)
# 处理结果
predict = pd.DataFrame(predict, columns=['pred'])
result = pd.concat([test[['User_id', 'Coupon_id', 'Date_received']], predict], axis=1)
result.to_csv('result/result.csv', index=False, header=None)
print("results are saved.")
return result