import pandas as pdfrom sklearn.ensemble import RandomForestClassifierfrom sklearn.metrics import accuracy_score, confusion_matrix# 读取数据data = pd.read_excel('data.xlsx')# 分割训练集和验证集train_data = data.sample(frac=0.8, random_state=1)test_data = data.drop(train_data.index)# 定义特征变量和目标变量features = ['feature1', 'feature2', 'feature3']target = 'target'# 训练随机森林模型rf = RandomForestClassifier(n_estimators=100, random_state=1)rf.fit(train_data[features], train_data[target])# 在验证集上进行预测并计算精度和混淆矩阵pred = rf.predict(test_data[features])accuracy = accuracy_score(test_data[target], pred)confusion_mat = confusion_matrix(test_data[target], pred)print('Accuracy:', accuracy)print('Confusion matrix:')print(confusion_mat)# 读取新数据文件并预测结果new_data = pd.read_excel('new_data.xlsx')new_pred = rf.predict(new_data[features])new_data['predicted_target'] = new_prednew_data.to_excel('predicted_results.xlsx', index=False)改进代码输出混淆矩阵图片
时间: 2024-02-17 13:00:15 浏览: 102
详解pandas库pd.read_excel操作读取excel文件参数整理与实例
好的,以下是改进后的代码,增加了输出混淆矩阵图片的功能:
```python
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
# 读取数据
data = pd.read_excel('data.xlsx')
# 将数据分为训练集和验证集
train_data = data.sample(frac=0.8, random_state=1)
test_data = data.drop(train_data.index)
# 定义特征变量和目标变量
features = ['feature1', 'feature2', 'feature3']
target = 'target'
# 训练随机森林模型
rf = RandomForestClassifier(n_estimators=100, random_state=1)
rf.fit(train_data[features], train_data[target])
# 在验证集上进行预测并计算精度和混淆矩阵
pred = rf.predict(test_data[features])
accuracy = accuracy_score(test_data[target], pred)
confusion_mat = confusion_matrix(test_data[target], pred)
print('Accuracy:', accuracy)
print('Confusion matrix:')
print(confusion_mat)
# 输出混淆矩阵图片
sns.heatmap(confusion_mat, annot=True)
plt.show()
# 读取新数据文件并预测结果
new_data = pd.read_excel('new_data.xlsx')
new_pred = rf.predict(new_data[features])
new_data['predicted_target'] = new_pred
new_data.to_excel('predicted_results.xlsx', index=False)
```
需要注意的是,为了输出混淆矩阵图片,我们需要导入matplotlib库。在代码中,我们使用seaborn库的heatmap函数绘制混淆矩阵的热力图,并使用matplotlib库的show函数显示图片。另外,如果你的数据文件格式不同,需要相应地修改代码。
阅读全文