优化这段代码:import pandas as pd import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.feature_selection import SelectKBest, f_classif from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.metrics import accuracy_score # 读取Excel文件 data = pd.read_excel("output.xlsx") # 提取特征和标签 features = data.iloc[:, 1:].values labels = np.where(data.iloc[:, 0] > 59, 1, 0) # 特征选择 selector = SelectKBest(score_func=f_classif, k=11) selected_features = selector.fit_transform(features, labels) # 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(selected_features, labels, test_size=0.2, random_state=42) # 创建随机森林分类器 rf_classifier = RandomForestClassifier() # 定义要调优的参数范围 param_grid = { 'n_estimators': [50, 100, 200], # 决策树的数量 'max_depth': [None, 5, 10], # 决策树的最大深度 'min_samples_split': [2, 5, 10], # 拆分内部节点所需的最小样本数 'min_samples_leaf': [1, 2, 4] # 叶节点上所需的最小样本数 } # 使用网格搜索进行调优 grid_search = GridSearchCV(rf_classifier, param_grid, cv=5) grid_search.fit(X_train, y_train) # 输出最佳参数组合和对应的准确率 print("最佳参数组合:", grid_search.best_params_) print("最佳准确率:", grid_search.best_score_) # 使用最佳参数组合训练模型 best_rf_classifier = grid_search.best_estimator_ best_rf_classifier.fit(X_train, y_train) # 预测 y_pred = best_rf_classifier.predict(X_test) # 计算准确率 accuracy = accuracy_score(y_test, y_pred) # 打印最高准确率分类结果 print("最高准确率分类结果:", accuracy)
时间: 2024-04-15 15:26:23 浏览: 170
优化后的代码如下:
```python
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
def optimize_classifier():
# 读取Excel文件
data = pd.read_excel("output.xlsx")
# 提取特征和标签
features = data.iloc[:, 1:].values
labels = np.where(data.iloc[:, 0] > 59, 1, 0)
# 特征选择
selector = SelectKBest(score_func=f_classif, k=11)
selected_features = selector.fit_transform(features, labels)
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(selected_features, labels, test_size=0.2, random_state=42)
# 创建随机森林分类器
rf_classifier = RandomForestClassifier()
# 定义要调优的参数范围
param_grid = {
'n_estimators': [50, 100, 200], # 决策树的数量
'max_depth': [None, 5, 10], # 决策树的最大深度
'min_samples_split': [2, 5, 10], # 拆分内部节点所需的最小样本数
'min_samples_leaf': [1, 2, 4] # 叶节点上所需的最小样本数
}
# 使用网格搜索进行调优
grid_search = GridSearchCV(rf_classifier, param_grid, cv=5)
grid_search.fit(X_train, y_train)
# 输出最佳参数组合和对应的准确率
print("最佳参数组合:", grid_search.best_params_)
print("最佳准确率:", grid_search.best_score_)
# 使用最佳参数组合训练模型
best_rf_classifier = grid_search.best_estimator_
best_rf_classifier.fit(X_train, y_train)
# 预测
y_pred = best_rf_classifier.predict(X_test)
# 计算准确率
accuracy = accuracy_score(y_test, y_pred)
# 打印最高准确率分类结果
print("最高准确率分类结果:", accuracy)
optimize_classifier()
```
这里将代码包装在一个函数`optimize_classifier()`中,使其更加模块化和可重用。
阅读全文