用banlancebaggingclassifier融合有最优参数的bagging,随机森林,mlp及xgboost等算法和过抽样策略python
时间: 2024-05-05 20:16:28 浏览: 175
随机森林(Random Forest)算法Python代码实现
代码如下:
```python
from imblearn.over_sampling import RandomOverSampler
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, f1_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from imblearn.ensemble import BalancedBaggingClassifier
# 数据预处理
X_train, X_test, y_train, y_test = ...
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# 过抽样策略
ros = RandomOverSampler()
X_train, y_train = ros.fit_resample(X_train, y_train)
# 算法模型
models = [
('bagging', BaggingClassifier()),
('rf', RandomForestClassifier()),
('mlp', MLPClassifier()),
('xgb', XGBClassifier())
]
# 网格搜索调参
params = {
'bagging__n_estimators': [50, 100, 200],
'bagging__base_estimator__max_depth': [5, 10, None],
'rf__n_estimators': [50, 100, 200],
'rf__max_depth': [5, 10, None],
'mlp__hidden_layer_sizes': [(10,), (20,), (30,), (50,)],
'mlp__alpha': [0.0001, 0.001, 0.01],
'mlp__max_iter': [200, 500, 1000],
'xgb__n_estimators': [50, 100, 200],
'xgb__max_depth': [5, 10, None],
}
for name, model in models:
pipe = Pipeline([
('pca', PCA()),
(name, model)
])
grid = GridSearchCV(pipe, params, cv=5, n_jobs=-1, scoring='f1')
grid.fit(X_train, y_train)
print(name, 'best params:', grid.best_params_)
print(name, 'best score:', grid.best_score_)
# 最终模型
bagging = BaggingClassifier(n_estimators=200, base_estimator=RandomForestClassifier(max_depth=None))
rf = RandomForestClassifier(n_estimators=200, max_depth=None)
mlp = MLPClassifier(hidden_layer_sizes=(50,), alpha=0.01, max_iter=1000)
xgb = XGBClassifier(n_estimators=200, max_depth=5)
bbc = BalancedBaggingClassifier(base_estimator=bagging, sampling_strategy='auto', replacement=False, random_state=0)
models = [('rf', rf), ('mlp', mlp), ('xgb', xgb), ('bbc', bbc)]
# 模型融合
y_preds = []
for name, model in models:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_preds.append(y_pred)
print(name, 'test accuracy:', accuracy_score(y_test, y_pred))
print(name, 'test f1 score:', f1_score(y_test, y_pred))
# 平均投票法
y_pred = sum(y_preds) / len(y_preds)
y_pred = [1 if x >= 0.5 else 0 for x in y_pred]
print('ensemble test accuracy:', accuracy_score(y_test, y_pred))
print('ensemble test f1 score:', f1_score(y_test, y_pred))
```
阅读全文