python lgb stacking代码
时间: 2023-09-06 12:08:22 浏览: 91
很抱歉,作为AI语言模型,我不具备编写代码的能力。但是,以下是一个LGBMRegressor模型的Stacking实现的代码示例,供参考:
```python
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from lightgbm import LGBMRegressor
# Load train and test datasets
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
# Define target variable and features
target = 'target'
features = [col for col in train_df.columns if col not in [target]]
# Initialize parameters for LGBMRegressor model
lgb_params = {
'objective': 'regression',
'boosting_type': 'gbdt',
'metric': 'rmse',
'num_leaves': 31,
'learning_rate': 0.05,
'feature_fraction': 0.9,
'bagging_fraction': 0.8,
'bagging_freq': 5,
'verbose': -1
}
# Define number of folds for cross-validation
n_folds = 5
# Initialize empty array to store predictions
train_preds = np.zeros(train_df.shape[0])
test_preds = np.zeros(test_df.shape[0])
# Initialize KFold cross-validation
kf = KFold(n_splits=n_folds, shuffle=True, random_state=42)
# Loop through each fold
for fold, (train_idx, val_idx) in enumerate(kf.split(train_df)):
print('Fold:', fold+1)
# Split train and validation datasets
X_train, y_train = train_df[features].iloc[train_idx], train_df[target].iloc[train_idx]
X_val, y_val = train_df[features].iloc[val_idx], train_df[target].iloc[val_idx]
# Initialize LGBMRegressor model
model = LGBMRegressor(**lgb_params)
# Fit model on train dataset
model.fit(X_train, y_train, eval_set=[(X_val, y_val)], early_stopping_rounds=100, verbose=100)
# Generate predictions on train and validation datasets
train_preds[val_idx] = model.predict(X_val)
test_preds += model.predict(test_df[features]) / n_folds
# Calculate RMSE for validation dataset
rmse = np.sqrt(mean_squared_error(y_val, train_preds[val_idx]))
print('RMSE:', rmse)
# Calculate overall RMSE for train dataset
rmse = np.sqrt(mean_squared_error(train_df[target], train_preds))
print('Overall RMSE:', rmse)
# Save predictions to CSV file
submission = pd.DataFrame({'id': test_df['id'], 'target': test_preds})
submission.to_csv('submission.csv', index=False)
```