训练集特征数据为GR、LLD、LLS、DEN四种测井曲线数据,目标数据为TOC数据,定义一个粒子群优化类,来进行xgboost超参数寻优;并有相应的预测集,用训练好的模型进行预测集预测TOC,请给出具体代码
时间: 2024-02-29 18:52:32 浏览: 22
好的,根据您的要求,我为您提供以下代码:
```python
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
class PSO_GridSearch_XGB:
def __init__(self, n_particles, n_iterations, param_grid):
self.n_particles = n_particles
self.n_iterations = n_iterations
self.param_grid = param_grid
self.particle_pos = None
self.particle_best_pos = None
self.particle_best_score = np.inf
self.global_best_pos = None
self.global_best_score = np.inf
def init_particles(self, n_features):
self.particle_pos = np.random.uniform(-1, 1, (self.n_particles, n_features))
self.particle_best_pos = self.particle_pos.copy()
def fitness(self, x_train, y_train, x_test, y_test):
xgbr = xgb.XGBRegressor(
objective='reg:squarederror', n_estimators=int(x_train[0]), max_depth=int(x_train[1]),
learning_rate=x_train[2], colsample_bytree=x_train[3], subsample=x_train[4]
)
xgbr.fit(x_train, y_train)
y_pred = xgbr.predict(x_test)
mse = mean_squared_error(y_test, y_pred)
return mse
def pso(self, x_train, y_train, x_test, y_test):
n_features = x_train.shape[1]
self.init_particles(n_features)
velocity = np.zeros((self.n_particles, n_features))
self.global_best_score = np.inf
for i in range(self.n_iterations):
for j in range(self.n_particles):
score = self.fitness(x_train, y_train, x_test, y_test, self.particle_pos[j])
if score < self.particle_best_score[j]:
self.particle_best_score[j] = score
self.particle_best_pos[j] = self.particle_pos[j]
if score < self.global_best_score:
self.global_best_score = score
self.global_best_pos = self.particle_pos[j]
for j in range(self.n_particles):
r1, r2 = np.random.uniform(0, 1, size=2)
velocity[j] = 0.5 * velocity[j] + \
0.5 * (self.particle_best_pos[j] - self.particle_pos[j]) * r1 + \
0.5 * (self.global_best_pos - self.particle_pos[j]) * r2
self.particle_pos[j] += velocity[j]
self.particle_pos[j] = np.clip(self.particle_pos[j], -1, 1)
return self.global_best_pos
def grid_search(self, x_train, y_train, x_test, y_test):
clf = GridSearchCV(
estimator=xgb.XGBRegressor(objective='reg:squarederror'),
param_grid=self.param_grid,
scoring='neg_mean_squared_error',
n_jobs=-1,
cv=5
)
clf.fit(x_train, y_train)
return clf.best_params_
def train(self, x_train, y_train, x_test, y_test):
best_params = self.grid_search(x_train, y_train, x_test, y_test)
best_params_values = list(best_params.values())
best_params_values.append(0.5)
self.pso(x_train, y_train, x_test, y_test)
best_fitness = self.fitness(x_train, y_train, x_test, y_test, self.global_best_pos)
return best_params_values, best_fitness
if __name__ == '__main__':
# 加载训练集和测试集数据
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')
# 提取训练集和测试集的特征和目标数据
train_X = train_df[['GR', 'LLD', 'LLS', 'DEN']]
train_y = train_df['TOC']
test_X = test_df[['GR', 'LLD', 'LLS', 'DEN']]
test_y = test_df['TOC']
# 定义超参数搜索范围
params = {
'n_estimators': [50, 100, 200, 300],
'max_depth': [3, 4, 5, 6],
'learning_rate': [0.05, 0.1, 0.2, 0.3],
'colsample_bytree': [0.5, 0.6, 0.7, 0.8],
'subsample': [0.5, 0.6, 0.7, 0.8]
}
# 定义PSO_GridSearch_XGB类实例,n_particles为100,n_iterations为50
pso_gs_xgb = PSO_GridSearch_XGB(n_particles=100, n_iterations=50, param_grid=params)
# 使用PSO_GridSearch_XGB类实例进行超参数寻优
best_params, best_fitness = pso_gs_xgb.train(train_X, train_y, test_X, test_y)
# 输出最优超参数和对应的均方误差
print('Best hyperparameters:', best_params)
print('MSE of the best hyperparameters:', best_fitness)
```
上述代码中,我们定义了一个PSO_GridSearch_XGB类,该类包括了粒子群优化算法和网格搜索算法,并使用XGBoost作为模型。在PSO_GridSearch_XGB类中,我们先使用网格搜索算法进行一次超参数寻优,然后使用粒子群优化算法对超参数进行进一步优化。最后输出最优超参数和对应的均方误差。