波士顿房价中遗传算法优化lightgbm的代码
时间: 2023-09-07 11:13:18 浏览: 108
以下是一个使用遗传算法优化LightGBM模型的波士顿房价预测代码示例:
```python
import pandas as pd
import numpy as np
from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from lightgbm import LGBMRegressor
from deap import creator, base, tools, algorithms
# 加载数据集
boston = load_boston()
X, y = boston.data, boston.target
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 定义适应度函数
def eval_func(params):
# 设置LightGBM的参数
lgb_reg = LGBMRegressor(
n_estimators=params[0],
learning_rate=params[1],
num_leaves=params[2],
max_depth=params[3],
min_child_samples=params[4],
subsample=params[5],
colsample_bytree=params[6]
)
# 训练模型并预测
lgb_reg.fit(X_train, y_train)
y_pred = lgb_reg.predict(X_test)
# 计算均方误差
mse = mean_squared_error(y_test, y_pred)
return mse,
# 遗传算法设置
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)
toolbox = base.Toolbox()
toolbox.register("attr_int", np.random.randint, 50, 500)
toolbox.register("attr_float", np.random.uniform, 0.001, 0.1)
toolbox.register("individual", tools.initCycle, creator.Individual,
(toolbox.attr_int, toolbox.attr_float), n=7)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=0.1, indpb=0.1)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", eval_func)
# 运行遗传算法
def main():
pop_size = 50
num_generations = 10
crossover_prob = 0.5
mutation_prob = 0.2
pop = toolbox.population(n=pop_size)
hof = tools.HallOfFame(1)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("min", np.min)
stats.register("avg", np.mean)
pop, log = algorithms.eaSimple(pop, toolbox, cxpb=crossover_prob, mutpb=mutation_prob,
ngen=num_generations, stats=stats, halloffame=hof, verbose=True)
best_ind = hof[0]
print("Best individual is ", best_ind)
print("Best fitness is ", best_ind.fitness.values[0])
# 使用最佳参数训练模型并预测
lgb_reg = LGBMRegressor(
n_estimators=best_ind[0],
learning_rate=best_ind[1],
num_leaves=best_ind[2],
max_depth=best_ind[3],
min_child_samples=best_ind[4],
subsample=best_ind[5],
colsample_bytree=best_ind[6]
)
lgb_reg.fit(X_train, y_train)
y_pred = lgb_reg.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("MSE on test set with best individual: ", mse)
if __name__ == "__main__":
main()
```
代码中使用了DEAP库来实现遗传算法,通过定义适应度函数来评估每个个体的表现。使用`LGBMRegressor`构建LightGBM模型,并使用均方误差作为适应度函数的评价指标。在遗传算法中,使用了7个超参数进行优化,包括n_estimators、learning_rate、num_leaves、max_depth、min_child_samples、subsample和colsample_bytree。最后,使用最佳超参数训练模型并预测波士顿房价,并计算预测误差。
阅读全文