读取所在文件夹中的“北京市空气质量数据.xlsx”,以AQI作为目标变量,分别建立AdaBoost回归模型和GBDT回归模型
时间: 2024-05-19 15:12:43 浏览: 130
# 导入所需模块
import pandas as pd
from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
# 读取数据
data = pd.read_excel('北京市空气质量数据.xlsx')
# 筛选数据
data = data.dropna()
X = data[['PM2.5', 'PM10', 'SO2', 'NO2', 'CO', 'O3']]
y = data['AQI']
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# 建立AdaBoost回归模型
ada = AdaBoostRegressor(n_estimators=100)
ada.fit(X_train, y_train)
y_pred_ada = ada.predict(X_test)
mse_ada = mean_squared_error(y_test, y_pred_ada)
print('AdaBoost回归模型的均方误差为:', mse_ada)
# 建立GBDT回归模型
gbdt = GradientBoostingRegressor(n_estimators=100)
gbdt.fit(X_train, y_train)
y_pred_gbdt = gbdt.predict(X_test)
mse_gbdt = mean_squared_error(y_test, y_pred_gbdt)
print('GBDT回归模型的均方误差为:', mse_gbdt)
阅读全文