kaggle共享单车案例的回归分析,广义线性模型,非线性回归,可加模型,变系数模型及其衍生模型的python代码
时间: 2024-03-20 22:39:29 浏览: 130
python线性回归模型 详细解析与代码实例.txt
以下是一个kaggle共享单车案例的回归分析的python代码,包括广义线性模型、非线性回归、可加模型、变系数模型及其衍生模型的实现:
```python
# 导入必要的库
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
# 加载数据
data = pd.read_csv("Bike-Sharing-Dataset/day.csv")
# 特征工程
data = data.drop(["instant", "dteday", "casual", "registered"], axis=1)
data = pd.get_dummies(data, columns=["season", "yr", "mnth", "holiday", "weekday", "workingday", "weathersit"])
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(data.drop(["cnt"], axis=1), data["cnt"], test_size=0.2, random_state=42)
# 线性回归模型
lr = LinearRegression()
lr.fit(X_train, y_train)
y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)
print("Linear Regression Train MSE:", mean_squared_error(y_train, y_train_pred))
print("Linear Regression Train R2:", r2_score(y_train, y_train_pred))
print("Linear Regression Test MSE:", mean_squared_error(y_test, y_test_pred))
print("Linear Regression Test R2:", r2_score(y_test, y_test_pred))
# 岭回归模型
ridge = Ridge(alpha=0.1)
ridge.fit(X_train, y_train)
y_train_pred = ridge.predict(X_train)
y_test_pred = ridge.predict(X_test)
print("Ridge Regression Train MSE:", mean_squared_error(y_train, y_train_pred))
print("Ridge Regression Train R2:", r2_score(y_train, y_train_pred))
print("Ridge Regression Test MSE:", mean_squared_error(y_test, y_test_pred))
print("Ridge Regression Test R2:", r2_score(y_test, y_test_pred))
# Lasso回归模型
lasso = Lasso(alpha=0.1)
lasso.fit(X_train, y_train)
y_train_pred = lasso.predict(X_train)
y_test_pred = lasso.predict(X_test)
print("Lasso Regression Train MSE:", mean_squared_error(y_train, y_train_pred))
print("Lasso Regression Train R2:", r2_score(y_train, y_train_pred))
print("Lasso Regression Test MSE:", mean_squared_error(y_test, y_test_pred))
print("Lasso Regression Test R2:", r2_score(y_test, y_test_pred))
# 多项式回归模型
polynomial_features = PolynomialFeatures(degree=2)
linear_regression = LinearRegression()
pipeline = Pipeline([("polynomial_features", polynomial_features), ("linear_regression", linear_regression)])
pipeline.fit(X_train, y_train)
y_train_pred = pipeline.predict(X_train)
y_test_pred = pipeline.predict(X_test)
print("Polynomial Regression Train MSE:", mean_squared_error(y_train, y_train_pred))
print("Polynomial Regression Train R2:", r2_score(y_train, y_train_pred))
print("Polynomial Regression Test MSE:", mean_squared_error(y_test, y_test_pred))
print("Polynomial Regression Test R2:", r2_score(y_test, y_test_pred))
# 可加模型
additive_model = LinearRegression()
additive_model.fit(X_train[["temp", "atemp"]], y_train)
y_train_pred = additive_model.predict(X_train[["temp", "atemp"]])
y_test_pred = additive_model.predict(X_test[["temp", "atemp"]])
print("Additive Model Train MSE:", mean_squared_error(y_train, y_train_pred))
print("Additive Model Train R2:", r2_score(y_train, y_train_pred))
print("Additive Model Test MSE:", mean_squared_error(y_test, y_test_pred))
print("Additive Model Test R2:", r2_score(y_test, y_test_pred))
# 变系数模型
data["season_cnt"] = data["spring"] * data["cnt_spring"] + data["summer"] * data["cnt_summer"] + data["fall"] * data["cnt_fall"] + data["winter"] * data["cnt_winter"]
data["year_cnt"] = data["yr_0"] * data["cnt_yr_0"] + data["yr_1"] * data["cnt_yr_1"]
data["month_cnt"] = data["mnth_1"] * data["cnt_mnth_1"] + data["mnth_2"] * data["cnt_mnth_2"] + data["mnth_3"] * data["cnt_mnth_3"] + data["mnth_4"] * data["cnt_mnth_4"] + data["mnth_5"] * data["cnt_mnth_5"] + data["mnth_6"] * data["cnt_mnth_6"] + data["mnth_7"] * data["cnt_mnth_7"] + data["mnth_8"] * data["cnt_mnth_8"] + data["mnth_9"] * data["cnt_mnth_9"] + data["mnth_10"] * data["cnt_mnth_10"] + data["mnth_11"] * data["cnt_mnth_11"] + data["mnth_12"] * data["cnt_mnth_12"]
data["workingday_cnt"] = data["workingday_0"] * data["cnt_workingday_0"] + data["workingday_1"] * data["cnt_workingday_1"]
data["weathersit_cnt"] = data["weathersit_1"] * data["cnt_weathersit_1"] + data["weathersit_2"] * data["cnt_weathersit_2"] + data["weathersit_3"] * data["cnt_weathersit_3"]
X_train, X_test, y_train, y_test = train_test_split(data.drop(["cnt", "season_cnt", "year_cnt", "month_cnt", "workingday_cnt", "weathersit_cnt"], axis=1), data[["cnt", "season_cnt", "year_cnt", "month_cnt", "workingday_cnt", "weathersit_cnt"]], test_size=0.2, random_state=42)
variable_coef_model = LinearRegression()
variable_coef_model.fit(X_train, y_train)
y_train_pred = variable_coef_model.predict(X_train)
y_test_pred = variable_coef_model.predict(X_test)
print("Variable Coefficient Model Train MSE:", mean_squared_error(y_train, y_train_pred))
print("Variable Coefficient Model Train R2:", r2_score(y_train, y_train_pred))
print("Variable Coefficient Model Test MSE:", mean_squared_error(y_test, y_test_pred))
print("Variable Coefficient Model Test R2:", r2_score(y_test, y_test_pred))
```
这个代码使用了Scikit-learn库中的多种回归模型,并且进行了特征工程和模型评估。其中,岭回归和Lasso回归是正则化模型,可以防止过拟合。多项式回归模型可以拟合非线性关系。可加模型和变系数模型是一些特殊的回归模型,可以用于解决一些特殊的问题。
阅读全文