import numpy as np import pandas as pd import matplotlib. pyplot as plt from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split from linear_model_03.closed_form_sol.LinearRegression_CFSol import LinearRegressionClosedFormSol # 数据2,读取数据,先进行数据转换 df = pd.read_table('../data/mpg.data', header=0) # data换成txt,即可转化txt格式 df.to_csv('../data/mpg.csv', index=False) mpg = pd. read_csv("../data/mpg.csv"). dropna() # ..表示上两级目录 X, y = np. asarray(mpg. loc[:, "horsepower"]), np. asarray(mpg. loc[:, "displacement"]) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0, shuffle=True) lr_cfs = LinearRegressionClosedFormSol(fit_intercept=True, normalized=True) # 默认训练偏置项和进行标准化 lr_cfs.fit(X_train, y_train) # 训练模型 theta = lr_cfs.get_params() print(theta) y_test_pred = lr_cfs.predict(x_test=X_test) mse, r2, r2c = lr_cfs.cal_mse_r2(y_test, y_test_pred) print("均方误差:%.5f,判决系数:%.5f,修正判决系数:%.5f" % (mse, r2, r2c)) # lr_cfs. plt_predict(y_test, y_test_pred, is_sort=False) lr_cfs.plt_predict(y_test, y_test_pred, is_sort=True) plt. figure(figsize=(7,5)) plt. plot(X_test, y_test, "ro", label="Test Samples") plt. plot(X_test, y_test_pred, "k-", lw=1.5, label="Fit Model") plt. legend(frameon=False) plt. show()
时间: 2023-12-24 18:10:54 浏览: 121
在你的代码中,第一行和第二行之间需要添加空格来分隔它们。此外,你导入的 LinearRegression_CFSol 模块没有提供,可能需要检查一下文件路径或者确保该文件存在。另外,你在读取 mpg 数据时,使用了 dropna() 函数删除了数据集中的缺失值,这可能会导致数据集变小,影响模型的学习效果。最后,lr_cfs. plt_predict() 这一行代码中似乎有一个多余的句点,需要去掉。下面是修改后的代码:
```
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from linear_model_03.closed_form_sol.LinearRegression_CFSol import LinearRegressionClosedFormSol
# 数据2,读取数据,先进行数据转换
df = pd.read_table('../data/mpg.data', header=0)
# data换成txt,即可转化txt格式
df.to_csv('../data/mpg.csv', index=False)
mpg = pd.read_csv("../data/mpg.csv")
# 选择特征和目标变量
X, y = np.asarray(mpg.loc[:, "horsepower"]), np.asarray(mpg.loc[:, "displacement"])
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0, shuffle=True)
# 训练模型
lr_cfs = LinearRegressionClosedFormSol(fit_intercept=True, normalized=True)
lr_cfs.fit(X_train, y_train)
theta = lr_cfs.get_params()
print(theta)
# 预测并计算模型评价指标
y_test_pred = lr_cfs.predict(x_test=X_test)
mse, r2, r2c = lr_cfs.cal_mse_r2(y_test, y_test_pred)
print("均方误差:%.5f,判决系数:%.5f,修正判决系数:%.5f" % (mse, r2, r2c))
# 可视化预测结果
lr_cfs.plt_predict(y_test, y_test_pred, is_sort=False)
lr_cfs.plt_predict(y_test, y_test_pred, is_sort=True)
plt.figure(figsize=(7,5))
plt.plot(X_test, y_test, "ro", label="Test Samples")
plt.plot(X_test, y_test_pred, "k-", lw=1.5, label="Fit Model")
plt.legend(frameon=False)
plt.show()
```
阅读全文