import matplotlib as mpl import matplotlib.pyplot as plt import pandas as pd from sklearn.linear_model import LinearRegression import numpy as np from sklearn.metrics import mean_absolute_error from sklearn.metrics import mean_squared_error from sklearn.metrics import r2_score mpl.rcParams['font.sans-serif']=['KaiTi'] mpl.rcParams['axes.unicode_minus']= False data=pd.read_csv('data.csv') #print(data.head) data.dropna(axis=0,how='any',inplace=True) data['单价']=data['单价'].map(lambda d:d.replace('元/平米','')) data['单价']=data['单价'].astype(float) data['总价']=data['总价'].map(lambda e:e.replace('万','')) data['总价']=data['总价'].astype(float) data['建筑面积']=data['建筑面积'].map(lambda p:p.replace('平米','')) data['建筑面积']=data['建筑面积'].astype(float) copy_d=data.copy() copy_d[['室','厅','卫']]=copy_d['户型'].str.extract('(\d+)室(\d+)厅(\d+)卫') copy_d['室']=copy_d['室'].astype(float) new_data=data[['总价','建筑面积']] new_data['室']=copy_d['室'] new_data.dropna(axis=0,how='any',inplace=True) print(new_data) new_data.loc[2583]=[None,180.00,4] data_train=new_data.loc[0:2582] x_list=['建筑面积','室'] ndata_mean=data_train.mean() ndata_std=data_train.std() data_train=(data_train-ndata_mean)/ndata_std x_train=data_train[x_list].values y_train=data_train['总价'].values svr=LinearRegression() svr.fit(x_train,y_train) x_test=((new_data[x_list]-ndata_mean[x_list])/ndata_std[x_list]).values y_test=svr.predict(x_test) print(y_test) new_data['y_pred']=y_test*ndata_std['总价']+ndata_mean['总价'] print(new_data[['总价','y_pred']]) svr_acc=svr.score(x_test,y_test)*100 svr_mae=mean_absolute_error(x_test,y_test) print(svr_mae)
时间: 2024-01-14 21:04:38 浏览: 159
import numpy as np import matplotlib.pyplot as plt import math
在你的代码中,第 39 行出现了错误,因为 `mean_absolute_error` 函数的第二个参数应该传入真实的目标值 `y_true`,而你传入的是测试集的特征值 `x_test`。正确的代码应该是:
```
svr_mae = mean_absolute_error(new_data['总价'], new_data['y_pred'])
```
另外,第 28 行代码中的 `loc` 函数用于添加新行时应该直接写入列名对应的值,而不是使用 `None`,否则会导致类型错误。正确的代码应该是:
```
new_data.loc[2583] = [np.nan, 180.00, 4]
```
最后,第 41 行代码中的 `score` 函数用于评估模型的拟合程度,应该传入测试集的特征值 `x_test` 和真实的目标值 `new_data['总价']`,而你传入的是 `y_test`,这也是引起前面错误的原因。正确的代码应该是:
```
svr_acc = svr.score(x_test, new_data['总价']) * 100
```
阅读全文