import matplotlib as mpl import matplotlib.pyplot as plt import pandas as pd from sklearn.linear_model import LinearRegression import numpy as np from sklearn.metrics import mean_absolute_error from sklearn.metrics import mean_squared_error from sklearn.metrics import r2_score mpl.rcParams['font.sans-serif']=['KaiTi'] mpl.rcParams['axes.unicode_minus']= False data=pd.read_csv('data.csv') #print(data.head) data.dropna(axis=0,how='any',inplace=True) data['单价']=data['单价'].map(lambda d:d.replace('元/平米','')) data['单价']=data['单价'].astype(float) data['总价']=data['总价'].map(lambda e:e.replace('万','')) data['总价']=data['总价'].astype(float) data['建筑面积']=data['建筑面积'].map(lambda p:p.replace('平米','')) data['建筑面积']=data['建筑面积'].astype(float) copy_d=data.copy() copy_d[['室','厅','卫']]=copy_d['户型'].str.extract('(\d+)室(\d+)厅(\d+)卫') copy_d['室']=copy_d['室'].astype(float) new_data=data[['总价','建筑面积']] new_data['室']=copy_d['室'] new_data.dropna(axis=0,how='any',inplace=True) print(new_data) new_data.loc[2583]=[None,180.00,4] data_train=new_data.loc[0:2582] x_list=['建筑面积','室'] ndata_mean=data_train.mean() ndata_std=data_train.std() data_train=(data_train-ndata_mean)/ndata_std x_train=data_train[x_list].values y_train=data_train['总价'].values svr=LinearRegression() svr.fit(x_train,y_train) x_test=((new_data[x_list]-ndata_mean[x_list])/ndata_std[x_list]).values y_test=svr.predict(x_test) print(y_test) new_data['y_pred']=y_test*ndata_std['总价']+ndata_mean['总价'] print(new_data[['总价','y_pred']]) svr_acc=svr.score(x_test,y_test)*100 svr_mae=mean_absolute_error(x_test,y_test) print(svr_mae)
时间: 2024-01-14 10:04:38 浏览: 129
在你的代码中,第 39 行出现了错误,因为 `mean_absolute_error` 函数的第二个参数应该传入真实的目标值 `y_true`,而你传入的是测试集的特征值 `x_test`。正确的代码应该是:
```
svr_mae = mean_absolute_error(new_data['总价'], new_data['y_pred'])
```
另外,第 28 行代码中的 `loc` 函数用于添加新行时应该直接写入列名对应的值,而不是使用 `None`,否则会导致类型错误。正确的代码应该是:
```
new_data.loc[2583] = [np.nan, 180.00, 4]
```
最后,第 41 行代码中的 `score` 函数用于评估模型的拟合程度,应该传入测试集的特征值 `x_test` 和真实的目标值 `new_data['总价']`,而你传入的是 `y_test`,这也是引起前面错误的原因。正确的代码应该是:
```
svr_acc = svr.score(x_test, new_data['总价']) * 100
```
相关问题
在Python中想运行from sklearn.model_selection import train_test_split from sklearn.decomposition import PCA import pandas as pd from sklearn import svm import numpy as np import math import matplotlib.pyplot as plt import matplotlib as mpl from matplotlib import colors from sklearn.model_selection import train_test_split from sklearn import datasets from matplotlib.colors import ListedColormap from sklearn.svm import SVC from sklearn.preprocessing import StandardScaler from sklearn.model_selection import StratifiedShuffleSplit,StratifiedKFold from sklearn.model_selection import GridSearchCV from sklearn.model_selection import GridSearchCV, LeaveOneOut, cross_val_predict from sklearn.model_selection import KFold from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import GaussianNB from sklearn.neighbors import KNeighborsClassifier from sklearn import svm from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import roc_auc_score from sklearn.metrics import roc_auc_score import math import datetime import multiprocessing as mp from sklearn.ensemble import StackingClassifier from sklearn.pipeline import make_pipeline from sklearn.svm import LinearSVC import random怎么操作
要在Python中运行这些代码,你需要确保已经安装了相应的包。大多数包都可以使用`pip`进行安装。
首先,在命令行中运行以下命令来安装所需的包:
```bash
pip install scikit-learn pandas matplotlib numpy
```
然后,在你的Python脚本中导入所需的库:
```python
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
import pandas as pd
from sklearn import svm
import numpy as np
import math
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import colors
from sklearn.model_selection import train_test_split
from sklearn import datasets
from matplotlib.colors import ListedColormap
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedShuffleSplit, StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import GridSearchCV, LeaveOneOut, cross_val_predict
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
import datetime
import multiprocessing as mp
from sklearn.ensemble import StackingClassifier
from sklearn.pipeline import make_pipeline
from sklearn.svm import LinearSVC
import random
```
请确保在运行这些代码之前,已经安装了所需的Python库。如果遇到任何问题,请确保已正确安装这些库,并且版本与代码兼容。
import+numpy+as+np+import+pandas+as+pd+import+matplotlib+as+mpl+import+matplotlib.pyplot+as+plt+from
import numpy as np: 这行代码是将numpy库导入,并将其命名为np,以便在后续代码中使用np来调用numpy库的函数和方法。
import pandas as pd: 这行代码是将pandas库导入,并将其命名为pd,以便在后续代码中使用pd来调用pandas库的函数和方法。
import matplotlib as mpl: 这行代码是将matplotlib库导入,并将其命名为mpl,以便在后续代码中使用mpl来调用matplotlib库的函数和方法。
import matplotlib.pyplot as plt: 这行代码是将matplotlib库中的pyplot模块导入,并将其命名为plt,以便在后续代码中使用plt来调用pyplot模块的函数和方法。
这些导入语句通常出现在Python脚本或Jupyter Notebook中的开头,用于导入所需的库和模块,以便在后续代码中使用它们进行数据处理、分析和可视化等操作。