对以上的房屋预测和数据,请编写Python代码,自定义编码梯度下降法对训练数据进行拟合,对测试数据预测其售价,并计算R方。
时间: 2023-06-08 15:07:40 浏览: 58
好的,以下是代码:
```python
import numpy as np
import pandas as pd
# 读取数据
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
# 数据预处理
train_data = train_data.fillna(train_data.mean())
test_data = test_data.fillna(test_data.mean())
# 特征选择
features = ['OverallQual', 'GrLivArea', 'GarageCars', 'TotalBsmtSF', 'FullBath', 'YearBuilt']
x_train = train_data[features].values
x_test = test_data[features].values
y_train = train_data['SalePrice'].values.reshape(-1, 1)
# 归一化
x_train = (x_train - np.mean(x_train, axis=0)) / np.std(x_train, axis=0)
x_test = (x_test - np.mean(x_test, axis=0)) / np.std(x_test, axis=0)
y_train = (y_train - np.mean(y_train)) / np.std(y_train)
# 损失函数
def loss_function(x, y, w, b):
return np.mean((y - np.dot(x, w) - b) ** 2)
# 梯度下降
def gradient_descent(x, y, w, b, learning_rate, num_iter):
losses = []
for i in range(num_iter):
y_pred = np.dot(x, w) + b
w_grad = -2 * np.dot(x.T, y - y_pred) / x.shape[0]
b_grad = -2 * np.mean(y - y_pred)
w -= learning_rate * w_grad
b -= learning_rate * b_grad
loss = loss_function(x, y, w, b)
losses.append(loss)
return w, b, losses
# 训练模型
learning_rate = 0.01
num_iter = 1000
w = np.random.rand(x_train.shape[1], 1)
b = np.random.rand(1)
w, b, losses = gradient_descent(x_train, y_train, w, b, learning_rate, num_iter)
# 预测结果
y_pred = np.dot(x_test, w) + b
y_pred = y_pred * np.std(y_train) + np.mean(y_train)
# 计算R方
y_test = pd.read_csv('sample_submission.csv')['SalePrice'].values.reshape(-1, 1)
y_test = (y_test - np.mean(y_train)) / np.std(y_train)
ss_reg = np.sum((y_pred - np.mean(y_train)) ** 2)
ss_res = np.sum((y_test - y_pred) ** 2)
r_squared = 1 - ss_res / ss_reg
print('R方为:', r_squared)
```
注意:需要替换掉相关的文件路径。