使用Pytorch框架进行波士顿房价预测线性回归
时间: 2023-11-11 18:03:33 浏览: 97
本文将使用Pytorch框架进行波士顿房价预测的线性回归。首先,我们需要导入必要的库和数据集。
```python
import torch
import torch.nn as nn
import pandas as pd
from sklearn.datasets import load_boston
# 加载数据集
boston = load_boston()
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df['MEDV'] = boston.target
```
接下来,我们将数据集划分为训练集和测试集。
```python
# 划分训练集和测试集
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
df[boston.feature_names], df['MEDV'], test_size=0.2, random_state=42)
```
然后,我们将数据集转换为Pytorch张量。
```python
# 转换为Pytorch张量
X_train = torch.from_numpy(X_train.values).float()
X_test = torch.from_numpy(X_test.values).float()
y_train = torch.from_numpy(y_train.values).float().view(-1, 1)
y_test = torch.from_numpy(y_test.values).float().view(-1, 1)
```
接下来,我们定义线性回归模型。
```python
# 定义线性回归模型
class LinearRegression(nn.Module):
def __init__(self, input_dim, output_dim):
super(LinearRegression, self).__init__()
self.linear = nn.Linear(input_dim, output_dim)
def forward(self, x):
out = self.linear(x)
return out
input_dim = X_train.shape[1]
output_dim = 1
model = LinearRegression(input_dim, output_dim)
```
然后,我们定义损失函数和优化器。
```python
# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
```
最后,我们训练模型并预测测试集的结果。
```python
# 训练模型
num_epochs = 1000
for epoch in range(num_epochs):
# 前向传播
y_pred = model(X_train)
# 计算损失
loss = criterion(y_pred, y_train)
# 反向传播和优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 每100个epoch打印一次损失
if (epoch+1) % 100 == 0:
print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
# 测试模型
with torch.no_grad():
y_pred = model(X_test)
test_loss = criterion(y_pred, y_test)
print('Test Loss: {:.4f}'.format(test_loss.item()))
```
完整代码如下:
```python
import torch
import torch.nn as nn
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
# 加载数据集
boston = load_boston()
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df['MEDV'] = boston.target
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
df[boston.feature_names], df['MEDV'], test_size=0.2, random_state=42)
# 转换为Pytorch张量
X_train = torch.from_numpy(X_train.values).float()
X_test = torch.from_numpy(X_test.values).float()
y_train = torch.from_numpy(y_train.values).float().view(-1, 1)
y_test = torch.from_numpy(y_test.values).float().view(-1, 1)
# 定义线性回归模型
class LinearRegression(nn.Module):
def __init__(self, input_dim, output_dim):
super(LinearRegression, self).__init__()
self.linear = nn.Linear(input_dim, output_dim)
def forward(self, x):
out = self.linear(x)
return out
input_dim = X_train.shape[1]
output_dim = 1
model = LinearRegression(input_dim, output_dim)
# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
# 训练模型
num_epochs = 1000
for epoch in range(num_epochs):
# 前向传播
y_pred = model(X_train)
# 计算损失
loss = criterion(y_pred, y_train)
# 反向传播和优化
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 每100个epoch打印一次损失
if (epoch+1) % 100 == 0:
print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
# 测试模型
with torch.no_grad():
y_pred = model(X_test)
test_loss = criterion(y_pred, y_test)
print('Test Loss: {:.4f}'.format(test_loss.item()))
```
阅读全文