用pythorch对数据集ml-100k深度学习实现推荐系统代码
时间: 2024-03-13 19:46:33 浏览: 191
ml-100k数据集啊
以下是使用PyTorch实现推荐系统的代码,使用的是MovieLens 100k数据集。
首先需要导入必要的包:
```python
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
```
接下来读取数据集并进行预处理:
```python
# 读取数据集
data = pd.read_csv('ml-100k/u.data', sep='\t', header=None, names=['userId', 'itemId', 'rating', 'timestamp'])
# 将数据集按照80:10:10的比例分成训练集、验证集、测试集
train_data = data[:int(len(data)*0.8)]
val_data = data[int(len(data)*0.8):int(len(data)*0.9)]
test_data = data[int(len(data)*0.9):]
# 获取用户数量和电影数量
num_users = len(data['userId'].unique())
num_items = len(data['itemId'].unique())
# 定义数据集类
class MovieLensDataset(Dataset):
def __init__(self, data):
self.data = data
def __len__(self):
return len(self.data)
def __getitem__(self, index):
user = self.data.iloc[index]['userId']
item = self.data.iloc[index]['itemId']
rating = self.data.iloc[index]['rating']
return {'user': user, 'item': item, 'rating': rating}
# 定义训练集、验证集、测试集的数据集实例
train_dataset = MovieLensDataset(train_data)
val_dataset = MovieLensDataset(val_data)
test_dataset = MovieLensDataset(test_data)
# 定义数据集加载器
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)
```
接下来定义模型:
```python
class RecommenderNet(nn.Module):
def __init__(self, num_users, num_items, emb_size=64, hidden_size=128):
super(RecommenderNet, self).__init__()
self.user_emb = nn.Embedding(num_users, emb_size)
self.item_emb = nn.Embedding(num_items, emb_size)
self.fc1 = nn.Linear(emb_size*2, hidden_size)
self.fc2 = nn.Linear(hidden_size, 1)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.2)
def forward(self, user, item):
user_emb = self.user_emb(user)
item_emb = self.item_emb(item)
x = torch.cat((user_emb, item_emb), dim=1)
x = self.fc1(x)
x = self.relu(x)
x = self.dropout(x)
x = self.fc2(x)
return x
```
然后定义训练函数和验证函数:
```python
def train(model, train_loader, criterion, optimizer, device):
model.train()
train_loss = 0.0
for batch in train_loader:
user = batch['user'].to(device)
item = batch['item'].to(device)
rating = batch['rating'].to(device)
optimizer.zero_grad()
output = model(user, item)
loss = criterion(output, rating.float())
loss.backward()
optimizer.step()
train_loss += loss.item()
return train_loss/len(train_loader)
def validate(model, val_loader, criterion, device):
model.eval()
val_loss = 0.0
with torch.no_grad():
for batch in val_loader:
user = batch['user'].to(device)
item = batch['item'].to(device)
rating = batch['rating'].to(device)
output = model(user, item)
loss = criterion(output, rating.float())
val_loss += loss.item()
return val_loss/len(val_loader)
```
最后进行训练和测试:
```python
# 定义模型、损失函数和优化器
model = RecommenderNet(num_users, num_items)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# 将模型放到GPU上
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
# 训练模型
for epoch in range(10):
train_loss = train(model, train_loader, criterion, optimizer, device)
val_loss = validate(model, val_loader, criterion, device)
print(f'Epoch {epoch+1}, train loss: {train_loss:.4f}, val loss: {val_loss:.4f}')
# 测试模型
test_loss = validate(model, test_loader, criterion, device)
print(f'Test loss: {test_loss:.4f}')
```
注意:以上代码仅供参考,实际应用中还需要进行更多的调参和优化。
阅读全文