代码实现基于LFM+MLP模型的GRU,包括数据预处理,构建模型,训练模型以及模型预测
时间: 2024-01-05 20:02:56 浏览: 130
好的,下面是完整的代码实现,包括数据预处理、构建模型、训练模型和模型预测,使用Python和PyTorch框架:
```python
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
# 数据预处理
ratings_df = pd.read_csv('ratings.csv')
num_users = len(ratings_df['userId'].unique())
num_items = len(ratings_df['movieId'].unique())
ratings_df['userId'] = ratings_df['userId'] - 1
ratings_df['movieId'] = ratings_df['movieId'] - 1
train_df, test_df = train_test_split(ratings_df, test_size=0.2, random_state=42)
class RatingDataset(Dataset):
def __init__(self, user_ids, item_ids, ratings):
self.user_ids = user_ids
self.item_ids = item_ids
self.ratings = ratings
def __len__(self):
return len(self.user_ids)
def __getitem__(self, index):
user_id = self.user_ids[index]
item_id = self.item_ids[index]
rating = self.ratings[index]
return {'user_id': user_id, 'item_id': item_id, 'rating': rating}
train_dataset = RatingDataset(train_df['userId'].values, train_df['movieId'].values, train_df['rating'].values)
test_dataset = RatingDataset(test_df['userId'].values, test_df['movieId'].values, test_df['rating'].values)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)
# 模型构建
class LFM_MLP_GRU(nn.Module):
def __init__(self, num_users, num_items, embedding_size, hidden_size, mlp_layers):
super(LFM_MLP_GRU, self).__init__()
# LFM embedding layers
self.user_embedding = nn.Embedding(num_users, embedding_size)
self.item_embedding = nn.Embedding(num_items, embedding_size)
# MLP layers
layers = []
input_size = 2 * embedding_size
for layer in mlp_layers:
layers.append(nn.Linear(input_size, layer))
layers.append(nn.ReLU())
input_size = layer
self.mlp_layers = nn.Sequential(*layers)
# GRU layer
self.gru = nn.GRU(input_size, hidden_size)
# Output layer
self.output_layer = nn.Linear(hidden_size, 1)
def forward(self, user_ids, item_ids):
# LFM embedding
user_embedding = self.user_embedding(user_ids)
item_embedding = self.item_embedding(item_ids)
# Concatenate user and item embeddings
lfm_input = torch.cat([user_embedding, item_embedding], dim=1)
# Pass through MLP layers
mlp_output = self.mlp_layers(lfm_input)
# Reshape MLP output for GRU input
gru_input = mlp_output.unsqueeze(0)
# Pass through GRU layer
gru_output, _ = self.gru(gru_input)
# Pass through output layer and return
output = self.output_layer(gru_output)
return output.squeeze()
# 模型训练
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = LFM_MLP_GRU(num_users, num_items, 32, 64, [128, 64]).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10
for epoch in range(num_epochs):
train_loss = 0.0
for batch in train_loader:
user_ids = batch['user_id'].to(device)
item_ids = batch['item_id'].to(device)
ratings = batch['rating'].to(device)
optimizer.zero_grad()
outputs = model(user_ids, item_ids)
loss = criterion(outputs, ratings.float())
loss.backward()
optimizer.step()
train_loss += loss.item() * len(user_ids)
train_loss /= len(train_dataset)
print('Epoch [{}/{}], Train Loss: {:.4f}'.format(epoch+1, num_epochs, train_loss))
# 模型预测
with torch.no_grad():
test_loss = 0.0
for batch in test_loader:
user_ids = batch['user_id'].to(device)
item_ids = batch['item_id'].to(device)
ratings = batch['rating'].to(device)
outputs = model(user_ids, item_ids)
loss = criterion(outputs, ratings.float())
test_loss += loss.item() * len(user_ids)
test_loss /= len(test_dataset)
print('Test Loss: {:.4f}'.format(test_loss))
```
此代码实现中,数据预处理使用了Pandas和Scikit-learn库,模型训练使用了PyTorch框架中的DataLoader、MSELoss、Adam等模块,模型预测使用了PyTorch框架中的no_grad()上下文管理器。在训练过程中,每个epoch都会遍历整个训练集,计算每个batch的损失并更新模型参数。在预测过程中,使用测试集计算模型的平均损失。
阅读全文