使用图神经网络进行商品品类自动识别代码案例
时间: 2024-04-23 12:07:33 浏览: 122
以下是使用图神经网络进行商品品类自动识别的代码案例:
首先,需要安装 `pytorch` 和 `dgl` 库。
```python
!pip install torch dgl
```
接着,导入必要的库和数据集。
```python
import torch
import dgl
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
from dgl.data.utils import load_graphs, save_graphs
# 下载数据集
!curl -O https://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Automotive_5.json.gz
!gunzip reviews_Automotive_5.json.gz
```
定义数据集类,并重载 `__getitem__` 和 `__len__` 方法。
```python
class AmazonDataset(Dataset):
def __init__(self, filename):
self.df = pd.read_json(filename, lines=True)
def __getitem__(self, idx):
review = self.df.iloc[idx]
g = dgl.DGLGraph()
g.add_nodes(1)
g.ndata['reviewerID'] = torch.tensor([review.reviewerID])
g.ndata['asin'] = torch.tensor([review.asin])
g.ndata['overall'] = torch.tensor([review.overall])
g.ndata['reviewText'] = torch.tensor([review.reviewText])
g.ndata['label'] = torch.tensor([review.label])
return g
def __len__(self):
return len(self.df)
```
定义图神经网络模型。
```python
class GNNModel(torch.nn.Module):
def __init__(self, in_feats, hidden_feats, out_feats):
super(GNNModel, self).__init__()
self.conv1 = dgl.nn.GraphConv(in_feats, hidden_feats)
self.conv2 = dgl.nn.GraphConv(hidden_feats, hidden_feats)
self.conv3 = dgl.nn.GraphConv(hidden_feats, out_feats)
def forward(self, g):
h = g.ndata['reviewText']
h = self.conv1(g, h)
h = torch.relu(h)
h = self.conv2(g, h)
h = torch.relu(h)
h = self.conv3(g, h)
return h
```
定义训练和预测函数。
```python
def train(model, data_loader, optimizer, criterion, device):
model.train()
loss_total = 0
for i, g in enumerate(data_loader):
g = g.to(device)
optimizer.zero_grad()
pred = model(g)
label = g.ndata['label'].squeeze().to(device)
loss = criterion(pred, label)
loss.backward()
optimizer.step()
loss_total += loss.item()
return loss_total / len(data_loader)
def predict(model, data_loader, device):
model.eval()
y_pred = []
y_true = []
with torch.no_grad():
for i, g in enumerate(data_loader):
g = g.to(device)
pred = model(g)
label = g.ndata['label'].squeeze().to(device)
y_pred.append(pred.cpu().numpy())
y_true.append(label.cpu().numpy())
return np.concatenate(y_pred), np.concatenate(y_true)
```
最后,读取数据集并训练模型。
```python
# 读取数据集
dataset = AmazonDataset('reviews_Automotive_5.json')
# 划分训练集和测试集
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
# 定义数据加载器
train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False)
# 定义模型、优化器和损失函数
model = GNNModel(50, 100, 1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.BCEWithLogitsLoss()
# 训练模型
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
num_epochs = 10
loss_list = []
for epoch in range(num_epochs):
loss = train(model, train_data_loader, optimizer, criterion, device)
loss_list.append(loss)
print(f'Epoch {epoch+1}, loss={loss:.4f}')
# 预测并计算准确率
y_pred, y_true = predict(model, test_data_loader, device)
y_pred = (y_pred > 0).astype(int)
accuracy = (y_pred == y_true).mean()
print(f'Accuracy: {accuracy:.4f}')
# 绘制 loss 曲线
plt.plot(loss_list)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()
```
以上代码实现了一个简单的图神经网络模型,并使用 `AmazonDataset` 数据集进行训练和测试。在训练过程中,将损失函数的值保存在 `loss_list` 列表中,并最终绘制出 loss 曲线。最后,计算模型的准确率并输出。
阅读全文