gcn 实现graph embedding代码pytorch
时间: 2023-09-05 10:13:31 浏览: 154
这里是一个简单的 gcn 实现的代码,用于实现图嵌入(graph embedding)任务。代码使用了 PyTorch 框架。
``` python
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
class GraphConvolution(nn.Module):
def __init__(self, in_features, out_features):
super(GraphConvolution, self).__init__()
self.weight = nn.Parameter(torch.FloatTensor(in_features, out_features))
self.bias = nn.Parameter(torch.FloatTensor(out_features))
self.reset_parameters()
def reset_parameters(self):
nn.init.xavier_uniform_(self.weight)
nn.init.zeros_(self.bias)
def forward(self, input, adj):
support = torch.mm(input, self.weight)
output = torch.spmm(adj, support)
output = output + self.bias
return output
class GCN(nn.Module):
def __init__(self, nfeat, nhid, nclass, dropout):
super(GCN, self).__init__()
self.gc1 = GraphConvolution(nfeat, nhid)
self.gc2 = GraphConvolution(nhid, nclass)
self.dropout = dropout
def forward(self, x, adj):
x = F.relu(self.gc1(x, adj))
x = F.dropout(x, self.dropout, training=self.training)
x = self.gc2(x, adj)
return x
class GraphDataset(Dataset):
def __init__(self, adj, features, labels):
self.adj = adj
self.features = features
self.labels = labels
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
return self.adj[idx], self.features[idx], self.labels[idx]
def train(model, optimizer, criterion, train_loader, device):
model.train()
for adj, features, labels in train_loader:
adj, features, labels = adj.to(device), features.to(device), labels.to(device)
optimizer.zero_grad()
output = model(features, adj)
loss = criterion(output, labels)
loss.backward()
optimizer.step()
def evaluate(model, criterion, val_loader, device):
model.eval()
total_loss = 0.0
total_correct = 0
with torch.no_grad():
for adj, features, labels in val_loader:
adj, features, labels = adj.to(device), features.to(device), labels.to(device)
output = model(features, adj)
loss = criterion(output, labels)
total_loss += loss.item() * len(labels)
pred_labels = output.argmax(dim=1)
total_correct += pred_labels.eq(labels).sum().item()
return total_loss / len(val_loader.dataset), total_correct / len(val_loader.dataset)
def main():
# load data
adj = np.load('adj.npy')
features = np.load('features.npy')
labels = np.load('labels.npy')
# split data
train_idx = np.random.choice(len(labels), int(len(labels)*0.8), replace=False)
val_idx = np.setdiff1d(np.arange(len(labels)), train_idx)
train_adj, train_features, train_labels = adj[train_idx], features[train_idx], labels[train_idx]
val_adj, val_features, val_labels = adj[val_idx], features[val_idx], labels[val_idx]
# create data loaders
train_dataset = GraphDataset(train_adj, train_features, train_labels)
val_dataset = GraphDataset(val_adj, val_features, val_labels)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
# create model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN(nfeat=features.shape[1], nhid=16, nclass=labels.max().item()+1, dropout=0.5).to(device)
# train model
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = nn.CrossEntropyLoss()
best_val_acc = 0.0
for epoch in range(100):
train(model, optimizer, criterion, train_loader, device)
val_loss, val_acc = evaluate(model, criterion, val_loader, device)
if val_acc > best_val_acc:
best_val_acc = val_acc
torch.save(model.state_dict(), 'best_model.pt')
print('Epoch: {:04d}'.format(epoch+1),
'loss_train: {:.4f}'.format(criterion(model(train_features, train_adj), train_labels).item()),
'loss_val: {:.4f}'.format(val_loss),
'acc_val: {:.4f}'.format(val_acc))
if __name__ == '__main__':
main()
```
代码实现了一个简单的 GCN 模型,包括两个 GraphConvolution 层和一个 ReLU 激活函数。训练过程中使用了交叉熵损失函数和 Adam 优化器。训练和验证数据集使用了 PyTorch 的 DataLoader 进行批处理。在每个 epoch 结束时,记录了训练和验证损失以及验证精度。
这个代码需要三个数据文件 `adj.npy`、`features.npy` 和 `labels.npy`,分别是邻接矩阵、特征矩阵和标签向量。你需要根据你自己的数据集进行修改。
阅读全文