在cora数据集上的代码
时间: 2023-12-08 09:03:53 浏览: 107
gcn练习代码-Cora数据集
以下是在Cora数据集上使用GAT进行节点分类的完整代码示例:
``` python
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, optimizers, losses
from sklearn.metrics import accuracy_score
from scipy.sparse import coo_matrix
# 加载Cora数据集
def load_data(path):
idx_features_labels = np.genfromtxt("{}{}.content".format(path, "cora"), dtype=np.dtype(str))
features = np.array(idx_features_labels[:, 1:-1], dtype=np.float32)
labels = np.array(idx_features_labels[:, -1], dtype=np.int32)
idx = np.array(idx_features_labels[:, 0], dtype=np.int32)
idx_map = {j: i for i, j in enumerate(idx)}
edges_unordered = np.genfromtxt("{}{}.cites".format(path, "cora"), dtype=np.int32)
edges = np.array(list(map(idx_map.get, edges_unordered.flatten())), dtype=np.int32).reshape(edges_unordered.shape)
adj = coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
shape=(labels.shape[0], labels.shape[0]), dtype=np.float32)
return features, labels, adj
# 定义GAT模型
class GAT(layers.Layer):
def __init__(self, units, num_heads, activation='relu'):
super(GAT, self).__init__()
self.units = units
self.num_heads = num_heads
self.activation = activation
self.W = []
self.attention = []
for i in range(self.num_heads):
self.W.append(layers.Dense(units))
self.attention.append(layers.Dense(1))
self.dropout = layers.Dropout(0.5)
self.add = layers.Add()
def call(self, inputs, training=True):
# inputs shape: (batch_size, num_nodes, input_dim)
h = inputs
outputs = []
for i in range(self.num_heads):
Wh = self.W[i](h)
a = self.attention[i](Wh)
e = tf.nn.leaky_relu(a)
alpha = tf.nn.softmax(e, axis=1)
alpha = self.dropout(alpha, training=training)
h_prime = tf.matmul(alpha, Wh, transpose_a=True)
outputs.append(h_prime)
if self.num_heads > 1:
h_prime = self.add(outputs)
else:
h_prime = outputs[0]
if self.activation is not None:
h_prime = tf.nn.relu(h_prime)
return h_prime
# 定义模型训练函数
def train_model(features, labels, adj, hidden_units, num_heads, learning_rate, epochs, batch_size):
num_nodes = adj.shape[0]
input_dim = features.shape[1]
num_classes = np.max(labels) + 1
# 构建GAT模型
inputs = layers.Input(shape=(num_nodes, input_dim))
x = inputs
for units in hidden_units:
x = GAT(units, num_heads)(x)
outputs = layers.Dense(num_classes, activation='softmax')(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
# 定义损失函数和优化器
loss_fn = losses.SparseCategoricalCrossentropy()
optimizer = optimizers.Adam(learning_rate)
# 训练模型
for epoch in range(epochs):
# 打乱节点顺序
permutation = np.random.permutation(num_nodes)
features = features[permutation]
labels = labels[permutation]
adj = adj[permutation][:, permutation]
for i in range(0, num_nodes, batch_size):
# 构建一个batch的数据
indices = range(i, min(i + batch_size, num_nodes))
batch_features = features[indices]
batch_labels = labels[indices]
batch_adj = adj[indices][:, indices]
with tf.GradientTape() as tape:
# 计算模型输出
logits = model(batch_features, training=True)
# 计算损失函数
loss = loss_fn(batch_labels, logits) + sum(model.losses)
# 计算梯度并更新模型参数
grads = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
# 每个epoch结束后计算模型在验证集上的准确率
if (epoch + 1) % 10 == 0:
logits = model(features, training=False)
val_acc = accuracy_score(labels, np.argmax(logits, axis=1))
print("Epoch {}, val_acc: {:.4f}".format(epoch + 1, val_acc))
return model
# 加载数据
features, labels, adj = load_data('cora/')
num_nodes = adj.shape[0]
num_features = features.shape[1]
num_classes = np.max(labels) + 1
# 划分训练集、验证集和测试集
idx_train = range(140)
idx_val = range(200, 500)
idx_test = range(500, 1500)
train_features = features[idx_train]
train_labels = labels[idx_train]
train_adj = adj[idx_train][:, idx_train]
val_features = features[idx_val]
val_labels = labels[idx_val]
val_adj = adj[idx_val][:, idx_val]
test_features = features[idx_test]
test_labels = labels[idx_test]
test_adj = adj[idx_test][:, idx_test]
# 训练模型
model = train_model(train_features, train_labels, train_adj, [8], 8, 0.01, 200, 16)
# 在测试集上评估模型
logits = model(test_features, training=False)
test_acc = accuracy_score(test_labels, np.argmax(logits, axis=1))
print("Test accuracy: {:.4f}".format(test_acc))
```
该代码首先使用`load_data`函数加载Cora数据集,然后定义了一个GAT模型,并使用`train_model`函数对模型进行训练。训练过程中,每个epoch都会计算模型在验证集上的准确率,并输出到控制台。训练完成后,使用模型在测试集上进行预测,并计算预测准确率。
阅读全文