用python写一个基于transformer的图像分类
时间: 2023-06-13 22:06:05 浏览: 106
要使用Transformer进行图像分类,需要将图像转换为序列数据。一种常见的方法是使用卷积神经网络(CNN)提取图像特征,然后将这些特征转换为序列数据。以下是一个基于PyTorch的示例代码:
首先,导入必要的库和模块:
```python
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
import math
```
然后,定义一个CNN来提取图像特征:
```python
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
def forward(self, x):
x = self.conv1(x)
x = nn.functional.relu(x)
x = self.pool(x)
x = self.conv2(x)
x = nn.functional.relu(x)
x = self.pool(x)
x = self.conv3(x)
x = nn.functional.relu(x)
x = self.pool(x)
x = x.view(x.size(0), -1)
return x
```
接下来,定义一个Transformer模型来处理序列数据:
```python
class Transformer(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_heads, dropout):
super(Transformer, self).__init__()
self.embedding = nn.Embedding(input_size, hidden_size)
self.position_embedding = nn.Embedding(1000, hidden_size)
encoder_layer = nn.TransformerEncoderLayer(hidden_size, num_heads, hidden_size * 4, dropout)
self.transformer = nn.TransformerEncoder(encoder_layer, num_layers)
def forward(self, x):
seq_length = x.size(1)
positions = torch.arange(seq_length, device=x.device).unsqueeze(0)
positions = self.position_embedding(positions)
x = self.embedding(x)
x = x + positions
x = x.transpose(0, 1)
x = self.transformer(x)
x = x.transpose(0, 1)
return x[:, -1, :]
```
最后,定义一个分类器来输出图像的分类:
```python
class Classifier(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_heads, dropout, output_size):
super(Classifier, self).__init__()
self.cnn = CNN()
self.transformer = Transformer(input_size, hidden_size, num_layers, num_heads, dropout)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.cnn(x)
x = x.unsqueeze(1)
x = self.transformer(x)
x = self.fc(x)
return x
```
最后,训练模型:
```python
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
transform = transforms.Compose([transforms.Resize(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))])
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
model = Classifier(input_size=1000, hidden_size=256, num_layers=4, num_heads=8, dropout=0.1, output_size=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
num_epochs = 10
for epoch in range(num_epochs):
running_loss = 0.0
for i, (inputs, labels) in enumerate(train_loader):
inputs = inputs.to(device)
labels = labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
scheduler.step()
correct = 0
total = 0
with torch.no_grad():
for inputs, labels in test_loader:
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Epoch: %d, Loss: %.3f, Test Accuracy: %.3f%%' % (epoch + 1, running_loss / len(train_loader), 100 * correct / total))
```
阅读全文