首先,基于图像和数值数据作为共同输入,自己创建一个CNN回归模型,其中图像和数值数据在全连接层进行拼接;然后,对CNN模型进行训练和测试;最后,实现Grad-Cam可视化,计算最后一个卷积层中所有特征图对图片类别的权重,以热力图的形式把特征图映射到原始图片中,显示图像中用于预测的重要区域。PyTorch完整代码实现
时间: 2023-12-24 10:05:32 浏览: 155
CNN-123.rar_cnn数据_flowerkem_一维数据 CNN_一维输入 cnn_图像 cnn
5星 · 资源好评率100%
好的,下面是基于图像和数值数据的CNN回归模型及Grad-Cam可视化的PyTorch代码实现:
```python
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
# 定义数据集类
class MyDataset(Dataset):
def __init__(self, image_paths, numerical_data, targets, transform=None):
self.image_paths = image_paths
self.numerical_data = numerical_data
self.targets = targets
self.transform = transform
def __len__(self):
return len(self.targets)
def __getitem__(self, idx):
img_path = self.image_paths[idx]
numerical = self.numerical_data[idx]
target = self.targets[idx]
image = Image.open(img_path).convert('RGB')
if self.transform:
image = self.transform(image)
return image, numerical, target
# 定义CNN回归模型
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.fc = nn.Sequential(
nn.Linear(128*7*7 + 1, 512),
nn.ReLU(),
nn.Linear(512, 1)
)
def forward(self, x, numerical):
x = self.conv(x)
x = x.view(x.size(0), -1)
x = torch.cat((x, numerical), dim=1)
x = self.fc(x)
return x
# 定义Grad-Cam可视化函数
class GradCam:
def __init__(self, model, target_layer):
self.model = model
self.target_layer = target_layer
self.feature_maps = None
self.gradient = None
def get_feature_maps(self, x):
self.model.eval()
feature_maps = []
for name, module in self.model.named_children():
x = module(x)
if name == self.target_layer:
feature_maps = x
return feature_maps
def get_gradient(self):
gradient = self.feature_maps.grad.cpu().data.numpy()[0]
return gradient
def hook_feature_maps(self, module, input, output):
self.feature_maps = output
def hook_gradient(self, module, grad_in, grad_out):
self.gradient = grad_out[0]
def __call__(self, x, numerical, index=None):
self.model.eval()
feature_maps = self.get_feature_maps(x)
if index == None:
index = torch.argmax(self.model(x, numerical)).item()
self.model.zero_grad()
target = feature_maps[:, index, :, :].sum()
target.backward(retain_graph=True)
gradient = self.get_gradient()
pooled_gradient = np.mean(gradient, axis=(1,2))
feature_maps = feature_maps.cpu().data.numpy()[0]
weighted_feature_maps = np.zeros(feature_maps.shape[1:], dtype=np.float32)
for i, w in enumerate(pooled_gradient):
weighted_feature_maps += w * feature_maps[i, :, :]
weighted_feature_maps = np.maximum(weighted_feature_maps, 0)
weighted_feature_maps /= np.max(weighted_feature_maps)
return weighted_feature_maps
# 训练模型
def train(model, train_loader, criterion, optimizer, device):
model.train()
running_loss = 0.0
for i, (images, numerical, targets) in enumerate(train_loader):
images = images.to(device)
numerical = numerical.to(device)
targets = targets.to(device)
optimizer.zero_grad()
outputs = model(images, numerical)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
running_loss += loss.item() * images.size(0)
if (i+1) % 10 == 0:
print('Training - Epoch: {} | Iter: {} | Loss: {:.4f}'.format(epoch+1, i+1, loss.item()))
epoch_loss = running_loss / len(train_loader.dataset)
return epoch_loss
# 测试模型
def test(model, test_loader, criterion, device):
model.eval()
running_loss = 0.0
with torch.no_grad():
for i, (images, numerical, targets) in enumerate(test_loader):
images = images.to(device)
numerical = numerical.to(device)
targets = targets.to(device)
outputs = model(images, numerical)
loss = criterion(outputs, targets)
running_loss += loss.item() * images.size(0)
epoch_loss = running_loss / len(test_loader.dataset)
return epoch_loss
# 加载数据集
train_image_paths = ['train_images/'+str(i)+'.jpg' for i in range(1, 2001)]
train_numerical_data = np.load('train_numerical_data.npy')
train_targets = np.load('train_targets.npy')
test_image_paths = ['test_images/'+str(i)+'.jpg' for i in range(1, 501)]
test_numerical_data = np.load('test_numerical_data.npy')
test_targets = np.load('test_targets.npy')
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
train_dataset = MyDataset(train_image_paths, train_numerical_data, train_targets, transform)
test_dataset = MyDataset(test_image_paths, test_numerical_data, test_targets, transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
# 训练模型
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = MyModel().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10
for epoch in range(num_epochs):
train_loss = train(model, train_loader, criterion, optimizer, device)
print('Training - Epoch: {} | Loss: {:.4f}'.format(epoch+1, train_loss))
test_loss = test(model, test_loader, criterion, device)
print('Testing - Epoch: {} | Loss: {:.4f}'.format(epoch+1, test_loss))
# 可视化Grad-Cam结果
grad_cam = GradCam(model, 'conv.11') # 使用最后一个卷积层进行可视化
img_path = 'test_images/1.jpg'
numerical_data = test_numerical_data[0]
img = Image.open(img_path).convert('RGB')
plt.imshow(img)
plt.axis('off')
plt.show()
img_tensor = transform(img).unsqueeze(0).to(device)
numerical_tensor = torch.from_numpy(numerical_data).unsqueeze(0).float().to(device)
output = model(img_tensor, numerical_tensor)
predicted_class = torch.argmax(output).item()
weighted_feature_maps = grad_cam(img_tensor, numerical_tensor, predicted_class)
plt.imshow(img)
plt.imshow(weighted_feature_maps, alpha=0.5, cmap='jet')
plt.axis('off')
plt.show()
```
需要注意的是,上述代码中使用了一个数据集,该数据集包含了2000张训练图片和500张测试图片,图片和数值数据是通过函数生成的,因此无法直接运行代码。但是代码中的数据处理部分是通用的,可以根据自己的数据集进行修改。同时,Grad-Cam可视化部分的代码需要注意,其中的`'conv.11'`是指VGG16模型中的最后一个卷积层,如果使用其他模型需要根据实际情况进行修改。
阅读全文