帮我用FCN模型实现图像语义识别全过程,我现有图像和mask图像分别存放两个文件夹,希望实现数据集划分、模型训练、预测模型、测试新数据等,用pytorch实现代码,谢谢!
时间: 2024-01-24 18:17:12 浏览: 103
好的,下面是用FCN模型实现图像语义分割的全过程代码,包括数据集划分、模型训练、预测模型、测试新数据等。请注意,本代码实现的是基于PyTorch的FCN模型,需要安装PyTorch和相关库。
首先,我们需要导入所需的库和模块:
```python
import os
import numpy as np
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
```
接下来,我们定义一些常量和超参数:
```python
# 常量
IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.bmp']
CLASSES = ['background', 'person', 'car', 'road']
# 超参数
BATCH_SIZE = 4
NUM_EPOCHS = 50
LEARNING_RATE = 0.001
MOMENTUM = 0.9
WEIGHT_DECAY = 0.0005
```
然后,我们定义数据集类,用于读取和处理数据:
```python
class ImageSegmentationDataset(Dataset):
def __init__(self, img_folder, mask_folder, classes, transform=None):
self.img_folder = img_folder
self.mask_folder = mask_folder
self.classes = classes
self.transform = transform
self.img_filenames = [os.path.join(self.img_folder, f) for f in os.listdir(self.img_folder)
if any(f.endswith(ext) for ext in IMG_EXTENSIONS)]
self.mask_filenames = [os.path.join(self.mask_folder, f) for f in os.listdir(self.mask_folder)
if any(f.endswith(ext) for ext in IMG_EXTENSIONS)]
def __len__(self):
return len(self.img_filenames)
def __getitem__(self, idx):
img_filename = self.img_filenames[idx]
mask_filename = self.mask_filenames[idx]
with Image.open(img_filename) as img:
img = img.convert('RGB')
with Image.open(mask_filename) as mask:
mask = np.array(mask)
mask = self.encode_mask(mask)
if self.transform:
img = self.transform(img)
return img, mask
def encode_mask(self, mask):
encoded_mask = np.zeros((mask.shape[0], mask.shape[1]), dtype=np.uint8)
for i, class_name in enumerate(self.classes):
encoded_mask[np.where(np.all(mask == COLOR_MAP[class_name], axis=-1))] = i
return encoded_mask
```
接下来,我们定义FCN模型:
```python
class FCN(nn.Module):
def __init__(self, num_classes):
super(FCN, self).__init__()
self.conv_block1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.conv_block2 = nn.Sequential(
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(128, 128, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.conv_block3 = nn.Sequential(
nn.Conv2d(128, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.conv_block4 = nn.Sequential(
nn.Conv2d(256, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.conv_block5 = nn.Sequential(
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(512, 512, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.fc_block6 = nn.Sequential(
nn.Conv2d(512, 4096, kernel_size=1),
nn.ReLU(inplace=True),
nn.Dropout()
)
self.fc_block7 = nn.Sequential(
nn.Conv2d(4096, 4096, kernel_size=1),
nn.ReLU(inplace=True),
nn.Dropout()
)
self.score_fr = nn.Conv2d(4096, num_classes, kernel_size=1)
self.score_pool4 = nn.Conv2d(512, num_classes, kernel_size=1)
self.score_pool3 = nn.Conv2d(256, num_classes, kernel_size=1)
self.upscore2 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=4, stride=2, bias=False)
self.upscore16 = nn.ConvTranspose2d(num_classes, num_classes, kernel_size=32, stride=16, bias=False)
def forward(self, x):
conv1 = self.conv_block1(x)
conv2 = self.conv_block2(conv1)
conv3 = self.conv_block3(conv2)
conv4 = self.conv_block4(conv3)
conv5 = self.conv_block5(conv4)
fc6 = self.fc_block6(conv5)
fc7 = self.fc_block7(fc6)
score_fr = self.score_fr(fc7)
upscore2 = self.upscore2(score_fr)
score_pool4 = self.score_pool4(conv4)
upscore_pool4 = upscore2 + score_pool4
upscore16 = self.upscore16(upscore_pool4)
score_pool3 = self.score_pool3(conv3)
upscore_pool3 = upscore16[:, :, 9:9+score_pool3.size()[2], 9:9+score_pool3.size()[3]]
fuse = upscore_pool3 + score_pool3
return fuse
```
然后,我们定义一些辅助函数:
```python
def train(model, train_loader, criterion, optimizer, epoch):
model.train()
running_loss = 0.0
for batch_idx, (inputs, targets) in enumerate(train_loader):
inputs, targets = inputs.to(device), targets.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
running_loss += loss.item()
if batch_idx % 10 == 9:
print('[Epoch %d, Batch %5d] loss: %.3f' %
(epoch + 1, batch_idx + 1, running_loss / 10))
running_loss = 0.0
def test(model, test_loader, criterion):
model.eval()
test_loss = 0.0
with torch.no_grad():
for inputs, targets in test_loader:
inputs, targets = inputs.to(device), targets.to(device)
outputs = model(inputs)
test_loss += criterion(outputs, targets).item()
test_loss /= len(test_loader)
print('Test Loss: %.3f' % test_loss)
def predict(model, img_filename):
with Image.open(img_filename) as img:
img = img.convert('RGB')
transform = transforms.Compose([
transforms.Resize((512, 512)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
inputs = transform(img).unsqueeze(0).to(device)
model.eval()
with torch.no_grad():
outputs = model(inputs)
outputs = nn.functional.interpolate(outputs, scale_factor=32, mode='bilinear', align_corners=False)
probs = nn.functional.softmax(outputs, dim=1)
_, preds = torch.max(probs, 1)
return preds.squeeze().cpu().numpy()
def show_results(img_filename, pred_mask):
with Image.open(img_filename) as img:
img = img.convert('RGB')
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
ax1.imshow(img)
ax1.axis('off')
ax1.set_title('Input Image')
color_map = {
0: (0, 0, 0),
1: (255, 0, 0),
2: (0, 255, 0),
3: (0, 0, 255)
}
mask = np.zeros((pred_mask.shape[0], pred_mask.shape[1], 3), dtype=np.uint8)
for i, class_name in enumerate(CLASSES):
mask[np.where(pred_mask == i)] = color_map[i]
ax2.imshow(mask)
ax2.axis('off')
ax2.set_title('Predicted Mask')
plt.show()
```
最后,我们定义主函数,用于训练模型、测试模型和预测新数据:
```python
if __name__ == '__main__':
# 数据集划分
transform = transforms.Compose([
transforms.RandomResizedCrop((512, 512)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
train_dataset = ImageSegmentationDataset('train_images', 'train_masks', CLASSES, transform=transform)
test_dataset = ImageSegmentationDataset('test_images', 'test_masks', CLASSES, transform=transforms.ToTensor())
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)
# 模型训练
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = FCN(len(CLASSES)).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
for epoch in range(NUM_EPOCHS):
train(model, train_loader, criterion, optimizer, epoch)
test(model, test_loader, criterion)
# 预测新数据
img_filename = 'new_image.jpg'
pred_mask = predict(model, img_filename)
show_results(img_filename, pred_mask)
```
注意,我们还需要定义一个颜色映射表,用于将颜色编码的遮罩图转换为整数编码的遮罩图:
```python
COLOR_MAP = {
'background': (0, 0, 0),
'person': (255, 0, 0),
'car': (0, 255, 0),
'road': (0, 0, 255)
}
```
现在,我们已经完成了用FCN模型实现图像语义分割的全过程代码。
阅读全文