基于深度学习的目标检测 代码
时间: 2023-09-01 15:07:29 浏览: 116
以下是一个基于深度学习的目标检测代码示例,使用的是 TensorFlow 和 Keras 框架。这个代码示例使用的是 Faster R-CNN 模型,可以在 COCO 数据集上进行训练和测试,同时还包括了数据增强和模型评估等功能。
```python
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import optimizers
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.layers import GlobalMaxPooling2D
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Add
from tensorflow.keras.layers import ZeroPadding2D
from tensorflow.keras.layers import Cropping2D
from tensorflow.keras.layers import Lambda
from tensorflow.keras.layers import Reshape
from tensorflow.keras.layers import Concatenate
from tensorflow.keras.layers import Softmax
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.utils import plot_model
import numpy as np
import os
import cv2
import time
import argparse
from tqdm import tqdm
from pycocotools.coco import COCO
from pycocotools import mask as maskUtils
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
np.random.seed(42)
tf.random.set_seed(42)
class Config:
NAME = "faster_rcnn"
BACKBONE = "resnet50"
IMAGE_MIN_DIM = 800
IMAGE_MAX_DIM = 1333
RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
RPN_ANCHOR_RATIOS = [0.5, 1, 2]
RPN_ANCHOR_STRIDE = 16
RPN_NMS_THRESHOLD = 0.7
RPN_TRAIN_ANCHORS_PER_IMAGE = 256
RPN_POSITIVE_RATIO = 0.5
DETECTION_MIN_CONFIDENCE = 0.7
DETECTION_NMS_THRESHOLD = 0.3
DETECTION_MAX_INSTANCES = 100
LEARNING_RATE = 0.001
WEIGHT_DECAY = 0.0001
EPOCHS = 50
BATCH_SIZE = 1
STEPS_PER_EPOCH = 1000
VALIDATION_STEPS = 50
IMAGES_PER_GPU = 1
MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
NUM_CLASSES = 81 # COCO has 80 classes + background
class DataGenerator(keras.utils.Sequence):
def __init__(self, dataset, config, shuffle=True, augment=True):
self.dataset = dataset
self.config = config
self.shuffle = shuffle
self.augment = augment
self.image_ids = np.copy(self.dataset.image_ids)
self.on_epoch_end()
def __len__(self):
return int(np.ceil(len(self.dataset.image_ids) / self.config.BATCH_SIZE))
def __getitem__(self, idx):
batch_image_ids = self.image_ids[idx * self.config.BATCH_SIZE:(idx + 1) * self.config.BATCH_SIZE]
batch_images = []
batch_gt_class_ids = []
batch_gt_boxes = []
for image_id in batch_image_ids:
image, gt_class_ids, gt_boxes = load_image_gt(self.dataset, self.config, image_id, augment=self.augment)
batch_images.append(image)
batch_gt_class_ids.append(gt_class_ids)
batch_gt_boxes.append(gt_boxes)
batch_images = np.array(batch_images)
batch_gt_class_ids = np.array(batch_gt_class_ids)
batch_gt_boxes = np.array(batch_gt_boxes)
rpn_match, rpn_bbox, rois, roi_gt_class_ids, roi_gt_boxes = build_rpn_targets(batch_images.shape, self.config, batch_gt_class_ids, batch_gt_boxes)
inputs = [batch_images, batch_gt_class_ids, batch_gt_boxes, rpn_match, rpn_bbox, rois, roi_gt_class_ids, roi_gt_boxes]
outputs = []
return inputs, outputs
def on_epoch_end(self):
if self.shuffle:
np.random.shuffle(self.image_ids)
def load_image_gt(dataset, config, image_id, augment=True):
image = dataset.load_image(image_id)
mask, class_ids = dataset.load_mask(image_id)
bbox = maskUtils.toBbox(mask)
bbox = np.expand_dims(bbox, axis=-1)
class_ids = np.expand_dims(class_ids, axis=-1)
gt_boxes = np.concatenate([bbox, class_ids], axis=-1)
if augment:
image, gt_boxes = augment_image(image, gt_boxes)
image, window, scale, padding = resize_image(image, min_dim=config.IMAGE_MIN_DIM, max_dim=config.IMAGE_MAX_DIM, padding=True)
gt_boxes[:, :4] = resize_box(gt_boxes[:, :4], scale, padding)
gt_class_ids = gt_boxes[:, 4]
return image.astype(np.float32) - config.MEAN_PIXEL, gt_class_ids.astype(np.int32), gt_boxes[:, :4].astype(np.float32)
def augment_image(image, gt_boxes):
if np.random.rand() < 0.5:
image = np.fliplr(image)
gt_boxes[:, 0] = image.shape[1] - gt_boxes[:, 0] - gt_boxes[:, 2]
return image, gt_boxes
def resize_image(image, min_dim=None, max_dim=None, padding=False):
original_shape = image.shape
rows, cols = original_shape[0], original_shape[1]
if min_dim:
scale = max(1, min_dim / min(rows, cols))
if max_dim:
scale = min(scale, max_dim / max(rows, cols))
image = cv2.resize(image, (int(round(cols * scale)), int(round(rows * scale))))
if padding:
padded_image = np.zeros((max_dim, max_dim, 3), dtype=np.float32)
padded_image[:image.shape[0], :image.shape[1], :] = image
window = (0, 0, image.shape[1], image.shape[0])
return padded_image, window, scale, (0, 0, 0, 0)
return image, None, scale, None
def resize_box(boxes, scale, padding):
if padding is not None:
boxes[:, 0] += padding[1] # x1
boxes[:, 1] += padding[0] # y1
boxes[:, :4] *= scale
return boxes
def overlaps(boxes1, boxes2):
i_x1 = np.maximum(boxes1[:, 0], boxes2[:, 0])
i_y1 = np.maximum(boxes1[:, 1], boxes2[:, 1])
i_x2 = np.minimum(boxes1[:, 2], boxes2[:, 2])
i_y2 = np.minimum(boxes1[:, 3], boxes2[:, 3])
i_area = np.maximum(i_x2 - i_x1 + 1, 0) * np.maximum(i_y2 - i_y1 + 1, 0)
a_area = (boxes1[:, 2] - boxes1[:, 0] + 1) * (boxes1[:, 3] - boxes1[:, 1] + 1)
b_area = (boxes2[:, 2] - boxes2[:, 0] + 1) * (boxes2[:, 3] - boxes2[:, 1] + 1)
u_area = a_area + b_area - i_area
overlaps = i_area / u_area
return overlaps
def compute_iou(box, boxes, eps=1e-8):
iou = overlaps(box[np.newaxis], boxes)
return iou
def compute_backbone_shapes(config, image_shape):
if callable(config.BACKBONE):
return config.BACKBONE(image_shape)
assert isinstance(config.BACKBONE, str)
if config.BACKBONE in ["resnet50", "resnet101"]:
if image_shape[0] >= 800:
return np.array([[200, 256], [100, 128], [50, 64], [25, 32], [13, 16]])
else:
return np.array([[100, 128], [50, 64], [25, 32], [13, 16], [7, 8]])
else:
raise ValueError("Invalid backbone name")
def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
scales, ratios = scales.flatten(), ratios.flatten()
heights = scales / np.sqrt(ratios)
widths = scales * np.sqrt(ratios)
shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
box_centers = np.stack([box_centers_y, box_centers_x], axis=2)
box_sizes = np.stack([box_heights, box_widths], axis=2)
box_centers = np.reshape(box_centers, [-1, 2])
box_sizes = np.reshape(box_sizes, [-1, 2])
boxes = np.concatenate([box_centers - 0.5 * box_sizes, box_centers + 0.5 * box_sizes], axis=1)
boxes = np.round(boxes)
return boxes
def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, anchor_stride):
anchors = []
for i in range(len(scales)):
anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i], feature_strides[i], anchor_stride))
return np.concatenate(anchors, axis=0)
def norm_boxes(boxes, shape):
boxes = boxes.astype(np.float32)
h, w = shape[:2]
scale = np.array([h - 1, w - 1, h - 1, w - 1])
shift = np.array([0, 0, 1, 1])
boxes = np.divide(boxes - shift, scale)
boxes = np.maximum(np.minimum(boxes, 1), 0)
return boxes
def denorm_boxes(boxes, shape):
h, w = shape[:2]
scale = np.array([h - 1, w - 1, h - 1, w - 1])
shift = np.array([0, 0, 1, 1])
boxes = boxes * scale + shift
return boxes.astype(np.int32)
def overlaps_graph(boxes1, boxes2):
b1 = tf.reshape(tf.tile(tf.expand_dims(boxes1, 1), [1, 1, tf.shape(boxes2)[0]]), [-1, 4])
b2 = tf.tile(boxes2, [tf.shape(boxes1)[0], 1])
b2 = tf.reshape(tf.transpose(b2), [-1, 4])
overlaps = compute_iou(b1, b2)
overlaps = tf.reshape(overlaps, [tf.shape(boxes1)[0], tf.shape(boxes2)[0]])
return overlaps
def detection_target_graph(proposals, gt_class_ids, gt_boxes, config):
proposals = tf.cast(proposals, tf.float32)
gt_boxes = tf.cast(gt_boxes, tf.float32)
gt_class_ids = tf.cast(gt_class_ids, tf.int64)
# Compute overlaps matrix [proposals, gt_boxes]
overlaps = overlaps_graph(proposals, gt_boxes)
# Compute overlaps with positive anchors
roi_iou_max = tf.reduce_max(overlaps, axis=1)
positive_roi_bool = (roi_iou_max >= config.RPN_POSITIVE_RATIO)
positive_indices = tf.where(positive_roi_bool)[:, 0]
# Subsample ROIs. Aim for 33% positive
# Positive ROIs
positive_count = int(config.RPN_TRAIN_ANCHORS_PER_IMAGE * config.RPN_POSITIVE_RATIO)
positive_indices = tf.random.shuffle(positive_indices)[:positive_count]
positive_count = tf.shape(positive_indices)[0]
# Negative ROIs. Add enough to maintain positive:negative ratio.
r = 1.0 / config.RPN_POSITIVE_RATIO
negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count
negative_indices = tf.where(roi_iou_max < config.RPN_POSITIVE_RATIO)[:, 0]
negative_count = tf.math.minimum(tf.shape(negative_indices)[0], negative_count)
negative_indices = tf.random.shuffle(negative_indices)[:negative_count]
# Gather selected ROIs
positive_rois = tf.gather(proposals, positive_indices)
negative_rois = tf.gather(proposals, negative_indices)
# Assign positive ROIs to GT boxes.
positive_overlaps = tf.gather(overlaps, positive_indices)
roi_gt_box_assignment = tf.cond(
tf.greater(tf.shape(positive_overlaps)[1], 0),
true_fn=lambda: tf.argmax(positive_overlaps, axis=1),
false_fn=lambda: tf.cast(tf.constant([]), tf.int64)
)
roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment)
roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment)
# Compute bbox refinement for positive ROIs
deltas = keras_rcnn.backend.boxutils.bbox_transform(positive_rois, roi_gt_boxes)
deltas /= tf.constant(config.BBOX_STD_DEV, dtype=tf.float32)
# Append negative ROIs and pad bbox deltas and masks that
# are not used for negative ROIs with zeros.
rois = tf.concat([positive_rois, negative_rois], axis=0)
N = tf.shape(negative_rois)[0]
P = tf.math.maximum(config.RPN_TRAIN_ANCHORS_PER_IMAGE - tf.shape(rois)[0], 0)
rois = tf.pad(rois, [(0, P), (0, 0)])
roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)])
roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)])
deltas = tf.pad(deltas, [(0, N + P), (0, 0)])
# Return rois and deltas
return rois, roi_gt_class_ids, deltas
def build_rpn_targets(image_shape, config, gt_class_ids, gt_boxes):
feature_shapes = compute_backbone_shapes(config, image_shape)
anchors = generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, feature_shapes, config.BACKBONE_SHAPES, config.RPN_ANCHOR_STRIDE)
rpn_match, rpn_bbox = keras_rcnn.backend.anchor.get_best_anchor(anchors, gt_boxes, config)
rpn_match = tf.expand_dims(rpn_match, axis=-1)
rpn_bbox = tf.reshape(rpn_bbox, [-1, 4])
rois, roi_gt_class_ids, deltas = tf.py_function(detection_target_graph, [anchors, gt_class_ids, gt_boxes, config], [tf.float32, tf.int64, tf.float32])
rois.set_shape([config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4])
roi_gt_class_ids.set_shape([config.RPN_TRAIN_ANCHORS_PER_IMAGE])
deltas.set_shape([config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4 * config.NUM_CLASSES])
rpn_match.set_shape([None, 1])
rpn_bbox.set_shape([None, 4])
rois = tf.stop_gradient(rois)
roi_gt_class_ids = tf.stop_gradient(roi_gt_class_ids)
deltas = tf.stop_gradient(deltas)
rpn_match = tf.stop_gradient(rpn_match)
rpn_bbox = tf.stop_gradient(rpn_bbox)
return rpn_match, rpn_bbox, rois, roi_gt_class_ids, deltas
def build_rpn_model(config):
input_image = Input(shape=[None, None, 3], name="input_image")
shared_layers = ResNet50(include_top=False, weights='imagenet', input_tensor=input_image)
layer_names = ["conv4_block6_out", "conv5_block3_out", "conv6_relu"]
layers = [shared_layers.get_layer(name).output for name in layer_names]
output_layers = layers
rpn_layers = []
for n, layer in enumerate(output_layers):
rpn = Conv2D(512, (3, 3), padding="same", activation="relu", name="rpn_conv%d" % (n + 1))(layer)
rpn_class = Conv2D(2 * config.RPN_ANCHOR_SCALES[0], (1, 1), activation="sigmoid", name="rpn_class%d" % (n + 1))(rpn)
rpn_bbox = Conv2D(4 * config.RPN_ANCHOR_SCALES[0], (1, 1), activation="linear", name="rpn_bbox%d" % (n + 1))(rpn)
rpn_layers.append(rpn_class)
rpn_layers.append(rpn_bbox)
rpn_class_logits = Concatenate(axis=1, name="rpn_class_logits")(rpn_layers[:len(config.RPN_ANCHOR_SCALES)])
rpn_class = Concatenate(axis=1, name="rpn_class")(rpn_layers[len(config.RPN_ANCHOR_SCALES):])
rpn_bbox = Concatenate(axis=1, name="rpn_bbox")(rpn_layers[len(config.R
阅读全文