基于深度学习的目标检测代码

以下是一个基于深度学习的目标检测代码示例，使用的是 TensorFlow 和 Keras 框架。这个代码示例使用的是 Faster R-CNN 模型，可以在 COCO 数据集上进行训练和测试，同时还包括了数据增强和模型评估等功能。 ```python import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras import models from tensorflow.keras import optimizers from tensorflow.keras import backend as K from tensorflow.keras.layers import Input from tensorflow.keras.applications import ResNet50 from tensorflow.keras.layers import Conv2D from tensorflow.keras.layers import MaxPooling2D from tensorflow.keras.layers import Flatten from tensorflow.keras.layers import Dense from tensorflow.keras.layers import Dropout from tensorflow.keras.layers import GlobalAveragePooling2D from tensorflow.keras.layers import GlobalMaxPooling2D from tensorflow.keras.layers import TimeDistributed from tensorflow.keras.layers import AveragePooling2D from tensorflow.keras.layers import BatchNormalization from tensorflow.keras.layers import Activation from tensorflow.keras.layers import Add from tensorflow.keras.layers import ZeroPadding2D from tensorflow.keras.layers import Cropping2D from tensorflow.keras.layers import Lambda from tensorflow.keras.layers import Reshape from tensorflow.keras.layers import Concatenate from tensorflow.keras.layers import Softmax from tensorflow.keras.models import Model from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping from tensorflow.keras.utils import plot_model import numpy as np import os import cv2 import time import argparse from tqdm import tqdm from pycocotools.coco import COCO from pycocotools import mask as maskUtils os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' np.random.seed(42) tf.random.set_seed(42) class Config: NAME = "faster_rcnn" BACKBONE = "resnet50" IMAGE_MIN_DIM = 800 IMAGE_MAX_DIM = 1333 RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) RPN_ANCHOR_RATIOS = [0.5, 1, 2] RPN_ANCHOR_STRIDE = 16 RPN_NMS_THRESHOLD = 0.7 RPN_TRAIN_ANCHORS_PER_IMAGE = 256 RPN_POSITIVE_RATIO = 0.5 DETECTION_MIN_CONFIDENCE = 0.7 DETECTION_NMS_THRESHOLD = 0.3 DETECTION_MAX_INSTANCES = 100 LEARNING_RATE = 0.001 WEIGHT_DECAY = 0.0001 EPOCHS = 50 BATCH_SIZE = 1 STEPS_PER_EPOCH = 1000 VALIDATION_STEPS = 50 IMAGES_PER_GPU = 1 MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) NUM_CLASSES = 81 # COCO has 80 classes + background class DataGenerator(keras.utils.Sequence): def __init__(self, dataset, config, shuffle=True, augment=True): self.dataset = dataset self.config = config self.shuffle = shuffle self.augment = augment self.image_ids = np.copy(self.dataset.image_ids) self.on_epoch_end() def __len__(self): return int(np.ceil(len(self.dataset.image_ids) / self.config.BATCH_SIZE)) def __getitem__(self, idx): batch_image_ids = self.image_ids[idx * self.config.BATCH_SIZE:(idx + 1) * self.config.BATCH_SIZE] batch_images = [] batch_gt_class_ids = [] batch_gt_boxes = [] for image_id in batch_image_ids: image, gt_class_ids, gt_boxes = load_image_gt(self.dataset, self.config, image_id, augment=self.augment) batch_images.append(image) batch_gt_class_ids.append(gt_class_ids) batch_gt_boxes.append(gt_boxes) batch_images = np.array(batch_images) batch_gt_class_ids = np.array(batch_gt_class_ids) batch_gt_boxes = np.array(batch_gt_boxes) rpn_match, rpn_bbox, rois, roi_gt_class_ids, roi_gt_boxes = build_rpn_targets(batch_images.shape, self.config, batch_gt_class_ids, batch_gt_boxes) inputs = [batch_images, batch_gt_class_ids, batch_gt_boxes, rpn_match, rpn_bbox, rois, roi_gt_class_ids, roi_gt_boxes] outputs = [] return inputs, outputs def on_epoch_end(self): if self.shuffle: np.random.shuffle(self.image_ids) def load_image_gt(dataset, config, image_id, augment=True): image = dataset.load_image(image_id) mask, class_ids = dataset.load_mask(image_id) bbox = maskUtils.toBbox(mask) bbox = np.expand_dims(bbox, axis=-1) class_ids = np.expand_dims(class_ids, axis=-1) gt_boxes = np.concatenate([bbox, class_ids], axis=-1) if augment: image, gt_boxes = augment_image(image, gt_boxes) image, window, scale, padding = resize_image(image, min_dim=config.IMAGE_MIN_DIM, max_dim=config.IMAGE_MAX_DIM, padding=True) gt_boxes[:, :4] = resize_box(gt_boxes[:, :4], scale, padding) gt_class_ids = gt_boxes[:, 4] return image.astype(np.float32) - config.MEAN_PIXEL, gt_class_ids.astype(np.int32), gt_boxes[:, :4].astype(np.float32) def augment_image(image, gt_boxes): if np.random.rand() < 0.5: image = np.fliplr(image) gt_boxes[:, 0] = image.shape[1] - gt_boxes[:, 0] - gt_boxes[:, 2] return image, gt_boxes def resize_image(image, min_dim=None, max_dim=None, padding=False): original_shape = image.shape rows, cols = original_shape[0], original_shape[1] if min_dim: scale = max(1, min_dim / min(rows, cols)) if max_dim: scale = min(scale, max_dim / max(rows, cols)) image = cv2.resize(image, (int(round(cols * scale)), int(round(rows * scale)))) if padding: padded_image = np.zeros((max_dim, max_dim, 3), dtype=np.float32) padded_image[:image.shape[0], :image.shape[1], :] = image window = (0, 0, image.shape[1], image.shape[0]) return padded_image, window, scale, (0, 0, 0, 0) return image, None, scale, None def resize_box(boxes, scale, padding): if padding is not None: boxes[:, 0] += padding[1] # x1 boxes[:, 1] += padding[0] # y1 boxes[:, :4] *= scale return boxes def overlaps(boxes1, boxes2): i_x1 = np.maximum(boxes1[:, 0], boxes2[:, 0]) i_y1 = np.maximum(boxes1[:, 1], boxes2[:, 1]) i_x2 = np.minimum(boxes1[:, 2], boxes2[:, 2]) i_y2 = np.minimum(boxes1[:, 3], boxes2[:, 3]) i_area = np.maximum(i_x2 - i_x1 + 1, 0) * np.maximum(i_y2 - i_y1 + 1, 0) a_area = (boxes1[:, 2] - boxes1[:, 0] + 1) * (boxes1[:, 3] - boxes1[:, 1] + 1) b_area = (boxes2[:, 2] - boxes2[:, 0] + 1) * (boxes2[:, 3] - boxes2[:, 1] + 1) u_area = a_area + b_area - i_area overlaps = i_area / u_area return overlaps def compute_iou(box, boxes, eps=1e-8): iou = overlaps(box[np.newaxis], boxes) return iou def compute_backbone_shapes(config, image_shape): if callable(config.BACKBONE): return config.BACKBONE(image_shape) assert isinstance(config.BACKBONE, str) if config.BACKBONE in ["resnet50", "resnet101"]: if image_shape[0] >= 800: return np.array([[200, 256], [100, 128], [50, 64], [25, 32], [13, 16]]) else: return np.array([[100, 128], [50, 64], [25, 32], [13, 16], [7, 8]]) else: raise ValueError("Invalid backbone name") def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride): scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) scales, ratios = scales.flatten(), ratios.flatten() heights = scales / np.sqrt(ratios) widths = scales * np.sqrt(ratios) shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y) box_widths, box_centers_x = np.meshgrid(widths, shifts_x) box_heights, box_centers_y = np.meshgrid(heights, shifts_y) box_centers = np.stack([box_centers_y, box_centers_x], axis=2) box_sizes = np.stack([box_heights, box_widths], axis=2) box_centers = np.reshape(box_centers, [-1, 2]) box_sizes = np.reshape(box_sizes, [-1, 2]) boxes = np.concatenate([box_centers - 0.5 * box_sizes, box_centers + 0.5 * box_sizes], axis=1) boxes = np.round(boxes) return boxes def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, anchor_stride): anchors = [] for i in range(len(scales)): anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i], feature_strides[i], anchor_stride)) return np.concatenate(anchors, axis=0) def norm_boxes(boxes, shape): boxes = boxes.astype(np.float32) h, w = shape[:2] scale = np.array([h - 1, w - 1, h - 1, w - 1]) shift = np.array([0, 0, 1, 1]) boxes = np.divide(boxes - shift, scale) boxes = np.maximum(np.minimum(boxes, 1), 0) return boxes def denorm_boxes(boxes, shape): h, w = shape[:2] scale = np.array([h - 1, w - 1, h - 1, w - 1]) shift = np.array([0, 0, 1, 1]) boxes = boxes * scale + shift return boxes.astype(np.int32) def overlaps_graph(boxes1, boxes2): b1 = tf.reshape(tf.tile(tf.expand_dims(boxes1, 1), [1, 1, tf.shape(boxes2)[0]]), [-1, 4]) b2 = tf.tile(boxes2, [tf.shape(boxes1)[0], 1]) b2 = tf.reshape(tf.transpose(b2), [-1, 4]) overlaps = compute_iou(b1, b2) overlaps = tf.reshape(overlaps, [tf.shape(boxes1)[0], tf.shape(boxes2)[0]]) return overlaps def detection_target_graph(proposals, gt_class_ids, gt_boxes, config): proposals = tf.cast(proposals, tf.float32) gt_boxes = tf.cast(gt_boxes, tf.float32) gt_class_ids = tf.cast(gt_class_ids, tf.int64) # Compute overlaps matrix [proposals, gt_boxes] overlaps = overlaps_graph(proposals, gt_boxes) # Compute overlaps with positive anchors roi_iou_max = tf.reduce_max(overlaps, axis=1) positive_roi_bool = (roi_iou_max >= config.RPN_POSITIVE_RATIO) positive_indices = tf.where(positive_roi_bool)[:, 0] # Subsample ROIs. Aim for 33% positive # Positive ROIs positive_count = int(config.RPN_TRAIN_ANCHORS_PER_IMAGE * config.RPN_POSITIVE_RATIO) positive_indices = tf.random.shuffle(positive_indices)[:positive_count] positive_count = tf.shape(positive_indices)[0] # Negative ROIs. Add enough to maintain positive:negative ratio. r = 1.0 / config.RPN_POSITIVE_RATIO negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count negative_indices = tf.where(roi_iou_max < config.RPN_POSITIVE_RATIO)[:, 0] negative_count = tf.math.minimum(tf.shape(negative_indices)[0], negative_count) negative_indices = tf.random.shuffle(negative_indices)[:negative_count] # Gather selected ROIs positive_rois = tf.gather(proposals, positive_indices) negative_rois = tf.gather(proposals, negative_indices) # Assign positive ROIs to GT boxes. positive_overlaps = tf.gather(overlaps, positive_indices) roi_gt_box_assignment = tf.cond( tf.greater(tf.shape(positive_overlaps)[1], 0), true_fn=lambda: tf.argmax(positive_overlaps, axis=1), false_fn=lambda: tf.cast(tf.constant([]), tf.int64) ) roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment) roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment) # Compute bbox refinement for positive ROIs deltas = keras_rcnn.backend.boxutils.bbox_transform(positive_rois, roi_gt_boxes) deltas /= tf.constant(config.BBOX_STD_DEV, dtype=tf.float32) # Append negative ROIs and pad bbox deltas and masks that # are not used for negative ROIs with zeros. rois = tf.concat([positive_rois, negative_rois], axis=0) N = tf.shape(negative_rois)[0] P = tf.math.maximum(config.RPN_TRAIN_ANCHORS_PER_IMAGE - tf.shape(rois)[0], 0) rois = tf.pad(rois, [(0, P), (0, 0)]) roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)]) roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)]) deltas = tf.pad(deltas, [(0, N + P), (0, 0)]) # Return rois and deltas return rois, roi_gt_class_ids, deltas def build_rpn_targets(image_shape, config, gt_class_ids, gt_boxes): feature_shapes = compute_backbone_shapes(config, image_shape) anchors = generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, feature_shapes, config.BACKBONE_SHAPES, config.RPN_ANCHOR_STRIDE) rpn_match, rpn_bbox = keras_rcnn.backend.anchor.get_best_anchor(anchors, gt_boxes, config) rpn_match = tf.expand_dims(rpn_match, axis=-1) rpn_bbox = tf.reshape(rpn_bbox, [-1, 4]) rois, roi_gt_class_ids, deltas = tf.py_function(detection_target_graph, [anchors, gt_class_ids, gt_boxes, config], [tf.float32, tf.int64, tf.float32]) rois.set_shape([config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4]) roi_gt_class_ids.set_shape([config.RPN_TRAIN_ANCHORS_PER_IMAGE]) deltas.set_shape([config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4 * config.NUM_CLASSES]) rpn_match.set_shape([None, 1]) rpn_bbox.set_shape([None, 4]) rois = tf.stop_gradient(rois) roi_gt_class_ids = tf.stop_gradient(roi_gt_class_ids) deltas = tf.stop_gradient(deltas) rpn_match = tf.stop_gradient(rpn_match) rpn_bbox = tf.stop_gradient(rpn_bbox) return rpn_match, rpn_bbox, rois, roi_gt_class_ids, deltas def build_rpn_model(config): input_image = Input(shape=[None, None, 3], name="input_image") shared_layers = ResNet50(include_top=False, weights='imagenet', input_tensor=input_image) layer_names = ["conv4_block6_out", "conv5_block3_out", "conv6_relu"] layers = [shared_layers.get_layer(name).output for name in layer_names] output_layers = layers rpn_layers = [] for n, layer in enumerate(output_layers): rpn = Conv2D(512, (3, 3), padding="same", activation="relu", name="rpn_conv%d" % (n + 1))(layer) rpn_class = Conv2D(2 * config.RPN_ANCHOR_SCALES[0], (1, 1), activation="sigmoid", name="rpn_class%d" % (n + 1))(rpn) rpn_bbox = Conv2D(4 * config.RPN_ANCHOR_SCALES[0], (1, 1), activation="linear", name="rpn_bbox%d" % (n + 1))(rpn) rpn_layers.append(rpn_class) rpn_layers.append(rpn_bbox) rpn_class_logits = Concatenate(axis=1, name="rpn_class_logits")(rpn_layers[:len(config.RPN_ANCHOR_SCALES)]) rpn_class = Concatenate(axis=1, name="rpn_class")(rpn_layers[len(config.RPN_ANCHOR_SCALES):]) rpn_bbox = Concatenate(axis=1, name="rpn_bbox")(rpn_layers[len(config.R

阅读全文

基于深度学习的目标检测 代码

相关推荐

深度学习中的目标检测YOLOX代码以及权重

基于深度学习的目标检测算法研究进展.docx

基于深度学习的目标检测

基于深度学习目标检测和人体关键点检测的不健康坐姿检测（部分代码）

《Matlab计算机视觉与深度学习实战》代码 基于深度学习的汽车目标检测.zip

基于深度学习的目标检测matlab代码

MATLAB计算机视觉与深度学习实战代码 - 基于深度学习的汽车目标检测.rar

基于深度学习的目标检测程序

基于深度学习的汽车目标检测

基于深度学习的目标检测app.zip

基于深度学习的汽车目标检测.zip

基于深度学习YOLOv3网络的鸟类目标检测 完整代码+数据 可直接运行 毕设

基于深度学习的人脸检测和人脸关键点检测代码

基于深度学习的回环检测代码

深度学习的目标检测代码

5. 深度学习中的目标检测 python代码实现

深度学习-目标检测

基于深度学习的目标检测算法综述.docx

玄武岩纤维行业研究报告 新材料技术 玄武岩纤维 性能应用 市场分析

基于 Vue 3、Vite、Ant Design Vue 4.0、TypeScript、Vben Vue Admin，最先进的技术栈，让初学者能够更快的入门并投入到团队开发中去

最新推荐

【深度学习入门】Paddle实现人脸检测和表情识别（基于TinyYOLO和ResNet18）

#基于yolov3和深度相机的目标检测框架定位系统总结

2021年最新互联网深度学习算法岗位面试题，包括计算机视觉、NLP、推荐

Angular实现MarcHayek简历展示应用教程

管理建模和仿真的文件

深入剖析：内存溢出背后的原因、预防及应急策略（专家版）

Java中如何对年月日时分秒的日期字符串作如下处理：如何日期分钟介于两个相连的半点之间，就将分钟数调整为前半点

Crossbow Spot最新更新 - 获取Chrome扩展新闻

"互动学习：行动中的多样性与论文攻读经历"

【Java内存管理终极指南】：一次性解决内存溢出、泄漏和性能瓶颈

基于深度学习的目标检测代码

《Matlab计算机视觉与深度学习实战》代码基于深度学习的汽车目标检测.zip

基于深度学习YOLOv3网络的鸟类目标检测完整代码+数据可直接运行毕设

玄武岩纤维行业研究报告新材料技术玄武岩纤维性能应用市场分析