目标检测中的gt框是什么意思
时间: 2023-10-11 18:06:51 浏览: 211
在目标检测中,"gt"代表"ground truth",意为"真实值"或"参考值"。gt框指的是在训练数据集中由人工标注者绘制的边界框,用于标记图像中目标的位置和大小。这些gt框表示了模型应该学习到的目标位置。在训练期间,模型会与gt框进行比较,通过调整预测框的位置和大小来最小化预测与真实值之间的差异。gt框是目标检测算法中评估和优化模型性能的重要工具。
相关问题
目标检测python计算tp、fp代码
目标检测是计算机视觉中的重要任务,它旨在检测图像或视频中的特定目标,并给出它们的位置和边界框。在目标检测中,通常会使用一些评估指标,如真阳性(True Positive,TP)和假阳性(False Positive,FP)来衡量算法的性能。
TP表示正确检测到的目标数目,而FP表示算法错误检测到的目标数目。下面是一个使用Python计算TP和FP的示例代码:
```python
def calculate_tp_fp(ground_truth, detection_result):
tp = 0
fp = 0
for bbox in detection_result:
found_match = False
for gt_bbox in ground_truth:
iou = calculate_iou(bbox, gt_bbox)
if iou >= 0.5: # 设定IOU阈值
found_match = True
break
if found_match:
tp += 1
else:
fp += 1
return tp, fp
def calculate_iou(box1, box2):
# 计算两个边界框的交并比(IoU)
# 这里省略具体实现
return iou
# 测试示例
ground_truth = [[10, 10, 50, 50], [70, 80, 120, 150]]
detection_result = [[15, 20, 45, 55], [80, 90, 110, 140], [200, 200, 250, 250]]
tp, fp = calculate_tp_fp(ground_truth, detection_result)
print("True Positives: {}".format(tp))
print("False Positives: {}".format(fp))
```
在上述示例代码中,`calculate_tp_fp`函数接收两个参数,`ground_truth`表示真实目标的边界框列表,`detection_result`表示算法检测到的边界框列表。函数通过计算每个检测结果的IOU与真实目标的IOU是否大于等于0.5来决定其是TP还是FP。最后,返回的`tp`和`fp`即为计算得到的真阳性和假阳性的数目。
这只是一个简单的示例代码,实际情况中可能需要考虑更多的细节和优化。希望以上回答对你有帮助!
基于深度学习的目标检测 代码
以下是一个基于深度学习的目标检测代码示例,使用的是 TensorFlow 和 Keras 框架。这个代码示例使用的是 Faster R-CNN 模型,可以在 COCO 数据集上进行训练和测试,同时还包括了数据增强和模型评估等功能。
```python
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import optimizers
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.layers import GlobalMaxPooling2D
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Add
from tensorflow.keras.layers import ZeroPadding2D
from tensorflow.keras.layers import Cropping2D
from tensorflow.keras.layers import Lambda
from tensorflow.keras.layers import Reshape
from tensorflow.keras.layers import Concatenate
from tensorflow.keras.layers import Softmax
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.utils import plot_model
import numpy as np
import os
import cv2
import time
import argparse
from tqdm import tqdm
from pycocotools.coco import COCO
from pycocotools import mask as maskUtils
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
np.random.seed(42)
tf.random.set_seed(42)
class Config:
NAME = "faster_rcnn"
BACKBONE = "resnet50"
IMAGE_MIN_DIM = 800
IMAGE_MAX_DIM = 1333
RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
RPN_ANCHOR_RATIOS = [0.5, 1, 2]
RPN_ANCHOR_STRIDE = 16
RPN_NMS_THRESHOLD = 0.7
RPN_TRAIN_ANCHORS_PER_IMAGE = 256
RPN_POSITIVE_RATIO = 0.5
DETECTION_MIN_CONFIDENCE = 0.7
DETECTION_NMS_THRESHOLD = 0.3
DETECTION_MAX_INSTANCES = 100
LEARNING_RATE = 0.001
WEIGHT_DECAY = 0.0001
EPOCHS = 50
BATCH_SIZE = 1
STEPS_PER_EPOCH = 1000
VALIDATION_STEPS = 50
IMAGES_PER_GPU = 1
MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
NUM_CLASSES = 81 # COCO has 80 classes + background
class DataGenerator(keras.utils.Sequence):
def __init__(self, dataset, config, shuffle=True, augment=True):
self.dataset = dataset
self.config = config
self.shuffle = shuffle
self.augment = augment
self.image_ids = np.copy(self.dataset.image_ids)
self.on_epoch_end()
def __len__(self):
return int(np.ceil(len(self.dataset.image_ids) / self.config.BATCH_SIZE))
def __getitem__(self, idx):
batch_image_ids = self.image_ids[idx * self.config.BATCH_SIZE:(idx + 1) * self.config.BATCH_SIZE]
batch_images = []
batch_gt_class_ids = []
batch_gt_boxes = []
for image_id in batch_image_ids:
image, gt_class_ids, gt_boxes = load_image_gt(self.dataset, self.config, image_id, augment=self.augment)
batch_images.append(image)
batch_gt_class_ids.append(gt_class_ids)
batch_gt_boxes.append(gt_boxes)
batch_images = np.array(batch_images)
batch_gt_class_ids = np.array(batch_gt_class_ids)
batch_gt_boxes = np.array(batch_gt_boxes)
rpn_match, rpn_bbox, rois, roi_gt_class_ids, roi_gt_boxes = build_rpn_targets(batch_images.shape, self.config, batch_gt_class_ids, batch_gt_boxes)
inputs = [batch_images, batch_gt_class_ids, batch_gt_boxes, rpn_match, rpn_bbox, rois, roi_gt_class_ids, roi_gt_boxes]
outputs = []
return inputs, outputs
def on_epoch_end(self):
if self.shuffle:
np.random.shuffle(self.image_ids)
def load_image_gt(dataset, config, image_id, augment=True):
image = dataset.load_image(image_id)
mask, class_ids = dataset.load_mask(image_id)
bbox = maskUtils.toBbox(mask)
bbox = np.expand_dims(bbox, axis=-1)
class_ids = np.expand_dims(class_ids, axis=-1)
gt_boxes = np.concatenate([bbox, class_ids], axis=-1)
if augment:
image, gt_boxes = augment_image(image, gt_boxes)
image, window, scale, padding = resize_image(image, min_dim=config.IMAGE_MIN_DIM, max_dim=config.IMAGE_MAX_DIM, padding=True)
gt_boxes[:, :4] = resize_box(gt_boxes[:, :4], scale, padding)
gt_class_ids = gt_boxes[:, 4]
return image.astype(np.float32) - config.MEAN_PIXEL, gt_class_ids.astype(np.int32), gt_boxes[:, :4].astype(np.float32)
def augment_image(image, gt_boxes):
if np.random.rand() < 0.5:
image = np.fliplr(image)
gt_boxes[:, 0] = image.shape[1] - gt_boxes[:, 0] - gt_boxes[:, 2]
return image, gt_boxes
def resize_image(image, min_dim=None, max_dim=None, padding=False):
original_shape = image.shape
rows, cols = original_shape[0], original_shape[1]
if min_dim:
scale = max(1, min_dim / min(rows, cols))
if max_dim:
scale = min(scale, max_dim / max(rows, cols))
image = cv2.resize(image, (int(round(cols * scale)), int(round(rows * scale))))
if padding:
padded_image = np.zeros((max_dim, max_dim, 3), dtype=np.float32)
padded_image[:image.shape[0], :image.shape[1], :] = image
window = (0, 0, image.shape[1], image.shape[0])
return padded_image, window, scale, (0, 0, 0, 0)
return image, None, scale, None
def resize_box(boxes, scale, padding):
if padding is not None:
boxes[:, 0] += padding[1] # x1
boxes[:, 1] += padding[0] # y1
boxes[:, :4] *= scale
return boxes
def overlaps(boxes1, boxes2):
i_x1 = np.maximum(boxes1[:, 0], boxes2[:, 0])
i_y1 = np.maximum(boxes1[:, 1], boxes2[:, 1])
i_x2 = np.minimum(boxes1[:, 2], boxes2[:, 2])
i_y2 = np.minimum(boxes1[:, 3], boxes2[:, 3])
i_area = np.maximum(i_x2 - i_x1 + 1, 0) * np.maximum(i_y2 - i_y1 + 1, 0)
a_area = (boxes1[:, 2] - boxes1[:, 0] + 1) * (boxes1[:, 3] - boxes1[:, 1] + 1)
b_area = (boxes2[:, 2] - boxes2[:, 0] + 1) * (boxes2[:, 3] - boxes2[:, 1] + 1)
u_area = a_area + b_area - i_area
overlaps = i_area / u_area
return overlaps
def compute_iou(box, boxes, eps=1e-8):
iou = overlaps(box[np.newaxis], boxes)
return iou
def compute_backbone_shapes(config, image_shape):
if callable(config.BACKBONE):
return config.BACKBONE(image_shape)
assert isinstance(config.BACKBONE, str)
if config.BACKBONE in ["resnet50", "resnet101"]:
if image_shape[0] >= 800:
return np.array([[200, 256], [100, 128], [50, 64], [25, 32], [13, 16]])
else:
return np.array([[100, 128], [50, 64], [25, 32], [13, 16], [7, 8]])
else:
raise ValueError("Invalid backbone name")
def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
scales, ratios = scales.flatten(), ratios.flatten()
heights = scales / np.sqrt(ratios)
widths = scales * np.sqrt(ratios)
shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
box_centers = np.stack([box_centers_y, box_centers_x], axis=2)
box_sizes = np.stack([box_heights, box_widths], axis=2)
box_centers = np.reshape(box_centers, [-1, 2])
box_sizes = np.reshape(box_sizes, [-1, 2])
boxes = np.concatenate([box_centers - 0.5 * box_sizes, box_centers + 0.5 * box_sizes], axis=1)
boxes = np.round(boxes)
return boxes
def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, anchor_stride):
anchors = []
for i in range(len(scales)):
anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i], feature_strides[i], anchor_stride))
return np.concatenate(anchors, axis=0)
def norm_boxes(boxes, shape):
boxes = boxes.astype(np.float32)
h, w = shape[:2]
scale = np.array([h - 1, w - 1, h - 1, w - 1])
shift = np.array([0, 0, 1, 1])
boxes = np.divide(boxes - shift, scale)
boxes = np.maximum(np.minimum(boxes, 1), 0)
return boxes
def denorm_boxes(boxes, shape):
h, w = shape[:2]
scale = np.array([h - 1, w - 1, h - 1, w - 1])
shift = np.array([0, 0, 1, 1])
boxes = boxes * scale + shift
return boxes.astype(np.int32)
def overlaps_graph(boxes1, boxes2):
b1 = tf.reshape(tf.tile(tf.expand_dims(boxes1, 1), [1, 1, tf.shape(boxes2)[0]]), [-1, 4])
b2 = tf.tile(boxes2, [tf.shape(boxes1)[0], 1])
b2 = tf.reshape(tf.transpose(b2), [-1, 4])
overlaps = compute_iou(b1, b2)
overlaps = tf.reshape(overlaps, [tf.shape(boxes1)[0], tf.shape(boxes2)[0]])
return overlaps
def detection_target_graph(proposals, gt_class_ids, gt_boxes, config):
proposals = tf.cast(proposals, tf.float32)
gt_boxes = tf.cast(gt_boxes, tf.float32)
gt_class_ids = tf.cast(gt_class_ids, tf.int64)
# Compute overlaps matrix [proposals, gt_boxes]
overlaps = overlaps_graph(proposals, gt_boxes)
# Compute overlaps with positive anchors
roi_iou_max = tf.reduce_max(overlaps, axis=1)
positive_roi_bool = (roi_iou_max >= config.RPN_POSITIVE_RATIO)
positive_indices = tf.where(positive_roi_bool)[:, 0]
# Subsample ROIs. Aim for 33% positive
# Positive ROIs
positive_count = int(config.RPN_TRAIN_ANCHORS_PER_IMAGE * config.RPN_POSITIVE_RATIO)
positive_indices = tf.random.shuffle(positive_indices)[:positive_count]
positive_count = tf.shape(positive_indices)[0]
# Negative ROIs. Add enough to maintain positive:negative ratio.
r = 1.0 / config.RPN_POSITIVE_RATIO
negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count
negative_indices = tf.where(roi_iou_max < config.RPN_POSITIVE_RATIO)[:, 0]
negative_count = tf.math.minimum(tf.shape(negative_indices)[0], negative_count)
negative_indices = tf.random.shuffle(negative_indices)[:negative_count]
# Gather selected ROIs
positive_rois = tf.gather(proposals, positive_indices)
negative_rois = tf.gather(proposals, negative_indices)
# Assign positive ROIs to GT boxes.
positive_overlaps = tf.gather(overlaps, positive_indices)
roi_gt_box_assignment = tf.cond(
tf.greater(tf.shape(positive_overlaps)[1], 0),
true_fn=lambda: tf.argmax(positive_overlaps, axis=1),
false_fn=lambda: tf.cast(tf.constant([]), tf.int64)
)
roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment)
roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment)
# Compute bbox refinement for positive ROIs
deltas = keras_rcnn.backend.boxutils.bbox_transform(positive_rois, roi_gt_boxes)
deltas /= tf.constant(config.BBOX_STD_DEV, dtype=tf.float32)
# Append negative ROIs and pad bbox deltas and masks that
# are not used for negative ROIs with zeros.
rois = tf.concat([positive_rois, negative_rois], axis=0)
N = tf.shape(negative_rois)[0]
P = tf.math.maximum(config.RPN_TRAIN_ANCHORS_PER_IMAGE - tf.shape(rois)[0], 0)
rois = tf.pad(rois, [(0, P), (0, 0)])
roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)])
roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)])
deltas = tf.pad(deltas, [(0, N + P), (0, 0)])
# Return rois and deltas
return rois, roi_gt_class_ids, deltas
def build_rpn_targets(image_shape, config, gt_class_ids, gt_boxes):
feature_shapes = compute_backbone_shapes(config, image_shape)
anchors = generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, feature_shapes, config.BACKBONE_SHAPES, config.RPN_ANCHOR_STRIDE)
rpn_match, rpn_bbox = keras_rcnn.backend.anchor.get_best_anchor(anchors, gt_boxes, config)
rpn_match = tf.expand_dims(rpn_match, axis=-1)
rpn_bbox = tf.reshape(rpn_bbox, [-1, 4])
rois, roi_gt_class_ids, deltas = tf.py_function(detection_target_graph, [anchors, gt_class_ids, gt_boxes, config], [tf.float32, tf.int64, tf.float32])
rois.set_shape([config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4])
roi_gt_class_ids.set_shape([config.RPN_TRAIN_ANCHORS_PER_IMAGE])
deltas.set_shape([config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4 * config.NUM_CLASSES])
rpn_match.set_shape([None, 1])
rpn_bbox.set_shape([None, 4])
rois = tf.stop_gradient(rois)
roi_gt_class_ids = tf.stop_gradient(roi_gt_class_ids)
deltas = tf.stop_gradient(deltas)
rpn_match = tf.stop_gradient(rpn_match)
rpn_bbox = tf.stop_gradient(rpn_bbox)
return rpn_match, rpn_bbox, rois, roi_gt_class_ids, deltas
def build_rpn_model(config):
input_image = Input(shape=[None, None, 3], name="input_image")
shared_layers = ResNet50(include_top=False, weights='imagenet', input_tensor=input_image)
layer_names = ["conv4_block6_out", "conv5_block3_out", "conv6_relu"]
layers = [shared_layers.get_layer(name).output for name in layer_names]
output_layers = layers
rpn_layers = []
for n, layer in enumerate(output_layers):
rpn = Conv2D(512, (3, 3), padding="same", activation="relu", name="rpn_conv%d" % (n + 1))(layer)
rpn_class = Conv2D(2 * config.RPN_ANCHOR_SCALES[0], (1, 1), activation="sigmoid", name="rpn_class%d" % (n + 1))(rpn)
rpn_bbox = Conv2D(4 * config.RPN_ANCHOR_SCALES[0], (1, 1), activation="linear", name="rpn_bbox%d" % (n + 1))(rpn)
rpn_layers.append(rpn_class)
rpn_layers.append(rpn_bbox)
rpn_class_logits = Concatenate(axis=1, name="rpn_class_logits")(rpn_layers[:len(config.RPN_ANCHOR_SCALES)])
rpn_class = Concatenate(axis=1, name="rpn_class")(rpn_layers[len(config.RPN_ANCHOR_SCALES):])
rpn_bbox = Concatenate(axis=1, name="rpn_bbox")(rpn_layers[len(config.R
相关推荐
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)