def IOU(boxes, classes, scores, controlRectangle, controlInvalid, url): # numpy转list boxes = boxes.tolist() classes = classes.tolist() scores = scores.tolist() # 读取配置文件 if os.path.exists('stream_dict.txt'): with open('stream_dict.txt', 'r') as f: stream_dict = json.load(f) else: stream_dict = {} if os.path.exists('cameraId_dict.txt'): with open('cameraId_dict.txt', 'r') as f: cameraId_dict = json.load(f) else: cameraId_dict = {} cameraId = cameraId_dict[url] try: w = int(stream_dict[cameraId][1].split(',')[0]) h = int(stream_dict[cameraId][1].split(',')[1]) except: w = 1920 h = 1080 h1 = (w - h) * 0.5 / w * 640 img_h = 640 - h1 * 2 if controlRectangle == [] and controlInvalid == []: boxes1 = boxes classes1 = classes scores1 = scores if controlRectangle != []: boxes1 = [] classes1 = [] scores1 = [] for i in range(len(boxes)): for j in range(len(controlRectangle)): a = boxes[i] c = classes[i] s = scores[i] x1 = a[0] y1 = a[1] x2 = a[2] y2 = a[3] d = [x1, y1, x2, y2, x2, y1, x1, y2] b = [controlRectangle[j][0]*640, h1 + controlRectangle[j][1]*img_h, controlRectangle[j][2]*640, h1 + controlRectangle[j][3]*img_h, controlRectangle[j][4]*640, h1 + controlRectangle[j][5]*img_h, controlRectangle[j][6]*640, h1 + controlRectangle[j][7]*img_h] iou = calculate_iou(d, b) if iou > 0: boxes1.append(a) classes1.append(c) scores1.append(s) break else: boxes1 = boxes classes1 = classes scores1 = scores if controlInvalid != []: boxes2 = copy.deepcopy(boxes1) classes2 = copy.deepcopy(classes1) scores2 = copy.deepcopy(scores1) for i in range(len(boxes2)): for j in range(len(controlInvalid)): a = boxes2[i] c = classes2[i] s = scores2[i] x1 = a[0] y1 = a[1] x2 = a[2] y2 = a[3] d = [x1, y1, x2, y2, x2, y1, x1, y2] b = [controlInvalid[j][0]*640, h1 + controlInvalid[j][1]*img_h, controlInvalid[j][2]*640, h1 + controlInvalid[j][3]*img_h, controlInvalid[j][4]*640, h1 + controlInvalid[j][5]*img_h, controlInvalid[j][6]*640, h1 + controlInvalid[j][7]*img_h] iou = calculate_iou(d, b) if iou > 0: del boxes1[i] del classes1[i] del scores1[i] break boxes1 = np.array(boxes1) classes1 = np.array(classes1) scores1 = np.array(scores1) return boxes1, classes1, scores1
时间: 2023-12-31 22:07:09 浏览: 114
这段代码主要定义了一个名为IOU的函数,函数的输入参数包括boxes, classes, scores, controlRectangle, controlInvalid, url。函数的功能是根据控制矩形和控制无效区域,计算并返回一组经过过滤的边界框、类别和得分。具体实现过程如下:
1. 将输入的numpy数组转换成列表格式;
2. 如果当前目录下存在名为stream_dict.txt和cameraId_dict.txt的文件,则分别读取它们的内容,否则创建一个空字典;
3. 从cameraId_dict中获取当前url对应的摄像头id,然后从stream_dict中获取该摄像头的宽和高,如果解析失败则默认宽为1920,高为1080;
4. 根据控制矩形和控制无效区域,过滤边界框、类别和得分,具体的实现过程如下:
* 如果控制矩形和控制无效区域均为空,则不做任何处理,直接返回输入的边界框、类别和得分;
* 如果控制矩形不为空,则遍历每一个边界框,判断其是否与控制矩形重叠,如果重叠则加入到输出列表中;
* 如果控制无效区域不为空,则在之前过滤掉的边界框中,再次遍历每一个边界框,判断其是否在控制无效区域内,如果在则从输出列表中删除该边界框;
5. 将输出的列表转换回numpy数组,并返回。
翻译每行代码的意思 def IOU(boxes, classes, scores, controlRectangle, controlInvalid, url): # numpy转list boxes = boxes.tolist() classes = classes.tolist() scores = scores.tolist() # 读取配置文件 if os.path.exists('stream_dict.txt'): with open('stream_dict.txt', 'r') as f: stream_dict = json.load(f) else: stream_dict = {} if os.path.exists('cameraId_dict.txt'): with open('cameraId_dict.txt', 'r') as f: cameraId_dict = json.load(f) else: cameraId_dict = {} cameraId = cameraId_dict[url] try: w = int(stream_dict[cameraId][1].split(',')[0]) h = int(stream_dict[cameraId][1].split(',')[1]) except: w = 1920 h = 1080 h1 = (w - h) * 0.5 / w * 640 img_h = 640 - h1 * 2 if controlRectangle == [] and controlInvalid == []: boxes1 = boxes classes1 = classes scores1 = scores if controlRectangle != []: boxes1 = [] classes1 = [] scores1 = [] for i in range(len(boxes)): for j in range(len(controlRectangle)): a = boxes[i] c = classes[i] s = scores[i] x1 = a[0] y1 = a[1] x2 = a[2] y2 = a[3] d = [x1, y1, x2, y2, x2, y1, x1, y2] b = [controlRectangle[j][0]*640, h1 + controlRectangle[j][1]*img_h, controlRectangle[j][2]*640, h1 + controlRectangle[j][3]*img_h, controlRectangle[j][4]*640, h1 + controlRectangle[j][5]*img_h, controlRectangle[j][6]*640, h1 + controlRectangle[j][7]*img_h] iou = calculate_iou(d, b) if iou > 0: boxes1.append(a) classes1.append(c) scores1.append(s) break else: boxes1 = boxes classes1 = classes scores1 = scores if controlInvalid != []: boxes2 = copy.deepcopy(boxes1) classes2 = copy.deepcopy(classes1) scores2 = copy.deepcopy(scores1) for i in range(len(boxes2)): for j in range(len(controlInvalid)): a = boxes2[i] c = classes2[i] s = scores2[i] x1 = a[0] y1 = a[1] x2 = a[2] y2 = a[3] d = [x1, y1, x2, y2, x2, y1, x1, y2] b = [controlInvalid[j][0]*640, h1 + controlInvalid[j][1]*img_h, controlInvalid[j][2]*640, h1 + controlInvalid[j][3]*img_h, controlInvalid[j][4]*640, h1 + controlInvalid[j][5]*img_h, controlInvalid[j][6]*640, h1 + controlInvalid[j][7]*img_h] iou = calculate_iou(d, b) if iou > 0: del boxes1[i] del classes1[i] del scores1[i] break boxes1 = np.array(boxes1) classes1 = np.array(classes1) scores1 = np.array(scores1) return boxes1, classes1, scores1
这段代码定义了一个名为 IOU 的函数,它接受六个参数:boxes, classes, scores, controlRectangle, controlInvalid 和 url。
首先,函数将 numpy 数组转换成列表,然后读取两个文件,并将它们存储在 stream_dict 和 cameraId_dict 变量中。接着,函数从 cameraId_dict 中获取与 url 对应的相机 ID,并从 stream_dict 中获取与相机 ID 对应的宽度和高度。如果获取失败,则使用默认值 1920 和 1080。
然后,函数计算出图像的实际高度 img_h,并检查 controlRectangle 和 controlInvalid 是否为空。如果不为空,则遍历 boxes 中的每个边界框,并将其与 controlRectangle 或 controlInvalid 中定义的矩形进行比较,以计算它们之间的交并比(IoU)。如果 IoU 大于 0,则将该边界框添加到 boxes1、classes1 和 scores1 列表中。如果 controlInvalid 不为空,则从 boxes1、classes1 和 scores1 列表中删除与 controlInvalid 中定义的矩形重叠的边界框。
最后,函数将 boxes1、classes1 和 scores1 转换回 numpy 数组,并将它们作为结果返回。
首先,需要安装 `torch` 和 `opencv-python` 库。然后,可以使用以下代码实现你所需的功能:
import torch
import cv2
import numpy as np
class YOLOv7:
def __init__(self, model_path, device='cuda'):
self.model = torch.load(model_path, map_location=device)['model'].float()
self.device = device
self.anchors = torch.tensor([[10,13], [16,30], [33,23], [30,61], [62,45], [59,119], [116,90], [156,198], [373,326]]).to(device)
self.stride = torch.tensor([8, 16, 32]).to(device)
self.grid_size = 0
self.img_size = 0
def detect(self, img):
self.img_size = img.shape[1], img.shape[0]
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (640, 640), interpolation=cv2.INTER_LINEAR)
img = img.astype(np.float32) / 255.
img = torch.from_numpy(img).unsqueeze(0).permute(0, 3, 1, 2).to(self.device)
self.grid_size = img.shape[2] // self.stride
with torch.no_grad():
pred = self.model(img)
outputs = self.postprocess(pred)
return outputs
def postprocess(self, pred):
outputs = []
for i, p in enumerate(pred):
if i == 0:
anchor_idx = [3, 4, 5]
anchor_idx = [0, 1, 2]
grid_size = p.shape[2]
stride = self.img_size[0] // grid_size
scaled_anchors = self.anchors[anchor_idx] / stride
prediction = self.decode(p, scaled_anchors)
prediction[..., :4] *= stride
outputs =, 1)
return self.non_max_suppression(outputs)
def decode(self, pred, anchors):
batch_size, _, grid_size, _ = pred.shape
pred = pred.view(batch_size, 3, -1, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()
x, y, w, h, obj, cls = torch.split(pred, [1, 1, 1, 1, 1, -1], dim=-1)
x = torch.sigmoid(x)
y = torch.sigmoid(y)
obj = torch.sigmoid(obj)
cls = torch.sigmoid(cls)
grid_y, grid_x = torch.meshgrid(torch.arange(grid_size), torch.arange(grid_size))
xy_grid = torch.stack((grid_x, grid_y), dim=-1).to(self.device).float()
xy_grid = xy_grid.view(1, 1, grid_size, grid_size, 2)
xy_grid = xy_grid.repeat(batch_size, 3, 1, 1, 1)
x += xy_grid[..., 0:1]
y += xy_grid[..., 1:2]
anchors = anchors.view(1, 3, 1, 1, 2).repeat(batch_size, 1, grid_size, grid_size, 1)
w = torch.exp(w) * anchors[..., 0:1]
h = torch.exp(h) * anchors[..., 1:2]
x1 = x - w / 2
y1 = y - h / 2
x2 = x1 + w
y2 = y1 + h
prediction =, y1, x2, y2, obj, cls), dim=-1)
return prediction.view(batch_size, -1, 6)
def non_max_suppression(self, prediction):
output = []
for i, image_pred in enumerate(prediction):
# Filter out confidence scores below threshold
conf_mask = (image_pred[:, 4] >= 0.5).squeeze()
image_pred = image_pred[conf_mask]
# If none are remaining => process next image
if not image_pred.size(0):
# Object confidence times class confidence
score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
# Sort by it
image_pred = image_pred[(-score).argsort()]
class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)
detections =[:, :5], class_confs.float(), class_preds.float()), 1)
# Iterate over detections
for c in detections[:, -1].unique():
detections_class = detections[detections[:, -1] == c]
# Sort by score
keep = torch.tensor([], dtype=torch.long)
while detections_class.size(0):
large_overlap = self.bbox_iou(detections_class[:1, :4], detections_class[:, :4]) > 0.5
label_match = detections_class[0, -1] == detections_class[:, -1]
# Indices of boxes with lower confidence scores, large IOUs and matching labels
invalid = large_overlap & label_match
keep =, detections_class[:1].long()), dim=0)
detections_class = detections_class[~invalid]
detections_class = detections[keep]
# Append detections for this image
return output
def bbox_iou(self, box1, box2):
Returns the IoU of two bounding boxes
box1_area = (box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1])
box2_area = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])
inter_min = torch.max(box1[:, None, :2], box2[:, :2])
inter_max = torch.min(box1[:, None, 2:], box2[:, 2:])
inter_size = torch.clamp((inter_max - inter_min), min=0)
inter_area = inter_size[:, :, 0] * inter_size[:, :, 1]
iou = inter_area / (box1_area[:, None] + box2_area - inter_area)
return iou
model_path = 'path/to/your/yolov5.pth'
yolov7 = YOLOv7(model_path)
img_path = 'path/to/your/image.jpg'
img = cv2.imread(img_path)
outputs = yolov7.detect(img)
输出的 `outputs` 是一个列表,其中每个元素都是一个检测框的信息,包括类别、置信度和四个顶点位置。