def IOU(boxes, classes, scores, controlRectangle, controlInvalid, url): # numpy转list boxes = boxes.tolist() classes = classes.tolist() scores = scores.tolist() # 读取配置文件 if os.path.exists('stream_dict.txt'): with open('stream_dict.txt', 'r') as f: stream_dict = json.load(f) else: stream_dict = {} if os.path.exists('cameraId_dict.txt'): with open('cameraId_dict.txt', 'r') as f: cameraId_dict = json.load(f) else: cameraId_dict = {} cameraId = cameraId_dict[url] try: w = int(stream_dict[cameraId][1].split(',')[0]) h = int(stream_dict[cameraId][1].split(',')[1]) except: w = 1920 h = 1080 h1 = (w - h) * 0.5 / w * 640 img_h = 640 - h1 * 2 if controlRectangle == [] and controlInvalid == []: boxes1 = boxes classes1 = classes scores1 = scores if controlRectangle != []: boxes1 = [] classes1 = [] scores1 = [] for i in range(len(boxes)): for j in range(len(controlRectangle)): a = boxes[i] c = classes[i] s = scores[i] x1 = a[0] y1 = a[1] x2 = a[2] y2 = a[3] d = [x1, y1, x2, y2, x2, y1, x1, y2] b = [controlRectangle[j][0]*640, h1 + controlRectangle[j][1]*img_h, controlRectangle[j][2]*640, h1 + controlRectangle[j][3]*img_h, controlRectangle[j][4]*640, h1 + controlRectangle[j][5]*img_h, controlRectangle[j][6]*640, h1 + controlRectangle[j][7]*img_h] iou = calculate_iou(d, b) if iou > 0: boxes1.append(a) classes1.append(c) scores1.append(s) break else: boxes1 = boxes classes1 = classes scores1 = scores if controlInvalid != []: boxes2 = copy.deepcopy(boxes1) classes2 = copy.deepcopy(classes1) scores2 = copy.deepcopy(scores1) for i in range(len(boxes2)): for j in range(len(controlInvalid)): a = boxes2[i] c = classes2[i] s = scores2[i] x1 = a[0] y1 = a[1] x2 = a[2] y2 = a[3] d = [x1, y1, x2, y2, x2, y1, x1, y2] b = [controlInvalid[j][0]*640, h1 + controlInvalid[j][1]*img_h, controlInvalid[j][2]*640, h1 + controlInvalid[j][3]*img_h, controlInvalid[j][4]*640, h1 + controlInvalid[j][5]*img_h, controlInvalid[j][6]*640, h1 + controlInvalid[j][7]*img_h] iou = calculate_iou(d, b) if iou > 0: del boxes1[i] del classes1[i] del scores1[i] break boxes1 = np.array(boxes1) classes1 = np.array(classes1) scores1 = np.array(scores1) return boxes1, classes1, scores1
时间: 2023-12-31 22:07:09 浏览: 114
这段代码主要定义了一个名为IOU的函数,函数的输入参数包括boxes, classes, scores, controlRectangle, controlInvalid, url。函数的功能是根据控制矩形和控制无效区域,计算并返回一组经过过滤的边界框、类别和得分。具体实现过程如下:
1. 将输入的numpy数组转换成列表格式;
2. 如果当前目录下存在名为stream_dict.txt和cameraId_dict.txt的文件,则分别读取它们的内容,否则创建一个空字典;
3. 从cameraId_dict中获取当前url对应的摄像头id,然后从stream_dict中获取该摄像头的宽和高,如果解析失败则默认宽为1920,高为1080;
4. 根据控制矩形和控制无效区域,过滤边界框、类别和得分,具体的实现过程如下:
* 如果控制矩形和控制无效区域均为空,则不做任何处理,直接返回输入的边界框、类别和得分;
* 如果控制矩形不为空,则遍历每一个边界框,判断其是否与控制矩形重叠,如果重叠则加入到输出列表中;
* 如果控制无效区域不为空,则在之前过滤掉的边界框中,再次遍历每一个边界框,判断其是否在控制无效区域内,如果在则从输出列表中删除该边界框;
5. 将输出的列表转换回numpy数组,并返回。
其中calculate_iou为计算两个矩形框之间的交并比函数。
相关问题
翻译每行代码的意思 def IOU(boxes, classes, scores, controlRectangle, controlInvalid, url): # numpy转list boxes = boxes.tolist() classes = classes.tolist() scores = scores.tolist() # 读取配置文件 if os.path.exists('stream_dict.txt'): with open('stream_dict.txt', 'r') as f: stream_dict = json.load(f) else: stream_dict = {} if os.path.exists('cameraId_dict.txt'): with open('cameraId_dict.txt', 'r') as f: cameraId_dict = json.load(f) else: cameraId_dict = {} cameraId = cameraId_dict[url] try: w = int(stream_dict[cameraId][1].split(',')[0]) h = int(stream_dict[cameraId][1].split(',')[1]) except: w = 1920 h = 1080 h1 = (w - h) * 0.5 / w * 640 img_h = 640 - h1 * 2 if controlRectangle == [] and controlInvalid == []: boxes1 = boxes classes1 = classes scores1 = scores if controlRectangle != []: boxes1 = [] classes1 = [] scores1 = [] for i in range(len(boxes)): for j in range(len(controlRectangle)): a = boxes[i] c = classes[i] s = scores[i] x1 = a[0] y1 = a[1] x2 = a[2] y2 = a[3] d = [x1, y1, x2, y2, x2, y1, x1, y2] b = [controlRectangle[j][0]*640, h1 + controlRectangle[j][1]*img_h, controlRectangle[j][2]*640, h1 + controlRectangle[j][3]*img_h, controlRectangle[j][4]*640, h1 + controlRectangle[j][5]*img_h, controlRectangle[j][6]*640, h1 + controlRectangle[j][7]*img_h] iou = calculate_iou(d, b) if iou > 0: boxes1.append(a) classes1.append(c) scores1.append(s) break else: boxes1 = boxes classes1 = classes scores1 = scores if controlInvalid != []: boxes2 = copy.deepcopy(boxes1) classes2 = copy.deepcopy(classes1) scores2 = copy.deepcopy(scores1) for i in range(len(boxes2)): for j in range(len(controlInvalid)): a = boxes2[i] c = classes2[i] s = scores2[i] x1 = a[0] y1 = a[1] x2 = a[2] y2 = a[3] d = [x1, y1, x2, y2, x2, y1, x1, y2] b = [controlInvalid[j][0]*640, h1 + controlInvalid[j][1]*img_h, controlInvalid[j][2]*640, h1 + controlInvalid[j][3]*img_h, controlInvalid[j][4]*640, h1 + controlInvalid[j][5]*img_h, controlInvalid[j][6]*640, h1 + controlInvalid[j][7]*img_h] iou = calculate_iou(d, b) if iou > 0: del boxes1[i] del classes1[i] del scores1[i] break boxes1 = np.array(boxes1) classes1 = np.array(classes1) scores1 = np.array(scores1) return boxes1, classes1, scores1
这段代码定义了一个名为 IOU 的函数,它接受六个参数:boxes, classes, scores, controlRectangle, controlInvalid 和 url。
首先,函数将 numpy 数组转换成列表,然后读取两个文件,并将它们存储在 stream_dict 和 cameraId_dict 变量中。接着,函数从 cameraId_dict 中获取与 url 对应的相机 ID,并从 stream_dict 中获取与相机 ID 对应的宽度和高度。如果获取失败,则使用默认值 1920 和 1080。
然后,函数计算出图像的实际高度 img_h,并检查 controlRectangle 和 controlInvalid 是否为空。如果不为空,则遍历 boxes 中的每个边界框,并将其与 controlRectangle 或 controlInvalid 中定义的矩形进行比较,以计算它们之间的交并比(IoU)。如果 IoU 大于 0,则将该边界框添加到 boxes1、classes1 和 scores1 列表中。如果 controlInvalid 不为空,则从 boxes1、classes1 和 scores1 列表中删除与 controlInvalid 中定义的矩形重叠的边界框。
最后,函数将 boxes1、classes1 和 scores1 转换回 numpy 数组,并将它们作为结果返回。
在python3.6,pytorch1.10.2,cuda11.3,numpy1.19.5环境下,完成一个名为yolov7的类,实现本地加载用自己数据集训练的yolov5的.pth模型,对图片进行检测并以列表的形式输出类别以及检测框的四个顶点位置,写成函数的形式调用
首先,需要安装 `torch` 和 `opencv-python` 库。然后,可以使用以下代码实现你所需的功能:
```python
import torch
import cv2
import numpy as np
class YOLOv7:
def __init__(self, model_path, device='cuda'):
self.model = torch.load(model_path, map_location=device)['model'].float()
self.model.to(device).eval()
self.device = device
self.anchors = torch.tensor([[10,13], [16,30], [33,23], [30,61], [62,45], [59,119], [116,90], [156,198], [373,326]]).to(device)
self.stride = torch.tensor([8, 16, 32]).to(device)
self.grid_size = 0
self.img_size = 0
def detect(self, img):
self.img_size = img.shape[1], img.shape[0]
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (640, 640), interpolation=cv2.INTER_LINEAR)
img = img.astype(np.float32) / 255.
img = torch.from_numpy(img).unsqueeze(0).permute(0, 3, 1, 2).to(self.device)
self.grid_size = img.shape[2] // self.stride
with torch.no_grad():
pred = self.model(img)
outputs = self.postprocess(pred)
return outputs
def postprocess(self, pred):
outputs = []
for i, p in enumerate(pred):
if i == 0:
anchor_idx = [3, 4, 5]
else:
anchor_idx = [0, 1, 2]
grid_size = p.shape[2]
stride = self.img_size[0] // grid_size
scaled_anchors = self.anchors[anchor_idx] / stride
prediction = self.decode(p, scaled_anchors)
prediction[..., :4] *= stride
outputs.append(prediction)
outputs = torch.cat(outputs, 1)
return self.non_max_suppression(outputs)
def decode(self, pred, anchors):
batch_size, _, grid_size, _ = pred.shape
pred = pred.view(batch_size, 3, -1, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()
x, y, w, h, obj, cls = torch.split(pred, [1, 1, 1, 1, 1, -1], dim=-1)
x = torch.sigmoid(x)
y = torch.sigmoid(y)
obj = torch.sigmoid(obj)
cls = torch.sigmoid(cls)
grid_y, grid_x = torch.meshgrid(torch.arange(grid_size), torch.arange(grid_size))
xy_grid = torch.stack((grid_x, grid_y), dim=-1).to(self.device).float()
xy_grid = xy_grid.view(1, 1, grid_size, grid_size, 2)
xy_grid = xy_grid.repeat(batch_size, 3, 1, 1, 1)
x += xy_grid[..., 0:1]
y += xy_grid[..., 1:2]
anchors = anchors.view(1, 3, 1, 1, 2).repeat(batch_size, 1, grid_size, grid_size, 1)
w = torch.exp(w) * anchors[..., 0:1]
h = torch.exp(h) * anchors[..., 1:2]
x1 = x - w / 2
y1 = y - h / 2
x2 = x1 + w
y2 = y1 + h
prediction = torch.cat((x1, y1, x2, y2, obj, cls), dim=-1)
return prediction.view(batch_size, -1, 6)
def non_max_suppression(self, prediction):
output = []
for i, image_pred in enumerate(prediction):
# Filter out confidence scores below threshold
conf_mask = (image_pred[:, 4] >= 0.5).squeeze()
image_pred = image_pred[conf_mask]
# If none are remaining => process next image
if not image_pred.size(0):
continue
# Object confidence times class confidence
score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
# Sort by it
image_pred = image_pred[(-score).argsort()]
class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)
detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1)
# Iterate over detections
for c in detections[:, -1].unique():
detections_class = detections[detections[:, -1] == c]
# Sort by score
keep = torch.tensor([], dtype=torch.long)
while detections_class.size(0):
large_overlap = self.bbox_iou(detections_class[:1, :4], detections_class[:, :4]) > 0.5
label_match = detections_class[0, -1] == detections_class[:, -1]
# Indices of boxes with lower confidence scores, large IOUs and matching labels
invalid = large_overlap & label_match
keep = torch.cat((keep, detections_class[:1].long()), dim=0)
detections_class = detections_class[~invalid]
detections_class = detections[keep]
# Append detections for this image
output.extend(detections_class.tolist())
return output
def bbox_iou(self, box1, box2):
"""
Returns the IoU of two bounding boxes
"""
box1_area = (box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1])
box2_area = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])
inter_min = torch.max(box1[:, None, :2], box2[:, :2])
inter_max = torch.min(box1[:, None, 2:], box2[:, 2:])
inter_size = torch.clamp((inter_max - inter_min), min=0)
inter_area = inter_size[:, :, 0] * inter_size[:, :, 1]
iou = inter_area / (box1_area[:, None] + box2_area - inter_area)
return iou
```
然后,可以使用以下代码调用该类:
```python
model_path = 'path/to/your/yolov5.pth'
yolov7 = YOLOv7(model_path)
img_path = 'path/to/your/image.jpg'
img = cv2.imread(img_path)
outputs = yolov7.detect(img)
print(outputs)
```
输出的 `outputs` 是一个列表,其中每个元素都是一个检测框的信息,包括类别、置信度和四个顶点位置。
阅读全文