yolov5 p.stem
时间: 2023-11-16 20:59:48 浏览: 73
yolov5 detect.py中打开摄像头的代码在哪
def detect():
# 如果使用摄像头,打开摄像头并获取视频流
if opt.source == '0' or opt.source.startswith('rtsp') or opt.source.startswith('http'):
# 打开摄像头或者视频流
view_img = True
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(opt.source, img_size=imgsz)
# 读取本地视频文件
save_img = True
dataset = LoadImages(opt.source, img_size=imgsz)
# 读取模型
model = attempt_load(weights, map_location=device) # load FP32 model
imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
if half:
model.half() # to FP16
# 获取类别名称
names = model.module.names if hasattr(model, 'module') else model.names
# 进行推理
results = []
for path, img, im0s, vid_cap in dataset:
# img : 当前帧的缩放后的图片
# im0s : 当前帧的原图
# 进行检测
t1 = torch_utils.time_synchronized()
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float()
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# 获取预测结果
pred = model(img, augment=opt.augment)[0]
# 进行后处理
pred = non_max_suppression(pred, conf_thres=conf_thres, iou_thres=iou_thres, classes=opt.classes,
agnostic=agnostic_nms, max_det=max_det)
t2 = torch_utils.time_synchronized()
# 输出当前帧信息
for i, det in enumerate(pred): # detections per image
if webcam: # batch_size >= 1
p, s, im0 = path[i], '%g: ' % i, im0s[i]
p, s, im0 = path, '', im0s
save_path = str(Path(out) / Path(p).name)
txt_path = str(Path(out) / Path(p).stem) + (f'_{frame_i:06d}' if save_img else '')
s += '%gx%g ' % img.shape[2:] # print string
gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
if det is not None and len(det):
# Rescale boxes from img_size to im0 size
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
# Write results
for *xyxy, conf, cls in det:
if save_txt: # Write to file
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
with open(txt_path + '.txt', 'a') as f:
f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format
if save_img or view_img: # Add bbox to image
label = f'{names[int(cls)]} {conf:.2f}'
plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
# Print time (inference + NMS)
print(f'{s}Done. ({t2 - t1:.3f}s)')
# Stream results
if view_img:
cv2.imshow(str(p), im0)
if cv2.waitKey(1) == ord('q'): # q to quit
raise StopIteration
# Save results (image with detections)
if save_img:
if dataset.mode == 'images':
cv2.imwrite(save_path, im0)
print(f'Done. ({time.time() - t0:.3f}s)')
在上面的代码中,如果`opt.source`为`0`或者以`rtsp`或`http`开头,则表示打开摄像头或视频流,代码中会调用`LoadStreams`函数加载视频流。在`for path, img, im0s, vid_cap in dataset:`这一行代码中,`img`表示当前帧的缩放后的图片,`im0s`表示当前帧的原图。在代码中,会对当前帧的图片进行目标检测,并对检测结果进行后处理,最后将结果输出到屏幕上或保存到本地。如果需要显示视频流,则调用`cv2.imshow`函数将当前帧的原图显示到屏幕上。
yolov7 test.py详解
1. 导入必要的库和模块
import argparse
import os
import platform
import shutil
import time
from pathlib import Path
import cv2
import torch
import torch.backends.cudnn as cudnn
import numpy as np
from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, check_requirements, check_imshow, \
non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, strip_optimizer, set_logging
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized
2. 定义输入参数
parser = argparse.ArgumentParser()
parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
parser.add_argument('--source', type=str, default='data/images', help='source')
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--view-img', action='store_true', help='display results')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
parser.add_argument('--save-crop', action='store_true', help='save cropped prediction boxes')
parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
parser.add_argument('--augment', action='store_true', help='augmented inference')
parser.add_argument('--update', action='store_true', help='update all models')
parser.add_argument('--project', default='runs/detect', help='save results to project/name')
parser.add_argument('--name', default='exp', help='save results to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
opt = parser.parse_args()
3. 加载模型
# 加载模型
model = attempt_load(opt.weights, map_location=device) # load FP32 model
imgsz = check_img_size(opt.img_size, s=model.stride.max()) # check img_size
if device.type != 'cpu':
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
4. 设置计算设备
# 设置计算设备
device = select_device(opt.device)
half = device.type != 'cpu' # half precision only supported on CUDA
# Initialize model
5. 加载数据集
# 加载数据集
if os.path.isdir(opt.source):
dataset = LoadImages(opt.source, img_size=imgsz)
dataset = LoadStreams(opt.source, img_size=imgsz)
6. 定义类别和颜色
# 定义类别和颜色
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[np.random.randint(0, 255) for _ in range(3)] for _ in names]
7. 定义输出文件夹
# 定义输出文件夹
save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run
(save_dir / 'labels' if opt.save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir
8. 开始推理
# 开始推理
for path, img, im0s, vid_cap in dataset:
t1 = time_synchronized()
# 图像预处理
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float()
img /= 255.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# 推理
pred = model(img)[0]
# 后处理
pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
t2 = time_synchronized()
# 处理结果
for i, det in enumerate(pred): # detections per image
if webcam: # batch_size >= 1
p, s, im0 = path[i], f'{i}: ', im0s[i].copy()
p, s, im0 = path, '', im0s
save_path = str(save_dir / p.name)
txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{counter}') + '.txt'
if det is not None and len(det):
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
for *xyxy, conf, cls in reversed(det):
c = int(cls)
label = f'{names[c]} {conf:.2f}'
plot_one_box(xyxy, im0, label=label, color=colors[c], line_thickness=3)
if opt.save_conf:
with open(txt_path, 'a') as f:
f.write(f'{names[c]} {conf:.2f}\n')
if opt.save_crop:
w = int(xyxy[2] - xyxy[0])
h = int(xyxy[3] - xyxy[1])
x1 = int(xyxy[0])
y1 = int(xyxy[1])
x2 = int(xyxy[2])
y2 = int(xyxy[3])
crop_img = im0[y1:y2, x1:x2]
crop_path = save_path + f'_{i}_{c}.jpg'
cv2.imwrite(crop_path, crop_img)
# 保存结果
if opt.nosave:
elif dataset.mode == 'images':
cv2.imwrite(save_path, im0)
if vid_path != save_path: # new video
vid_path = save_path
if isinstance(vid_writer, cv2.VideoWriter):
vid_writer.release() # release previous video writer
fourcc = 'mp4v' # output video codec
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
# 打印结果
print(f'{s}Done. ({t2 - t1:.3f}s)')
# 释放资源
if cv2.waitKey(1) == ord('q'): # q to quit
raise StopIteration
elif cv2.waitKey(1) == ord('p'): # p to pause
- 图像预处理:将图像转换为PyTorch张量,并进行归一化和类型转换。
- 推理:将图像张量传入模型进行推理,得到预测结果。
- 后处理:对预测结果进行非极大值抑制、类别筛选等后处理操作,得到最终的检测结果。
- 处理结果:对每个检测框进行标签和颜色的绘制,同时可以选择保存检测结果的图片或视频以及标签信息的TXT文件。
- 释放资源:根据按键输入决定是否退出或暂停程序。
9. 总结