如何理解这2行代码x = torch.sigmoid(prediction[..., 0]) y = torch.sigmoid(prediction[..., 1])

这两行代码是在使用PyTorch框架中的sigmoid函数对神经网络输出的预测结果进行处理，将结果转化为0到1之间的概率值。其中prediction为神经网络的输出结果，而[...,0]和[...,1]分别表示取prediction张量的最后两个维度中的第一个和第二个数据。通过sigmoid函数将预测结果处理后，x和y变量分别表示预测的两个不同类别的概率值。

yolov7 common.py 源码

以下是 YOLOv7 的 common.py 源码： ```python import math import torch.nn.functional as F import torch.nn as nn import torch def make_divisible(x, divisor): # Returns x evenly divisible by divisor return math.ceil(x / divisor) * divisor def autopad(k, p=None): # kernel, padding # Pad to 'same' if p is None: p = k // 2 if isinstance(k, int) else [x // 2 for x in k] return p class Conv(nn.Module): # Standard convolution def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): super(Conv, self).__init__() self.conv = nn.Conv2d(c1, c2, k, stride=s, padding=autopad(k, p), groups=g, bias=False) self.bn = nn.BatchNorm2d(c2) self.act = nn.Hardswish() if act else nn.Identity() def forward(self, x): return self.act(self.bn(self.conv(x))) class Bottleneck(nn.Module): # Standard bottleneck def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): super(Bottleneck, self).__init__() c_ = int(c2 * e) # hidden channels self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c_, c2, 3, 1, g=g) self.add = shortcut and c1 == c2 self.identity = nn.Identity() if self.add else None def forward(self, x): return self.identity(x) + self.cv2(self.cv1(x)) class SPP(nn.Module): # Spatial pyramid pooling layer used in YOLOv3-SPP def __init__(self, c1, c2, k=(5, 9, 13)): super(SPP, self).__init__() c_ = c1 // 2 # hidden channels self.cv1 = Conv(c1, c_, 1, 1) self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) def forward(self, x): x = self.cv1(x) return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) class DWConv(nn.Module): # Depthwise convolution def __init__(self, c1, c2, k=1, s=1, p=None): super(DWConv, self).__init__() self.conv = nn.Conv2d(c1, c1, k, stride=s, padding=autopad(k, p), groups=c1, bias=False) self.bn = nn.BatchNorm2d(c1) self.act = nn.Hardswish() self.project = nn.Conv2d(c1, c2, 1, bias=False) self.bn2 = nn.BatchNorm2d(c2) self.act2 = nn.Hardswish() def forward(self, x): return self.act2(self.bn2(self.project(self.act(self.bn(self.conv(x)))))) class Focus(nn.Module): # Focus wh information into c-space def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): super(Focus, self).__init__() self.conv = Conv(c1 * 4, c2, k, s, p, g, act) def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) class Concat(nn.Module): # Concatenate a list of tensors along dimension def __init__(self, dimension=1): super(Concat, self).__init__() self.d = dimension def forward(self, x): return torch.cat(x, self.d) class Detect(nn.Module): # Detect layer def __init__(self, nc, anchors): super(Detect, self).__init__() self.nc = nc # number of classes self.no = nc + 5 # number of outputs per anchor self.na = len(anchors) # number of anchors self.anchors = torch.tensor(anchors).float().view(self.na, -1) self.anchors /= self.anchors.sum(1).view(self.na, 1) # normalized anchors self.register_buffer("anchor_grid", self.anchors.clone().view(1, -1, 1, 1)) self.m = nn.Conv2d(self.no * self.na, self.no * self.na, 1) # prediction conv def forward(self, x): # x(bs,255,h,w) -> p(bs,3,85,h,w) bs, _, ny, nx = x.shape device, dtype = x.device, x.dtype stride = self.anchor_grid.device / torch.tensor([nx, ny])[None, :, None, None].to(device) grid = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) y = torch.stack(grid, 2).to(device).float() x = (x.sigmoid() * 2. - 0.5) * stride # x(?,255,?,?) --sig--> x(?,255,?,?) --*2-0.5--> x(?,255,?,?) --*stride--> x(?,255,?,?) y = (y + 0.5) * stride # y(?,2,?,?) --+0.5--> y(?,2,?,?) --*stride--> y(?,2,?,?) xy = torch.stack([x, y], 2).view(bs, 2, self.na * ny * nx).permute(0, 2, 1).contiguous().view(bs, self.na * ny * nx, 2) x = self.m(x.flatten(2).permute(0, 2, 1)).view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() # x(bs,na,ny,nx,na) --view--> x(bs,na,ny,nx,no) --permute--> x(bs,na,ny,nx,no) if not self.training: x[..., 4:] = x[..., 4:].sigmoid() return x else: # train return x, xy, self.anchor_grid.repeat(bs, 1, ny, nx) class Model(nn.Module): # YOLOv7 model https://github.com/WongKinYiu/yolov7 def __init__(self, nc=80, anchors=((10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)), ch=[256, 512, 1024, 2048], depth=0.33): super(Model, self).__init__() assert depth in [0.33, 0.67, 1.0] self.depth = depth # model depth multiplier self.grid = [torch.zeros(1)] * 5 # init grid self.stride = torch.tensor([8., 16., 32., 64., 128.]) self.create_backbone(ch) self.create_neck() self.create_head(nc, anchors) def forward(self, x): z = [] for i in range(5): x = self.backbone[i](x) z.append(x) x = self.neck(z) return self.head(x) def create_backbone(self, ch): # darknet backbone self.backbone = nn.ModuleList([Focus(3, ch[0], 3), Conv(ch[0], ch[1], 3, 2), Bottleneck(ch[1], ch[2]), Conv(ch[2], ch[3], 3, 2), Bottleneck(ch[3], ch[4]), Conv(ch[4], ch[5], 3, 2), SPP(ch[5], ch[5]), Bottleneck(ch[5], ch[6]), Conv(ch[6], ch[7], 1)]) c2 = make_divisible(ch[7] * self.depth) # ch_last self.backbone.append(Bottleneck(ch[7], c2, False)) self.out_channels = [c2, ch[4], ch[2], ch[0]] def create_neck(self): # FPN-like attentional output self.neck = nn.Sequential( Concat(), Conv(self.out_channels[0], self.out_channels[0], 1), DWConv(self.out_channels[0], self.out_channels[1], 3, s=2), DWConv(self.out_channels[1], self.out_channels[2], 3, s=2), DWConv(self.out_channels[2], self.out_channels[3], 3, s=2), SPP(self.out_channels[3], self.out_channels[3]), DWConv(self.out_channels[3], self.out_channels[3], 3, dilation=3), DWConv(self.out_channels[3], self.out_channels[3], 3, dilation=3), DWConv(self.out_channels[3], self.out_channels[3], 3, dilation=3), ) def create_head(self, nc, anchors): # detection head self.head = nn.Sequential( DWConv(self.out_channels[3], self.out_channels[3], 3, dilation=3), DWConv(self.out_channels[3], self.out_channels[3], 3, dilation=3), DWConv(self.out_channels[3], self.out_channels[3], 3, dilation=3), Concat(), Conv(self.out_channels[3] * 4, self.out_channels[3], 1), nn.Conv2d(self.out_channels[3], len(anchors) * (nc + 5), 1, bias=True), Detect(nc, anchors)) def attempt_load(weights, map_location=None, inplace=True): # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a if isinstance(weights, (list, tuple)): # Load a list of models (ensemble) ensemble = nn.ModuleList() for w in weights: model = Model() model.to(next(w.parameters()).device) try: ckpt = torch.load(w, map_location=map_location) # load state_dict = ckpt['model'].float().state_dict() # to FP32 state_dict = {k: v for k, v in state_dict.items() if model.state_dict()[k].shape == v.shape} # filter model.load_state_dict(state_dict, strict=False) # load print(f"Transferred {len(state_dict)} from {w}") except: print(f"Error loading {w}") ensemble.append(model.eval()) return ensemble else: # Load a single model model = Model() model.to(next(weights.parameters()).device) try: ckpt = torch.load(weights, map_location=map_location) # load state_dict = ckpt['model'].float().state_dict() # to FP32 state_dict = {k: v for k, v in state_dict.items() if model.state_dict()[k].shape == v.shape} # filter model.load_state_dict(state_dict, strict=False) # load print(f"Transferred {len(state_dict)} from {weights}") except: print(f"Error loading {weights}") return model.eval() ```

在python3.6，pytorch1.10.2，cuda11.3，numpy1.19.5环境下，完成一个名为yolov7的类，实现本地加载用自己数据集训练的yolov5的.pth模型，对图片进行检测并以列表的形式输出类别以及检测框的四个顶点位置，写成函数的形式调用

首先，需要安装 `torch` 和 `opencv-python` 库。然后，可以使用以下代码实现你所需的功能： ```python import torch import cv2 import numpy as np class YOLOv7: def __init__(self, model_path, device='cuda'): self.model = torch.load(model_path, map_location=device)['model'].float() self.model.to(device).eval() self.device = device self.anchors = torch.tensor([[10,13], [16,30], [33,23], [30,61], [62,45], [59,119], [116,90], [156,198], [373,326]]).to(device) self.stride = torch.tensor([8, 16, 32]).to(device) self.grid_size = 0 self.img_size = 0 def detect(self, img): self.img_size = img.shape[1], img.shape[0] img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (640, 640), interpolation=cv2.INTER_LINEAR) img = img.astype(np.float32) / 255. img = torch.from_numpy(img).unsqueeze(0).permute(0, 3, 1, 2).to(self.device) self.grid_size = img.shape[2] // self.stride with torch.no_grad(): pred = self.model(img) outputs = self.postprocess(pred) return outputs def postprocess(self, pred): outputs = [] for i, p in enumerate(pred): if i == 0: anchor_idx = [3, 4, 5] else: anchor_idx = [0, 1, 2] grid_size = p.shape[2] stride = self.img_size[0] // grid_size scaled_anchors = self.anchors[anchor_idx] / stride prediction = self.decode(p, scaled_anchors) prediction[..., :4] *= stride outputs.append(prediction) outputs = torch.cat(outputs, 1) return self.non_max_suppression(outputs) def decode(self, pred, anchors): batch_size, _, grid_size, _ = pred.shape pred = pred.view(batch_size, 3, -1, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous() x, y, w, h, obj, cls = torch.split(pred, [1, 1, 1, 1, 1, -1], dim=-1) x = torch.sigmoid(x) y = torch.sigmoid(y) obj = torch.sigmoid(obj) cls = torch.sigmoid(cls) grid_y, grid_x = torch.meshgrid(torch.arange(grid_size), torch.arange(grid_size)) xy_grid = torch.stack((grid_x, grid_y), dim=-1).to(self.device).float() xy_grid = xy_grid.view(1, 1, grid_size, grid_size, 2) xy_grid = xy_grid.repeat(batch_size, 3, 1, 1, 1) x += xy_grid[..., 0:1] y += xy_grid[..., 1:2] anchors = anchors.view(1, 3, 1, 1, 2).repeat(batch_size, 1, grid_size, grid_size, 1) w = torch.exp(w) * anchors[..., 0:1] h = torch.exp(h) * anchors[..., 1:2] x1 = x - w / 2 y1 = y - h / 2 x2 = x1 + w y2 = y1 + h prediction = torch.cat((x1, y1, x2, y2, obj, cls), dim=-1) return prediction.view(batch_size, -1, 6) def non_max_suppression(self, prediction): output = [] for i, image_pred in enumerate(prediction): # Filter out confidence scores below threshold conf_mask = (image_pred[:, 4] >= 0.5).squeeze() image_pred = image_pred[conf_mask] # If none are remaining => process next image if not image_pred.size(0): continue # Object confidence times class confidence score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0] # Sort by it image_pred = image_pred[(-score).argsort()] class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True) detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1) # Iterate over detections for c in detections[:, -1].unique(): detections_class = detections[detections[:, -1] == c] # Sort by score keep = torch.tensor([], dtype=torch.long) while detections_class.size(0): large_overlap = self.bbox_iou(detections_class[:1, :4], detections_class[:, :4]) > 0.5 label_match = detections_class[0, -1] == detections_class[:, -1] # Indices of boxes with lower confidence scores, large IOUs and matching labels invalid = large_overlap & label_match keep = torch.cat((keep, detections_class[:1].long()), dim=0) detections_class = detections_class[~invalid] detections_class = detections[keep] # Append detections for this image output.extend(detections_class.tolist()) return output def bbox_iou(self, box1, box2): """ Returns the IoU of two bounding boxes """ box1_area = (box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1]) box2_area = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1]) inter_min = torch.max(box1[:, None, :2], box2[:, :2]) inter_max = torch.min(box1[:, None, 2:], box2[:, 2:]) inter_size = torch.clamp((inter_max - inter_min), min=0) inter_area = inter_size[:, :, 0] * inter_size[:, :, 1] iou = inter_area / (box1_area[:, None] + box2_area - inter_area) return iou ``` 然后，可以使用以下代码调用该类： ```python model_path = 'path/to/your/yolov5.pth' yolov7 = YOLOv7(model_path) img_path = 'path/to/your/image.jpg' img = cv2.imread(img_path) outputs = yolov7.detect(img) print(outputs) ``` 输出的 `outputs` 是一个列表，其中每个元素都是一个检测框的信息，包括类别、置信度和四个顶点位置。

阅读全文

如何理解这2行代码x = torch.sigmoid(prediction[..., 0]) y = torch.sigmoid(prediction[..., 1])

yolov7 common.py 源码

在python3.6，pytorch1.10.2，cuda11.3，numpy1.19.5环境下，完成一个名为yolov7的类，实现本地加载用自己数据集训练的yolov5的.pth模型，对图片进行检测并以列表的形式输出类别以及检测框的四个顶点位置，写成函数的形式调用

相关推荐

python torch.utils.data.DataLoader使用方法

浅谈pytorch torch.backends.cudnn设置作用

pytorch1.0中torch.nn.Conv2d用法详解

pytorch基础代码

pytorch神经网络预测

基于 cnn和xgboost pytorch 实现对数据的预测 5个数据 预测一个数据代码

生成对抗网络时间序列预测代码

给我一串可运行的神经网络的情感分析代码

用代码在pytorch上写训练模型的测试集，对测试集进行预测

用transformer进行sendiment-analysis代码

生成一个使用pytorch库的能对imdb数据库进行情感分类的python代码

基于pytorch，帮我编写一个生成时序数据的GAN模型，噪声用正态分布的噪声，真实数据为xlsx文件里的每行时序数据，并帮我每行都标上注释

写一个示例出来，内容是用Python 的pytorch跑ann模型

利用pytorch框架设计基于DPN模型的图像检测与分类输出坐标。大小和种类，并给出相应训练代码和测试代码

transformer 文本二分类

如何在yolov5中添加spatialattention

最新推荐

Windows平台下的Fastboot工具使用指南

管理建模和仿真的文件

DLMS规约深度剖析：从基础到电力通信标准的全面掌握

修改代码，使其正确运行

Python机器学习基础入门与项目实践

"互动学习：行动中的多样性与论文攻读经历"

【Shell脚本进阶】：wc命令行数统计的高级用法及解决方案

python编写一个程序，使得根据输入的起点和终点坐标值计算出坐标方位角

Achilles-2 原始压缩包内容解密

关系数据表示学习

基于 cnn和xgboost pytorch 实现对数据的预测 5个数据预测一个数据代码