y_preds.append(preds.numpy())和 y_preds.append(tf.sigmoid(preds).numpy().argmax(axis=1).tolist())的区别
时间: 2024-06-06 08:09:14 浏览: 178
y_preds.append(preds.numpy())将一个TensorFlow张量对象的数值转化成NumPy 数组格式,而y_preds.append(tf.sigmoid(preds).numpy().argmax(axis=1).tolist())则使用Sigmoid函数对张量中的数值进行操作,将其转化成概率值并取得概率最大的标签,然后将标签转换为列表格式。两者的输出结果不同,前者输出张量对象的数值,后者输出一个标签列表。
相关问题
np.append(np.array(y_score), tf.sigmoid(preds).numpy(), axis=0)什么意思
这段代码的意思是将 np.array(y_score) 和 tf.sigmoid(preds).numpy() 沿着 axis=0 的方向(即竖直方向)进行拼接,并返回拼接结果。其中 np.array(y_score) 和 tf.sigmoid(preds).numpy() 都是 numpy 数组。np.append() 是 numpy 库中的函数,用于进行数组拼接。
在python3.6,pytorch1.10.2,cuda11.3,numpy1.19.5环境下,完成一个名为yolov7的类,实现本地加载用自己数据集训练的yolov5的.pth模型,对图片进行检测并以列表的形式输出类别以及检测框的四个顶点位置,写成函数的形式调用
首先,需要安装 `torch` 和 `opencv-python` 库。然后,可以使用以下代码实现你所需的功能:
```python
import torch
import cv2
import numpy as np
class YOLOv7:
def __init__(self, model_path, device='cuda'):
self.model = torch.load(model_path, map_location=device)['model'].float()
self.model.to(device).eval()
self.device = device
self.anchors = torch.tensor([[10,13], [16,30], [33,23], [30,61], [62,45], [59,119], [116,90], [156,198], [373,326]]).to(device)
self.stride = torch.tensor([8, 16, 32]).to(device)
self.grid_size = 0
self.img_size = 0
def detect(self, img):
self.img_size = img.shape[1], img.shape[0]
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (640, 640), interpolation=cv2.INTER_LINEAR)
img = img.astype(np.float32) / 255.
img = torch.from_numpy(img).unsqueeze(0).permute(0, 3, 1, 2).to(self.device)
self.grid_size = img.shape[2] // self.stride
with torch.no_grad():
pred = self.model(img)
outputs = self.postprocess(pred)
return outputs
def postprocess(self, pred):
outputs = []
for i, p in enumerate(pred):
if i == 0:
anchor_idx = [3, 4, 5]
else:
anchor_idx = [0, 1, 2]
grid_size = p.shape[2]
stride = self.img_size[0] // grid_size
scaled_anchors = self.anchors[anchor_idx] / stride
prediction = self.decode(p, scaled_anchors)
prediction[..., :4] *= stride
outputs.append(prediction)
outputs = torch.cat(outputs, 1)
return self.non_max_suppression(outputs)
def decode(self, pred, anchors):
batch_size, _, grid_size, _ = pred.shape
pred = pred.view(batch_size, 3, -1, grid_size, grid_size).permute(0, 1, 3, 4, 2).contiguous()
x, y, w, h, obj, cls = torch.split(pred, [1, 1, 1, 1, 1, -1], dim=-1)
x = torch.sigmoid(x)
y = torch.sigmoid(y)
obj = torch.sigmoid(obj)
cls = torch.sigmoid(cls)
grid_y, grid_x = torch.meshgrid(torch.arange(grid_size), torch.arange(grid_size))
xy_grid = torch.stack((grid_x, grid_y), dim=-1).to(self.device).float()
xy_grid = xy_grid.view(1, 1, grid_size, grid_size, 2)
xy_grid = xy_grid.repeat(batch_size, 3, 1, 1, 1)
x += xy_grid[..., 0:1]
y += xy_grid[..., 1:2]
anchors = anchors.view(1, 3, 1, 1, 2).repeat(batch_size, 1, grid_size, grid_size, 1)
w = torch.exp(w) * anchors[..., 0:1]
h = torch.exp(h) * anchors[..., 1:2]
x1 = x - w / 2
y1 = y - h / 2
x2 = x1 + w
y2 = y1 + h
prediction = torch.cat((x1, y1, x2, y2, obj, cls), dim=-1)
return prediction.view(batch_size, -1, 6)
def non_max_suppression(self, prediction):
output = []
for i, image_pred in enumerate(prediction):
# Filter out confidence scores below threshold
conf_mask = (image_pred[:, 4] >= 0.5).squeeze()
image_pred = image_pred[conf_mask]
# If none are remaining => process next image
if not image_pred.size(0):
continue
# Object confidence times class confidence
score = image_pred[:, 4] * image_pred[:, 5:].max(1)[0]
# Sort by it
image_pred = image_pred[(-score).argsort()]
class_confs, class_preds = image_pred[:, 5:].max(1, keepdim=True)
detections = torch.cat((image_pred[:, :5], class_confs.float(), class_preds.float()), 1)
# Iterate over detections
for c in detections[:, -1].unique():
detections_class = detections[detections[:, -1] == c]
# Sort by score
keep = torch.tensor([], dtype=torch.long)
while detections_class.size(0):
large_overlap = self.bbox_iou(detections_class[:1, :4], detections_class[:, :4]) > 0.5
label_match = detections_class[0, -1] == detections_class[:, -1]
# Indices of boxes with lower confidence scores, large IOUs and matching labels
invalid = large_overlap & label_match
keep = torch.cat((keep, detections_class[:1].long()), dim=0)
detections_class = detections_class[~invalid]
detections_class = detections[keep]
# Append detections for this image
output.extend(detections_class.tolist())
return output
def bbox_iou(self, box1, box2):
"""
Returns the IoU of two bounding boxes
"""
box1_area = (box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1])
box2_area = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])
inter_min = torch.max(box1[:, None, :2], box2[:, :2])
inter_max = torch.min(box1[:, None, 2:], box2[:, 2:])
inter_size = torch.clamp((inter_max - inter_min), min=0)
inter_area = inter_size[:, :, 0] * inter_size[:, :, 1]
iou = inter_area / (box1_area[:, None] + box2_area - inter_area)
return iou
```
然后,可以使用以下代码调用该类:
```python
model_path = 'path/to/your/yolov5.pth'
yolov7 = YOLOv7(model_path)
img_path = 'path/to/your/image.jpg'
img = cv2.imread(img_path)
outputs = yolov7.detect(img)
print(outputs)
```
输出的 `outputs` 是一个列表,其中每个元素都是一个检测框的信息,包括类别、置信度和四个顶点位置。
阅读全文