diff --git a/README.md b/README.md index 7d0463ac314..138d6cfe6ce 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,11 @@ +#### Get model optimized for RKNN + +Exports detection/segment model with optimization for RKNN, please refer here [RKOPT_README.md](RKOPT_README.md). Optimization for exporting model does not affect the training stage + +关于如何导出适配 RKNPU 分割/检测 模型,请参考 [RKOPT_README.zh-CN.md](RKOPT_README.zh-CN.md),该优化只在导出模型时生效,训练代码按照原仓库的指引即可。 + +--- +

diff --git a/README.zh-CN.md b/README.zh-CN.md index ab182093133..650140552db 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -1,3 +1,9 @@ +#### 导出适配 RKNPU 的模型 + +关于如何导出适配 RKNPU 分割/检测 模型,请参考 [RKOPT_README_CN.md](RKOPT_README_CN.md), 该优化只在导出模型时生效,训练代码按照原仓库的指引即可。 + +--- +

diff --git a/RKOPT_README.md b/RKOPT_README.md new file mode 100644 index 00000000000..60c7d0ff248 --- /dev/null +++ b/RKOPT_README.md @@ -0,0 +1,39 @@ +# RKNN optimization for exporting model + +## Source +Base on https://github.com/ultralytics/ultralytics with commit id as c9be1f3cce89778f79fb462797b8ca0300e3813d + + + + +## What different +With inference result values unchanged, the following optimizations were applied: +- Change output node, remove post-process from the model. (post-process block in model is unfriendly for quantization) +- Remove dfl structure at the end of the model. (which slowdown the inference speed on NPU device) +- Add a score-sum output branch to speedup post-process. + +All the removed operation will be done on CPU. (the CPU post-process could be found in **RKNN_Model_Zoo**) + + + + +## Export ONNX model + +After meeting the environment requirements specified in "./requirements.txt," execute the following command to export the model (support detect/segment model): + +``` +# Adjust the model file path in "./ultralytics/cfg/default.yaml" (default is yolov8n.pt). If you trained your own model, please provide the corresponding path. +# For example, filled with yolov8n.pt for detection model. +# Filling with yolov8n-seg.pt for segmentation model. + +export PYTHONPATH=./ +python ./ultralytics/engine/exporter.py + +# Upon completion, the ".onnx" model will be generated. If the original model is "yolov8n.pt," the generated model will be "yolov8n.onnx" +``` + + + +## Convert to RKNN model, Python demo, C demo + +Please refer to https://github.com/airockchip/rknn_model_zoo. \ No newline at end of file diff --git a/RKOPT_README.zh-CN.md b/RKOPT_README.zh-CN.md new file mode 100644 index 00000000000..28fbcdc51e3 --- /dev/null +++ b/RKOPT_README.zh-CN.md @@ -0,0 +1,45 @@ +# 导出 RKNPU 适配模型说明 + +## Source + +​ 本仓库基于 https://github.com/ultralytics/ultralytics 仓库的 c9be1f3cce89778f79fb462797b8ca0300e3813d commit 进行修改,验证. + + + +## 模型差异 + +在基于不影响输出结果, 不需要重新训练模型的条件下, 有以下改动: + +- 修改输出结构, 移除后处理结构. (后处理结果对于量化不友好) + +- dfl 结构在 NPU 处理上性能不佳,移至模型外部的后处理阶段,此操作大部分情况下可提升推理性能。 + + +- 模型输出分支新增置信度的总和,用于后处理阶段加速阈值筛选。 + + +以上移除的操作, 均需要在外部使用CPU进行相应的处理. (对应的后处理代码可以在 **RKNN_Model_Zoo** 中找到) + + + +## 导出onnx模型 + +在满足 ./requirements.txt 的环境要求后,执行以下语句导出模型 + +``` +# 调整 ./ultralytics/cfg/default.yaml 中 model 文件路径,默认为 yolov8n.pt,若自己训练模型,请调接至对应的路径。支持检测、分割模型。 +# 如填入 yolov8n.pt 导出检测模型 +# 如填入 yolov8-seg.pt 导出分割模型 + +export PYTHONPATH=./ +python ./ultralytics/engine/exporter.py + +# 执行完毕后,会生成 ONNX 模型. 假如原始模型为 yolov8n.pt,则生成 yolov8n.onnx 模型。 +``` + + + +## 转RKNN模型、Python demo、C demo + +请参考 https://github.com/airockchip/rknn_model_zoo + diff --git a/ultralytics/cfg/default.yaml b/ultralytics/cfg/default.yaml index 5babd254a3e..d435dbdaef1 100644 --- a/ultralytics/cfg/default.yaml +++ b/ultralytics/cfg/default.yaml @@ -5,7 +5,7 @@ task: detect # (str) YOLO task, i.e. detect, segment, classify, pose mode: train # (str) YOLO mode, i.e. train, val, predict, export, track, benchmark # Train settings ------------------------------------------------------------------------------------------------------- -model: # (str, optional) path to model file, i.e. yolov8n.pt, yolov8n.yaml +model: yolov8m-seg.pt # (str, optional) path to model file, i.e. yolov8n.pt, yolov8n.yaml data: # (str, optional) path to data file, i.e. coco128.yaml epochs: 100 # (int) number of epochs to train for patience: 50 # (int) epochs to wait for no observable improvement for early stopping of training @@ -68,7 +68,7 @@ retina_masks: False # (bool) use high-resolution segmentation masks boxes: True # (bool) Show boxes in segmentation predictions # Export settings ------------------------------------------------------------------------------------------------------ -format: torchscript # (str) format to export to, choices at https://docs.ultralytics.com/modes/export/#export-formats +format: rknn # (str) format to export to, choices at https://docs.ultralytics.com/modes/export/#export-formats keras: False # (bool) use Kera=s optimize: False # (bool) TorchScript: optimize for mobile int8: False # (bool) CoreML/TF INT8 quantization diff --git a/ultralytics/data/augment.py b/ultralytics/data/augment.py index 12d09cff2cc..964c9f2f30a 100644 --- a/ultralytics/data/augment.py +++ b/ultralytics/data/augment.py @@ -369,7 +369,7 @@ def apply_bboxes(self, bboxes, M): # Create new boxes x = xy[:, [0, 2, 4, 6]] y = xy[:, [1, 3, 5, 7]] - return np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1)), dtype=bboxes.dtype).reshape(4, n).T + return np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T def apply_segments(self, segments, M): """ diff --git a/ultralytics/data/base.py b/ultralytics/data/base.py index bfc3cc19e22..bb386302bbb 100644 --- a/ultralytics/data/base.py +++ b/ultralytics/data/base.py @@ -1,4 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license +import cv2 import glob import math @@ -9,7 +10,6 @@ from pathlib import Path from typing import Optional -import cv2 import numpy as np import psutil from torch.utils.data import Dataset diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py index 02cacf0dbd2..ab065c99310 100644 --- a/ultralytics/engine/exporter.py +++ b/ultralytics/engine/exporter.py @@ -57,6 +57,7 @@ from copy import deepcopy from datetime import datetime from pathlib import Path +import cv2 import torch @@ -88,8 +89,10 @@ def export_formats(): ['TensorFlow Lite', 'tflite', '.tflite', True, False], ['TensorFlow Edge TPU', 'edgetpu', '_edgetpu.tflite', True, False], ['TensorFlow.js', 'tfjs', '_web_model', True, False], - ['PaddlePaddle', 'paddle', '_paddle_model', True, True], - ['ncnn', 'ncnn', '_ncnn_model', True, True], ] + ['PaddlePaddle', 'paddle', '_paddle_model', True, True], + ['ncnn', 'ncnn', '_ncnn_model', True, True], + ['RKNN', 'rknn', '_rknnopt.torchscript', True, False], + ] return pandas.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'CPU', 'GPU']) @@ -157,7 +160,8 @@ def __call__(self, model=None): flags = [x == format for x in fmts] if sum(flags) != 1: raise ValueError(f"Invalid export format='{format}'. Valid formats are {fmts}") - jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, ncnn = flags # export booleans + jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, ncnn, rknn = flags # export booleans + # Load PyTorch model self.device = select_device('cpu' if self.args.device is None else self.args.device) @@ -262,6 +266,8 @@ def __call__(self, model=None): f[10], _ = self.export_paddle() if ncnn: # ncnn f[11], _ = self.export_ncnn() + if rknn: + f[12], _ = self.export_rknn() # Finish f = [str(x) for x in f if x] # filter out '' and None @@ -297,6 +303,31 @@ def export_torchscript(self, prefix=colorstr('TorchScript:')): ts.save(str(f), _extra_files=extra_files) return f, None + @try_export + def export_rknn(self, prefix=colorstr('RKNN:')): + """YOLOv8 RKNN model export.""" + LOGGER.info(f'\n{prefix} starting export with torch {torch.__version__}...') + + # ts = torch.jit.trace(self.model, self.im, strict=False) + # f = str(self.file).replace(self.file.suffix, f'_rknnopt.torchscript') + # torch.jit.save(ts, str(f)) + + f = str(self.file).replace(self.file.suffix, f'.onnx') + opset_version = self.args.opset or get_latest_opset() + torch.onnx.export( + self.model, + self.im[0:1,:,:,:], + f, + verbose=False, + opset_version=12, + do_constant_folding=True, # WARNING: DNN inference with torch>=1.12 may require do_constant_folding=False + input_names=['images']) + + LOGGER.info(f'\n{prefix} feed {f} to RKNN-Toolkit or RKNN-Toolkit2 to generate RKNN model.\n' + 'Refer https://github.com/airockchip/rknn_model_zoo/tree/main/models/CV/object_detection/yolo') + return f, None + + @try_export def export_onnx(self, prefix=colorstr('ONNX:')): """YOLOv8 ONNX export.""" diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py index e1c7ea82268..62febbfded4 100644 --- a/ultralytics/nn/autobackend.py +++ b/ultralytics/nn/autobackend.py @@ -80,7 +80,7 @@ def __init__(self, super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) nn_module = isinstance(weights, torch.nn.Module) - pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, ncnn, triton = \ + pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, ncnn, triton, rknn = \ self._model_type(w) fp16 &= pt or jit or onnx or xml or engine or nn_module or triton # FP16 nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH) @@ -385,6 +385,8 @@ def forward(self, im, augment=False, visualize=False): mat_out = self.pyncnn.Mat() ex.extract(output_name, mat_out) y.append(np.array(mat_out)[None]) + elif getattr(self, 'rknn', False): + assert "for inference, please refer to https://github.com/airockchip/rknn_model_zoo/" elif self.triton: # NVIDIA Triton Inference Server y = self.model(im) else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU) diff --git a/ultralytics/nn/modules.py b/ultralytics/nn/modules.py new file mode 100644 index 00000000000..95bdec13ed3 --- /dev/null +++ b/ultralytics/nn/modules.py @@ -0,0 +1,597 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +""" +Common modules +""" + +import math + +import torch +import torch.nn as nn + +from ultralytics.yolo.utils.tal import dist2bbox, make_anchors + + +def autopad(k, p=None, d=1): # kernel, padding, dilation + """Pad to 'same' shape outputs.""" + if d > 1: + k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size + if p is None: + p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad + return p + + +class Conv(nn.Module): + """Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation).""" + default_act = nn.SiLU() # default activation + + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True): + """Initialize Conv layer with given arguments including activation.""" + super().__init__() + self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False) + self.bn = nn.BatchNorm2d(c2) + self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() + + def forward(self, x): + """Apply convolution, batch normalization and activation to input tensor.""" + return self.act(self.bn(self.conv(x))) + + def forward_fuse(self, x): + """Perform transposed convolution of 2D data.""" + return self.act(self.conv(x)) + + +class DWConv(Conv): + """Depth-wise convolution.""" + + def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation + super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act) + + +class DWConvTranspose2d(nn.ConvTranspose2d): + """Depth-wise transpose convolution.""" + + def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out + super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2)) + + +class ConvTranspose(nn.Module): + """Convolution transpose 2d layer.""" + default_act = nn.SiLU() # default activation + + def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True): + """Initialize ConvTranspose2d layer with batch normalization and activation function.""" + super().__init__() + self.conv_transpose = nn.ConvTranspose2d(c1, c2, k, s, p, bias=not bn) + self.bn = nn.BatchNorm2d(c2) if bn else nn.Identity() + self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity() + + def forward(self, x): + """Applies transposed convolutions, batch normalization and activation to input.""" + return self.act(self.bn(self.conv_transpose(x))) + + def forward_fuse(self, x): + """Applies activation and convolution transpose operation to input.""" + return self.act(self.conv_transpose(x)) + + +class DFL(nn.Module): + """ + Integral module of Distribution Focal Loss (DFL). + Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391 + """ + + def __init__(self, c1=16): + """Initialize a convolutional layer with a given number of input channels.""" + super().__init__() + self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False) + x = torch.arange(c1, dtype=torch.float) + self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1)) + self.c1 = c1 + + def forward(self, x): + """Applies a transformer layer on input tensor 'x' and returns a tensor.""" + b, c, a = x.shape # batch, channels, anchors + return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a) + # return self.conv(x.view(b, self.c1, 4, a).softmax(1)).view(b, 4, a) + + +class TransformerLayer(nn.Module): + """Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance).""" + + def __init__(self, c, num_heads): + """Initializes a self-attention mechanism using linear transformations and multi-head attention.""" + super().__init__() + self.q = nn.Linear(c, c, bias=False) + self.k = nn.Linear(c, c, bias=False) + self.v = nn.Linear(c, c, bias=False) + self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads) + self.fc1 = nn.Linear(c, c, bias=False) + self.fc2 = nn.Linear(c, c, bias=False) + + def forward(self, x): + """Apply a transformer block to the input x and return the output.""" + x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x + x = self.fc2(self.fc1(x)) + x + return x + + +class TransformerBlock(nn.Module): + """Vision Transformer https://arxiv.org/abs/2010.11929.""" + + def __init__(self, c1, c2, num_heads, num_layers): + """Initialize a Transformer module with position embedding and specified number of heads and layers.""" + super().__init__() + self.conv = None + if c1 != c2: + self.conv = Conv(c1, c2) + self.linear = nn.Linear(c2, c2) # learnable position embedding + self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers))) + self.c2 = c2 + + def forward(self, x): + """Forward propagates the input through the bottleneck module.""" + if self.conv is not None: + x = self.conv(x) + b, _, w, h = x.shape + p = x.flatten(2).permute(2, 0, 1) + return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h) + + +class Bottleneck(nn.Module): + """Standard bottleneck.""" + + def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): # ch_in, ch_out, shortcut, groups, kernels, expand + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, k[0], 1) + self.cv2 = Conv(c_, c2, k[1], 1, g=g) + self.add = shortcut and c1 == c2 + + def forward(self, x): + """'forward()' applies the YOLOv5 FPN to input data.""" + return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) + + +class BottleneckCSP(nn.Module): + """CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks.""" + + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) + self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) + self.cv4 = Conv(2 * c_, c2, 1, 1) + self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) + self.act = nn.SiLU() + self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n))) + + def forward(self, x): + """Applies a CSP bottleneck with 3 convolutions.""" + y1 = self.cv3(self.m(self.cv1(x))) + y2 = self.cv2(x) + return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1)))) + + +class C3(nn.Module): + """CSP Bottleneck with 3 convolutions.""" + + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + c_ = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c1, c_, 1, 1) + self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2) + self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, k=((1, 1), (3, 3)), e=1.0) for _ in range(n))) + + def forward(self, x): + """Forward pass through the CSP bottleneck with 2 convolutions.""" + return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1)) + + +class C2(nn.Module): + """CSP Bottleneck with 2 convolutions.""" + + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + self.c = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, 2 * self.c, 1, 1) + self.cv2 = Conv(2 * self.c, c2, 1) # optional act=FReLU(c2) + # self.attention = ChannelAttention(2 * self.c) # or SpatialAttention() + self.m = nn.Sequential(*(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))) + + def forward(self, x): + """Forward pass through the CSP bottleneck with 2 convolutions.""" + a, b = self.cv1(x).chunk(2, 1) + return self.cv2(torch.cat((self.m(a), b), 1)) + + +class C2f(nn.Module): + """CSP Bottleneck with 2 convolutions.""" + + def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + self.c = int(c2 * e) # hidden channels + self.cv1 = Conv(c1, 2 * self.c, 1, 1) + self.cv2 = Conv((2 + n) * self.c, c2, 1) # optional act=FReLU(c2) + self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)) + + def forward(self, x): + """Forward pass of a YOLOv5 CSPDarknet backbone layer.""" + y = list(self.cv1(x).chunk(2, 1)) + y.extend(m(y[-1]) for m in self.m) + return self.cv2(torch.cat(y, 1)) + + def forward_split(self, x): + """Applies spatial attention to module's input.""" + y = list(self.cv1(x).split((self.c, self.c), 1)) + y.extend(m(y[-1]) for m in self.m) + return self.cv2(torch.cat(y, 1)) + + +class ChannelAttention(nn.Module): + """Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet.""" + + def __init__(self, channels: int) -> None: + super().__init__() + self.pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True) + self.act = nn.Sigmoid() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return x * self.act(self.fc(self.pool(x))) + + +class SpatialAttention(nn.Module): + """Spatial-attention module.""" + + def __init__(self, kernel_size=7): + """Initialize Spatial-attention module with kernel size argument.""" + super().__init__() + assert kernel_size in (3, 7), 'kernel size must be 3 or 7' + padding = 3 if kernel_size == 7 else 1 + self.cv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False) + self.act = nn.Sigmoid() + + def forward(self, x): + """Apply channel and spatial attention on input for feature recalibration.""" + return x * self.act(self.cv1(torch.cat([torch.mean(x, 1, keepdim=True), torch.max(x, 1, keepdim=True)[0]], 1))) + + +class CBAM(nn.Module): + """Convolutional Block Attention Module.""" + + def __init__(self, c1, kernel_size=7): # ch_in, kernels + super().__init__() + self.channel_attention = ChannelAttention(c1) + self.spatial_attention = SpatialAttention(kernel_size) + + def forward(self, x): + """Applies the forward pass through C1 module.""" + return self.spatial_attention(self.channel_attention(x)) + + +class C1(nn.Module): + """CSP Bottleneck with 1 convolution.""" + + def __init__(self, c1, c2, n=1): # ch_in, ch_out, number + super().__init__() + self.cv1 = Conv(c1, c2, 1, 1) + self.m = nn.Sequential(*(Conv(c2, c2, 3) for _ in range(n))) + + def forward(self, x): + """Applies cross-convolutions to input in the C3 module.""" + y = self.cv1(x) + return self.m(y) + y + + +class C3x(C3): + """C3 module with cross-convolutions.""" + + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): + """Initialize C3TR instance and set default parameters.""" + super().__init__(c1, c2, n, shortcut, g, e) + self.c_ = int(c2 * e) + self.m = nn.Sequential(*(Bottleneck(self.c_, self.c_, shortcut, g, k=((1, 3), (3, 1)), e=1) for _ in range(n))) + + +class C3TR(C3): + """C3 module with TransformerBlock().""" + + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): + """Initialize C3Ghost module with GhostBottleneck().""" + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) + self.m = TransformerBlock(c_, c_, 4, n) + + +class C3Ghost(C3): + """C3 module with GhostBottleneck().""" + + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): + """Initialize 'SPP' module with various pooling sizes for spatial pyramid pooling.""" + super().__init__(c1, c2, n, shortcut, g, e) + c_ = int(c2 * e) # hidden channels + self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n))) + + +class SPP(nn.Module): + """Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729.""" + + def __init__(self, c1, c2, k=(5, 9, 13)): + """Initialize the SPP layer with input/output channels and pooling kernel sizes.""" + super().__init__() + c_ = c1 // 2 # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) + self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) + + def forward(self, x): + """Forward pass of the SPP layer, performing spatial pyramid pooling.""" + x = self.cv1(x) + return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) + + +class SPPF(nn.Module): + """Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher.""" + + def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13)) + super().__init__() + c_ = c1 // 2 # hidden channels + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_ * 4, c2, 1, 1) + self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2) + + def forward(self, x): + """Forward pass through Ghost Convolution block.""" + x = self.cv1(x) + y1 = self.m(x) + y2 = self.m(y1) + return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1)) + + +class Focus(nn.Module): + """Focus wh information into c-space.""" + + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups + super().__init__() + self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act) + # self.contract = Contract(gain=2) + + def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) + return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1)) + # return self.conv(self.contract(x)) + + +class GhostConv(nn.Module): + """Ghost Convolution https://github.com/huawei-noah/ghostnet.""" + + def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups + super().__init__() + c_ = c2 // 2 # hidden channels + self.cv1 = Conv(c1, c_, k, s, None, g, act=act) + self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act) + + def forward(self, x): + """Forward propagation through a Ghost Bottleneck layer with skip connection.""" + y = self.cv1(x) + return torch.cat((y, self.cv2(y)), 1) + + +class GhostBottleneck(nn.Module): + """Ghost Bottleneck https://github.com/huawei-noah/ghostnet.""" + + def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride + super().__init__() + c_ = c2 // 2 + self.conv = nn.Sequential( + GhostConv(c1, c_, 1, 1), # pw + DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw + GhostConv(c_, c2, 1, 1, act=False)) # pw-linear + self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, + act=False)) if s == 2 else nn.Identity() + + def forward(self, x): + """Applies skip connection and concatenation to input tensor.""" + return self.conv(x) + self.shortcut(x) + + +class Concat(nn.Module): + """Concatenate a list of tensors along dimension.""" + + def __init__(self, dimension=1): + """Concatenates a list of tensors along a specified dimension.""" + super().__init__() + self.d = dimension + + def forward(self, x): + """Forward pass for the YOLOv8 mask Proto module.""" + return torch.cat(x, self.d) + + +class Proto(nn.Module): + """YOLOv8 mask Proto module for segmentation models.""" + + def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks + super().__init__() + self.cv1 = Conv(c1, c_, k=3) + self.upsample = nn.ConvTranspose2d(c_, c_, 2, 2, 0, bias=True) # nn.Upsample(scale_factor=2, mode='nearest') + self.cv2 = Conv(c_, c_, k=3) + self.cv3 = Conv(c_, c2) + + def forward(self, x): + """Performs a forward pass through layers using an upsampled input image.""" + return self.cv3(self.cv2(self.upsample(self.cv1(x)))) + + +class Ensemble(nn.ModuleList): + """Ensemble of models.""" + + def __init__(self): + """Initialize an ensemble of models.""" + super().__init__() + + def forward(self, x, augment=False, profile=False, visualize=False): + """Function generates the YOLOv5 network's final layer.""" + y = [module(x, augment, profile, visualize)[0] for module in self] + # y = torch.stack(y).max(0)[0] # max ensemble + # y = torch.stack(y).mean(0) # mean ensemble + y = torch.cat(y, 2) # nms ensemble, y shape(B, HW, C) + return y, None # inference, train output + + +# Model heads below ---------------------------------------------------------------------------------------------------- + + +class Detect(nn.Module): + """YOLOv8 Detect head for detection models.""" + dynamic = False # force grid reconstruction + export = False # export mode + shape = None + anchors = torch.empty(0) # init + strides = torch.empty(0) # init + + def __init__(self, nc=80, ch=()): # detection layer + super().__init__() + self.nc = nc # number of classes + self.nl = len(ch) # number of detection layers + self.reg_max = 16 # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x) + self.no = nc + self.reg_max * 4 # number of outputs per anchor + self.stride = torch.zeros(self.nl) # strides computed during build + c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], self.nc) # channels + self.cv2 = nn.ModuleList( + nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch) + self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch) + self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity() + + def forward(self, x): + """Concatenates and returns predicted bounding boxes and class probabilities.""" + shape = x[0].shape # BCHW + if self.export and self.format == 'rknn': + y = [] + for i in range(self.nl): + y.append(self.cv2[i](x[i])) + cls = torch.sigmoid(self.cv3[i](x[i])) + cls_sum = torch.clamp(y[-1].sum(1, keepdim=True), 0, 1) + y.append(cls) + y.append(cls_sum) + return y + + for i in range(self.nl): + x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1) + if self.training: + return x + elif self.dynamic or self.shape != shape: + self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5)) + self.shape = shape + + x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2) + if self.export and self.format in ('saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs'): # avoid TF FlexSplitV ops + box = x_cat[:, :self.reg_max * 4] + cls = x_cat[:, self.reg_max * 4:] + else: + box, cls = x_cat.split((self.reg_max * 4, self.nc), 1) + dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides + y = torch.cat((dbox, cls.sigmoid()), 1) + return y if self.export else (y, x) + + def bias_init(self): + """Initialize Detect() biases, WARNING: requires stride availability.""" + m = self # self.model[-1] # Detect() module + # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1 + # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # nominal class frequency + for a, b, s in zip(m.cv2, m.cv3, m.stride): # from + a[-1].bias.data[:] = 1.0 # box + b[-1].bias.data[:m.nc] = math.log(5 / m.nc / (640 / s) ** 2) # cls (.01 objects, 80 classes, 640 img) + + +class Segment(Detect): + """YOLOv8 Segment head for segmentation models.""" + + def __init__(self, nc=80, nm=32, npr=256, ch=()): + """Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers.""" + super().__init__(nc, ch) + self.nm = nm # number of masks + self.npr = npr # number of protos + self.proto = Proto(ch[0], self.npr, self.nm) # protos + self.detect = Detect.forward + + c4 = max(ch[0] // 4, self.nm) + self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch) + + def forward(self, x): + """Return model outputs and mask coefficients if training, otherwise return outputs and mask coefficients.""" + p = self.proto(x[0]) # mask protos + bs = p.shape[0] # batch size + + if self.export == True and self.format == 'rknn': + mc = [self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)] + else: + mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients + x = self.detect(self, x) + if self.training: + return x, mc, p + + if self.export and self.format == 'rknn': + outputs = [] + out_per_detect_branch = int(len(x)/self.nl) + for i in range(self.nl): + outputs.extend(x[i*out_per_detect_branch:(i+1)*out_per_detect_branch]) + outputs.append(mc[i]) + outputs.append(p) + return outputs + + return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p)) + + +class Pose(Detect): + """YOLOv8 Pose head for keypoints models.""" + + def __init__(self, nc=80, kpt_shape=(17, 3), ch=()): + """Initialize YOLO network with default parameters and Convolutional Layers.""" + super().__init__(nc, ch) + self.kpt_shape = kpt_shape # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible) + self.nk = kpt_shape[0] * kpt_shape[1] # number of keypoints total + self.detect = Detect.forward + + c4 = max(ch[0] // 4, self.nk) + self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch) + + def forward(self, x): + """Perform forward pass through YOLO model and return predictions.""" + bs = x[0].shape[0] # batch size + kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1) # (bs, 17*3, h*w) + x = self.detect(self, x) + if self.training: + return x, kpt + pred_kpt = self.kpts_decode(kpt) + return torch.cat([x, pred_kpt], 1) if self.export else (torch.cat([x[0], pred_kpt], 1), (x[1], kpt)) + + def kpts_decode(self, kpts): + """Decodes keypoints.""" + ndim = self.kpt_shape[1] + y = kpts.clone() + if ndim == 3: + y[:, 2::3].sigmoid_() # inplace sigmoid + y[:, 0::ndim] = (y[:, 0::ndim] * 2.0 + (self.anchors[0] - 0.5)) * self.strides + y[:, 1::ndim] = (y[:, 1::ndim] * 2.0 + (self.anchors[1] - 0.5)) * self.strides + return y + + +class Classify(nn.Module): + """YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2).""" + + def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups + super().__init__() + c_ = 1280 # efficientnet_b0 size + self.conv = Conv(c1, c_, k, s, autopad(k, p), g) + self.pool = nn.AdaptiveAvgPool2d(1) # to x(b,c_,1,1) + self.drop = nn.Dropout(p=0.0, inplace=True) + self.linear = nn.Linear(c_, c2) # to x(b,c2) + + def forward(self, x): + """Performs a forward pass of the YOLO model on input image data.""" + if isinstance(x, list): + x = torch.cat(x, 1) + x = self.linear(self.drop(self.pool(self.conv(x)).flatten(1))) + return x if self.training else x.softmax(1) diff --git a/ultralytics/nn/modules/head.py b/ultralytics/nn/modules/head.py index fffd102a3cf..5bf3a119fbc 100644 --- a/ultralytics/nn/modules/head.py +++ b/ultralytics/nn/modules/head.py @@ -43,6 +43,17 @@ def __init__(self, nc=80, ch=()): # detection layer def forward(self, x): """Concatenates and returns predicted bounding boxes and class probabilities.""" shape = x[0].shape # BCHW + + if self.export and self.format == 'rknn': + y = [] + for i in range(self.nl): + y.append(self.cv2[i](x[i])) + cls = torch.sigmoid(self.cv3[i](x[i])) + cls_sum = torch.clamp(cls.sum(1, keepdim=True), 0, 1) + y.append(cls) + y.append(cls_sum) + return y + for i in range(self.nl): x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1) if self.training: @@ -100,10 +111,22 @@ def forward(self, x): p = self.proto(x[0]) # mask protos bs = p.shape[0] # batch size - mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients + if self.export and self.format == 'rknn': + mc = [self.cv4[i](x[i]) for i in range(self.nl)] + else: + mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients + x = self.detect(self, x) if self.training: return x, mc, p + if self.export and self.format == 'rknn': + bo = len(x)//3 + relocated = [] + for i in range(len(mc)): + relocated.extend(x[i*bo:(i+1)*bo]) + relocated.extend([mc[i]]) + relocated.extend([p]) + return relocated return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p)) diff --git a/ultralytics/utils/torch_utils.py b/ultralytics/utils/torch_utils.py index c593ac27a41..41b3c780832 100644 --- a/ultralytics/utils/torch_utils.py +++ b/ultralytics/utils/torch_utils.py @@ -106,7 +106,7 @@ def select_device(device='', batch=0, newline=False, verbose=True): s += f'MPS ({get_cpu_info()})\n' arg = 'mps' else: # revert to CPU - s += f'CPU ({get_cpu_info()})\n' + s += f'CPU ()\n' arg = 'cpu' if verbose and RANK == -1: