diff --git a/README.md b/README.md index 2f270a04b..38708a067 100644 --- a/README.md +++ b/README.md @@ -58,15 +58,23 @@ yolo predict model=yolov10n/s/m/b/l/x.pt ``` ## Export - - -## Latency Measurement - - +``` +# End-to-End ONNX +yolo export model=yolov10n/s/m/b/l/x.pt format=onnx opset=13 simplify +# Predict with ONNX +yolo predict model=yolov10n/s/m/b/l/x.onnx + +# End-to-End TensorRT +yolo export model=yolov10n/s/m/b/l/x.pt format=engine half=True simplify opset=13 workspace=16 +# Or +trtexec --onnx=onnxs/yolov10n/s/m/b/l/x.onnx --saveEngine=engines/yolov10n/s/m/b/l/x.engine --fp16 +# Predict with TensorRT +yolo predict model=yolov10n/s/m/b/l/x.engine +``` ## Acknowledgement -The code base is built with [ultralytics](https://github.com/ultralytics/ultralytics) +The code base is built with [ultralytics](https://github.com/ultralytics/ultralytics) and [RT-DETR](https://github.com/lyuwenyu/RT-DETR) Thanks for the great implementations! diff --git a/requirements.txt b/requirements.txt index ff7ba9a04..7ce1843e8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,6 @@ onnx onnxruntime pycocotools PyYAML -scipy \ No newline at end of file +scipy +onnxsim +onnxruntime-gpu \ No newline at end of file diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py index 859106a1a..1fa3f2e10 100644 --- a/ultralytics/engine/exporter.py +++ b/ultralytics/engine/exporter.py @@ -67,7 +67,7 @@ from ultralytics.data.dataset import YOLODataset from ultralytics.data.utils import check_det_dataset from ultralytics.nn.autobackend import check_class_names, default_class_names -from ultralytics.nn.modules import C2f, Detect, RTDETRDecoder +from ultralytics.nn.modules import C2f, Detect, RTDETRDecoder, v10Detect from ultralytics.nn.tasks import DetectionModel, SegmentationModel, WorldModel from ultralytics.utils import ( ARM64, @@ -229,6 +229,9 @@ def __call__(self, model=None): m.dynamic = self.args.dynamic m.export = True m.format = self.args.format + if isinstance(m, v10Detect): + m.max_det = self.args.max_det + elif isinstance(m, C2f) and not any((saved_model, pb, tflite, edgetpu, tfjs)): # EdgeTPU does not support FlexSplitV while split provides cleaner ONNX graph m.forward = m.forward_split diff --git a/ultralytics/models/yolov10/predict.py b/ultralytics/models/yolov10/predict.py index 83f7bd413..7310db3d0 100644 --- a/ultralytics/models/yolov10/predict.py +++ b/ultralytics/models/yolov10/predict.py @@ -9,10 +9,13 @@ def postprocess(self, preds, img, orig_imgs): if isinstance(preds, (list, tuple)): preds = preds[0] - preds = preds.transpose(-1, -2) - bboxes, scores, labels = ops.v10postprocess(preds, self.args.max_det) - bboxes = ops.xywh2xyxy(bboxes) - preds = torch.cat([bboxes, scores.unsqueeze(-1), labels.unsqueeze(-1)], dim=-1) + if preds.shape[-1] == 6: + pass + else: + preds = preds.transpose(-1, -2) + bboxes, scores, labels = ops.v10postprocess(preds, self.args.max_det) + bboxes = ops.xywh2xyxy(bboxes) + preds = torch.cat([bboxes, scores.unsqueeze(-1), labels.unsqueeze(-1)], dim=-1) mask = preds[..., 4] > self.args.conf diff --git a/ultralytics/nn/modules/head.py b/ultralytics/nn/modules/head.py index 6b2c3eded..5e6af102e 100644 --- a/ultralytics/nn/modules/head.py +++ b/ultralytics/nn/modules/head.py @@ -13,6 +13,7 @@ from .transformer import MLP, DeformableTransformerDecoder, DeformableTransformerDecoderLayer from .utils import bias_init_with_prob, linear_init import copy +from ultralytics.utils import ops __all__ = "Detect", "Segment", "Pose", "Classify", "OBB", "RTDETRDecoder" @@ -51,7 +52,6 @@ def inference(self, x): shape = x[0].shape # BCHW x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2) if self.dynamic or self.shape != shape: - assert(not self.export) self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5)) self.shape = shape @@ -501,6 +501,8 @@ def _reset_parameters(self): class v10Detect(Detect): + max_det = -1 + def __init__(self, nc=80, ch=()): super().__init__(nc, ch) c3 = max(ch[0], min(self.nc, 100)) # channels @@ -515,7 +517,12 @@ def forward(self, x): one2one = self.forward_feat([xi.detach() for xi in x], self.one2one_cv2, self.one2one_cv3) if not self.training: one2one = self.inference(one2one) - return one2one + if not self.export: + return one2one + else: + assert(self.max_det != -1) + boxes, scores, labels = ops.v10postprocess(one2one.permute(0, 2, 1), self.max_det) + return torch.cat([boxes, scores.unsqueeze(-1), labels.unsqueeze(-1)], dim=-1) else: one2many = super().forward(x) return {"one2many": one2many, "one2one": one2one} diff --git a/ultralytics/utils/ops.py b/ultralytics/utils/ops.py index e6aaf1031..a539fe50f 100644 --- a/ultralytics/utils/ops.py +++ b/ultralytics/utils/ops.py @@ -848,8 +848,8 @@ def clean_str(s): """ return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s) -def v10postprocess(preds, max_det): - nc = preds.shape[-1] - 4 +def v10postprocess(preds, max_det, nc=80): + assert(4 + nc == preds.shape[-1]) boxes, scores = preds.split([4, nc], dim=-1) max_scores = scores.amax(dim=-1) max_scores, index = torch.topk(max_scores, max_det, axis=-1)