From e5250549ec9709b5a67f339104df8a96923a2873 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Kozlovsk=C3=BD?= <martin.kozlovsky@luxonis.com>
Date: Wed, 9 Oct 2024 01:40:03 +0200
Subject: [PATCH] Inference Fix (#100)

---
 .github/workflows/ci.yaml                     |   3 +
 .gitignore                                    |   1 +
 .../attached_modules/base_attached_module.py  |   9 +-
 .../metrics/object_keypoint_similarity.py     |   1 -
 .../visualizers/base_visualizer.py            |   2 +-
 .../visualizers/bbox_visualizer.py            |  25 +-
 .../visualizers/classification_visualizer.py  |  32 ++-
 .../visualizers/keypoint_visualizer.py        |  21 +-
 .../visualizers/multi_visualizer.py           |   6 +-
 .../visualizers/segmentation_visualizer.py    |  28 +-
 .../attached_modules/visualizers/utils.py     |   6 +-
 luxonis_train/core/core.py                    |  35 +--
 luxonis_train/core/utils/infer_utils.py       | 254 ++++++++++--------
 luxonis_train/core/utils/tune_utils.py        |   4 +-
 luxonis_train/models/luxonis_lightning.py     |   1 -
 .../nodes/backbones/mobileone/mobileone.py    |   2 +-
 .../nodes/backbones/repvgg/repvgg.py          |   8 +-
 luxonis_train/nodes/blocks/blocks.py          |   2 +-
 tests/integration/test_simple.py              |  65 ++++-
 19 files changed, 306 insertions(+), 199 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 847601c3..0d7dbdc7 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -43,6 +43,9 @@ jobs:
       with:
         ref: ${{ github.head_ref }}
 
+    - name: Install pre-commit
+      run: python -m pip install 'pre-commit<4.0.0'
+
     - name: Run pre-commit
       uses: pre-commit/action@v3.0.1
 
diff --git a/.gitignore b/.gitignore
index 03ba884c..36d3b3e3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -153,4 +153,5 @@ mlruns
 wandb
 tests/_data
 tests/integration/save-directory
+tests/integration/infer-save-directory
 data
diff --git a/luxonis_train/attached_modules/base_attached_module.py b/luxonis_train/attached_modules/base_attached_module.py
index a4ac8e8f..c65a4b7d 100644
--- a/luxonis_train/attached_modules/base_attached_module.py
+++ b/luxonis_train/attached_modules/base_attached_module.py
@@ -275,7 +275,7 @@ def get_input_tensors(
         return inputs[self.node_tasks[self.required_labels[0]]]
 
     def prepare(
-        self, inputs: Packet[Tensor], labels: Labels
+        self, inputs: Packet[Tensor], labels: Labels | None
     ) -> tuple[Unpack[Ts]]:
         """Prepares node outputs for the forward pass of the module.
 
@@ -287,8 +287,9 @@ def prepare(
 
         @type inputs: L{Packet}[Tensor]
         @param inputs: Output from the node, inputs to the attached module.
-        @type labels: L{Labels}
-        @param labels: Labels from the dataset.
+        @type labels: L{Labels} | None
+        @param labels: Labels from the dataset. If not provided, empty labels are used.
+            This is useful in visualizers for working with standalone images.
 
         @rtype: tuple[Unpack[Ts]]
         @return: Prepared inputs. Should allow the following usage with the
@@ -325,6 +326,8 @@ def prepare(
                 set(self.supported_tasks) & set(self.node_tasks)
             )
         x = self.get_input_tensors(inputs)
+        if labels is None:
+            return x, None  # type: ignore
         label, task_type = self._get_label(labels)
         if task_type in [TaskType.CLASSIFICATION, TaskType.SEGMENTATION]:
             if len(x) == 1:
diff --git a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
index 248ebe10..d291e7e0 100644
--- a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
+++ b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py
@@ -36,7 +36,6 @@ class ObjectKeypointSimilarity(
 
     def __init__(
         self,
-        n_keypoints: int | None = None,
         sigmas: list[float] | None = None,
         area_factor: float | None = None,
         use_cocoeval_oks: bool = True,
diff --git a/luxonis_train/attached_modules/visualizers/base_visualizer.py b/luxonis_train/attached_modules/visualizers/base_visualizer.py
index 817a09d5..a02aa933 100644
--- a/luxonis_train/attached_modules/visualizers/base_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/base_visualizer.py
@@ -66,7 +66,7 @@ def run(
         label_canvas: Tensor,
         prediction_canvas: Tensor,
         inputs: Packet[Tensor],
-        labels: Labels,
+        labels: Labels | None,
     ) -> Tensor | tuple[Tensor, Tensor] | tuple[Tensor, list[Tensor]]:
         return self(
             label_canvas, prediction_canvas, *self.prepare(inputs, labels)
diff --git a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
index 8dded134..87a3fd2d 100644
--- a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py
@@ -173,8 +173,8 @@ def forward(
         label_canvas: Tensor,
         prediction_canvas: Tensor,
         predictions: list[Tensor],
-        targets: Tensor,
-    ) -> tuple[Tensor, Tensor]:
+        targets: Tensor | None,
+    ) -> tuple[Tensor, Tensor] | Tensor:
         """Creates a visualization of the bounding box predictions and
         labels.
 
@@ -189,26 +189,29 @@ def forward(
         @type targets: Tensor
         @param targets: The target bounding boxes.
         """
-        targets_viz = self.draw_targets(
-            label_canvas,
-            targets,
-            color_dict=self.colors,
+        predictions_viz = self.draw_predictions(
+            prediction_canvas,
+            predictions,
             label_dict=self.bbox_labels,
+            color_dict=self.colors,
             draw_labels=self.draw_labels,
             fill=self.fill,
             font=self.font,
             font_size=self.font_size,
             width=self.width,
         )
-        predictions_viz = self.draw_predictions(
-            prediction_canvas,
-            predictions,
-            label_dict=self.bbox_labels,
+        if targets is None:
+            return predictions_viz
+
+        targets_viz = self.draw_targets(
+            label_canvas,
+            targets,
             color_dict=self.colors,
+            label_dict=self.bbox_labels,
             draw_labels=self.draw_labels,
             fill=self.fill,
             font=self.font,
             font_size=self.font_size,
             width=self.width,
         )
-        return targets_viz, predictions_viz.to(targets_viz.device)
+        return targets_viz, predictions_viz
diff --git a/luxonis_train/attached_modules/visualizers/classification_visualizer.py b/luxonis_train/attached_modules/visualizers/classification_visualizer.py
index 91096f54..3ba5ce8c 100644
--- a/luxonis_train/attached_modules/visualizers/classification_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/classification_visualizer.py
@@ -5,6 +5,7 @@
 from torch import Tensor
 
 from luxonis_train.enums import TaskType
+from luxonis_train.utils import Labels, Packet
 
 from .base_visualizer import BaseVisualizer
 from .utils import figure_to_torch, numpy_to_torch_img, torch_img_to_numpy
@@ -56,29 +57,38 @@ def _generate_plot(
         ax.grid(True)
         return figure_to_torch(fig, width, height)
 
+    def prepare(
+        self, inputs: Packet[Tensor], labels: Labels | None
+    ) -> tuple[Tensor, Tensor]:
+        predictions, targets = super().prepare(inputs, labels)
+        if isinstance(predictions, list):
+            predictions = predictions[0]
+        return predictions, targets
+
     def forward(
         self,
         label_canvas: Tensor,
         prediction_canvas: Tensor,
         predictions: Tensor,
-        labels: Tensor,
+        targets: Tensor | None,
     ) -> Tensor | tuple[Tensor, Tensor]:
         overlay = torch.zeros_like(label_canvas)
         plots = torch.zeros_like(prediction_canvas)
         for i in range(len(overlay)):
             prediction = predictions[i]
-            gt = self._get_class_name(labels[i])
             arr = torch_img_to_numpy(label_canvas[i].clone())
             curr_class = self._get_class_name(prediction)
-            arr = cv2.putText(
-                arr,
-                f"GT: {gt}",
-                (5, 10),
-                cv2.FONT_HERSHEY_SIMPLEX,
-                self.font_scale,
-                self.color,
-                self.thickness,
-            )
+            if targets is not None:
+                gt = self._get_class_name(targets[i])
+                arr = cv2.putText(
+                    arr,
+                    f"GT: {gt}",
+                    (5, 10),
+                    cv2.FONT_HERSHEY_SIMPLEX,
+                    self.font_scale,
+                    self.color,
+                    self.thickness,
+                )
             arr = cv2.putText(
                 arr,
                 f"Pred: {curr_class}",
diff --git a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
index da4ddc7c..8c7252ee 100644
--- a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py
@@ -94,16 +94,9 @@ def forward(
         label_canvas: Tensor,
         prediction_canvas: Tensor,
         predictions: list[Tensor],
-        targets: Tensor,
+        targets: Tensor | None,
         **kwargs,
-    ) -> tuple[Tensor, Tensor]:
-        target_viz = self.draw_targets(
-            label_canvas,
-            targets,
-            colors=self.visible_color,
-            connectivity=self.connectivity,
-            **kwargs,
-        )
+    ) -> tuple[Tensor, Tensor] | Tensor:
         pred_viz = self.draw_predictions(
             prediction_canvas,
             predictions,
@@ -113,4 +106,14 @@ def forward(
             visibility_threshold=self.visibility_threshold,
             **kwargs,
         )
+        if targets is None:
+            return pred_viz
+
+        target_viz = self.draw_targets(
+            label_canvas,
+            targets,
+            colors=self.visible_color,
+            connectivity=self.connectivity,
+            **kwargs,
+        )
         return target_viz, pred_viz
diff --git a/luxonis_train/attached_modules/visualizers/multi_visualizer.py b/luxonis_train/attached_modules/visualizers/multi_visualizer.py
index b7ecbfbb..dd884c8d 100644
--- a/luxonis_train/attached_modules/visualizers/multi_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/multi_visualizer.py
@@ -42,8 +42,8 @@ def forward(
         label_canvas: Tensor,
         prediction_canvas: Tensor,
         outputs: Packet[Tensor],
-        labels: Labels,
-    ) -> tuple[Tensor, Tensor]:
+        labels: Labels | None,
+    ) -> tuple[Tensor, Tensor] | Tensor:
         for visualizer in self.visualizers:
             match visualizer.run(
                 label_canvas, prediction_canvas, outputs, labels
@@ -57,4 +57,6 @@ def forward(
                     raise NotImplementedError(
                         "Unexpected return type from visualizer."
                     )
+        if labels is None:
+            return prediction_canvas
         return label_canvas, prediction_canvas
diff --git a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
index e1b22e0d..7194a506 100644
--- a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
+++ b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py
@@ -4,6 +4,7 @@
 from torch import Tensor
 
 from luxonis_train.enums import TaskType
+from luxonis_train.utils import Labels, Packet
 
 from .base_visualizer import BaseVisualizer
 from .utils import (
@@ -95,14 +96,22 @@ def draw_targets(
 
         return viz
 
+    def prepare(
+        self, inputs: Packet[Tensor], labels: Labels | None
+    ) -> tuple[Tensor, Tensor]:
+        predictions, targets = super().prepare(inputs, labels)
+        if isinstance(predictions, list):
+            predictions = predictions[0]
+        return predictions, targets
+
     def forward(
         self,
         label_canvas: Tensor,
         prediction_canvas: Tensor,
         predictions: Tensor,
-        targets: Tensor,
+        targets: Tensor | None,
         **kwargs,
-    ) -> tuple[Tensor, Tensor]:
+    ) -> tuple[Tensor, Tensor] | Tensor:
         """Creates a visualization of the segmentation predictions and
         labels.
 
@@ -118,18 +127,21 @@ def forward(
         @return: A tuple of the label and prediction visualizations.
         """
 
-        targets_vis = self.draw_targets(
-            label_canvas,
-            targets,
+        predictions_vis = self.draw_predictions(
+            prediction_canvas,
+            predictions,
             colors=self.colors,
             alpha=self.alpha,
             background_class=self.background_class,
             background_color=self.background_color,
             **kwargs,
         )
-        predictions_vis = self.draw_predictions(
-            prediction_canvas,
-            predictions,
+        if targets is None:
+            return predictions_vis
+
+        targets_vis = self.draw_targets(
+            label_canvas,
+            targets,
             colors=self.colors,
             alpha=self.alpha,
             background_class=self.background_class,
diff --git a/luxonis_train/attached_modules/visualizers/utils.py b/luxonis_train/attached_modules/visualizers/utils.py
index 76478421..a8965020 100644
--- a/luxonis_train/attached_modules/visualizers/utils.py
+++ b/luxonis_train/attached_modules/visualizers/utils.py
@@ -232,11 +232,7 @@ def get_unnormalized_images(cfg: Config, inputs: dict[str, Tensor]) -> Tensor:
     if cfg.trainer.preprocessing.normalize.active:
         mean = normalize_params.get("mean", [0.485, 0.456, 0.406])
         std = normalize_params.get("std", [0.229, 0.224, 0.225])
-    return preprocess_images(
-        images,
-        mean=mean,
-        std=std,
-    )
+    return preprocess_images(images, mean=mean, std=std)
 
 
 def number_to_hsl(seed: int) -> tuple[float, float, float]:
diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py
index 46405b0c..04f006d5 100644
--- a/luxonis_train/core/core.py
+++ b/luxonis_train/core/core.py
@@ -37,9 +37,9 @@
 from .utils.infer_utils import (
     IMAGE_FORMATS,
     VIDEO_FORMATS,
-    process_dataset_images,
-    process_images,
-    process_video,
+    infer_from_dataset,
+    infer_from_directory,
+    infer_from_video,
 )
 from .utils.train_utils import create_trainer
 
@@ -466,25 +466,30 @@ def infer(
         weights = weights or self.cfg.model.weights
 
         with replace_weights(self.lightning_module, weights):
-            if source_path:
-                source_path_obj = Path(source_path)
-                if source_path_obj.suffix.lower() in VIDEO_FORMATS:
-                    process_video(self, source_path_obj, view, save_dir)
-                elif source_path_obj.is_file():
-                    process_images(self, [source_path_obj], view, save_dir)
-                elif source_path_obj.is_dir():
-                    image_files = [
+            if save_dir is not None:
+                save_dir = Path(save_dir)
+                save_dir.mkdir(parents=True, exist_ok=True)
+            if source_path is not None:
+                source_path = Path(source_path)
+                if source_path.suffix.lower() in VIDEO_FORMATS:
+                    infer_from_video(
+                        self, video_path=source_path, save_dir=save_dir
+                    )
+                elif source_path.is_file():
+                    infer_from_directory(self, [source_path], save_dir)
+                elif source_path.is_dir():
+                    image_files = (
                         f
-                        for f in source_path_obj.iterdir()
+                        for f in source_path.iterdir()
                         if f.suffix.lower() in IMAGE_FORMATS
-                    ]
-                    process_images(self, image_files, view, save_dir)
+                    )
+                    infer_from_directory(self, image_files, save_dir)
                 else:
                     raise ValueError(
                         f"Source path {source_path} is not a valid file or directory."
                     )
             else:
-                process_dataset_images(self, view, save_dir)
+                infer_from_dataset(self, view, save_dir)
 
     def tune(self) -> None:
         """Runs Optuna tunning of hyperparameters."""
diff --git a/luxonis_train/core/utils/infer_utils.py b/luxonis_train/core/utils/infer_utils.py
index 0240e5fc..ffeaa1cb 100644
--- a/luxonis_train/core/utils/infer_utils.py
+++ b/luxonis_train/core/utils/infer_utils.py
@@ -1,14 +1,15 @@
 from collections import defaultdict
+from collections.abc import Iterable
 from pathlib import Path
+from typing import Literal
 
 import cv2
 import numpy as np
 import torch
-import tqdm
 from torch import Tensor
 
+import luxonis_train
 from luxonis_train.attached_modules.visualizers import get_unnormalized_images
-from luxonis_train.enums import TaskType
 
 IMAGE_FORMATS = {
     ".bmp",
@@ -22,51 +23,33 @@
     ".mpo",
     ".pfm",
 }
-VIDEO_FORMATS = {".mp4", ".mov", ".avi", ".mkv"}
+VIDEO_FORMATS = {".mp4", ".mov", ".avi", ".mkv", ".webm"}
 
 
-def render_visualizations(
-    visualizations: dict[str, dict[str, Tensor]],
-    save_dir: str | Path | None,
-    show: bool = True,
-) -> dict[str, list[np.ndarray]]:
+def process_visualizations(
+    visualizations: dict[str, dict[str, Tensor]], batch_size: int
+) -> dict[tuple[str, str], list[np.ndarray]]:
     """Render or save visualizations."""
-    save_dir = Path(save_dir) if save_dir is not None else None
-    if save_dir is not None:
-        save_dir.mkdir(exist_ok=True, parents=True)
-
-    rendered_visualizations = defaultdict(list)
-    i = 0
-    for node_name, vzs in visualizations.items():
-        for viz_name, viz_batch in vzs.items():
-            for i, viz in enumerate(viz_batch):
+    renders = defaultdict(list)
+
+    for i in range(batch_size):
+        for node_name, vzs in visualizations.items():
+            for viz_name, viz_batch in vzs.items():
+                viz = viz_batch[i]
                 viz_arr = viz.detach().cpu().numpy().transpose(1, 2, 0)
                 viz_arr = cv2.cvtColor(viz_arr, cv2.COLOR_RGB2BGR)
-                name = f"{node_name}/{viz_name}/{i}"
-                if save_dir is not None:
-                    name = name.replace("/", "_")
-                    cv2.imwrite(str(save_dir / f"{name}_{i}.png"), viz_arr)
-                    i += 1
-                elif show:
-                    cv2.imshow(name, viz_arr)
-                else:
-                    rendered_visualizations[name].append(viz_arr)
-
-    if save_dir is None and show:
-        if cv2.waitKey(0) == ord("q"):
-            exit()
+                renders[(node_name, viz_name)].append(viz_arr)
 
-    return rendered_visualizations
+    return renders
 
 
-def prepare_and_infer_image(model, img: np.ndarray, labels: dict, view: str):
+def prepare_and_infer_image(
+    model: "luxonis_train.core.LuxonisModel",
+    img: np.ndarray,
+):
     """Prepares the image for inference and runs the model."""
     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-    img, _ = (
-        model.train_augmentations([(img, {})])
-        if view == "train"
-        else model.val_augmentations([(img, {})])
-    )
+    img, _ = model.val_augmentations([(img, {})])
 
     inputs = {
         "image": torch.tensor(img).unsqueeze(0).permute(0, 3, 1, 2).float()
@@ -74,112 +57,149 @@ def prepare_and_infer_image(model, img: np.ndarray, labels: dict, view: str):
     images = get_unnormalized_images(model.cfg, inputs)
 
     outputs = model.lightning_module.forward(
-        inputs, labels, images=images, compute_visualizations=True
+        inputs, images=images, compute_visualizations=True
     )
     return outputs
 
 
-def process_video(
-    model,
+def window_closed() -> bool:  # pragma: no cover
+    return cv2.waitKey(0) in {27, ord("q")}
+
+
+def infer_from_video(
+    model: "luxonis_train.core.LuxonisModel",
     video_path: str | Path,
-    view: str,
-    save_dir: str | Path | None,
-    show: bool = False,
+    save_dir: Path | None,
 ) -> None:
-    """Handles inference on a video."""
-    cap = cv2.VideoCapture(filename=str(video_path))  # type: ignore
-    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    progress_bar = tqdm.tqdm(
-        total=total_frames, position=0, leave=True, desc="Processing video"
-    )
+    """Runs inference on individual frames from a video.
+
+    @type model: L{LuxonisModel}
+    @param model: The model to use for inference.
+    @type video_path: str | Path
+    @param video_path: The path to the video.
+    @type save_dir: Path | None
+    @param save_dir: The directory to save the visualizations to.
+    @type show: bool
+    @param show: Whether to display the visualizations.
+    """
 
-    if save_dir is not None:
-        out_writers = {}
-        save_dir = Path(save_dir)
-        save_dir.mkdir(exist_ok=True, parents=True)
+    cap = cv2.VideoCapture(filename=str(video_path))  # type: ignore
 
-    labels = create_dummy_labels(
-        model, view, (int(cap.get(4)), int(cap.get(3)), 3)
-    )
+    writers: dict[str, cv2.VideoWriter] = {}
 
     while cap.isOpened():
         ret, frame = cap.read()
-        if not ret:
+        if not ret:  # pragma: no cover
             break
 
-        outputs = prepare_and_infer_image(model, frame, labels, view)
-        rendered_visualizations = render_visualizations(
-            outputs.visualizations, None, show
-        )
-        if save_dir is not None:
-            for name, viz_arrs in rendered_visualizations.items():
-                if name not in out_writers:
-                    out_writers[name] = cv2.VideoWriter(
-                        filename=str(  # type: ignore
-                            save_dir / f"{name.replace('/', '-')}.mp4"
-                        ),
+        # TODO: batched inference
+        outputs = prepare_and_infer_image(model, frame)
+        renders = process_visualizations(outputs.visualizations, batch_size=1)
+
+        for (node_name, viz_name), [viz] in renders.items():
+            if save_dir is not None:
+                name = f"{node_name}_{viz_name}"
+                if name not in writers:
+                    w, h = viz.shape[1], viz.shape[0]
+                    writers[name] = cv2.VideoWriter(
+                        filename=str(save_dir / f"{name}.mp4"),  # type: ignore
                         fourcc=cv2.VideoWriter_fourcc(*"mp4v"),  # type: ignore
                         fps=cap.get(cv2.CAP_PROP_FPS),  # type: ignore
-                        frameSize=(viz_arrs[0].shape[1], viz_arrs[0].shape[0]),  # type: ignore
-                    )  # type: ignore
-                for viz_arr in viz_arrs:
-                    out_writers[name].write(viz_arr)
-
-        progress_bar.update(1)
-
-    if save_dir is not None:
-        for writer in out_writers.values():
-            writer.release()
+                        frameSize=(w, h),  # type: ignore
+                    )
+                if name in writers:
+                    writers[name].write(viz)
+            else:  # pragma: no cover
+                cv2.imshow(f"{node_name}/{viz_name}", viz)
+
+        if not save_dir and window_closed():  # pragma: no cover
+            break
 
     cap.release()
-    progress_bar.close()
+    cv2.destroyAllWindows()
 
+    for writer in writers.values():
+        writer.release()
 
-def process_images(
-    model, img_paths: list[Path], view: str, save_dir: str | Path | None
+
+def infer_from_directory(
+    model: "luxonis_train.core.LuxonisModel",
+    img_paths: Iterable[Path],
+    save_dir: Path | None,
 ) -> None:
-    """Handles inference on one or more images."""
-    first_image = cv2.cvtColor(
-        cv2.imread(str(img_paths[0])), cv2.COLOR_BGR2RGB
-    )
-    labels = create_dummy_labels(model, view, first_image.shape)
+    """Runs inference on individual images from a directory.
+
+    @type model: L{LuxonisModel}
+    @param model: The model to use for inference.
+    @type img_paths: Iterable[Path]
+    @param img_paths: Iterable of paths to the images.
+    @type save_dir: Path | None
+    @param save_dir: The directory to save the visualizations to.
+    """
     for img_path in img_paths:
         img = cv2.imread(str(img_path))
-        outputs = prepare_and_infer_image(model, img, labels, view)
-        render_visualizations(outputs.visualizations, save_dir)
+        outputs = prepare_and_infer_image(model, img)
+        renders = process_visualizations(outputs.visualizations, batch_size=1)
+
+        for (node_name, viz_name), [viz] in renders.items():
+            if save_dir is not None:
+                cv2.imwrite(
+                    str(
+                        save_dir
+                        / f"{img_path.stem}_{node_name}_{viz_name}.png"
+                    ),
+                    viz,
+                )
+            else:  # pragma: no cover
+                cv2.imshow(f"{node_name}/{viz_name}", viz)
+
+        if not save_dir and window_closed():  # pragma: no cover
+            break
+
+    cv2.destroyAllWindows()
 
 
-def process_dataset_images(
-    model, view: str, save_dir: str | Path | None
+def infer_from_dataset(
+    model: "luxonis_train.core.LuxonisModel",
+    view: Literal["train", "val", "test"],
+    save_dir: Path | None,
 ) -> None:
-    """Handles the inference on dataset images."""
-    for inputs, labels in model.pytorch_loaders[view]:
+    """Runs inference on images from the dataset.
+
+    @type model: L{LuxonisModel}
+    @param model: The model to use for inference.
+    @type view: Literal["train", "val", "test"]
+    @param view: The view of the dataset to use.
+    @type save_dir: str | Path | None
+    @param save_dir: The directory to save the visualizations to.
+    """
+    broken = False
+    for i, (inputs, labels) in enumerate(model.pytorch_loaders[view]):
+        if broken:  # pragma: no cover
+            break
+
         images = get_unnormalized_images(model.cfg, inputs)
+        batch_size = images.shape[0]
         outputs = model.lightning_module.forward(
             inputs, labels, images=images, compute_visualizations=True
         )
-        render_visualizations(outputs.visualizations, save_dir)
-
-
-def create_dummy_labels(model, view: str, img_shape: tuple) -> dict:
-    """Prepares the labels for different tasks (classification,
-    keypoints, etc.)."""
-    tasks = list(model.loaders["train"].get_classes().keys())
-    h, w, _ = img_shape
-    labels = {}
-
-    for task in tasks:
-        if task == "classification":
-            labels[task] = [-1, TaskType.CLASSIFICATION]
-        elif task == "keypoints":
-            nk = model.loaders[view].get_n_keypoints()["keypoints"]
-            labels[task] = [torch.zeros((1, nk * 3 + 2)), TaskType.KEYPOINTS]
-        elif task == "segmentation":
-            labels[task] = [torch.zeros((1, h, w)), TaskType.SEGMENTATION]
-        elif task == "boundingbox":
-            labels[task] = [
-                torch.tensor([[-1, 0, 0, 0, 0, 0]]),
-                TaskType.BOUNDINGBOX,
-            ]
-
-    return labels
+        renders = process_visualizations(
+            outputs.visualizations,
+            batch_size=batch_size,
+        )
+        for j in range(batch_size):
+            for (node_name, viz_name), visualizations in renders.items():
+                viz = visualizations[j]
+                if save_dir is not None:
+                    name = f"{node_name}_{viz_name}"
+                    cv2.imwrite(
+                        str(save_dir / f"{name}_{i * batch_size + j}.png"), viz
+                    )
+                else:
+                    cv2.imshow(f"{node_name}/{viz_name}", viz)
+
+            if not save_dir and window_closed():  # pragma: no cover
+                broken = True
+                break
+
+    cv2.destroyAllWindows()
diff --git a/luxonis_train/core/utils/tune_utils.py b/luxonis_train/core/utils/tune_utils.py
index d9d6c4c0..ead8a3dd 100644
--- a/luxonis_train/core/utils/tune_utils.py
+++ b/luxonis_train/core/utils/tune_utils.py
@@ -12,7 +12,7 @@ def _augs_to_indices(all_augs: list[str], aug_names: list[str]) -> list[int]:
     aug_indices = []
     for aug_name in aug_names:
         if aug_name == "Normalize":
-            logger.warn(
+            logger.warning(
                 f"'{aug_name}' should be tuned directly by adding '...normalize.active_categorical' to the tuner params, skipping."
             )
             continue
@@ -20,7 +20,7 @@ def _augs_to_indices(all_augs: list[str], aug_names: list[str]) -> list[int]:
             index = all_augs.index(aug_name)
             aug_indices.append(index)
         except ValueError:
-            logger.warn(
+            logger.warning(
                 f"Augmentation '{aug_name}' not found under trainer augemntations, skipping."
             )
             continue
diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py
index 459b20d1..2ca59117 100644
--- a/luxonis_train/models/luxonis_lightning.py
+++ b/luxonis_train/models/luxonis_lightning.py
@@ -415,7 +415,6 @@ def forward(
                 compute_visualizations
                 and node_name in self.visualizers
                 and images is not None
-                and labels is not None
             ):
                 for viz_name, visualizer in self.visualizers[
                     node_name
diff --git a/luxonis_train/nodes/backbones/mobileone/mobileone.py b/luxonis_train/nodes/backbones/mobileone/mobileone.py
index 8180f960..2047e474 100644
--- a/luxonis_train/nodes/backbones/mobileone/mobileone.py
+++ b/luxonis_train/nodes/backbones/mobileone/mobileone.py
@@ -67,7 +67,7 @@ def __init__(
         @type n_conv_branches: int | None
         @param n_conv_branches: Number of linear convolution branches in MobileOne block. If provided, overrides the variant values.
         @type use_se: bool | None
-        @param use_se: Whether to use SE blocks in the network. If provided, overrides the variant value.
+        @param use_se: Whether to use C{Squeeze-and-Excitation} blocks in the network. If provided, overrides the variant value.
         """
         super().__init__(**kwargs)
 
diff --git a/luxonis_train/nodes/backbones/repvgg/repvgg.py b/luxonis_train/nodes/backbones/repvgg/repvgg.py
index fd8a5e67..ac1407a2 100644
--- a/luxonis_train/nodes/backbones/repvgg/repvgg.py
+++ b/luxonis_train/nodes/backbones/repvgg/repvgg.py
@@ -46,16 +46,16 @@ def __init__(
 
         @type variant: Literal["A0", "A1", "A2"]
         @param variant: RepVGG model variant. Defaults to "A0".
+        @type n_blocks: tuple[int, int, int, int] | None
+        @param n_blocks: Number of blocks in each stage.
+        @type width_multiplier: tuple[float, float, float, float] | None
+        @param width_multiplier: Width multiplier for each stage.
         @type override_groups_map: dict[int, int] | None
         @param override_groups_map: Dictionary mapping layer index to number of groups. The layers are indexed starting from 0.
         @type use_se: bool
         @param use_se: Whether to use Squeeze-and-Excitation blocks.
         @type use_checkpoint: bool
         @param use_checkpoint: Whether to use checkpointing.
-        @type n_blocks: tuple[int, int, int, int] | None
-        @param n_blocks: Number of blocks in each stage.
-        @type width_multiplier: tuple[float, float, float, float] | None
-        @param width_multiplier: Width multiplier for each stage.
         """
         super().__init__(**kwargs)
         var = get_variant(variant)
diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py
index 5059c651..25bea7c5 100644
--- a/luxonis_train/nodes/blocks/blocks.py
+++ b/luxonis_train/nodes/blocks/blocks.py
@@ -862,7 +862,7 @@ class DropPath(nn.Module):
     ...   def forward(self, x):
     ...     return x + self.drop_path(self.conv_bn_act(x))
 
-    @see U{Original code (TIMM) <https://github.com/rwightman/pytorch-image-models>}
+    @see: U{Original code (TIMM) <https://github.com/rwightman/pytorch-image-models>}
     @license: U{Apache License 2.0 <https://github.com/huggingface/pytorch-image-models?tab=Apache-2.0-1-ov-file#readme>}
     """
 
diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py
index 3d489c4a..d4c5b46f 100644
--- a/tests/integration/test_simple.py
+++ b/tests/integration/test_simple.py
@@ -6,8 +6,9 @@
 from pathlib import Path
 from typing import Any
 
+import cv2
 import pytest
-from luxonis_ml.data import LuxonisDataset
+from luxonis_ml.data import LuxonisDataset, LuxonisLoader
 from luxonis_ml.utils import environ
 
 from luxonis_train.core import LuxonisModel
@@ -19,6 +20,14 @@
 STUDY_PATH = Path("study_local.db")
 
 
+@pytest.fixture
+def infer_path() -> Path:
+    if INFER_PATH.exists():
+        shutil.rmtree(INFER_PATH)
+    INFER_PATH.mkdir()
+    return INFER_PATH
+
+
 @pytest.fixture
 def opts(test_output_dir: Path) -> dict[str, Any]:
     return {
@@ -33,11 +42,9 @@ def opts(test_output_dir: Path) -> dict[str, Any]:
 
 @pytest.fixture(scope="function", autouse=True)
 def clear_files():
-    # todo
     yield
     STUDY_PATH.unlink(missing_ok=True)
     ONNX_PATH.unlink(missing_ok=True)
-    shutil.rmtree(INFER_PATH, ignore_errors=True)
 
 
 @pytest.mark.parametrize(
@@ -72,7 +79,7 @@ def test_predefined_models(
     model.test()
 
 
-def test_multi_input(opts: dict[str, Any]):
+def test_multi_input(opts: dict[str, Any], infer_path: Path):
     config_file = "tests/configs/multi_input.yaml"
     model = LuxonisModel(config_file, opts)
     model.train()
@@ -82,9 +89,9 @@ def test_multi_input(opts: dict[str, Any]):
     model.export(str(ONNX_PATH))
     assert ONNX_PATH.exists()
 
-    assert not INFER_PATH.exists()
-    model.infer(view="val", save_dir=INFER_PATH)
-    assert INFER_PATH.exists()
+    assert len(list(infer_path.iterdir())) == 0
+    model.infer(view="val", save_dir=infer_path)
+    assert infer_path.exists()
 
 
 def test_custom_tasks(
@@ -149,6 +156,46 @@ def test_tune(opts: dict[str, Any], coco_dataset: LuxonisDataset):
     assert STUDY_PATH.exists()
 
 
+def test_infer(coco_dataset: LuxonisDataset, infer_path: Path):
+    loader = LuxonisLoader(coco_dataset)
+    img_dir = Path("tests/data/img_dir")
+    video_writer = cv2.VideoWriter(
+        "tests/data/video.avi",  # type: ignore
+        cv2.VideoWriter_fourcc(*"XVID"),
+        1,
+        (256, 256),
+    )
+    if img_dir.exists():
+        shutil.rmtree(img_dir)
+    img_dir.mkdir()
+    for i, (img, _) in enumerate(loader):
+        img = cv2.resize(img, (256, 256))
+        cv2.imwrite(str(img_dir / f"{i}.jpg"), img)
+        video_writer.write(img)
+    video_writer.release()
+
+    opts = {
+        "loader.params.dataset_name": coco_dataset.identifier,
+        "trainer.preprocessing.augmentations": [],
+    }
+    model = LuxonisModel("configs/complex_model.yaml", opts)
+
+    model.infer(source_path=img_dir / "0.jpg", save_dir=infer_path)
+    assert len(list(infer_path.glob("*.png"))) == 3
+
+    model.infer(source_path=img_dir, save_dir=infer_path)
+    assert len(list(infer_path.glob("*.png"))) == len(loader) * 3
+
+    model.infer(source_path="tests/data/video.avi", save_dir=infer_path)
+    assert len(list(infer_path.glob("*.mp4"))) == 3
+
+    model.infer(save_dir=infer_path, view="train")
+    assert len(list(infer_path.glob("*.png"))) == len(loader) * 3 * 2
+
+    with pytest.raises(ValueError):
+        model.infer(source_path="tests/data/invalid.jpg", save_dir=infer_path)
+
+
 def test_archive(test_output_dir: Path, coco_dataset: LuxonisDataset):
     opts = {
         "tracker.save_directory": str(test_output_dir),
@@ -187,6 +234,10 @@ def test_callbacks(opts: dict[str, Any], parking_lot_dataset: LuxonisDataset):
             {
                 "name": "ExportOnTrainEnd",
             },
+            {
+                "name": "ExportOnTrainEnd",
+                "params": {"preferred_checkpoint": "loss"},
+            },
             {
                 "name": "ArchiveOnTrainEnd",
                 "params": {"preferred_checkpoint": "loss"},