Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tiling and TilesPatcher #89

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions depthai_nodes/ml/postprocessing/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .tiles_patcher import TilesPatcher

__all__ = ["TilesPatcher"]
168 changes: 168 additions & 0 deletions depthai_nodes/ml/postprocessing/tiles_patcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import depthai as dai

from ..preprocessing.tiling import Tiling
from .utils.nms import nms_detections


class TilesPatcher(dai.node.HostNode):
def __init__(self) -> None:
super().__init__()
self.name = "Patcher"
self.tile_manager = None
self.conf_thresh = 0.3
self.iou_thresh = 0.4

self.tile_buffer = []
self.current_timestamp = None
self.expected_tiles_count = 0

def set_conf_thresh(self, conf_thresh: float) -> None:
self.conf_thresh = conf_thresh

def set_iou_thresh(self, iou_thresh: float) -> None:
self.iou_thresh = iou_thresh

def build(self, tile_manager: Tiling, nn: dai.Node.Output):
self.tile_manager = tile_manager
if (
self.tile_manager.x is None
or self.tile_manager.grid_size is None
or self.tile_manager.overlap is None
):
raise ValueError("Tile dimensions, grid size, or overlap not initialized.")
self.expected_tiles_count = len(self.tile_manager.tile_positions)
self.sendProcessingToPipeline(True)
self.link_args(nn)
return self

def process(self, nn_output: dai.ImgDetections) -> None:
timestamp = nn_output.getTimestamp()
device_timestamp = nn_output.getTimestampDevice()

if self.current_timestamp is None:
self.current_timestamp = timestamp

if self.current_timestamp != timestamp and len(self.tile_buffer) > 0:
# new frame started, send the output for the previous frame
self._send_output(self.current_timestamp, device_timestamp)
self.tile_buffer = []

self.current_timestamp = timestamp
tile_index = nn_output.getSequenceNum()

bboxes: list[dai.ImgDetection] = nn_output.detections
mapped_bboxes = self._map_bboxes_to_global_frame(bboxes, tile_index)
self.tile_buffer.append(mapped_bboxes)

if len(self.tile_buffer) == self.expected_tiles_count:
self._send_output(timestamp, device_timestamp)
self.tile_buffer = []

def _map_bboxes_to_global_frame(
self, bboxes: list[dai.ImgDetection], tile_index: int
):
tile_info = self._get_tile_info(tile_index)
if (
self.tile_manager is None
or self.tile_manager.nn_shape is None
or tile_info is None
):
return []
if tile_info is None:
return []

# Original tile coordinates in the global frame
tile_x1, tile_y1, tile_x2, tile_y2 = tile_info["coords"]
tile_actual_width = tile_x2 - tile_x1
tile_actual_height = tile_y2 - tile_y1

# Scaled dimensions (after resizing to fit NN input)
scaled_width, scaled_height = tile_info["scaled_size"]
nn_width, nn_height = self.tile_manager.nn_shape

# Offsets due to padding
x_offset = (nn_width - scaled_width) // 2
y_offset = (nn_height - scaled_height) // 2

# Scaling factors from scaled tile back to original tile dimensions
scale_x = tile_actual_width / scaled_width
scale_y = tile_actual_height / scaled_height

global_bboxes = []
for bbox in bboxes:
# Convert bbox coordinates from normalized to NN input dimensions
bbox_xmin_nn = bbox.xmin * nn_width
bbox_ymin_nn = bbox.ymin * nn_height
bbox_xmax_nn = bbox.xmax * nn_width
bbox_ymax_nn = bbox.ymax * nn_height

# Adjust for padding offsets to get coordinates in the scaled tile
bbox_xmin_scaled = bbox_xmin_nn - x_offset
bbox_ymin_scaled = bbox_ymin_nn - y_offset
bbox_xmax_scaled = bbox_xmax_nn - x_offset
bbox_ymax_scaled = bbox_ymax_nn - y_offset

# Ensure coordinates are within the scaled tile dimensions
bbox_xmin_scaled = max(0, min(bbox_xmin_scaled, scaled_width))
bbox_ymin_scaled = max(0, min(bbox_ymin_scaled, scaled_height))
bbox_xmax_scaled = max(0, min(bbox_xmax_scaled, scaled_width))
bbox_ymax_scaled = max(0, min(bbox_ymax_scaled, scaled_height))

# Map to original tile coordinates
bbox_xmin_tile = bbox_xmin_scaled * scale_x
bbox_ymin_tile = bbox_ymin_scaled * scale_y
bbox_xmax_tile = bbox_xmax_scaled * scale_x
bbox_ymax_tile = bbox_ymax_scaled * scale_y

# Map to global image coordinates
x1_global = tile_x1 + bbox_xmin_tile
y1_global = tile_y1 + bbox_ymin_tile
x2_global = tile_x1 + bbox_xmax_tile
y2_global = tile_y1 + bbox_ymax_tile

# Normalize global coordinates
img_width, img_height = self.tile_manager.img_shape
normalized_bbox = dai.ImgDetection()
normalized_bbox.label = bbox.label
normalized_bbox.confidence = bbox.confidence
normalized_bbox.xmin = x1_global / img_width
normalized_bbox.ymin = y1_global / img_height
normalized_bbox.xmax = x2_global / img_width
normalized_bbox.ymax = y2_global / img_height

global_bboxes.append(normalized_bbox)

return global_bboxes

def _get_tile_info(self, tile_index: int):
"""Retrieves the tile's coordinates and scaled dimensions based on the tile
index."""
if self.tile_manager is None or self.tile_manager.tile_positions is None:
raise ValueError("Tile manager or tile positions not initialized.")
if tile_index >= len(self.tile_manager.tile_positions):
return None
return self.tile_manager.tile_positions[tile_index]

def _send_output(self, timestamp, device_timestamp):
"""Send the final combined bounding boxes as output when all tiles for a frame
are processed."""
combined_bboxes: list[dai.ImgDetection] = []
for bboxes in self.tile_buffer:
combined_bboxes.extend(bboxes)

if combined_bboxes:
detection_list = nms_detections(
combined_bboxes,
conf_thresh=self.conf_thresh,
iou_thresh=self.iou_thresh,
)
else:
detection_list = []

# Create ImgDetections message
detections = dai.ImgDetections()
detections.setTimestamp(timestamp)
detections.setTimestampDevice(device_timestamp)
detections.detections = detection_list

self.out.send(detections)
Empty file.
96 changes: 96 additions & 0 deletions depthai_nodes/ml/postprocessing/utils/nms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import depthai as dai
import numpy as np


def nms_detections(detections: list[dai.ImgDetection], conf_thresh=0.3, iou_thresh=0.4):
"""Applies Non-Maximum Suppression (NMS) on a list of dai.ImgDetection objects.

Parameters:
- detections: List of dai.ImgDetection objects.
- conf_thresh: Confidence threshold for filtering boxes.
- iou_thresh: IoU threshold for NMS.

Returns:
- A list of dai.ImgDetection objects after NMS.
"""
if len(detections) == 0:
return []

# Filter out detections below confidence threshold
detections = [det for det in detections if det.confidence >= conf_thresh]
if len(detections) == 0:
return []

# Organize detections by class
detections_by_class = {}
for det in detections:
label = det.label
if label not in detections_by_class:
detections_by_class[label] = []
detections_by_class[label].append(det)

final_detections = []
for _, dets in detections_by_class.items():
boxes = []
scores = []
for det in dets:
# Coordinates are normalized between 0 and 1
boxes.append([det.xmin, det.ymin, det.xmax, det.ymax])
scores.append(det.confidence)

boxes = np.array(boxes)
scores = np.array(scores)

# Perform NMS
keep_indices = nms(boxes, scores, iou_thresh)

# Keep the detections after NMS
final_dets = [dets[i] for i in keep_indices]
final_detections.extend(final_dets)

return final_detections


def nms(boxes, scores, iou_thresh):
"""Perform Non-Maximum Suppression (NMS).

Parameters:
- boxes: ndarray of shape (N, 4), where each row is [xmin, ymin, xmax, ymax].
- scores: ndarray of shape (N,), scores for each box.
- iou_thresh: float, IoU threshold for NMS.

Returns:
- List of indices of boxes to keep.
"""
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
# Compute area of each box
areas = (x2 - x1) * (y2 - y1)
# Sort the boxes by scores in descending order
order = scores.argsort()[::-1]

keep = []
while order.size > 0:
i = order[0]
keep.append(i)
# Compute IoU of the kept box with the rest
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])

w = np.maximum(0.0, xx2 - xx1)
h = np.maximum(0.0, yy2 - yy1)
inter = w * h
union = (
areas[i] + areas[order[1:]] - inter + 1e-6
) # Add a small epsilon to prevent division by zero
iou = inter / union

# Keep boxes with IoU less than the threshold
inds = np.where(iou <= iou_thresh)[0]
order = order[inds + 1]

return keep
3 changes: 3 additions & 0 deletions depthai_nodes/ml/preprocessing/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .tiling import Tiling

__all__ = ["Tiling"]
Loading
Loading