Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Force NCHW tensor order. #135

Merged
merged 4 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions depthai_nodes/ml/messages/creators/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,8 @@ def create_detection_message(
if len(masks.shape) != 2:
raise ValueError(f"Masks should be of shape (H, W), got {masks.shape}.")

if masks.dtype != np.int8:
masks = masks.astype(np.int8)
if masks.dtype != np.int16:
masks = masks.astype(np.int16)

detections = []
for detection_idx in range(n_bboxes):
Expand Down
6 changes: 3 additions & 3 deletions depthai_nodes/ml/messages/img_detections.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def masks(self) -> NDArray[np.int8]:
return self._masks.mask

@masks.setter
def masks(self, value: NDArray[np.int8]):
def masks(self, value: NDArray[np.int16]):
"""Sets the segmentation mask.

@param value: Segmentation mask.
Expand All @@ -219,8 +219,8 @@ def masks(self, value: NDArray[np.int8]):
raise TypeError("Mask must be a numpy array.")
if value.ndim != 2:
raise ValueError("Mask must be 2D.")
if value.dtype != np.int8:
raise ValueError("Mask must be an array of int8.")
if value.dtype != np.int16:
raise ValueError("Mask must be an array of int16.")
if np.any((value < -1)):
raise ValueError("Mask must be an array values larger or equal to -1.")
masks_msg = SegmentationMask()
Expand Down
18 changes: 4 additions & 14 deletions depthai_nodes/ml/parsers/fastsam.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
point_prompt,
process_single_mask,
)
from .utils.masks_utils import get_segmentation_outputs, reshape_seg_outputs
from .utils.masks_utils import get_segmentation_outputs


class FastSAMParser(BaseParser):
Expand Down Expand Up @@ -286,7 +286,9 @@ def run(self):

outputs_names = sorted([name for name in self.yolo_outputs])
outputs_values = [
output.getTensor(o, dequantize=True).astype(np.float32)
output.getTensor(
o, dequantize=True, storageOrder=dai.TensorInfo.StorageOrder.NCHW
).astype(np.float32)
for o in outputs_names
]
# Get the segmentation outputs
Expand All @@ -296,18 +298,6 @@ def run(self):
protos_len,
) = get_segmentation_outputs(output, self.mask_outputs, self.protos_output)

if (
len(outputs_values[0].shape) == 4
and outputs_values[0].shape[-1] == outputs_values[1].shape[-1]
):
# RVC4
outputs_values = [o.transpose((0, 3, 1, 2)) for o in outputs_values]
(
protos_output,
protos_len,
masks_outputs_values,
) = reshape_seg_outputs(protos_output, protos_len, masks_outputs_values)

# determine the input shape of the model from the first output
width = outputs_values[0].shape[3] * 8
height = outputs_values[0].shape[2] * 8
Expand Down
11 changes: 5 additions & 6 deletions depthai_nodes/ml/parsers/mlsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,18 +152,17 @@ def run(self):
except dai.MessageQueue.QueueException:
break # Pipeline was stopped

tpMap = output.getTensor(self.output_layer_tpmap, dequantize=True).astype(
np.float32
)
tpMap = output.getTensor(
self.output_layer_tpmap,
dequantize=True,
storageOrder=dai.TensorInfo.StorageOrder.NCHW,
).astype(np.float32)
heat_np = output.getTensor(self.output_layer_heat, dequantize=True).astype(
np.float32
)

if len(tpMap.shape) != 4:
raise ValueError("Invalid shape of the tpMap tensor. Should be 4D.")
if tpMap.shape[3] == 9:
# We have NWHC format, transform to NCHW
tpMap = np.transpose(tpMap, (0, 3, 1, 2))

pts, pts_score, vmap = decode_scores_and_points(tpMap, heat_np, self.topk_n)
lines, scores = get_lines(
Expand Down
3 changes: 0 additions & 3 deletions depthai_nodes/ml/parsers/superanimal_landmarker.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,6 @@ def run(self):
np.float32
)

if len(heatmaps.shape) == 3:
heatmaps = heatmaps.reshape((1,) + heatmaps.shape)

heatmaps_scale_factor = (
self.scale_factor / heatmaps.shape[1],
self.scale_factor / heatmaps.shape[2],
Expand Down
21 changes: 7 additions & 14 deletions depthai_nodes/ml/parsers/utils/masks_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,22 +76,15 @@ def get_segmentation_outputs(
layer_names = mask_output_layer_names or output.getAllLayerNames()
mask_outputs = sorted([name for name in layer_names if "mask" in name])
masks_outputs_values = [
output.getTensor(o, dequantize=True).astype(np.float32) for o in mask_outputs
output.getTensor(
o, dequantize=True, storageOrder=dai.TensorInfo.StorageOrder.NCHW
).astype(np.float32)
for o in mask_outputs
]
protos_output = output.getTensor(
protos_output_layer_name or "protos_output", dequantize=True
protos_output_layer_name or "protos_output",
dequantize=True,
storageOrder=dai.TensorInfo.StorageOrder.NCHW,
).astype(np.float32)
protos_len = protos_output.shape[1]
return masks_outputs_values, protos_output, protos_len


def reshape_seg_outputs(
protos_output: np.ndarray,
protos_len: int,
masks_outputs_values: List[np.ndarray],
) -> Tuple[np.ndarray, int, List[np.ndarray]]:
"""Reshape the segmentation outputs."""
protos_output = protos_output.transpose((0, 3, 1, 2))
protos_len = protos_output.shape[1]
masks_outputs_values = [o.transpose((0, 3, 1, 2)) for o in masks_outputs_values]
return protos_output, protos_len, masks_outputs_values
20 changes: 3 additions & 17 deletions depthai_nodes/ml/parsers/yolo.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from .utils.masks_utils import (
get_segmentation_outputs,
process_single_mask,
reshape_seg_outputs,
)
from .utils.yolo import (
YOLOSubtype,
Expand Down Expand Up @@ -223,7 +222,9 @@ def run(self):
[name for name in layer_names if "_yolo" in name or "yolo-" in name]
)
outputs_values = [
output.getTensor(o, dequantize=True).astype(np.float32)
output.getTensor(
o, dequantize=True, storageOrder=dai.TensorInfo.StorageOrder.NCHW
).astype(np.float32)
for o in outputs_names
]

Expand Down Expand Up @@ -254,21 +255,6 @@ def run(self):
else:
mode = self._DET_MODE

if (
len(outputs_values[0].shape) == 4
and outputs_values[0].shape[-1] == outputs_values[1].shape[-1]
):
# RVC4
outputs_values = [o.transpose((0, 3, 1, 2)) for o in outputs_values]
if mode == self._SEG_MODE:
(
protos_output,
protos_len,
masks_outputs_values,
) = reshape_seg_outputs(
protos_output, protos_len, masks_outputs_values
)

# Get the model's input shape
strides = (
[8, 16, 32]
Expand Down
Loading