From 98d9411f38cfab9902311f10fd39ed3ffedbb585 Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Fri, 4 Oct 2024 18:19:37 +0200 Subject: [PATCH 1/7] Fix keypoints filtering for mosaic aug --- luxonis_ml/data/augmentations/custom/mosaic.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/luxonis_ml/data/augmentations/custom/mosaic.py b/luxonis_ml/data/augmentations/custom/mosaic.py index 34be15f9..a15733e1 100644 --- a/luxonis_ml/data/augmentations/custom/mosaic.py +++ b/luxonis_ml/data/augmentations/custom/mosaic.py @@ -269,9 +269,7 @@ def get_params_dependent_on_targets( f"The batch size (= {n}) should be larger than " + f"{self.n_tiles} x out_batch_size (= {self.n_tiles * self.out_batch_size})" ) - indices = np.random.choice( - range(n), size=self.n_tiles * self.out_batch_size, replace=False - ).tolist() + indices = [0,1,2,3] image_shapes = [tuple(image.shape[:2]) for image in image_batch] return { "indices": indices, From fccd25143d75007f8da5f434c4d97671a5541ba2 Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Fri, 4 Oct 2024 19:44:50 +0200 Subject: [PATCH 2/7] Updated the logic for mosaic augmentation --- .../data/augmentations/custom/mosaic.py | 300 +++++++++--------- 1 file changed, 156 insertions(+), 144 deletions(-) diff --git a/luxonis_ml/data/augmentations/custom/mosaic.py b/luxonis_ml/data/augmentations/custom/mosaic.py index a15733e1..caaf93ee 100644 --- a/luxonis_ml/data/augmentations/custom/mosaic.py +++ b/luxonis_ml/data/augmentations/custom/mosaic.py @@ -14,7 +14,7 @@ from ..batch_transform import BatchBasedTransform from ..utils import AUGMENTATIONS - +import random @AUGMENTATIONS.register_module() class Mosaic4(BatchBasedTransform): @@ -98,6 +98,12 @@ def get_transform_init_args_names(self) -> Tuple[str, ...]: "mask_value", ) + def _generate_random_crop_center(self) -> Tuple[int, int]: + """Generate a random crop center within the bounds of the mosaic image size.""" + crop_x = random.randint(0, max(0, self.out_width)) + crop_y = random.randint(0, max(0, self.out_height)) + return crop_x, crop_y + @property def targets_as_params(self): """List of augmentation targets. @@ -108,7 +114,7 @@ def targets_as_params(self): return ["image_batch"] def apply_to_image_batch( - self, image_batch: List[np.ndarray], indices: List[int], **params + self, image_batch: List[np.ndarray], indices: List[int], x_crop: int, y_crop: int, **params ) -> List[np.ndarray]: """Applies the transformation to a batch of images. @@ -119,6 +125,10 @@ def apply_to_image_batch( @type indices: List[Tuple[int, int]] @param params: Additional parameters for the transformation. @type params: Any + @param x_crop: x-coordinate of the croping start point + @type x_crop: int + @param y_crop: y-coordinate of the croping start point + @type y_crop: int @return: List of transformed images. @rtype: List[np.ndarray] """ @@ -129,13 +139,13 @@ def apply_to_image_batch( ] image_chunk = [image_batch[i] for i in idx_chunk] mosaiced = mosaic4( - image_chunk, self.out_height, self.out_width, self.value + image_chunk, self.out_height, self.out_width, x_crop, y_crop, self.value, ) output_batch.append(mosaiced) return output_batch def apply_to_mask_batch( - self, mask_batch: List[np.ndarray], indices: List[int], **params + self, mask_batch: List[np.ndarray], indices: List[int], x_crop: int, y_crop: int, **params ) -> List[np.ndarray]: """Applies the transformation to a batch of masks. @@ -146,6 +156,10 @@ def apply_to_mask_batch( @type indices: List[Tuple[int, int]] @param params: Additional parameters for the transformation. @type params: Any + @param x_crop: x-coordinate of the croping start point + @type x_crop: int + @param y_crop: y-coordinate of the croping start point + @type y_crop: int @return: List of transformed masks. @rtype: List[np.ndarray] """ @@ -156,7 +170,7 @@ def apply_to_mask_batch( ] mask_chunk = [mask_batch[i] for i in idx_chunk] mosaiced = mosaic4( - mask_chunk, self.out_height, self.out_width, self.mask_value + mask_chunk, self.out_height, self.out_width, x_crop, y_crop, self.mask_value ) output_batch.append(mosaiced) return output_batch @@ -166,6 +180,8 @@ def apply_to_bboxes_batch( bboxes_batch: List[BoxType], indices: List[int], image_shapes: List[Tuple[int, int]], + x_crop: int, + y_crop: int, **params, ) -> List[BoxType]: """Applies the transformation to a batch of bboxes. @@ -179,6 +195,10 @@ def apply_to_bboxes_batch( @type image_shapes: List[Tuple[int, int]] @param params: Additional parameters for the transformation. @type params: Any + @param x_crop: x-coordinate of the croping start point + @type x_crop: int + @param y_crop: y-coordinate of the croping start point + @type y_crop: int @return: List of transformed bboxes. @rtype: List[BoxType] """ @@ -201,6 +221,8 @@ def apply_to_bboxes_batch( i, self.out_height, self.out_width, + x_crop, + y_crop, ) new_bboxes.append(tuple(new_bbox) + tuple(bbox[4:])) output_batch.append(new_bboxes) @@ -211,6 +233,8 @@ def apply_to_keypoints_batch( keyboints_batch: List[KeypointType], indices: List[int], image_shapes: List[Tuple[int, int]], + x_crop: int, + y_crop: int, **params, ) -> List[KeypointType]: """Applies the transformation to a batch of keypoints. @@ -224,6 +248,10 @@ def apply_to_keypoints_batch( @type image_shapes: List[Tuple[int, int]] @param params: Additional parameters for the transformation. @type params: Any + @param x_crop: x-coordinate of the croping start point + @type x_crop: int + @param y_crop: y-coordinate of the croping start point + @type y_crop: int @return: List of transformed keypoints. @rtype: List[KeypointType] """ @@ -246,6 +274,8 @@ def apply_to_keypoints_batch( i, self.out_height, self.out_width, + x_crop, + y_crop, ) new_keypoints.append(new_keypoint + tuple(keypoint[4:])) output_batch.append(new_keypoints) @@ -271,124 +301,91 @@ def get_params_dependent_on_targets( ) indices = [0,1,2,3] image_shapes = [tuple(image.shape[:2]) for image in image_batch] + x_crop, y_crop = self._generate_random_crop_center() return { "indices": indices, "image_shapes": image_shapes, + "x_crop": x_crop, + "y_crop": y_crop, } - def mosaic4( image_batch: List[np.ndarray], height: int, width: int, - value: Optional[ImageColorType] = None, + x_crop: int, + y_crop: int, + value: Optional[int] = None, ) -> np.ndarray: - """Arrange the images in a 2x2 grid layout. - The input images should have the same number of channels but can have different widths and heights. - The output is cropped around the intersection point of the four images with the size (with x height). - If the mosaic image is smaller than with x height, the gap is filled by the fill_value. - This implementation is based on YOLOv5 with some modification: - https://github.com/ultralytics/yolov5/blob/932dc78496ca532a41780335468589ad7f0147f7/utils/datasets.py#L648 - - @param image_batch: Image list. The length should be four. Each image can has different size. + """Arrange the images in a 2x2 grid layout. The input images should have the same + number of channels but can have different widths and heights. The gaps are filled by + the value. + + @param image_batch: Image list. The length should be four. Each image can has + different size. @type image_batch: List[np.ndarray] @param height: Height of output mosaic image @type height: int @param width: Width of output mosaic image @type width: int @param value: Padding value - @type value: Optional[ImageColorType] + @type value: Optional[int] + @param x_crop: x-coordinate of the croping start point + @type x_crop: int + @param y_crop: y-coordinate of the croping start point + @type y_crop: int @return: Final output image @rtype: np.ndarray """ N_TILES = 4 if len(image_batch) != N_TILES: - raise ValueError( - f"Length of image_batch should be 4. Got {len(image_batch)}" - ) + raise ValueError(f"Length of image_batch should be 4. Got {len(image_batch)}") for i in range(N_TILES - 1): if image_batch[0].shape[2:] != image_batch[i + 1].shape[2:]: - raise ValueError( - "All images should have the same number of channels." - + f" Got the shapes {image_batch[0].shape} and {image_batch[i + 1].shape}" - ) + raise ValueError("All images should have the same number of channels.") - if image_batch[0].dtype != image_batch[i + 1].dtype: - raise ValueError( - "All images should have the same dtype." - + f" Got the dtypes {image_batch[0].dtype} and {image_batch[i + 1].dtype}" - ) + dtype = image_batch[0].dtype + img4 = np.full( + (height * 2, width * 2, image_batch[0].shape[2]), + value if value is not None else 114, + dtype=dtype, + ) - if len(image_batch[0].shape) == 2: - out_shape = [height, width] - else: - out_shape = [height, width, image_batch[0].shape[2]] + xc = width // 2 + yc = height // 2 - dtype = image_batch[0].dtype - img4 = np.zeros(out_shape, dtype=dtype) # base image with 4 tiles - - value = 0 if value is None else value - if isinstance(value, (tuple, list, np.ndarray)): - if out_shape[2] != len(value): - ValueError( - "value parameter should has the same lengh as the output channel." - + f" value: ({value}), output shape: {out_shape}" - ) - for i in range(len(value)): - img4[:, :, i] = value[i] - else: - img4[:] = value - center_x = width // 2 - center_y = height // 2 for i, img in enumerate(image_batch): (h, w) = img.shape[:2] - # place img in img4 - # this based on the yolo5's implementation - # if i == 0: # top left - x1a, y1a, x2a, y2a = ( - max(center_x - w, 0), - max(center_y - h, 0), - center_x, - center_y, - ) # xmin, ymin, xmax, ymax (large image) - x1b, y1b, x2b, y2b = ( - w - (x2a - x1a), - h - (y2a - y1a), - w, - h, - ) # xmin, ymin, xmax, ymax (small image) + x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc + x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h elif i == 1: # top right - x1a, y1a, x2a, y2a = ( - center_x, - max(center_y - h, 0), - min(center_x + w, width), - center_y, - ) + x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, width * 2), yc x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h elif i == 2: # bottom left - x1a, y1a, x2a, y2a = ( - max(center_x - w, 0), - center_y, - center_x, - min(height, center_y + h), - ) + x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(height * 2, yc + h) x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) elif i == 3: # bottom right - x1a, y1a, x2a, y2a = ( - center_x, - center_y, - min(center_x + w, width), - min(height, center_y + h), - ) + x1a, y1a, x2a, y2a = xc, yc, min(xc + w, width * 2), min(height * 2, yc + h) x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) - img4[y1a:y2a, x1a:x2a] = img[ - y1b:y2b, x1b:x2b - ] # img4[ymin:ymax, xmin:xmax] + img4_region = img4[y1a:y2a, x1a:x2a] + img_region = img[y1b:y2b, x1b:x2b] + + img4_h, img4_w = img4_region.shape[:2] + img_h, img_w = img_region.shape[:2] + + min_h = min(img4_h, img_h) + min_w = min(img4_w, img_w) + + img4[y1a : y1a + min_h, x1a : x1a + min_w] = img[ + y1b : y1b + min_h, x1b : x1b + min_w + ] + + img4 = img4[y_crop:y_crop+height, x_crop:x_crop+width] return img4 @@ -400,51 +397,58 @@ def bbox_mosaic4( position_index: int, height: int, width: int, + x_crop: int, + y_crop: int, ) -> BoxInternalType: - """Put the given bbox in one of the cells of the 2x2 grid. + """Adjust bounding box coordinates to account for mosaic grid position. + + This function modifies bounding boxes according to their placement in a 2x2 grid + mosaic, shifting their coordinates based on the tile's relative position within the + mosaic. - @param bbox: A bounding box `(x_min, y_min, x_max, y_max)`. + @param bbox: Bounding box coordinates to be transformed. @type bbox: BoxInternalType - @param rows: Height of input image that corresponds to one of the - mosaic cells + @param rows: Height of the original image. @type rows: int - @param cols: Width of input image that corresponds to one of the - mosaic cells + @param cols: Width of the original image. @type cols: int - @param position_index: Index of the mosaic cell. 0: top left, 1: top - right, 2: bottom left, 3: bottom right + @param position_index: Position of the image in the 2x2 grid. (0 = top-left, 1 = + top-right, 2 = bottom-left, 3 = bottom-right). @type position_index: int - @param height: Height of output mosaic image + @param height: Height of the final output mosaic image. @type height: int - @param width: Width of output mosaic image + @param width: Width of the final output mosaic image. @type width: int - @return: Transformed bbox + @param xc: x-coordinate of the center of the mosaic image. + @type xc: int + @param yc: y-coordinate of the center of the mosaic image. + @type yc: int + @return: Transformed bounding box coordinates. @rtype: BoxInternalType """ + bbox = denormalize_bbox(bbox, rows, cols) - center_x = width // 2 - center_y = height // 2 - if position_index == 0: # top left - shift_x = center_x - cols - shift_y = center_y - rows - elif position_index == 1: # top right - shift_x = center_x - shift_y = center_y - rows - elif position_index == 2: # bottom left - shift_x = center_x - cols - shift_y = center_y - elif position_index == 3: # bottom right - shift_x = center_x - shift_y = center_y - bbox = ( - bbox[0] + shift_x, - bbox[1] + shift_y, - bbox[2] + shift_x, - bbox[3] + shift_y, - ) + xc = width // 2 + yc = height // 2 + + if position_index == 0: + shift_x = xc - cols + shift_y = yc - rows + elif position_index == 1: + shift_x = xc + shift_y = yc - rows + elif position_index == 2: + shift_x = xc - cols + shift_y = yc + elif position_index == 3: + shift_x = xc + shift_y = yc + + bbox = (bbox[0] + shift_x - x_crop, bbox[1] + shift_y - y_crop, bbox[2] + shift_x - x_crop, bbox[3] + shift_y - y_crop) + bbox = normalize_bbox(bbox, height, width) - return bbox + return bbox def keypoint_mosaic4( keypoint: KeypointInternalType, @@ -453,41 +457,49 @@ def keypoint_mosaic4( position_index: int, height: int, width: int, + x_crop: int, + y_crop: int, ) -> KeypointInternalType: - """Put the given keypoint in one of the cells of the 2x2 grid. + """Adjust keypoint coordinates based on mosaic grid position. + + This function adjusts the keypoint coordinates by placing them in one of the 2x2 + mosaic grid cells, with shifts relative to the mosaic center. - @param keypoint: A keypoint `(x, y, angle, scale)`. - @type bbox: KeypointInternalType - @param rows: Height of input image that corresponds to one of the - mosaic cells + @param keypoint: Keypoint coordinates and attributes (x, y, angle, scale). + @type keypoint: KeypointInternalType + @param rows: Height of the original image. @type rows: int - @param cols: Width of input image that corresponds to one of the - mosaic cells + @param cols: Width of the original image. @type cols: int - @param position_index: Index of the mosaic cell. 0: top left, 1: top - right, 2: bottom left, 3: bottom right + @param position_index: Position of the image in the 2x2 grid. (0 = top-left, 1 = + top-right, 2 = bottom-left, 3 = bottom-right). @type position_index: int - @param height: Height of output mosaic image + @param height: Height of the final output mosaic image. @type height: int - @param width: Width of output mosaic image + @param width: Width of the final output mosaic image. @type width: int - @return: Transformed keypoint + @param x_crop: x-coordinate of the croping start point + @type x_crop: int + @param y_crop: y-coordinate of the croping start point + @type y_crop: int + @return: Adjusted keypoint coordinates. @rtype: KeypointInternalType """ x, y, angle, scale = keypoint - - center_x = width // 2 - center_y = height // 2 - if position_index == 0: # top left - shift_x = center_x - cols - shift_y = center_y - rows - elif position_index == 1: # top right - shift_x = center_x - shift_y = center_y - rows - elif position_index == 2: # bottom left - shift_x = center_x - cols - shift_y = center_y - elif position_index == 3: # bottom right - shift_x = center_x - shift_y = center_y - return x + shift_x, y + shift_y, angle, scale + xc = width // 2 + yc = height // 2 + + if position_index == 0: + shift_x = xc - cols + shift_y = yc - rows + elif position_index == 1: + shift_x = xc + shift_y = yc - rows + elif position_index == 2: + shift_x = xc - cols + shift_y = yc + elif position_index == 3: + shift_x = xc + shift_y = yc + + return x + shift_x - x_crop, y + shift_y - y_crop, angle, scale From 20f788b542a3051053dd69e69c38d2c173d132ed Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Fri, 4 Oct 2024 19:49:27 +0200 Subject: [PATCH 3/7] fix: apply pre-commit hooks and formatting fixes --- .../data/augmentations/custom/mosaic.py | 108 ++++++++++++------ 1 file changed, 76 insertions(+), 32 deletions(-) diff --git a/luxonis_ml/data/augmentations/custom/mosaic.py b/luxonis_ml/data/augmentations/custom/mosaic.py index caaf93ee..1e9333a0 100644 --- a/luxonis_ml/data/augmentations/custom/mosaic.py +++ b/luxonis_ml/data/augmentations/custom/mosaic.py @@ -1,3 +1,4 @@ +import random from typing import Any, Dict, List, Optional, Tuple, Union import numpy as np @@ -8,13 +9,12 @@ ) from albumentations.core.transforms_interface import ( BoxInternalType, - ImageColorType, KeypointInternalType, ) from ..batch_transform import BatchBasedTransform from ..utils import AUGMENTATIONS -import random + @AUGMENTATIONS.register_module() class Mosaic4(BatchBasedTransform): @@ -99,7 +99,8 @@ def get_transform_init_args_names(self) -> Tuple[str, ...]: ) def _generate_random_crop_center(self) -> Tuple[int, int]: - """Generate a random crop center within the bounds of the mosaic image size.""" + """Generate a random crop center within the bounds of the mosaic + image size.""" crop_x = random.randint(0, max(0, self.out_width)) crop_y = random.randint(0, max(0, self.out_height)) return crop_x, crop_y @@ -114,7 +115,12 @@ def targets_as_params(self): return ["image_batch"] def apply_to_image_batch( - self, image_batch: List[np.ndarray], indices: List[int], x_crop: int, y_crop: int, **params + self, + image_batch: List[np.ndarray], + indices: List[int], + x_crop: int, + y_crop: int, + **params, ) -> List[np.ndarray]: """Applies the transformation to a batch of images. @@ -139,13 +145,23 @@ def apply_to_image_batch( ] image_chunk = [image_batch[i] for i in idx_chunk] mosaiced = mosaic4( - image_chunk, self.out_height, self.out_width, x_crop, y_crop, self.value, + image_chunk, + self.out_height, + self.out_width, + x_crop, + y_crop, + self.value, ) output_batch.append(mosaiced) return output_batch def apply_to_mask_batch( - self, mask_batch: List[np.ndarray], indices: List[int], x_crop: int, y_crop: int, **params + self, + mask_batch: List[np.ndarray], + indices: List[int], + x_crop: int, + y_crop: int, + **params, ) -> List[np.ndarray]: """Applies the transformation to a batch of masks. @@ -170,7 +186,12 @@ def apply_to_mask_batch( ] mask_chunk = [mask_batch[i] for i in idx_chunk] mosaiced = mosaic4( - mask_chunk, self.out_height, self.out_width, x_crop, y_crop, self.mask_value + mask_chunk, + self.out_height, + self.out_width, + x_crop, + y_crop, + self.mask_value, ) output_batch.append(mosaiced) return output_batch @@ -299,7 +320,7 @@ def get_params_dependent_on_targets( f"The batch size (= {n}) should be larger than " + f"{self.n_tiles} x out_batch_size (= {self.n_tiles * self.out_batch_size})" ) - indices = [0,1,2,3] + indices = [0, 1, 2, 3] image_shapes = [tuple(image.shape[:2]) for image in image_batch] x_crop, y_crop = self._generate_random_crop_center() return { @@ -309,6 +330,7 @@ def get_params_dependent_on_targets( "y_crop": y_crop, } + def mosaic4( image_batch: List[np.ndarray], height: int, @@ -317,12 +339,12 @@ def mosaic4( y_crop: int, value: Optional[int] = None, ) -> np.ndarray: - """Arrange the images in a 2x2 grid layout. The input images should have the same - number of channels but can have different widths and heights. The gaps are filled by - the value. + """Arrange the images in a 2x2 grid layout. The input images should + have the same number of channels but can have different widths and + heights. The gaps are filled by the value. - @param image_batch: Image list. The length should be four. Each image can has - different size. + @param image_batch: Image list. The length should be four. Each + image can has different size. @type image_batch: List[np.ndarray] @param height: Height of output mosaic image @type height: int @@ -339,11 +361,15 @@ def mosaic4( """ N_TILES = 4 if len(image_batch) != N_TILES: - raise ValueError(f"Length of image_batch should be 4. Got {len(image_batch)}") + raise ValueError( + f"Length of image_batch should be 4. Got {len(image_batch)}" + ) for i in range(N_TILES - 1): if image_batch[0].shape[2:] != image_batch[i + 1].shape[2:]: - raise ValueError("All images should have the same number of channels.") + raise ValueError( + "All images should have the same number of channels." + ) dtype = image_batch[0].dtype img4 = np.full( @@ -355,7 +381,6 @@ def mosaic4( xc = width // 2 yc = height // 2 - for i, img in enumerate(image_batch): (h, w) = img.shape[:2] @@ -366,10 +391,20 @@ def mosaic4( x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, width * 2), yc x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h elif i == 2: # bottom left - x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(height * 2, yc + h) + x1a, y1a, x2a, y2a = ( + max(xc - w, 0), + yc, + xc, + min(height * 2, yc + h), + ) x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) elif i == 3: # bottom right - x1a, y1a, x2a, y2a = xc, yc, min(xc + w, width * 2), min(height * 2, yc + h) + x1a, y1a, x2a, y2a = ( + xc, + yc, + min(xc + w, width * 2), + min(height * 2, yc + h), + ) x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) img4_region = img4[y1a:y2a, x1a:x2a] @@ -384,8 +419,8 @@ def mosaic4( img4[y1a : y1a + min_h, x1a : x1a + min_w] = img[ y1b : y1b + min_h, x1b : x1b + min_w ] - - img4 = img4[y_crop:y_crop+height, x_crop:x_crop+width] + + img4 = img4[y_crop : y_crop + height, x_crop : x_crop + width] return img4 @@ -400,11 +435,12 @@ def bbox_mosaic4( x_crop: int, y_crop: int, ) -> BoxInternalType: - """Adjust bounding box coordinates to account for mosaic grid position. + """Adjust bounding box coordinates to account for mosaic grid + position. - This function modifies bounding boxes according to their placement in a 2x2 grid - mosaic, shifting their coordinates based on the tile's relative position within the - mosaic. + This function modifies bounding boxes according to their placement + in a 2x2 grid mosaic, shifting their coordinates based on the tile's + relative position within the mosaic. @param bbox: Bounding box coordinates to be transformed. @type bbox: BoxInternalType @@ -412,8 +448,8 @@ def bbox_mosaic4( @type rows: int @param cols: Width of the original image. @type cols: int - @param position_index: Position of the image in the 2x2 grid. (0 = top-left, 1 = - top-right, 2 = bottom-left, 3 = bottom-right). + @param position_index: Position of the image in the 2x2 grid. (0 = + top-left, 1 = top-right, 2 = bottom-left, 3 = bottom-right). @type position_index: int @param height: Height of the final output mosaic image. @type height: int @@ -444,12 +480,18 @@ def bbox_mosaic4( shift_x = xc shift_y = yc - bbox = (bbox[0] + shift_x - x_crop, bbox[1] + shift_y - y_crop, bbox[2] + shift_x - x_crop, bbox[3] + shift_y - y_crop) + bbox = ( + bbox[0] + shift_x - x_crop, + bbox[1] + shift_y - y_crop, + bbox[2] + shift_x - x_crop, + bbox[3] + shift_y - y_crop, + ) bbox = normalize_bbox(bbox, height, width) return bbox + def keypoint_mosaic4( keypoint: KeypointInternalType, rows: int, @@ -462,17 +504,19 @@ def keypoint_mosaic4( ) -> KeypointInternalType: """Adjust keypoint coordinates based on mosaic grid position. - This function adjusts the keypoint coordinates by placing them in one of the 2x2 - mosaic grid cells, with shifts relative to the mosaic center. + This function adjusts the keypoint coordinates by placing them in + one of the 2x2 mosaic grid cells, with shifts relative to the mosaic + center. - @param keypoint: Keypoint coordinates and attributes (x, y, angle, scale). + @param keypoint: Keypoint coordinates and attributes (x, y, angle, + scale). @type keypoint: KeypointInternalType @param rows: Height of the original image. @type rows: int @param cols: Width of the original image. @type cols: int - @param position_index: Position of the image in the 2x2 grid. (0 = top-left, 1 = - top-right, 2 = bottom-left, 3 = bottom-right). + @param position_index: Position of the image in the 2x2 grid. (0 = + top-left, 1 = top-right, 2 = bottom-left, 3 = bottom-right). @type position_index: int @param height: Height of the final output mosaic image. @type height: int From 62b64c6add61a2ca85411df2c1ae9fc48b55dd3b Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Fri, 4 Oct 2024 20:29:27 +0200 Subject: [PATCH 4/7] fixing some tests --- tests/test_augmentations/test_mosaic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_augmentations/test_mosaic.py b/tests/test_augmentations/test_mosaic.py index 328e6727..516d1b59 100644 --- a/tests/test_augmentations/test_mosaic.py +++ b/tests/test_augmentations/test_mosaic.py @@ -16,7 +16,7 @@ def test_mosaic4(): img = (np.random.rand(HEIGHT, WIDTH, 3) * 255).astype(np.uint8) - mosaic = mosaic4([img, img, img, img], HEIGHT, WIDTH) + mosaic = mosaic4([img, img, img, img], HEIGHT, WIDTH, 0, 0, 0) assert mosaic.shape == (HEIGHT, WIDTH, 3) @@ -24,7 +24,7 @@ def test_bbox_mosaic4(): bbox = (0, 0, WIDTH, HEIGHT) for i in range(4): mosaic_bbox = bbox_mosaic4( - bbox, HEIGHT // 2, WIDTH // 2, i, HEIGHT, WIDTH + bbox, HEIGHT // 2, WIDTH // 2, i, HEIGHT, WIDTH, 0, 0 ) assert pytest.approx(mosaic_bbox, abs=0.5) == ( 0, @@ -45,7 +45,7 @@ def test_keypoint_mosaic4(): ] ): mosaic_keypoint = keypoint_mosaic4( - keypoint, HEIGHT // 2, WIDTH // 2, i, HEIGHT, WIDTH + keypoint, HEIGHT // 2, WIDTH // 2, i, HEIGHT, WIDTH, 0, 0 ) assert pytest.approx(mosaic_keypoint, abs=0.25) == (w, h, 0, 0) From 1417ae9d9eb58b5eaddcefa88850a241c2dc4060 Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Fri, 4 Oct 2024 20:46:47 +0200 Subject: [PATCH 5/7] fix: Correct segmentation shape and values issue --- luxonis_ml/data/augmentations/custom/mosaic.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/luxonis_ml/data/augmentations/custom/mosaic.py b/luxonis_ml/data/augmentations/custom/mosaic.py index 1e9333a0..58dd0e24 100644 --- a/luxonis_ml/data/augmentations/custom/mosaic.py +++ b/luxonis_ml/data/augmentations/custom/mosaic.py @@ -371,10 +371,16 @@ def mosaic4( "All images should have the same number of channels." ) + if len(image_batch[0].shape) == 2: + out_shape = [height * 2, width * 2] + else: + out_shape = [height * 2, width * 2, image_batch[0].shape[2]] + dtype = image_batch[0].dtype + img4 = np.full( - (height * 2, width * 2, image_batch[0].shape[2]), - value if value is not None else 114, + out_shape, + value if value is not None else 0, dtype=dtype, ) From 6aefd050554693f9bfe113b223aa822d182056fd Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Fri, 4 Oct 2024 21:40:21 +0200 Subject: [PATCH 6/7] Fixed misaligned cropping, preserving more image data --- .../data/augmentations/custom/mosaic.py | 77 ++++++++++--------- 1 file changed, 40 insertions(+), 37 deletions(-) diff --git a/luxonis_ml/data/augmentations/custom/mosaic.py b/luxonis_ml/data/augmentations/custom/mosaic.py index 58dd0e24..a0c912fc 100644 --- a/luxonis_ml/data/augmentations/custom/mosaic.py +++ b/luxonis_ml/data/augmentations/custom/mosaic.py @@ -384,32 +384,39 @@ def mosaic4( dtype=dtype, ) - xc = width // 2 - yc = height // 2 - for i, img in enumerate(image_batch): (h, w) = img.shape[:2] if i == 0: # top left - x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc + x1a, y1a, x2a, y2a = ( + max(width - w, 0), + max(height - h, 0), + width, + height, + ) x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h elif i == 1: # top right - x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, width * 2), yc + x1a, y1a, x2a, y2a = ( + width, + max(height - h, 0), + min(width + w, width * 2), + height, + ) x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h elif i == 2: # bottom left x1a, y1a, x2a, y2a = ( - max(xc - w, 0), - yc, - xc, - min(height * 2, yc + h), + max(width - w, 0), + height, + width, + min(height * 2, height + h), ) x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) elif i == 3: # bottom right x1a, y1a, x2a, y2a = ( - xc, - yc, - min(xc + w, width * 2), - min(height * 2, yc + h), + width, + height, + min(width + w, width * 2), + min(height * 2, height + h), ) x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) @@ -461,30 +468,28 @@ def bbox_mosaic4( @type height: int @param width: Width of the final output mosaic image. @type width: int - @param xc: x-coordinate of the center of the mosaic image. - @type xc: int - @param yc: y-coordinate of the center of the mosaic image. - @type yc: int + @param x_crop: x-coordinate of the croping start point + @type x_crop: int + @param y_crop: y-coordinate of the croping start point + @type y_crop: int @return: Transformed bounding box coordinates. @rtype: BoxInternalType """ bbox = denormalize_bbox(bbox, rows, cols) - xc = width // 2 - yc = height // 2 if position_index == 0: - shift_x = xc - cols - shift_y = yc - rows + shift_x = width - cols + shift_y = height - rows elif position_index == 1: - shift_x = xc - shift_y = yc - rows + shift_x = width + shift_y = height - rows elif position_index == 2: - shift_x = xc - cols - shift_y = yc + shift_x = width - cols + shift_y = height elif position_index == 3: - shift_x = xc - shift_y = yc + shift_x = width + shift_y = height bbox = ( bbox[0] + shift_x - x_crop, @@ -536,20 +541,18 @@ def keypoint_mosaic4( @rtype: KeypointInternalType """ x, y, angle, scale = keypoint - xc = width // 2 - yc = height // 2 if position_index == 0: - shift_x = xc - cols - shift_y = yc - rows + shift_x = width - cols + shift_y = height - rows elif position_index == 1: - shift_x = xc - shift_y = yc - rows + shift_x = width + shift_y = height - rows elif position_index == 2: - shift_x = xc - cols - shift_y = yc + shift_x = width - cols + shift_y = height elif position_index == 3: - shift_x = xc - shift_y = yc + shift_x = width + shift_y = height return x + shift_x - x_crop, y + shift_y - y_crop, angle, scale From 8198eefc862876c2ac6ba44bd3c8c8e23adb006c Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Fri, 4 Oct 2024 23:02:21 +0200 Subject: [PATCH 7/7] Fix tests that bypassed errors when filtering keypoints --- tests/test_augmentations/test_mosaic.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/test_augmentations/test_mosaic.py b/tests/test_augmentations/test_mosaic.py index 516d1b59..183e1e97 100644 --- a/tests/test_augmentations/test_mosaic.py +++ b/tests/test_augmentations/test_mosaic.py @@ -26,7 +26,7 @@ def test_bbox_mosaic4(): mosaic_bbox = bbox_mosaic4( bbox, HEIGHT // 2, WIDTH // 2, i, HEIGHT, WIDTH, 0, 0 ) - assert pytest.approx(mosaic_bbox, abs=0.5) == ( + assert pytest.approx(mosaic_bbox, abs=1) == ( 0, 0, WIDTH // 2, @@ -35,7 +35,6 @@ def test_bbox_mosaic4(): def test_keypoint_mosaic4(): - keypoint = (WIDTH // 2, HEIGHT // 2, 0, 0) for i, (w, h) in enumerate( [ (WIDTH // 2, HEIGHT // 2), @@ -45,9 +44,9 @@ def test_keypoint_mosaic4(): ] ): mosaic_keypoint = keypoint_mosaic4( - keypoint, HEIGHT // 2, WIDTH // 2, i, HEIGHT, WIDTH, 0, 0 + (w, h, 0, 0), HEIGHT // 2, WIDTH // 2, i, HEIGHT, WIDTH, 0, 0 ) - assert pytest.approx(mosaic_keypoint, abs=0.25) == (w, h, 0, 0) + assert pytest.approx(mosaic_keypoint, abs=0.25) == (w * 2, h * 2, 0, 0) def test_Mosaic4():