From ad82a3e6e233492b2084ece9fa5f33bb6f903746 Mon Sep 17 00:00:00 2001 From: sidijju Date: Thu, 30 May 2024 13:03:10 -0600 Subject: [PATCH 1/7] [draft] add size = None option to Resize --- test/test_transforms_v2.py | 61 ++++++++++++++++--- torchvision/transforms/v2/_geometry.py | 29 ++++++--- .../transforms/v2/functional/_geometry.py | 36 +++++++---- 3 files changed, 97 insertions(+), 29 deletions(-) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index b0c1659f253..d4323e63c3d 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -99,7 +99,7 @@ def _script(obj): return torch.jit.script(obj) except Exception as error: name = getattr(obj, "__name__", obj.__class__.__name__) - raise AssertionError(f"Trying to `torch.jit.script` '{name}' raised the error above.") from error + raise AssertionError(f"Trying to 'torch.jit.script' {name} raised the error above.") from error def _check_kernel_scripted_vs_eager(kernel, input, *args, rtol, atol, **kwargs): @@ -233,8 +233,8 @@ def check_functional_kernel_signature_match(functional, *, kernel, input_type): functional_param = next(functional_params) except StopIteration: raise AssertionError( - f"Parameter `{kernel_param.name}` of kernel `{kernel.__name__}` " - f"has no corresponding parameter on the functional `{functional.__name__}`." + f"Parameter {repr(kernel_param.name)} of kernel {repr(kernel.__name__)}" + f"has no corresponding parameter on the functional {repr(functional.__name__)}." ) from None if issubclass(input_type, PIL.Image.Image): @@ -551,10 +551,12 @@ def affine_bounding_boxes(bounding_boxes): class TestResize: INPUT_SIZE = (17, 11) - OUTPUT_SIZES = [17, [17], (17,), [12, 13], (12, 13)] + OUTPUT_SIZES = [17, [17], (17,), None, [12, 13], (12, 13)] def _make_max_size_kwarg(self, *, use_max_size, size): - if use_max_size: + if size is None: + max_size = min(list(self.INPUT_SIZE)) + elif use_max_size: if not (isinstance(size, int) or len(size) == 1): # This would result in an `ValueError` return None @@ -566,10 +568,13 @@ def _make_max_size_kwarg(self, *, use_max_size, size): return dict(max_size=max_size) def _compute_output_size(self, *, input_size, size, max_size): - if not (isinstance(size, int) or len(size) == 1): + if size is None: + size = max_size + + elif not (isinstance(size, int) or len(size) == 1): return tuple(size) - if not isinstance(size, int): + elif not isinstance(size, int): size = size[0] old_height, old_width = input_size @@ -656,10 +661,14 @@ def test_kernel_video(self): [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], ) def test_functional(self, size, make_input): + if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=True, size=size)): + return + check_functional( F.resize, make_input(self.INPUT_SIZE), size=size, + **max_size_kwarg, antialias=True, check_scripted_smoke=not isinstance(size, int), ) @@ -693,8 +702,11 @@ def test_functional_signature(self, kernel, input_type): ], ) def test_transform(self, size, device, make_input): + if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=True, size=size)): + return + check_transform( - transforms.Resize(size=size, antialias=True), + transforms.Resize(size=size, **max_size_kwarg, antialias=True), make_input(self.INPUT_SIZE, device=device), # atol=1 due to Resize v2 is using native uint8 interpolate path for bilinear and nearest modes check_v1_compatibility=dict(rtol=0, atol=1), @@ -799,7 +811,11 @@ def test_functional_pil_antialias_warning(self): ], ) def test_max_size_error(self, size, make_input): - if isinstance(size, int) or len(size) == 1: + if size is None: + # value can be anything other than an integer + max_size = None + match = "max_size must be an integer when size is None" + elif isinstance(size, int) or len(size) == 1: max_size = (size if isinstance(size, int) else size[0]) - 1 match = "must be strictly greater than the requested size" else: @@ -810,6 +826,31 @@ def test_max_size_error(self, size, make_input): with pytest.raises(ValueError, match=match): F.resize(make_input(self.INPUT_SIZE), size=size, max_size=max_size, antialias=True) + @pytest.mark.parametrize( + "max_size", + [min(list(INPUT_SIZE)) // 2, min(list(INPUT_SIZE)), max(list(INPUT_SIZE)), max(list(INPUT_SIZE)) * 2], + ) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize( + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_boxes, + make_segmentation_mask, + make_detection_masks, + make_video, + ], + ) + def test_size_none(self, max_size, device, make_input): + check_transform( + transforms.Resize(size=None, max_size=max_size, antialias=True), + make_input(self.INPUT_SIZE, device=device), + # atol=1 due to Resize v2 is using native uint8 interpolate path for bilinear and nearest modes + check_v1_compatibility=dict(rtol=0, atol=1), + ) + @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES) @pytest.mark.parametrize( "make_input", @@ -832,7 +873,7 @@ def test_interpolation_int(self, interpolation, make_input): assert_equal(actual, expected) def test_transform_unknown_size_error(self): - with pytest.raises(ValueError, match="size can either be an integer or a sequence of one or two integers"): + with pytest.raises(ValueError, match="size can be an integer, a sequence of one or two integers, or None"): transforms.Resize(size=object()) @pytest.mark.parametrize( diff --git a/torchvision/transforms/v2/_geometry.py b/torchvision/transforms/v2/_geometry.py index c670ca5e523..42a3d28dd7b 100644 --- a/torchvision/transforms/v2/_geometry.py +++ b/torchvision/transforms/v2/_geometry.py @@ -75,11 +75,14 @@ class Resize(Transform): the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape. Args: - size (sequence or int): Desired output size. If size is a sequence like + size (sequence, int, or None): Desired output size. If size is a sequence like (h, w), output size will be matched to this. If size is an int, smaller edge of the image will be matched to this number. i.e, if height > width, then image will be rescaled to - (size * height / width, size). + (size * height / width, size). If size is None, + the longer edge of the image will be matched to max_size. + i.e, if height > width, then image will be rescaled to + (max_size, max_size * width / height). .. note:: In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``. @@ -95,7 +98,8 @@ class Resize(Transform): ``max_size``. As a result, the smaller edge may be shorter than ``size``. This is only supported if ``size`` is an int (or a sequence of length - 1 in torchscript mode). + 1 in torchscript mode) or None. + antialias (bool, optional): Whether to apply antialiasing. It only affects **tensors** with bilinear or bicubic modes and it is ignored otherwise: on PIL images, antialiasing is always applied on @@ -120,7 +124,7 @@ class Resize(Transform): def __init__( self, - size: Union[int, Sequence[int]], + size: Union[int, Sequence[int], None], interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR, max_size: Optional[int] = None, antialias: Optional[bool] = True, @@ -131,9 +135,12 @@ def __init__( size = [size] elif isinstance(size, Sequence) and len(size) in {1, 2}: size = list(size) + elif size is None: + if not isinstance(max_size, int): + raise ValueError(f"max_size must be an integer when size is None, but got {max_size} instead.") else: raise ValueError( - f"size can either be an integer or a sequence of one or two integers, but got {size} instead." + f"size can be an integer, a sequence of one or two integers, or None, but got {size} instead." ) self.size = size @@ -141,6 +148,14 @@ def __init__( self.max_size = max_size self.antialias = antialias + def _extract_params_for_v1_transform(self) -> Dict[str, Any]: + params = super()._extract_params_for_v1_transform() + + if params["size"] is None: + params["size"] = [params["max_size"] - 1] + + return params + def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: return self._call_kernel( F.resize, @@ -445,7 +460,7 @@ def _extract_params_for_v1_transform(self) -> Dict[str, Any]: params = super()._extract_params_for_v1_transform() if not (params["fill"] is None or isinstance(params["fill"], (int, float))): - raise ValueError(f"{type(self).__name__}() can only be scripted for a scalar `fill`, but got {self.fill}.") + raise ValueError(f"{type(self).__name__}() can only be scripted for a scalar 'fill', but got {self.fill}.") return params @@ -791,7 +806,7 @@ def _extract_params_for_v1_transform(self) -> Dict[str, Any]: params = super()._extract_params_for_v1_transform() if not (params["fill"] is None or isinstance(params["fill"], (int, float))): - raise ValueError(f"{type(self).__name__}() can only be scripted for a scalar `fill`, but got {self.fill}.") + raise ValueError(f"{type(self).__name__}() can only be scripted for a scalar 'fill', but got {self.fill}.") padding = self.padding if padding is not None: diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py index 67338d1a839..c68d2d84a9c 100644 --- a/torchvision/transforms/v2/functional/_geometry.py +++ b/torchvision/transforms/v2/functional/_geometry.py @@ -32,7 +32,7 @@ def _check_interpolation(interpolation: Union[InterpolationMode, int]) -> Interp interpolation = _interpolation_modes_from_int(interpolation) elif not isinstance(interpolation, InterpolationMode): raise ValueError( - f"Argument interpolation should be an `InterpolationMode` or a corresponding Pillow integer constant, " + f"Argument interpolation should be an 'InterpolationMode' or a corresponding Pillow integer constant, " f"but got {interpolation}." ) return interpolation @@ -159,13 +159,22 @@ def vertical_flip_video(video: torch.Tensor) -> torch.Tensor: def _compute_resized_output_size( - canvas_size: Tuple[int, int], size: List[int], max_size: Optional[int] = None + canvas_size: Tuple[int, int], size: Optional[List[int]], max_size: Optional[int] = None ) -> List[int]: if isinstance(size, int): size = [size] + elif size is None: + if isinstance(max_size, int): + h, w = canvas_size + short, long = (w, h) if w <= h else (h, w) + new_short, new_long = int(max_size * short / long), max_size + new_w, new_h = (new_long, new_short) if w <= h else (new_short, new_long) + size = [new_w, new_h] + else: + raise ValueError(f"max_size must be an integer when size is None, but got {max_size} instead.") elif max_size is not None and len(size) != 1: raise ValueError( - "max_size should only be passed if size specifies the length of the smaller edge, " + "max_size should only be passed if size is None or specifies the length of the smaller edge, " "i.e. size should be an int or a sequence of length 1 in torchscript mode." ) return __compute_resized_output_size(canvas_size, size=size, max_size=max_size) @@ -173,7 +182,7 @@ def _compute_resized_output_size( def resize( inpt: torch.Tensor, - size: List[int], + size: Optional[List[int]], interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR, max_size: Optional[int] = None, antialias: Optional[bool] = True, @@ -206,7 +215,7 @@ def _do_native_uint8_resize_on_cpu(interpolation: InterpolationMode) -> bool: @_register_kernel_internal(resize, tv_tensors.Image) def resize_image( image: torch.Tensor, - size: List[int], + size: Optional[List[int]], interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR, max_size: Optional[int] = None, antialias: Optional[bool] = True, @@ -310,7 +319,7 @@ def __resize_image_pil_dispatch( return _resize_image_pil(image, size=size, interpolation=interpolation, max_size=max_size) -def resize_mask(mask: torch.Tensor, size: List[int], max_size: Optional[int] = None) -> torch.Tensor: +def resize_mask(mask: torch.Tensor, size: Optional[List[int]], max_size: Optional[int] = None) -> torch.Tensor: if mask.ndim < 3: mask = mask.unsqueeze(0) needs_squeeze = True @@ -334,7 +343,10 @@ def _resize_mask_dispatch( def resize_bounding_boxes( - bounding_boxes: torch.Tensor, canvas_size: Tuple[int, int], size: List[int], max_size: Optional[int] = None + bounding_boxes: torch.Tensor, + canvas_size: Tuple[int, int], + size: Optional[List[int]], + max_size: Optional[int] = None, ) -> Tuple[torch.Tensor, Tuple[int, int]]: old_height, old_width = canvas_size new_height, new_width = _compute_resized_output_size(canvas_size, size=size, max_size=max_size) @@ -353,7 +365,7 @@ def resize_bounding_boxes( @_register_kernel_internal(resize, tv_tensors.BoundingBoxes, tv_tensor_wrapper=False) def _resize_bounding_boxes_dispatch( - inpt: tv_tensors.BoundingBoxes, size: List[int], max_size: Optional[int] = None, **kwargs: Any + inpt: tv_tensors.BoundingBoxes, size: Optional[List[int]], max_size: Optional[int] = None, **kwargs: Any ) -> tv_tensors.BoundingBoxes: output, canvas_size = resize_bounding_boxes( inpt.as_subclass(torch.Tensor), inpt.canvas_size, size, max_size=max_size @@ -364,7 +376,7 @@ def _resize_bounding_boxes_dispatch( @_register_kernel_internal(resize, tv_tensors.Video) def resize_video( video: torch.Tensor, - size: List[int], + size: Optional[List[int]], interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR, max_size: Optional[int] = None, antialias: Optional[bool] = True, @@ -1176,7 +1188,7 @@ def _parse_pad_padding(padding: Union[int, List[int]]) -> List[int]: f"Padding must be an int or a 1, 2, or 4 element tuple, not a {len(padding)} element tuple" ) else: - raise TypeError(f"`padding` should be an integer or tuple or list of integers, but got {padding}") + raise TypeError(f"'padding' should be an integer or tuple or list of integers, but got {padding}") return [pad_left, pad_right, pad_top, pad_bottom] @@ -1196,8 +1208,8 @@ def pad_image( if padding_mode not in ("constant", "edge", "reflect", "symmetric"): raise ValueError( - f"`padding_mode` should be either `'constant'`, `'edge'`, `'reflect'` or `'symmetric'`, " - f"but got `'{padding_mode}'`." + f"'padding_mode' should be either 'constant', 'edge', 'reflect' or 'symmetric', " + f"but got '{padding_mode}'." ) if fill is None: From 9c5193580389bd0b81340cef96efc509d5a42512 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 31 May 2024 11:02:19 +0100 Subject: [PATCH 2/7] Remove _extract_params_for_v1_transform --- test/test_transforms_v2.py | 7 +++---- torchvision/transforms/v2/_geometry.py | 8 -------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index d4323e63c3d..ffbd8993210 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -551,7 +551,7 @@ def affine_bounding_boxes(bounding_boxes): class TestResize: INPUT_SIZE = (17, 11) - OUTPUT_SIZES = [17, [17], (17,), None, [12, 13], (12, 13)] + OUTPUT_SIZES = [17, [17], (17,), [12, 13], (12, 13), None] def _make_max_size_kwarg(self, *, use_max_size, size): if size is None: @@ -709,7 +709,7 @@ def test_transform(self, size, device, make_input): transforms.Resize(size=size, **max_size_kwarg, antialias=True), make_input(self.INPUT_SIZE, device=device), # atol=1 due to Resize v2 is using native uint8 interpolate path for bilinear and nearest modes - check_v1_compatibility=dict(rtol=0, atol=1), + check_v1_compatibility=dict(rtol=0, atol=1) if size is not None else False, ) def _check_output_size(self, input, output, *, size, max_size): @@ -847,8 +847,7 @@ def test_size_none(self, max_size, device, make_input): check_transform( transforms.Resize(size=None, max_size=max_size, antialias=True), make_input(self.INPUT_SIZE, device=device), - # atol=1 due to Resize v2 is using native uint8 interpolate path for bilinear and nearest modes - check_v1_compatibility=dict(rtol=0, atol=1), + check_v1_compatibility=False, ) @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES) diff --git a/torchvision/transforms/v2/_geometry.py b/torchvision/transforms/v2/_geometry.py index 42a3d28dd7b..a9b1680886d 100644 --- a/torchvision/transforms/v2/_geometry.py +++ b/torchvision/transforms/v2/_geometry.py @@ -148,14 +148,6 @@ def __init__( self.max_size = max_size self.antialias = antialias - def _extract_params_for_v1_transform(self) -> Dict[str, Any]: - params = super()._extract_params_for_v1_transform() - - if params["size"] is None: - params["size"] = [params["max_size"] - 1] - - return params - def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: return self._call_kernel( F.resize, From 4cd4cbc64e239a4f12c0c0050cdf0f45c5bb9ab6 Mon Sep 17 00:00:00 2001 From: sidijju Date: Tue, 4 Jun 2024 12:01:17 -0600 Subject: [PATCH 3/7] move max_size logic into v1 functional --- test/test_transforms_v2.py | 29 ++++++++------- torchvision/transforms/functional.py | 37 ++++++++++++------- torchvision/transforms/v2/_geometry.py | 4 +- .../transforms/v2/functional/_geometry.py | 19 +++------- 4 files changed, 47 insertions(+), 42 deletions(-) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index ffbd8993210..3e9d29f5bea 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -99,7 +99,7 @@ def _script(obj): return torch.jit.script(obj) except Exception as error: name = getattr(obj, "__name__", obj.__class__.__name__) - raise AssertionError(f"Trying to 'torch.jit.script' {name} raised the error above.") from error + raise AssertionError(f"Trying to `torch.jit.script` `{name}` raised the error above.") from error def _check_kernel_scripted_vs_eager(kernel, input, *args, rtol, atol, **kwargs): @@ -233,8 +233,8 @@ def check_functional_kernel_signature_match(functional, *, kernel, input_type): functional_param = next(functional_params) except StopIteration: raise AssertionError( - f"Parameter {repr(kernel_param.name)} of kernel {repr(kernel.__name__)}" - f"has no corresponding parameter on the functional {repr(functional.__name__)}." + f"Parameter `{kernel_param.name}` of kernel `{kernel.__name__}` " + f"has no corresponding parameter on the functional `{functional.__name__}`." ) from None if issubclass(input_type, PIL.Image.Image): @@ -551,7 +551,7 @@ def affine_bounding_boxes(bounding_boxes): class TestResize: INPUT_SIZE = (17, 11) - OUTPUT_SIZES = [17, [17], (17,), [12, 13], (12, 13), None] + OUTPUT_SIZES = [17, [17], (17,), None, [12, 13], (12, 13)] def _make_max_size_kwarg(self, *, use_max_size, size): if size is None: @@ -827,10 +827,15 @@ def test_max_size_error(self, size, make_input): F.resize(make_input(self.INPUT_SIZE), size=size, max_size=max_size, antialias=True) @pytest.mark.parametrize( - "max_size", - [min(list(INPUT_SIZE)) // 2, min(list(INPUT_SIZE)), max(list(INPUT_SIZE)), max(list(INPUT_SIZE)) * 2], + "input_size, max_size, expected_size", + [ + ((10, 10), 10, (10, 10)), + ((10, 20), 40, (20, 40)), + ((20, 10), 40, (40, 20)), + ((10, 20), 10, (5, 10)), + ((20, 10), 10, (10, 5)), + ], ) - @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize( "make_input", [ @@ -843,12 +848,10 @@ def test_max_size_error(self, size, make_input): make_video, ], ) - def test_size_none(self, max_size, device, make_input): - check_transform( - transforms.Resize(size=None, max_size=max_size, antialias=True), - make_input(self.INPUT_SIZE, device=device), - check_v1_compatibility=False, - ) + def test_resize_size_none(self, input_size, max_size, expected_size, make_input): + img = make_input(input_size) + out = F.resize(img, size=None, max_size=max_size) + assert F.get_size(out)[-2:] == list(expected_size) @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES) @pytest.mark.parametrize( diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index 124d1da5f4f..68a7aa30253 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -351,23 +351,32 @@ def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool def _compute_resized_output_size( - image_size: Tuple[int, int], size: List[int], max_size: Optional[int] = None + image_size: Tuple[int, int], + size: List[int], + max_size: Optional[int] = None, + allow_size_none: bool = False, ) -> List[int]: - if len(size) == 1: # specified size only for the smallest edge + if len(size) <= 1: # specified size only for the smallest edge h, w = image_size short, long = (w, h) if w <= h else (h, w) - requested_new_short = size if isinstance(size, int) else size[0] - - new_short, new_long = requested_new_short, int(requested_new_short * long / short) - - if max_size is not None: - if max_size <= requested_new_short: - raise ValueError( - f"max_size = {max_size} must be strictly greater than the requested " - f"size for the smaller edge size = {size}" - ) - if new_long > max_size: - new_short, new_long = int(max_size * new_short / new_long), max_size + + if len(size) == 0 and allow_size_none: + if isinstance(max_size, int): + new_short, new_long = int(max_size * short / long), max_size + else: + raise ValueError(f"max_size must be an integer when size is None, but got {max_size} instead.") + else: + requested_new_short = size if isinstance(size, int) else size[0] + new_short, new_long = requested_new_short, int(requested_new_short * long / short) + + if max_size is not None: + if max_size <= requested_new_short: + raise ValueError( + f"max_size = {max_size} must be strictly greater than the requested " + f"size for the smaller edge size = {size}" + ) + if new_long > max_size: + new_short, new_long = int(max_size * new_short / new_long), max_size new_w, new_h = (new_short, new_long) if w <= h else (new_long, new_short) else: # specified both h and w diff --git a/torchvision/transforms/v2/_geometry.py b/torchvision/transforms/v2/_geometry.py index a9b1680886d..a076f05441f 100644 --- a/torchvision/transforms/v2/_geometry.py +++ b/torchvision/transforms/v2/_geometry.py @@ -452,7 +452,7 @@ def _extract_params_for_v1_transform(self) -> Dict[str, Any]: params = super()._extract_params_for_v1_transform() if not (params["fill"] is None or isinstance(params["fill"], (int, float))): - raise ValueError(f"{type(self).__name__}() can only be scripted for a scalar 'fill', but got {self.fill}.") + raise ValueError(f"{type(self).__name__}() can only be scripted for a scalar `fill`, but got {self.fill}.") return params @@ -798,7 +798,7 @@ def _extract_params_for_v1_transform(self) -> Dict[str, Any]: params = super()._extract_params_for_v1_transform() if not (params["fill"] is None or isinstance(params["fill"], (int, float))): - raise ValueError(f"{type(self).__name__}() can only be scripted for a scalar 'fill', but got {self.fill}.") + raise ValueError(f"{type(self).__name__}() can only be scripted for a scalar `fill`, but got {self.fill}.") padding = self.padding if padding is not None: diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py index c68d2d84a9c..3dc6245354d 100644 --- a/torchvision/transforms/v2/functional/_geometry.py +++ b/torchvision/transforms/v2/functional/_geometry.py @@ -32,7 +32,7 @@ def _check_interpolation(interpolation: Union[InterpolationMode, int]) -> Interp interpolation = _interpolation_modes_from_int(interpolation) elif not isinstance(interpolation, InterpolationMode): raise ValueError( - f"Argument interpolation should be an 'InterpolationMode' or a corresponding Pillow integer constant, " + f"Argument interpolation should be an `InterpolationMode` or a corresponding Pillow integer constant, " f"but got {interpolation}." ) return interpolation @@ -164,20 +164,13 @@ def _compute_resized_output_size( if isinstance(size, int): size = [size] elif size is None: - if isinstance(max_size, int): - h, w = canvas_size - short, long = (w, h) if w <= h else (h, w) - new_short, new_long = int(max_size * short / long), max_size - new_w, new_h = (new_long, new_short) if w <= h else (new_short, new_long) - size = [new_w, new_h] - else: - raise ValueError(f"max_size must be an integer when size is None, but got {max_size} instead.") + size = [] # pass empty list to match function signature for v1 elif max_size is not None and len(size) != 1: raise ValueError( "max_size should only be passed if size is None or specifies the length of the smaller edge, " "i.e. size should be an int or a sequence of length 1 in torchscript mode." ) - return __compute_resized_output_size(canvas_size, size=size, max_size=max_size) + return __compute_resized_output_size(canvas_size, size=size, max_size=max_size, allow_size_none=True) def resize( @@ -1188,7 +1181,7 @@ def _parse_pad_padding(padding: Union[int, List[int]]) -> List[int]: f"Padding must be an int or a 1, 2, or 4 element tuple, not a {len(padding)} element tuple" ) else: - raise TypeError(f"'padding' should be an integer or tuple or list of integers, but got {padding}") + raise TypeError(f"`padding` should be an integer or tuple or list of integers, but got {padding}") return [pad_left, pad_right, pad_top, pad_bottom] @@ -1208,8 +1201,8 @@ def pad_image( if padding_mode not in ("constant", "edge", "reflect", "symmetric"): raise ValueError( - f"'padding_mode' should be either 'constant', 'edge', 'reflect' or 'symmetric', " - f"but got '{padding_mode}'." + f"`padding_mode` should be either `'constant'`, `'edge'`, `'reflect'` or `'symmetric'`, " + f"but got `'{padding_mode}'`." ) if fill is None: From 176c12587dff35907656c3efddf2c05e347a449a Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 5 Jun 2024 11:27:53 +0100 Subject: [PATCH 4/7] Docs --- torchvision/transforms/v2/_geometry.py | 38 ++++++++++++++++---------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/torchvision/transforms/v2/_geometry.py b/torchvision/transforms/v2/_geometry.py index a076f05441f..5d6b1841d7f 100644 --- a/torchvision/transforms/v2/_geometry.py +++ b/torchvision/transforms/v2/_geometry.py @@ -75,14 +75,15 @@ class Resize(Transform): the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape. Args: - size (sequence, int, or None): Desired output size. If size is a sequence like - (h, w), output size will be matched to this. If size is an int, - smaller edge of the image will be matched to this number. - i.e, if height > width, then image will be rescaled to - (size * height / width, size). If size is None, - the longer edge of the image will be matched to max_size. - i.e, if height > width, then image will be rescaled to - (max_size, max_size * width / height). + size (sequence, int, or None): Desired + output size. + + - If size is a sequence like (h, w), output size will be matched to this. + - If size is an int, smaller edge of the image will be matched to this + number. i.e, if height > width, then image will be rescaled to + (size * height / width, size). + - If size is None, the output shape is determined by the ``max_size`` + parameter. .. note:: In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``. @@ -92,13 +93,20 @@ class Resize(Transform): ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported. The corresponding Pillow integer constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well. max_size (int, optional): The maximum allowed for the longer edge of - the resized image. If the longer edge of the image is greater - than ``max_size`` after being resized according to ``size``, - ``size`` will be overruled so that the longer edge is equal to - ``max_size``. - As a result, the smaller edge may be shorter than ``size``. This - is only supported if ``size`` is an int (or a sequence of length - 1 in torchscript mode) or None. + the resized image. + + - If ``size`` is an int: if the longer edge of the image is greater + than ``max_size`` after being resized according to ``size``, + ``size`` will be overruled so that the longer edge is equal to + ``max_size``. As a result, the smaller edge may be shorter than + ``size``. This is only supported if ``size`` is an int (or a + sequence of length 1 in torchscript mode). + - If ``size`` is None: the longer edge of the image will be matched + to max_size. i.e, if height > width, then image will be rescaled + to (max_size, max_size * width / height). + + This should be left to ``None`` (default) when ``size`` is a + sequence. antialias (bool, optional): Whether to apply antialiasing. It only affects **tensors** with bilinear or bicubic modes and it is From c1f1a9434c2c0191e265e929b15beaecef533063 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 5 Jun 2024 11:36:48 +0100 Subject: [PATCH 5/7] Add test --- test/test_transforms_v2.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 308f3a1ab68..628fdaf6727 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -828,6 +828,10 @@ def test_max_size_error(self, size, make_input): with pytest.raises(ValueError, match=match): F.resize(make_input(self.INPUT_SIZE), size=size, max_size=max_size, antialias=True) + if isinstance(size, list) and len(size) != 1: + with pytest.raises(ValueError, match="max_size should only be passed if size is None or specifies"): + F.resize(make_input(self.INPUT_SIZE), size=size, max_size=500) + @pytest.mark.parametrize( "input_size, max_size, expected_size", [ From b7b15764da92013fe8bd003f4f1786ff59951f46 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 5 Jun 2024 11:57:46 +0100 Subject: [PATCH 6/7] Avoid empty list hack and pass None (hopefully mypy is still happy) --- torchvision/transforms/functional.py | 46 +++++++++---------- .../transforms/v2/functional/_geometry.py | 4 +- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index 68a7aa30253..8efe2a8878a 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -352,31 +352,31 @@ def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool def _compute_resized_output_size( image_size: Tuple[int, int], - size: List[int], + size: Optional[List[int]], max_size: Optional[int] = None, - allow_size_none: bool = False, + allow_size_none: bool = False, # only True in v2 ) -> List[int]: - if len(size) <= 1: # specified size only for the smallest edge - h, w = image_size - short, long = (w, h) if w <= h else (h, w) - - if len(size) == 0 and allow_size_none: - if isinstance(max_size, int): - new_short, new_long = int(max_size * short / long), max_size - else: - raise ValueError(f"max_size must be an integer when size is None, but got {max_size} instead.") - else: - requested_new_short = size if isinstance(size, int) else size[0] - new_short, new_long = requested_new_short, int(requested_new_short * long / short) - - if max_size is not None: - if max_size <= requested_new_short: - raise ValueError( - f"max_size = {max_size} must be strictly greater than the requested " - f"size for the smaller edge size = {size}" - ) - if new_long > max_size: - new_short, new_long = int(max_size * new_short / new_long), max_size + h, w = image_size + short, long = (w, h) if w <= h else (h, w) + if size is None: + if not allow_size_none: + raise ValueError("This should never happen!!") + if not isinstance(max_size, int): + raise ValueError(f"max_size must be an integer when size is None, but got {max_size} instead.") + new_short, new_long = int(max_size * short / long), max_size + new_w, new_h = (new_short, new_long) if w <= h else (new_long, new_short) + elif len(size) == 1: # specified size only for the smallest edge + requested_new_short = size if isinstance(size, int) else size[0] + new_short, new_long = requested_new_short, int(requested_new_short * long / short) + + if max_size is not None: + if max_size <= requested_new_short: + raise ValueError( + f"max_size = {max_size} must be strictly greater than the requested " + f"size for the smaller edge size = {size}" + ) + if new_long > max_size: + new_short, new_long = int(max_size * new_short / new_long), max_size new_w, new_h = (new_short, new_long) if w <= h else (new_long, new_short) else: # specified both h and w diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py index 3dc6245354d..da080e437c9 100644 --- a/torchvision/transforms/v2/functional/_geometry.py +++ b/torchvision/transforms/v2/functional/_geometry.py @@ -163,9 +163,7 @@ def _compute_resized_output_size( ) -> List[int]: if isinstance(size, int): size = [size] - elif size is None: - size = [] # pass empty list to match function signature for v1 - elif max_size is not None and len(size) != 1: + elif max_size is not None and size is not None and len(size) != 1: raise ValueError( "max_size should only be passed if size is None or specifies the length of the smaller edge, " "i.e. size should be an int or a sequence of length 1 in torchscript mode." From a9b8daf54bf98cb84b39d08d87770fd18d96100c Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 5 Jun 2024 12:07:11 +0100 Subject: [PATCH 7/7] Avoids skipping some tests --- test/test_transforms_v2.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 628fdaf6727..07e3d75df6d 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -663,8 +663,7 @@ def test_kernel_video(self): [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], ) def test_functional(self, size, make_input): - if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=True, size=size)): - return + max_size_kwarg = self._make_max_size_kwarg(use_max_size=size is None, size=size) check_functional( F.resize, @@ -704,8 +703,7 @@ def test_functional_signature(self, kernel, input_type): ], ) def test_transform(self, size, device, make_input): - if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=True, size=size)): - return + max_size_kwarg = self._make_max_size_kwarg(use_max_size=size is None, size=size) check_transform( transforms.Resize(size=size, **max_size_kwarg, antialias=True),