Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow v2 Resize to resize longer edge exactly to max_size #8459

Merged
merged 8 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 54 additions & 9 deletions test/test_transforms_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def _script(obj):
return torch.jit.script(obj)
except Exception as error:
name = getattr(obj, "__name__", obj.__class__.__name__)
raise AssertionError(f"Trying to `torch.jit.script` '{name}' raised the error above.") from error
raise AssertionError(f"Trying to `torch.jit.script` `{name}` raised the error above.") from error


def _check_kernel_scripted_vs_eager(kernel, input, *args, rtol, atol, **kwargs):
Expand Down Expand Up @@ -553,10 +553,12 @@ def affine_bounding_boxes(bounding_boxes):

class TestResize:
INPUT_SIZE = (17, 11)
OUTPUT_SIZES = [17, [17], (17,), [12, 13], (12, 13)]
OUTPUT_SIZES = [17, [17], (17,), None, [12, 13], (12, 13)]

def _make_max_size_kwarg(self, *, use_max_size, size):
if use_max_size:
if size is None:
max_size = min(list(self.INPUT_SIZE))
elif use_max_size:
if not (isinstance(size, int) or len(size) == 1):
# This would result in an `ValueError`
return None
Expand All @@ -568,10 +570,13 @@ def _make_max_size_kwarg(self, *, use_max_size, size):
return dict(max_size=max_size)

def _compute_output_size(self, *, input_size, size, max_size):
if not (isinstance(size, int) or len(size) == 1):
if size is None:
size = max_size

elif not (isinstance(size, int) or len(size) == 1):
return tuple(size)

if not isinstance(size, int):
elif not isinstance(size, int):
size = size[0]

old_height, old_width = input_size
Expand Down Expand Up @@ -658,10 +663,13 @@ def test_kernel_video(self):
[make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video],
)
def test_functional(self, size, make_input):
max_size_kwarg = self._make_max_size_kwarg(use_max_size=size is None, size=size)

check_functional(
F.resize,
make_input(self.INPUT_SIZE),
size=size,
**max_size_kwarg,
antialias=True,
check_scripted_smoke=not isinstance(size, int),
)
Expand Down Expand Up @@ -695,11 +703,13 @@ def test_functional_signature(self, kernel, input_type):
],
)
def test_transform(self, size, device, make_input):
max_size_kwarg = self._make_max_size_kwarg(use_max_size=size is None, size=size)

check_transform(
transforms.Resize(size=size, antialias=True),
transforms.Resize(size=size, **max_size_kwarg, antialias=True),
make_input(self.INPUT_SIZE, device=device),
# atol=1 due to Resize v2 is using native uint8 interpolate path for bilinear and nearest modes
check_v1_compatibility=dict(rtol=0, atol=1),
check_v1_compatibility=dict(rtol=0, atol=1) if size is not None else False,
)

def _check_output_size(self, input, output, *, size, max_size):
Expand Down Expand Up @@ -801,7 +811,11 @@ def test_functional_pil_antialias_warning(self):
],
)
def test_max_size_error(self, size, make_input):
if isinstance(size, int) or len(size) == 1:
if size is None:
# value can be anything other than an integer
max_size = None
match = "max_size must be an integer when size is None"
elif isinstance(size, int) or len(size) == 1:
max_size = (size if isinstance(size, int) else size[0]) - 1
match = "must be strictly greater than the requested size"
else:
Expand All @@ -812,6 +826,37 @@ def test_max_size_error(self, size, make_input):
with pytest.raises(ValueError, match=match):
F.resize(make_input(self.INPUT_SIZE), size=size, max_size=max_size, antialias=True)

if isinstance(size, list) and len(size) != 1:
with pytest.raises(ValueError, match="max_size should only be passed if size is None or specifies"):
F.resize(make_input(self.INPUT_SIZE), size=size, max_size=500)

@pytest.mark.parametrize(
"input_size, max_size, expected_size",
[
((10, 10), 10, (10, 10)),
((10, 20), 40, (20, 40)),
((20, 10), 40, (40, 20)),
((10, 20), 10, (5, 10)),
((20, 10), 10, (10, 5)),
],
)
@pytest.mark.parametrize(
"make_input",
[
make_image_tensor,
make_image_pil,
make_image,
make_bounding_boxes,
make_segmentation_mask,
make_detection_masks,
make_video,
],
)
def test_resize_size_none(self, input_size, max_size, expected_size, make_input):
img = make_input(input_size)
out = F.resize(img, size=None, max_size=max_size)
assert F.get_size(out)[-2:] == list(expected_size)

@pytest.mark.parametrize("interpolation", INTERPOLATION_MODES)
@pytest.mark.parametrize(
"make_input",
Expand All @@ -834,7 +879,7 @@ def test_interpolation_int(self, interpolation, make_input):
assert_equal(actual, expected)

def test_transform_unknown_size_error(self):
with pytest.raises(ValueError, match="size can either be an integer or a sequence of one or two integers"):
with pytest.raises(ValueError, match="size can be an integer, a sequence of one or two integers, or None"):
transforms.Resize(size=object())

@pytest.mark.parametrize(
Expand Down
19 changes: 14 additions & 5 deletions torchvision/transforms/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,13 +351,22 @@ def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool


def _compute_resized_output_size(
image_size: Tuple[int, int], size: List[int], max_size: Optional[int] = None
image_size: Tuple[int, int],
size: Optional[List[int]],
max_size: Optional[int] = None,
allow_size_none: bool = False, # only True in v2
) -> List[int]:
if len(size) == 1: # specified size only for the smallest edge
h, w = image_size
short, long = (w, h) if w <= h else (h, w)
h, w = image_size
short, long = (w, h) if w <= h else (h, w)
if size is None:
if not allow_size_none:
raise ValueError("This should never happen!!")
if not isinstance(max_size, int):
raise ValueError(f"max_size must be an integer when size is None, but got {max_size} instead.")
new_short, new_long = int(max_size * short / long), max_size
new_w, new_h = (new_short, new_long) if w <= h else (new_long, new_short)
elif len(size) == 1: # specified size only for the smallest edge
requested_new_short = size if isinstance(size, int) else size[0]

new_short, new_long = requested_new_short, int(requested_new_short * long / short)

if max_size is not None:
Expand Down
43 changes: 29 additions & 14 deletions torchvision/transforms/v2/_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,15 @@ class Resize(Transform):
the image can have ``[..., C, H, W]`` shape. A bounding box can have ``[..., 4]`` shape.

Args:
size (sequence or int): Desired output size. If size is a sequence like
(h, w), output size will be matched to this. If size is an int,
smaller edge of the image will be matched to this number.
i.e, if height > width, then image will be rescaled to
(size * height / width, size).
size (sequence, int, or None): Desired
output size.

- If size is a sequence like (h, w), output size will be matched to this.
- If size is an int, smaller edge of the image will be matched to this
number. i.e, if height > width, then image will be rescaled to
(size * height / width, size).
- If size is None, the output shape is determined by the ``max_size``
parameter.

.. note::
In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``.
Expand All @@ -89,13 +93,21 @@ class Resize(Transform):
``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are supported.
The corresponding Pillow integer constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well.
max_size (int, optional): The maximum allowed for the longer edge of
the resized image. If the longer edge of the image is greater
than ``max_size`` after being resized according to ``size``,
``size`` will be overruled so that the longer edge is equal to
``max_size``.
As a result, the smaller edge may be shorter than ``size``. This
is only supported if ``size`` is an int (or a sequence of length
1 in torchscript mode).
the resized image.

- If ``size`` is an int: if the longer edge of the image is greater
than ``max_size`` after being resized according to ``size``,
``size`` will be overruled so that the longer edge is equal to
``max_size``. As a result, the smaller edge may be shorter than
``size``. This is only supported if ``size`` is an int (or a
sequence of length 1 in torchscript mode).
- If ``size`` is None: the longer edge of the image will be matched
to max_size. i.e, if height > width, then image will be rescaled
to (max_size, max_size * width / height).

This should be left to ``None`` (default) when ``size`` is a
sequence.

antialias (bool, optional): Whether to apply antialiasing.
It only affects **tensors** with bilinear or bicubic modes and it is
ignored otherwise: on PIL images, antialiasing is always applied on
Expand All @@ -120,7 +132,7 @@ class Resize(Transform):

def __init__(
self,
size: Union[int, Sequence[int]],
size: Union[int, Sequence[int], None],
interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
max_size: Optional[int] = None,
antialias: Optional[bool] = True,
Expand All @@ -131,9 +143,12 @@ def __init__(
size = [size]
elif isinstance(size, Sequence) and len(size) in {1, 2}:
size = list(size)
elif size is None:
if not isinstance(max_size, int):
raise ValueError(f"max_size must be an integer when size is None, but got {max_size} instead.")
else:
raise ValueError(
f"size can either be an integer or a sequence of one or two integers, but got {size} instead."
f"size can be an integer, a sequence of one or two integers, or None, but got {size} instead."
)
self.size = size

Expand Down
23 changes: 13 additions & 10 deletions torchvision/transforms/v2/functional/_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,21 +159,21 @@ def vertical_flip_video(video: torch.Tensor) -> torch.Tensor:


def _compute_resized_output_size(
canvas_size: Tuple[int, int], size: List[int], max_size: Optional[int] = None
canvas_size: Tuple[int, int], size: Optional[List[int]], max_size: Optional[int] = None
) -> List[int]:
if isinstance(size, int):
size = [size]
elif max_size is not None and len(size) != 1:
elif max_size is not None and size is not None and len(size) != 1:
raise ValueError(
"max_size should only be passed if size specifies the length of the smaller edge, "
"max_size should only be passed if size is None or specifies the length of the smaller edge, "
"i.e. size should be an int or a sequence of length 1 in torchscript mode."
)
return __compute_resized_output_size(canvas_size, size=size, max_size=max_size)
return __compute_resized_output_size(canvas_size, size=size, max_size=max_size, allow_size_none=True)


def resize(
inpt: torch.Tensor,
size: List[int],
size: Optional[List[int]],
interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
max_size: Optional[int] = None,
antialias: Optional[bool] = True,
Expand Down Expand Up @@ -206,7 +206,7 @@ def _do_native_uint8_resize_on_cpu(interpolation: InterpolationMode) -> bool:
@_register_kernel_internal(resize, tv_tensors.Image)
def resize_image(
image: torch.Tensor,
size: List[int],
size: Optional[List[int]],
interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
max_size: Optional[int] = None,
antialias: Optional[bool] = True,
Expand Down Expand Up @@ -310,7 +310,7 @@ def __resize_image_pil_dispatch(
return _resize_image_pil(image, size=size, interpolation=interpolation, max_size=max_size)


def resize_mask(mask: torch.Tensor, size: List[int], max_size: Optional[int] = None) -> torch.Tensor:
def resize_mask(mask: torch.Tensor, size: Optional[List[int]], max_size: Optional[int] = None) -> torch.Tensor:
if mask.ndim < 3:
mask = mask.unsqueeze(0)
needs_squeeze = True
Expand All @@ -334,7 +334,10 @@ def _resize_mask_dispatch(


def resize_bounding_boxes(
bounding_boxes: torch.Tensor, canvas_size: Tuple[int, int], size: List[int], max_size: Optional[int] = None
bounding_boxes: torch.Tensor,
canvas_size: Tuple[int, int],
size: Optional[List[int]],
max_size: Optional[int] = None,
) -> Tuple[torch.Tensor, Tuple[int, int]]:
old_height, old_width = canvas_size
new_height, new_width = _compute_resized_output_size(canvas_size, size=size, max_size=max_size)
Expand All @@ -353,7 +356,7 @@ def resize_bounding_boxes(

@_register_kernel_internal(resize, tv_tensors.BoundingBoxes, tv_tensor_wrapper=False)
def _resize_bounding_boxes_dispatch(
inpt: tv_tensors.BoundingBoxes, size: List[int], max_size: Optional[int] = None, **kwargs: Any
inpt: tv_tensors.BoundingBoxes, size: Optional[List[int]], max_size: Optional[int] = None, **kwargs: Any
) -> tv_tensors.BoundingBoxes:
output, canvas_size = resize_bounding_boxes(
inpt.as_subclass(torch.Tensor), inpt.canvas_size, size, max_size=max_size
Expand All @@ -364,7 +367,7 @@ def _resize_bounding_boxes_dispatch(
@_register_kernel_internal(resize, tv_tensors.Video)
def resize_video(
video: torch.Tensor,
size: List[int],
size: Optional[List[int]],
interpolation: Union[InterpolationMode, int] = InterpolationMode.BILINEAR,
max_size: Optional[int] = None,
antialias: Optional[bool] = True,
Expand Down
Loading