Skip to content

Commit

Permalink
Add rotated bounding box formats to transforms
Browse files Browse the repository at this point in the history
Test Plan:
`pytest test/test_transforms_v2.py -vvv -k "TestConvertBoundingBoxFormat"`
  • Loading branch information
AntoineSimoulin committed Jan 23, 2025
1 parent 7dea49d commit c6df4e0
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 28 deletions.
7 changes: 7 additions & 0 deletions test/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,7 @@ def sample_position(values, max_value):
dtype = dtype or torch.float32

h, w = [torch.randint(1, s, (num_boxes,)) for s in canvas_size]
r = -360 * torch.rand((num_boxes,)) + 180
y = sample_position(h, canvas_size[0])
x = sample_position(w, canvas_size[1])

Expand All @@ -435,6 +436,12 @@ def sample_position(values, max_value):
cx = x + w / 2
cy = y + h / 2
parts = (cx, cy, w, h)
elif format is tv_tensors.BoundingBoxFormat.XYWHR:
parts = (x, y, w, h, r)
elif format is tv_tensors.BoundingBoxFormat.CXCYWHR:
cx = x + w / 2
cy = y + h / 2
parts = (cx, cy, w, h, r)
else:
raise ValueError(f"Format {format} is not supported")

Expand Down
67 changes: 39 additions & 28 deletions test/test_transforms_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,15 @@
from torchvision.transforms.v2.functional._utils import _get_kernel, _register_kernel_internal


# While we are working on adjusting transform functions
# for rotated and oriented bounding boxes formats,
# we limit the perimeter of tests to formats
# for which transform functions are already implemented.
# In the future, this global variable will be replaced with `list(tv_tensors.BoundingBoxFormat)`
# to support all available formats.
SUPPORTED_BOX_FORMATS = [tv_tensors.BoundingBoxFormat[x] for x in ["XYXY", "XYWH", "CXCYWH"]]
NEW_BOX_FORMATS = [tv_tensors.BoundingBoxFormat[x] for x in ["XYWHR", "CXCYWHR"]] # XYXYR

# turns all warnings into errors for this module
pytestmark = [pytest.mark.filterwarnings("error")]

Expand Down Expand Up @@ -626,7 +635,7 @@ def test_kernel_image(self, size, interpolation, use_max_size, antialias, dtype,
check_scripted_vs_eager=not isinstance(size, int),
)

@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("size", OUTPUT_SIZES)
@pytest.mark.parametrize("use_max_size", [True, False])
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
Expand Down Expand Up @@ -757,7 +766,7 @@ def _reference_resize_bounding_boxes(self, bounding_boxes, *, size, max_size=Non
new_canvas_size=(new_height, new_width),
)

@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("size", OUTPUT_SIZES)
@pytest.mark.parametrize("use_max_size", [True, False])
@pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)])
Expand Down Expand Up @@ -1003,7 +1012,7 @@ class TestHorizontalFlip:
def test_kernel_image(self, dtype, device):
check_kernel(F.horizontal_flip_image, make_image(dtype=dtype, device=device))

@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_kernel_bounding_boxes(self, format, dtype, device):
Expand Down Expand Up @@ -1072,7 +1081,7 @@ def _reference_horizontal_flip_bounding_boxes(self, bounding_boxes):

return reference_affine_bounding_boxes_helper(bounding_boxes, affine_matrix=affine_matrix)

@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize(
"fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)]
)
Expand Down Expand Up @@ -1169,7 +1178,7 @@ def test_kernel_image(self, param, value, dtype, device):
shear=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["shear"],
center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"],
)
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_kernel_bounding_boxes(self, param, value, format, dtype, device):
Expand Down Expand Up @@ -1318,7 +1327,7 @@ def _reference_affine_bounding_boxes(self, bounding_boxes, *, angle, translate,
),
)

@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"])
@pytest.mark.parametrize("translate", _CORRECTNESS_AFFINE_KWARGS["translate"])
@pytest.mark.parametrize("scale", _CORRECTNESS_AFFINE_KWARGS["scale"])
Expand Down Expand Up @@ -1346,7 +1355,7 @@ def test_functional_bounding_boxes_correctness(self, format, angle, translate, s

torch.testing.assert_close(actual, expected)

@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
@pytest.mark.parametrize("seed", list(range(5)))
def test_transform_bounding_boxes_correctness(self, format, center, seed):
Expand Down Expand Up @@ -1453,7 +1462,7 @@ class TestVerticalFlip:
def test_kernel_image(self, dtype, device):
check_kernel(F.vertical_flip_image, make_image(dtype=dtype, device=device))

@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_kernel_bounding_boxes(self, format, dtype, device):
Expand Down Expand Up @@ -1520,7 +1529,7 @@ def _reference_vertical_flip_bounding_boxes(self, bounding_boxes):

return reference_affine_bounding_boxes_helper(bounding_boxes, affine_matrix=affine_matrix)

@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)])
def test_bounding_boxes_correctness(self, format, fn):
bounding_boxes = make_bounding_boxes(format=format)
Expand Down Expand Up @@ -1589,7 +1598,7 @@ def test_kernel_image(self, param, value, dtype, device):
expand=[False, True],
center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"],
)
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_kernel_bounding_boxes(self, param, value, format, dtype, device):
Expand Down Expand Up @@ -1760,7 +1769,7 @@ def _reference_rotate_bounding_boxes(self, bounding_boxes, *, angle, expand, cen
bounding_boxes
)

@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"])
@pytest.mark.parametrize("expand", [False, True])
@pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
Expand All @@ -1773,7 +1782,7 @@ def test_functional_bounding_boxes_correctness(self, format, angle, expand, cent
torch.testing.assert_close(actual, expected)
torch.testing.assert_close(F.get_size(actual), F.get_size(expected), atol=2 if expand else 0, rtol=0)

@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("expand", [False, True])
@pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
@pytest.mark.parametrize("seed", list(range(5)))
Expand Down Expand Up @@ -2694,7 +2703,7 @@ def test_kernel_image(self, param, value, dtype, device):
check_cuda_vs_cpu=dtype is not torch.float16,
)

@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_kernel_bounding_boxes(self, format, dtype, device):
Expand Down Expand Up @@ -2821,7 +2830,7 @@ def test_kernel_image(self, kwargs, dtype, device):
check_kernel(F.crop_image, make_image(self.INPUT_SIZE, dtype=dtype, device=device), **kwargs)

@pytest.mark.parametrize("kwargs", CORRECTNESS_CROP_KWARGS)
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_kernel_bounding_box(self, kwargs, format, dtype, device):
Expand Down Expand Up @@ -2971,7 +2980,7 @@ def _reference_crop_bounding_boxes(self, bounding_boxes, *, top, left, height, w
)

@pytest.mark.parametrize("kwargs", CORRECTNESS_CROP_KWARGS)
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_functional_bounding_box_correctness(self, kwargs, format, dtype, device):
Expand All @@ -2984,7 +2993,7 @@ def test_functional_bounding_box_correctness(self, kwargs, format, dtype, device
assert_equal(F.get_size(actual), F.get_size(expected))

@pytest.mark.parametrize("output_size", [(17, 11), (11, 17), (11, 11)])
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@pytest.mark.parametrize("device", cpu_and_cuda())
@pytest.mark.parametrize("seed", list(range(5)))
Expand Down Expand Up @@ -3507,7 +3516,9 @@ def test_aug_mix_severity_error(self, severity):


class TestConvertBoundingBoxFormat:
old_new_formats = list(itertools.permutations(iter(tv_tensors.BoundingBoxFormat), 2))
old_new_formats = list(itertools.permutations(SUPPORTED_BOX_FORMATS, 2))
old_new_formats += list(itertools.permutations(NEW_BOX_FORMATS, 2))
# old_new_formats = list(itertools.permutations(NEW_BOX_FORMATS, 2))

@pytest.mark.parametrize(("old_format", "new_format"), old_new_formats)
def test_kernel(self, old_format, new_format):
Expand All @@ -3518,7 +3529,7 @@ def test_kernel(self, old_format, new_format):
old_format=old_format,
)

@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("inplace", [False, True])
def test_kernel_noop(self, format, inplace):
input = make_bounding_boxes(format=format).as_subclass(torch.Tensor)
Expand Down Expand Up @@ -3563,7 +3574,7 @@ def test_transform(self, old_format, new_format, format_type):
@pytest.mark.parametrize(("old_format", "new_format"), old_new_formats)
def test_strings(self, old_format, new_format):
# Non-regression test for https://github.com/pytorch/vision/issues/8258
input = tv_tensors.BoundingBoxes(torch.tensor([[10, 10, 20, 20]]), format=old_format, canvas_size=(50, 50))
input = make_bounding_boxes(format=old_format, canvas_size=(50, 50))
expected = self._reference_convert_bounding_box_format(input, new_format)

old_format = old_format.name
Expand Down Expand Up @@ -3728,7 +3739,7 @@ def _reference_resized_crop_bounding_boxes(self, bounding_boxes, *, top, left, h
new_canvas_size=size,
)

@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
def test_functional_bounding_boxes_correctness(self, format):
bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format)

Expand Down Expand Up @@ -3796,7 +3807,7 @@ def test_kernel_image(self, param, value, dtype, device):
),
)

@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
def test_kernel_bounding_boxes(self, format):
bounding_boxes = make_bounding_boxes(format=format)
check_kernel(
Expand Down Expand Up @@ -3915,7 +3926,7 @@ def _reference_pad_bounding_boxes(self, bounding_boxes, *, padding):
)

@pytest.mark.parametrize("padding", CORRECTNESS_PADDINGS)
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
@pytest.mark.parametrize("device", cpu_and_cuda())
@pytest.mark.parametrize("fn", [F.pad, transform_cls_to_functional(transforms.Pad)])
Expand Down Expand Up @@ -3944,7 +3955,7 @@ def test_kernel_image(self, output_size, dtype, device):
)

@pytest.mark.parametrize("output_size", OUTPUT_SIZES)
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
def test_kernel_bounding_boxes(self, output_size, format):
bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format)
check_kernel(
Expand Down Expand Up @@ -4023,7 +4034,7 @@ def _reference_center_crop_bounding_boxes(self, bounding_boxes, output_size):
)

@pytest.mark.parametrize("output_size", OUTPUT_SIZES)
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
@pytest.mark.parametrize("device", cpu_and_cuda())
@pytest.mark.parametrize("fn", [F.center_crop, transform_cls_to_functional(transforms.CenterCrop)])
Expand Down Expand Up @@ -4090,7 +4101,7 @@ def test_kernel_image_error(self):
coefficients=COEFFICIENTS,
start_end_points=START_END_POINTS,
)
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
def test_kernel_bounding_boxes(self, param, value, format):
if param == "start_end_points":
kwargs = dict(zip(["startpoints", "endpoints"], value))
Expand Down Expand Up @@ -4266,7 +4277,7 @@ def perspective_bounding_boxes(bounding_boxes):
)

@pytest.mark.parametrize(("startpoints", "endpoints"), START_END_POINTS)
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_correctness_perspective_bounding_boxes(self, startpoints, endpoints, format, dtype, device):
Expand Down Expand Up @@ -4473,7 +4484,7 @@ def test_correctness_image(self, mean, std, dtype, fn):


class TestClampBoundingBoxes:
@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
@pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
@pytest.mark.parametrize("device", cpu_and_cuda())
def test_kernel(self, format, dtype, device):
Expand All @@ -4485,7 +4496,7 @@ def test_kernel(self, format, dtype, device):
canvas_size=bounding_boxes.canvas_size,
)

@pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
@pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
def test_functional(self, format):
check_functional(F.clamp_bounding_boxes, make_bounding_boxes(format=format))

Expand Down
31 changes: 31 additions & 0 deletions torchvision/transforms/v2/functional/_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,33 @@ def _xyxy_to_cxcywh(xyxy: torch.Tensor, inplace: bool) -> torch.Tensor:
return xyxy


def _cxcywhr_to_xywhr(cxcywhr: torch.Tensor, inplace: bool) -> torch.Tensor:
if not inplace:
cxcywhr = cxcywhr.clone()

half_wh = cxcywhr[..., 2:-1].div(-2, rounding_mode=None if cxcywhr.is_floating_point() else "floor").abs_()
r_rad = cxcywhr[..., 4].mul(torch.pi).div(180.0)
# (cx - width / 2 * cos - height / 2 * sin) = x1
cxcywhr[..., 0].sub_(half_wh[..., 0].mul(r_rad.cos()).add(half_wh[..., 1].mul(r_rad.sin())).to(cxcywhr.dtype))
# (cy + width / 2 * sin - height / 2 * cos) = y1
cxcywhr[..., 1].add_(half_wh[..., 0].mul(r_rad.sin()).sub(half_wh[..., 1].mul(r_rad.cos())).to(cxcywhr.dtype))

return cxcywhr


def _xywhr_to_cxcywhr(xywhr: torch.Tensor, inplace: bool) -> torch.Tensor:
if not inplace:
xywhr = xywhr.clone()

half_wh = xywhr[..., 2:-1].div(-2, rounding_mode=None if xywhr.is_floating_point() else "floor").abs_()
r_rad = xywhr[..., 4].mul(torch.pi).div(180.0)
# (x1 + width / 2 * cos + height / 2 * sin) = cx
xywhr[..., 0].add_(half_wh[..., 0].mul(r_rad.cos()).add(half_wh[..., 1].mul(r_rad.sin())).to(xywhr.dtype))
# (y1 - width / 2 * sin + height / 2 * cos) = cy
xywhr[..., 1].add_(half_wh[..., 1].mul(r_rad.cos()).sub(half_wh[..., 0].mul(r_rad.sin())).to(xywhr.dtype))

return xywhr

def _convert_bounding_box_format(
bounding_boxes: torch.Tensor, old_format: BoundingBoxFormat, new_format: BoundingBoxFormat, inplace: bool = False
) -> torch.Tensor:
Expand All @@ -188,11 +215,15 @@ def _convert_bounding_box_format(
bounding_boxes = _xywh_to_xyxy(bounding_boxes, inplace)
elif old_format == BoundingBoxFormat.CXCYWH:
bounding_boxes = _cxcywh_to_xyxy(bounding_boxes, inplace)
elif old_format == BoundingBoxFormat.CXCYWHR:
bounding_boxes = _cxcywhr_to_xywhr(bounding_boxes, inplace)

if new_format == BoundingBoxFormat.XYWH:
bounding_boxes = _xyxy_to_xywh(bounding_boxes, inplace)
elif new_format == BoundingBoxFormat.CXCYWH:
bounding_boxes = _xyxy_to_cxcywh(bounding_boxes, inplace)
elif new_format == BoundingBoxFormat.CXCYWHR:
bounding_boxes = _xywhr_to_cxcywhr(bounding_boxes, inplace)

return bounding_boxes

Expand Down

0 comments on commit c6df4e0

Please sign in to comment.