Add rotated bounding box formats to transforms

Test Plan: `pytest test/test_transforms_v2.py -vvv -k "TestConvertBoundingBoxFormat"`
pytorch · Jan 23, 2025 · c6df4e0 · c6df4e0
1 parent 7dea49d
commit c6df4e0
Show file tree

Hide file tree

Showing 3 changed files with 77 additions and 28 deletions.
diff --git a/test/common_utils.py b/test/common_utils.py
@@ -421,6 +421,7 @@ def sample_position(values, max_value):
     dtype = dtype or torch.float32
 
     h, w = [torch.randint(1, s, (num_boxes,)) for s in canvas_size]
+    r = -360 * torch.rand((num_boxes,)) + 180
     y = sample_position(h, canvas_size[0])
     x = sample_position(w, canvas_size[1])
 
@@ -435,6 +436,12 @@ def sample_position(values, max_value):
         cx = x + w / 2
         cy = y + h / 2
         parts = (cx, cy, w, h)
+    elif format is tv_tensors.BoundingBoxFormat.XYWHR:
+        parts = (x, y, w, h, r)
+    elif format is tv_tensors.BoundingBoxFormat.CXCYWHR:
+        cx = x + w / 2
+        cy = y + h / 2
+        parts = (cx, cy, w, h, r)
     else:
         raise ValueError(f"Format {format} is not supported")
 

diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
@@ -53,6 +53,15 @@
 from torchvision.transforms.v2.functional._utils import _get_kernel, _register_kernel_internal
 
 
+# While we are working on adjusting transform functions 
+# for rotated and oriented bounding boxes formats, 
+# we limit the perimeter of tests to formats
+#  for which transform functions are already implemented.
+# In the future, this global variable will be replaced with `list(tv_tensors.BoundingBoxFormat)`
+# to support all available formats.
+SUPPORTED_BOX_FORMATS = [tv_tensors.BoundingBoxFormat[x] for x in ["XYXY", "XYWH", "CXCYWH"]]
+NEW_BOX_FORMATS = [tv_tensors.BoundingBoxFormat[x] for x in ["XYWHR", "CXCYWHR"]]  # XYXYR
+
 # turns all warnings into errors for this module
 pytestmark = [pytest.mark.filterwarnings("error")]
 
@@ -626,7 +635,7 @@ def test_kernel_image(self, size, interpolation, use_max_size, antialias, dtype,
             check_scripted_vs_eager=not isinstance(size, int),
         )
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("size", OUTPUT_SIZES)
     @pytest.mark.parametrize("use_max_size", [True, False])
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
@@ -757,7 +766,7 @@ def _reference_resize_bounding_boxes(self, bounding_boxes, *, size, max_size=Non
             new_canvas_size=(new_height, new_width),
         )
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("size", OUTPUT_SIZES)
     @pytest.mark.parametrize("use_max_size", [True, False])
     @pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)])
@@ -1003,7 +1012,7 @@ class TestHorizontalFlip:
     def test_kernel_image(self, dtype, device):
         check_kernel(F.horizontal_flip_image, make_image(dtype=dtype, device=device))
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel_bounding_boxes(self, format, dtype, device):
@@ -1072,7 +1081,7 @@ def _reference_horizontal_flip_bounding_boxes(self, bounding_boxes):
 
         return reference_affine_bounding_boxes_helper(bounding_boxes, affine_matrix=affine_matrix)
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize(
         "fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)]
     )
@@ -1169,7 +1178,7 @@ def test_kernel_image(self, param, value, dtype, device):
         shear=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["shear"],
         center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"],
     )
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel_bounding_boxes(self, param, value, format, dtype, device):
@@ -1318,7 +1327,7 @@ def _reference_affine_bounding_boxes(self, bounding_boxes, *, angle, translate,
             ),
         )
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"])
     @pytest.mark.parametrize("translate", _CORRECTNESS_AFFINE_KWARGS["translate"])
     @pytest.mark.parametrize("scale", _CORRECTNESS_AFFINE_KWARGS["scale"])
@@ -1346,7 +1355,7 @@ def test_functional_bounding_boxes_correctness(self, format, angle, translate, s
 
         torch.testing.assert_close(actual, expected)
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
     @pytest.mark.parametrize("seed", list(range(5)))
     def test_transform_bounding_boxes_correctness(self, format, center, seed):
@@ -1453,7 +1462,7 @@ class TestVerticalFlip:
     def test_kernel_image(self, dtype, device):
         check_kernel(F.vertical_flip_image, make_image(dtype=dtype, device=device))
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel_bounding_boxes(self, format, dtype, device):
@@ -1520,7 +1529,7 @@ def _reference_vertical_flip_bounding_boxes(self, bounding_boxes):
 
         return reference_affine_bounding_boxes_helper(bounding_boxes, affine_matrix=affine_matrix)
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)])
     def test_bounding_boxes_correctness(self, format, fn):
         bounding_boxes = make_bounding_boxes(format=format)
@@ -1589,7 +1598,7 @@ def test_kernel_image(self, param, value, dtype, device):
         expand=[False, True],
         center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"],
     )
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel_bounding_boxes(self, param, value, format, dtype, device):
@@ -1760,7 +1769,7 @@ def _reference_rotate_bounding_boxes(self, bounding_boxes, *, angle, expand, cen
             bounding_boxes
         )
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"])
     @pytest.mark.parametrize("expand", [False, True])
     @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
@@ -1773,7 +1782,7 @@ def test_functional_bounding_boxes_correctness(self, format, angle, expand, cent
         torch.testing.assert_close(actual, expected)
         torch.testing.assert_close(F.get_size(actual), F.get_size(expected), atol=2 if expand else 0, rtol=0)
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("expand", [False, True])
     @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
     @pytest.mark.parametrize("seed", list(range(5)))
@@ -2694,7 +2703,7 @@ def test_kernel_image(self, param, value, dtype, device):
             check_cuda_vs_cpu=dtype is not torch.float16,
         )
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel_bounding_boxes(self, format, dtype, device):
@@ -2821,7 +2830,7 @@ def test_kernel_image(self, kwargs, dtype, device):
         check_kernel(F.crop_image, make_image(self.INPUT_SIZE, dtype=dtype, device=device), **kwargs)
 
     @pytest.mark.parametrize("kwargs", CORRECTNESS_CROP_KWARGS)
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel_bounding_box(self, kwargs, format, dtype, device):
@@ -2971,7 +2980,7 @@ def _reference_crop_bounding_boxes(self, bounding_boxes, *, top, left, height, w
         )
 
     @pytest.mark.parametrize("kwargs", CORRECTNESS_CROP_KWARGS)
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_functional_bounding_box_correctness(self, kwargs, format, dtype, device):
@@ -2984,7 +2993,7 @@ def test_functional_bounding_box_correctness(self, kwargs, format, dtype, device
         assert_equal(F.get_size(actual), F.get_size(expected))
 
     @pytest.mark.parametrize("output_size", [(17, 11), (11, 17), (11, 11)])
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("seed", list(range(5)))
@@ -3507,7 +3516,9 @@ def test_aug_mix_severity_error(self, severity):
 
 
 class TestConvertBoundingBoxFormat:
-    old_new_formats = list(itertools.permutations(iter(tv_tensors.BoundingBoxFormat), 2))
+    old_new_formats = list(itertools.permutations(SUPPORTED_BOX_FORMATS, 2))
+    old_new_formats += list(itertools.permutations(NEW_BOX_FORMATS, 2))
+    # old_new_formats = list(itertools.permutations(NEW_BOX_FORMATS, 2))
 
     @pytest.mark.parametrize(("old_format", "new_format"), old_new_formats)
     def test_kernel(self, old_format, new_format):
@@ -3518,7 +3529,7 @@ def test_kernel(self, old_format, new_format):
             old_format=old_format,
         )
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("inplace", [False, True])
     def test_kernel_noop(self, format, inplace):
         input = make_bounding_boxes(format=format).as_subclass(torch.Tensor)
@@ -3563,7 +3574,7 @@ def test_transform(self, old_format, new_format, format_type):
     @pytest.mark.parametrize(("old_format", "new_format"), old_new_formats)
     def test_strings(self, old_format, new_format):
         # Non-regression test for https://github.com/pytorch/vision/issues/8258
-        input = tv_tensors.BoundingBoxes(torch.tensor([[10, 10, 20, 20]]), format=old_format, canvas_size=(50, 50))
+        input = make_bounding_boxes(format=old_format, canvas_size=(50, 50))
         expected = self._reference_convert_bounding_box_format(input, new_format)
 
         old_format = old_format.name
@@ -3728,7 +3739,7 @@ def _reference_resized_crop_bounding_boxes(self, bounding_boxes, *, top, left, h
             new_canvas_size=size,
         )
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     def test_functional_bounding_boxes_correctness(self, format):
         bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format)
 
@@ -3796,7 +3807,7 @@ def test_kernel_image(self, param, value, dtype, device):
             ),
         )
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     def test_kernel_bounding_boxes(self, format):
         bounding_boxes = make_bounding_boxes(format=format)
         check_kernel(
@@ -3915,7 +3926,7 @@ def _reference_pad_bounding_boxes(self, bounding_boxes, *, padding):
         )
 
     @pytest.mark.parametrize("padding", CORRECTNESS_PADDINGS)
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("fn", [F.pad, transform_cls_to_functional(transforms.Pad)])
@@ -3944,7 +3955,7 @@ def test_kernel_image(self, output_size, dtype, device):
         )
 
     @pytest.mark.parametrize("output_size", OUTPUT_SIZES)
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     def test_kernel_bounding_boxes(self, output_size, format):
         bounding_boxes = make_bounding_boxes(self.INPUT_SIZE, format=format)
         check_kernel(
@@ -4023,7 +4034,7 @@ def _reference_center_crop_bounding_boxes(self, bounding_boxes, output_size):
         )
 
     @pytest.mark.parametrize("output_size", OUTPUT_SIZES)
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("fn", [F.center_crop, transform_cls_to_functional(transforms.CenterCrop)])
@@ -4090,7 +4101,7 @@ def test_kernel_image_error(self):
         coefficients=COEFFICIENTS,
         start_end_points=START_END_POINTS,
     )
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     def test_kernel_bounding_boxes(self, param, value, format):
         if param == "start_end_points":
             kwargs = dict(zip(["startpoints", "endpoints"], value))
@@ -4266,7 +4277,7 @@ def perspective_bounding_boxes(bounding_boxes):
         )
 
     @pytest.mark.parametrize(("startpoints", "endpoints"), START_END_POINTS)
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_correctness_perspective_bounding_boxes(self, startpoints, endpoints, format, dtype, device):
@@ -4473,7 +4484,7 @@ def test_correctness_image(self, mean, std, dtype, fn):
 
 
 class TestClampBoundingBoxes:
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     @pytest.mark.parametrize("dtype", [torch.int64, torch.float32])
     @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_kernel(self, format, dtype, device):
@@ -4485,7 +4496,7 @@ def test_kernel(self, format, dtype, device):
             canvas_size=bounding_boxes.canvas_size,
         )
 
-    @pytest.mark.parametrize("format", list(tv_tensors.BoundingBoxFormat))
+    @pytest.mark.parametrize("format", SUPPORTED_BOX_FORMATS)
     def test_functional(self, format):
         check_functional(F.clamp_bounding_boxes, make_bounding_boxes(format=format))
 

diff --git a/torchvision/transforms/v2/functional/_meta.py b/torchvision/transforms/v2/functional/_meta.py
@@ -176,6 +176,33 @@ def _xyxy_to_cxcywh(xyxy: torch.Tensor, inplace: bool) -> torch.Tensor:
     return xyxy
 
 
+def _cxcywhr_to_xywhr(cxcywhr: torch.Tensor, inplace: bool) -> torch.Tensor:
+    if not inplace:
+        cxcywhr = cxcywhr.clone()
+
+    half_wh = cxcywhr[..., 2:-1].div(-2, rounding_mode=None if cxcywhr.is_floating_point() else "floor").abs_()
+    r_rad = cxcywhr[..., 4].mul(torch.pi).div(180.0)
+    # (cx - width / 2 * cos - height / 2 * sin) = x1
+    cxcywhr[..., 0].sub_(half_wh[..., 0].mul(r_rad.cos()).add(half_wh[..., 1].mul(r_rad.sin())).to(cxcywhr.dtype))
+    # (cy + width / 2 * sin - height / 2 * cos) = y1
+    cxcywhr[..., 1].add_(half_wh[..., 0].mul(r_rad.sin()).sub(half_wh[..., 1].mul(r_rad.cos())).to(cxcywhr.dtype))
+
+    return cxcywhr
+
+
+def _xywhr_to_cxcywhr(xywhr: torch.Tensor, inplace: bool) -> torch.Tensor:
+    if not inplace:
+        xywhr = xywhr.clone()
+
+    half_wh = xywhr[..., 2:-1].div(-2, rounding_mode=None if xywhr.is_floating_point() else "floor").abs_()
+    r_rad = xywhr[..., 4].mul(torch.pi).div(180.0)
+    # (x1 + width / 2 * cos + height / 2 * sin) = cx
+    xywhr[..., 0].add_(half_wh[..., 0].mul(r_rad.cos()).add(half_wh[..., 1].mul(r_rad.sin())).to(xywhr.dtype))
+    # (y1 - width / 2 * sin + height / 2 * cos) = cy 
+    xywhr[..., 1].add_(half_wh[..., 1].mul(r_rad.cos()).sub(half_wh[..., 0].mul(r_rad.sin())).to(xywhr.dtype))
+
+    return xywhr
+
 def _convert_bounding_box_format(
     bounding_boxes: torch.Tensor, old_format: BoundingBoxFormat, new_format: BoundingBoxFormat, inplace: bool = False
 ) -> torch.Tensor:
@@ -188,11 +215,15 @@ def _convert_bounding_box_format(
         bounding_boxes = _xywh_to_xyxy(bounding_boxes, inplace)
     elif old_format == BoundingBoxFormat.CXCYWH:
         bounding_boxes = _cxcywh_to_xyxy(bounding_boxes, inplace)
+    elif old_format == BoundingBoxFormat.CXCYWHR:
+        bounding_boxes = _cxcywhr_to_xywhr(bounding_boxes, inplace)
 
     if new_format == BoundingBoxFormat.XYWH:
         bounding_boxes = _xyxy_to_xywh(bounding_boxes, inplace)
     elif new_format == BoundingBoxFormat.CXCYWH:
         bounding_boxes = _xyxy_to_cxcywh(bounding_boxes, inplace)
+    elif new_format == BoundingBoxFormat.CXCYWHR:
+        bounding_boxes = _xywhr_to_cxcywhr(bounding_boxes, inplace)
 
     return bounding_boxes