_meta.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. from typing import Any, Dict, Union
  2. from torchvision import tv_tensors
  3. from torchvision.transforms.v2 import functional as F, Transform
  4. class ConvertBoundingBoxFormat(Transform):
  5. """[BETA] Convert bounding box coordinates to the given ``format``, eg from "CXCYWH" to "XYXY".
  6. .. v2betastatus:: ConvertBoundingBoxFormat transform
  7. Args:
  8. format (str or tv_tensors.BoundingBoxFormat): output bounding box format.
  9. Possible values are defined by :class:`~torchvision.tv_tensors.BoundingBoxFormat` and
  10. string values match the enums, e.g. "XYXY" or "XYWH" etc.
  11. """
  12. _transformed_types = (tv_tensors.BoundingBoxes,)
  13. def __init__(self, format: Union[str, tv_tensors.BoundingBoxFormat]) -> None:
  14. super().__init__()
  15. if isinstance(format, str):
  16. format = tv_tensors.BoundingBoxFormat[format]
  17. self.format = format
  18. def _transform(self, inpt: tv_tensors.BoundingBoxes, params: Dict[str, Any]) -> tv_tensors.BoundingBoxes:
  19. return F.convert_bounding_box_format(inpt, new_format=self.format) # type: ignore[return-value]
  20. class ClampBoundingBoxes(Transform):
  21. """[BETA] Clamp bounding boxes to their corresponding image dimensions.
  22. The clamping is done according to the bounding boxes' ``canvas_size`` meta-data.
  23. .. v2betastatus:: ClampBoundingBoxes transform
  24. """
  25. _transformed_types = (tv_tensors.BoundingBoxes,)
  26. def _transform(self, inpt: tv_tensors.BoundingBoxes, params: Dict[str, Any]) -> tv_tensors.BoundingBoxes:
  27. return F.clamp_bounding_boxes(inpt) # type: ignore[return-value]