instance.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. # Ultralytics YOLO 🚀, AGPL-3.0 license
  2. from collections import abc
  3. from itertools import repeat
  4. from numbers import Number
  5. from typing import List
  6. import numpy as np
  7. from .ops import ltwh2xywh, ltwh2xyxy, resample_segments, xywh2ltwh, xywh2xyxy, xyxy2ltwh, xyxy2xywh
  8. def _ntuple(n):
  9. """From PyTorch internals."""
  10. def parse(x):
  11. """Parse bounding boxes format between XYWH and LTWH."""
  12. return x if isinstance(x, abc.Iterable) else tuple(repeat(x, n))
  13. return parse
  14. to_2tuple = _ntuple(2)
  15. to_4tuple = _ntuple(4)
  16. # `xyxy` means left top and right bottom
  17. # `xywh` means center x, center y and width, height(YOLO format)
  18. # `ltwh` means left top and width, height(COCO format)
  19. _formats = ['xyxy', 'xywh', 'ltwh']
  20. __all__ = 'Bboxes', # tuple or list
  21. class Bboxes:
  22. """Bounding Boxes class. Only numpy variables are supported."""
  23. def __init__(self, bboxes, format='xyxy') -> None:
  24. assert format in _formats, f'Invalid bounding box format: {format}, format must be one of {_formats}'
  25. bboxes = bboxes[None, :] if bboxes.ndim == 1 else bboxes
  26. assert bboxes.ndim == 2
  27. assert bboxes.shape[1] == 4
  28. self.bboxes = bboxes
  29. self.format = format
  30. # self.normalized = normalized
  31. def convert(self, format):
  32. """Converts bounding box format from one type to another."""
  33. assert format in _formats, f'Invalid bounding box format: {format}, format must be one of {_formats}'
  34. if self.format == format:
  35. return
  36. elif self.format == 'xyxy':
  37. func = xyxy2xywh if format == 'xywh' else xyxy2ltwh
  38. elif self.format == 'xywh':
  39. func = xywh2xyxy if format == 'xyxy' else xywh2ltwh
  40. else:
  41. func = ltwh2xyxy if format == 'xyxy' else ltwh2xywh
  42. self.bboxes = func(self.bboxes)
  43. self.format = format
  44. def areas(self):
  45. """Return box areas."""
  46. self.convert('xyxy')
  47. return (self.bboxes[:, 2] - self.bboxes[:, 0]) * (self.bboxes[:, 3] - self.bboxes[:, 1])
  48. # def denormalize(self, w, h):
  49. # if not self.normalized:
  50. # return
  51. # assert (self.bboxes <= 1.0).all()
  52. # self.bboxes[:, 0::2] *= w
  53. # self.bboxes[:, 1::2] *= h
  54. # self.normalized = False
  55. #
  56. # def normalize(self, w, h):
  57. # if self.normalized:
  58. # return
  59. # assert (self.bboxes > 1.0).any()
  60. # self.bboxes[:, 0::2] /= w
  61. # self.bboxes[:, 1::2] /= h
  62. # self.normalized = True
  63. def mul(self, scale):
  64. """
  65. Args:
  66. scale (tuple | list | int): the scale for four coords.
  67. """
  68. if isinstance(scale, Number):
  69. scale = to_4tuple(scale)
  70. assert isinstance(scale, (tuple, list))
  71. assert len(scale) == 4
  72. self.bboxes[:, 0] *= scale[0]
  73. self.bboxes[:, 1] *= scale[1]
  74. self.bboxes[:, 2] *= scale[2]
  75. self.bboxes[:, 3] *= scale[3]
  76. def add(self, offset):
  77. """
  78. Args:
  79. offset (tuple | list | int): the offset for four coords.
  80. """
  81. if isinstance(offset, Number):
  82. offset = to_4tuple(offset)
  83. assert isinstance(offset, (tuple, list))
  84. assert len(offset) == 4
  85. self.bboxes[:, 0] += offset[0]
  86. self.bboxes[:, 1] += offset[1]
  87. self.bboxes[:, 2] += offset[2]
  88. self.bboxes[:, 3] += offset[3]
  89. def __len__(self):
  90. """Return the number of boxes."""
  91. return len(self.bboxes)
  92. @classmethod
  93. def concatenate(cls, boxes_list: List['Bboxes'], axis=0) -> 'Bboxes':
  94. """
  95. Concatenate a list of Bboxes objects into a single Bboxes object.
  96. Args:
  97. boxes_list (List[Bboxes]): A list of Bboxes objects to concatenate.
  98. axis (int, optional): The axis along which to concatenate the bounding boxes.
  99. Defaults to 0.
  100. Returns:
  101. Bboxes: A new Bboxes object containing the concatenated bounding boxes.
  102. Note:
  103. The input should be a list or tuple of Bboxes objects.
  104. """
  105. assert isinstance(boxes_list, (list, tuple))
  106. if not boxes_list:
  107. return cls(np.empty(0))
  108. assert all(isinstance(box, Bboxes) for box in boxes_list)
  109. if len(boxes_list) == 1:
  110. return boxes_list[0]
  111. return cls(np.concatenate([b.bboxes for b in boxes_list], axis=axis))
  112. def __getitem__(self, index) -> 'Bboxes':
  113. """
  114. Retrieve a specific bounding box or a set of bounding boxes using indexing.
  115. Args:
  116. index (int, slice, or np.ndarray): The index, slice, or boolean array to select
  117. the desired bounding boxes.
  118. Returns:
  119. Bboxes: A new Bboxes object containing the selected bounding boxes.
  120. Raises:
  121. AssertionError: If the indexed bounding boxes do not form a 2-dimensional matrix.
  122. Note:
  123. When using boolean indexing, make sure to provide a boolean array with the same
  124. length as the number of bounding boxes.
  125. """
  126. if isinstance(index, int):
  127. return Bboxes(self.bboxes[index].view(1, -1))
  128. b = self.bboxes[index]
  129. assert b.ndim == 2, f'Indexing on Bboxes with {index} failed to return a matrix!'
  130. return Bboxes(b)
  131. class Instances:
  132. def __init__(self, bboxes, segments=None, keypoints=None, bbox_format='xywh', normalized=True) -> None:
  133. """
  134. Args:
  135. bboxes (ndarray): bboxes with shape [N, 4].
  136. segments (list | ndarray): segments.
  137. keypoints (ndarray): keypoints(x, y, visible) with shape [N, 17, 3].
  138. """
  139. if segments is None:
  140. segments = []
  141. self._bboxes = Bboxes(bboxes=bboxes, format=bbox_format)
  142. self.keypoints = keypoints
  143. self.normalized = normalized
  144. if len(segments) > 0:
  145. # list[np.array(1000, 2)] * num_samples
  146. segments = resample_segments(segments)
  147. # (N, 1000, 2)
  148. segments = np.stack(segments, axis=0)
  149. else:
  150. segments = np.zeros((0, 1000, 2), dtype=np.float32)
  151. self.segments = segments
  152. def convert_bbox(self, format):
  153. """Convert bounding box format."""
  154. self._bboxes.convert(format=format)
  155. @property
  156. def bbox_areas(self):
  157. """Calculate the area of bounding boxes."""
  158. return self._bboxes.areas()
  159. def scale(self, scale_w, scale_h, bbox_only=False):
  160. """this might be similar with denormalize func but without normalized sign."""
  161. self._bboxes.mul(scale=(scale_w, scale_h, scale_w, scale_h))
  162. if bbox_only:
  163. return
  164. self.segments[..., 0] *= scale_w
  165. self.segments[..., 1] *= scale_h
  166. if self.keypoints is not None:
  167. self.keypoints[..., 0] *= scale_w
  168. self.keypoints[..., 1] *= scale_h
  169. def denormalize(self, w, h):
  170. """Denormalizes boxes, segments, and keypoints from normalized coordinates."""
  171. if not self.normalized:
  172. return
  173. self._bboxes.mul(scale=(w, h, w, h))
  174. self.segments[..., 0] *= w
  175. self.segments[..., 1] *= h
  176. if self.keypoints is not None:
  177. self.keypoints[..., 0] *= w
  178. self.keypoints[..., 1] *= h
  179. self.normalized = False
  180. def normalize(self, w, h):
  181. """Normalize bounding boxes, segments, and keypoints to image dimensions."""
  182. if self.normalized:
  183. return
  184. self._bboxes.mul(scale=(1 / w, 1 / h, 1 / w, 1 / h))
  185. self.segments[..., 0] /= w
  186. self.segments[..., 1] /= h
  187. if self.keypoints is not None:
  188. self.keypoints[..., 0] /= w
  189. self.keypoints[..., 1] /= h
  190. self.normalized = True
  191. def add_padding(self, padw, padh):
  192. """Handle rect and mosaic situation."""
  193. assert not self.normalized, 'you should add padding with absolute coordinates.'
  194. self._bboxes.add(offset=(padw, padh, padw, padh))
  195. self.segments[..., 0] += padw
  196. self.segments[..., 1] += padh
  197. if self.keypoints is not None:
  198. self.keypoints[..., 0] += padw
  199. self.keypoints[..., 1] += padh
  200. def __getitem__(self, index) -> 'Instances':
  201. """
  202. Retrieve a specific instance or a set of instances using indexing.
  203. Args:
  204. index (int, slice, or np.ndarray): The index, slice, or boolean array to select
  205. the desired instances.
  206. Returns:
  207. Instances: A new Instances object containing the selected bounding boxes,
  208. segments, and keypoints if present.
  209. Note:
  210. When using boolean indexing, make sure to provide a boolean array with the same
  211. length as the number of instances.
  212. """
  213. segments = self.segments[index] if len(self.segments) else self.segments
  214. keypoints = self.keypoints[index] if self.keypoints is not None else None
  215. bboxes = self.bboxes[index]
  216. bbox_format = self._bboxes.format
  217. return Instances(
  218. bboxes=bboxes,
  219. segments=segments,
  220. keypoints=keypoints,
  221. bbox_format=bbox_format,
  222. normalized=self.normalized,
  223. )
  224. def flipud(self, h):
  225. """Flips the coordinates of bounding boxes, segments, and keypoints vertically."""
  226. if self._bboxes.format == 'xyxy':
  227. y1 = self.bboxes[:, 1].copy()
  228. y2 = self.bboxes[:, 3].copy()
  229. self.bboxes[:, 1] = h - y2
  230. self.bboxes[:, 3] = h - y1
  231. else:
  232. self.bboxes[:, 1] = h - self.bboxes[:, 1]
  233. self.segments[..., 1] = h - self.segments[..., 1]
  234. if self.keypoints is not None:
  235. self.keypoints[..., 1] = h - self.keypoints[..., 1]
  236. def fliplr(self, w):
  237. """Reverses the order of the bounding boxes and segments horizontally."""
  238. if self._bboxes.format == 'xyxy':
  239. x1 = self.bboxes[:, 0].copy()
  240. x2 = self.bboxes[:, 2].copy()
  241. self.bboxes[:, 0] = w - x2
  242. self.bboxes[:, 2] = w - x1
  243. else:
  244. self.bboxes[:, 0] = w - self.bboxes[:, 0]
  245. self.segments[..., 0] = w - self.segments[..., 0]
  246. if self.keypoints is not None:
  247. self.keypoints[..., 0] = w - self.keypoints[..., 0]
  248. def clip(self, w, h):
  249. """Clips bounding boxes, segments, and keypoints values to stay within image boundaries."""
  250. ori_format = self._bboxes.format
  251. self.convert_bbox(format='xyxy')
  252. self.bboxes[:, [0, 2]] = self.bboxes[:, [0, 2]].clip(0, w)
  253. self.bboxes[:, [1, 3]] = self.bboxes[:, [1, 3]].clip(0, h)
  254. if ori_format != 'xyxy':
  255. self.convert_bbox(format=ori_format)
  256. self.segments[..., 0] = self.segments[..., 0].clip(0, w)
  257. self.segments[..., 1] = self.segments[..., 1].clip(0, h)
  258. if self.keypoints is not None:
  259. self.keypoints[..., 0] = self.keypoints[..., 0].clip(0, w)
  260. self.keypoints[..., 1] = self.keypoints[..., 1].clip(0, h)
  261. def remove_zero_area_boxes(self):
  262. """Remove zero-area boxes, i.e. after clipping some boxes may have zero width or height. This removes them."""
  263. good = self.bbox_areas > 0
  264. if not all(good):
  265. self._bboxes = self._bboxes[good]
  266. if len(self.segments):
  267. self.segments = self.segments[good]
  268. if self.keypoints is not None:
  269. self.keypoints = self.keypoints[good]
  270. return good
  271. def update(self, bboxes, segments=None, keypoints=None):
  272. """Updates instance variables."""
  273. self._bboxes = Bboxes(bboxes, format=self._bboxes.format)
  274. if segments is not None:
  275. self.segments = segments
  276. if keypoints is not None:
  277. self.keypoints = keypoints
  278. def __len__(self):
  279. """Return the length of the instance list."""
  280. return len(self.bboxes)
  281. @classmethod
  282. def concatenate(cls, instances_list: List['Instances'], axis=0) -> 'Instances':
  283. """
  284. Concatenates a list of Instances objects into a single Instances object.
  285. Args:
  286. instances_list (List[Instances]): A list of Instances objects to concatenate.
  287. axis (int, optional): The axis along which the arrays will be concatenated. Defaults to 0.
  288. Returns:
  289. Instances: A new Instances object containing the concatenated bounding boxes,
  290. segments, and keypoints if present.
  291. Note:
  292. The `Instances` objects in the list should have the same properties, such as
  293. the format of the bounding boxes, whether keypoints are present, and if the
  294. coordinates are normalized.
  295. """
  296. assert isinstance(instances_list, (list, tuple))
  297. if not instances_list:
  298. return cls(np.empty(0))
  299. assert all(isinstance(instance, Instances) for instance in instances_list)
  300. if len(instances_list) == 1:
  301. return instances_list[0]
  302. use_keypoint = instances_list[0].keypoints is not None
  303. bbox_format = instances_list[0]._bboxes.format
  304. normalized = instances_list[0].normalized
  305. cat_boxes = np.concatenate([ins.bboxes for ins in instances_list], axis=axis)
  306. cat_segments = np.concatenate([b.segments for b in instances_list], axis=axis)
  307. cat_keypoints = np.concatenate([b.keypoints for b in instances_list], axis=axis) if use_keypoint else None
  308. return cls(cat_boxes, cat_segments, cat_keypoints, bbox_format, normalized)
  309. @property
  310. def bboxes(self):
  311. """Return bounding boxes."""
  312. return self._bboxes.bboxes