123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 |
- import warnings
- import torch
- warnings.warn(
- "The 'torchvision.transforms._functional_video' module is deprecated since 0.12 and will be removed in the future. "
- "Please use the 'torchvision.transforms.functional' module instead."
- )
- def _is_tensor_video_clip(clip):
- if not torch.is_tensor(clip):
- raise TypeError("clip should be Tensor. Got %s" % type(clip))
- if not clip.ndimension() == 4:
- raise ValueError("clip should be 4D. Got %dD" % clip.dim())
- return True
- def crop(clip, i, j, h, w):
- """
- Args:
- clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
- """
- if len(clip.size()) != 4:
- raise ValueError("clip should be a 4D tensor")
- return clip[..., i : i + h, j : j + w]
- def resize(clip, target_size, interpolation_mode):
- if len(target_size) != 2:
- raise ValueError(f"target size should be tuple (height, width), instead got {target_size}")
- return torch.nn.functional.interpolate(clip, size=target_size, mode=interpolation_mode, align_corners=False)
- def resized_crop(clip, i, j, h, w, size, interpolation_mode="bilinear"):
- """
- Do spatial cropping and resizing to the video clip
- Args:
- clip (torch.tensor): Video clip to be cropped. Size is (C, T, H, W)
- i (int): i in (i,j) i.e coordinates of the upper left corner.
- j (int): j in (i,j) i.e coordinates of the upper left corner.
- h (int): Height of the cropped region.
- w (int): Width of the cropped region.
- size (tuple(int, int)): height and width of resized clip
- Returns:
- clip (torch.tensor): Resized and cropped clip. Size is (C, T, H, W)
- """
- if not _is_tensor_video_clip(clip):
- raise ValueError("clip should be a 4D torch.tensor")
- clip = crop(clip, i, j, h, w)
- clip = resize(clip, size, interpolation_mode)
- return clip
- def center_crop(clip, crop_size):
- if not _is_tensor_video_clip(clip):
- raise ValueError("clip should be a 4D torch.tensor")
- h, w = clip.size(-2), clip.size(-1)
- th, tw = crop_size
- if h < th or w < tw:
- raise ValueError("height and width must be no smaller than crop_size")
- i = int(round((h - th) / 2.0))
- j = int(round((w - tw) / 2.0))
- return crop(clip, i, j, th, tw)
- def to_tensor(clip):
- """
- Convert tensor data type from uint8 to float, divide value by 255.0 and
- permute the dimensions of clip tensor
- Args:
- clip (torch.tensor, dtype=torch.uint8): Size is (T, H, W, C)
- Return:
- clip (torch.tensor, dtype=torch.float): Size is (C, T, H, W)
- """
- _is_tensor_video_clip(clip)
- if not clip.dtype == torch.uint8:
- raise TypeError("clip tensor should have data type uint8. Got %s" % str(clip.dtype))
- return clip.float().permute(3, 0, 1, 2) / 255.0
- def normalize(clip, mean, std, inplace=False):
- """
- Args:
- clip (torch.tensor): Video clip to be normalized. Size is (C, T, H, W)
- mean (tuple): pixel RGB mean. Size is (3)
- std (tuple): pixel standard deviation. Size is (3)
- Returns:
- normalized clip (torch.tensor): Size is (C, T, H, W)
- """
- if not _is_tensor_video_clip(clip):
- raise ValueError("clip should be a 4D torch.tensor")
- if not inplace:
- clip = clip.clone()
- mean = torch.as_tensor(mean, dtype=clip.dtype, device=clip.device)
- std = torch.as_tensor(std, dtype=clip.dtype, device=clip.device)
- clip.sub_(mean[:, None, None, None]).div_(std[:, None, None, None])
- return clip
- def hflip(clip):
- """
- Args:
- clip (torch.tensor): Video clip to be normalized. Size is (C, T, H, W)
- Returns:
- flipped clip (torch.tensor): Size is (C, T, H, W)
- """
- if not _is_tensor_video_clip(clip):
- raise ValueError("clip should be a 4D torch.tensor")
- return clip.flip(-1)
|