123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617 |
- import math
- import numbers
- import warnings
- from enum import Enum
- from typing import Any, List, Optional, Tuple, Union
- import numpy as np
- import torch
- from PIL import Image
- from torch import Tensor
- try:
- import accimage
- except ImportError:
- accimage = None
- from ..utils import _log_api_usage_once
- from . import _functional_pil as F_pil, _functional_tensor as F_t
- class InterpolationMode(Enum):
- """Interpolation modes
- Available interpolation methods are ``nearest``, ``nearest-exact``, ``bilinear``, ``bicubic``, ``box``, ``hamming``,
- and ``lanczos``.
- """
- NEAREST = "nearest"
- NEAREST_EXACT = "nearest-exact"
- BILINEAR = "bilinear"
- BICUBIC = "bicubic"
- # For PIL compatibility
- BOX = "box"
- HAMMING = "hamming"
- LANCZOS = "lanczos"
- # TODO: Once torchscript supports Enums with staticmethod
- # this can be put into InterpolationMode as staticmethod
- def _interpolation_modes_from_int(i: int) -> InterpolationMode:
- inverse_modes_mapping = {
- 0: InterpolationMode.NEAREST,
- 2: InterpolationMode.BILINEAR,
- 3: InterpolationMode.BICUBIC,
- 4: InterpolationMode.BOX,
- 5: InterpolationMode.HAMMING,
- 1: InterpolationMode.LANCZOS,
- }
- return inverse_modes_mapping[i]
- pil_modes_mapping = {
- InterpolationMode.NEAREST: 0,
- InterpolationMode.BILINEAR: 2,
- InterpolationMode.BICUBIC: 3,
- InterpolationMode.NEAREST_EXACT: 0,
- InterpolationMode.BOX: 4,
- InterpolationMode.HAMMING: 5,
- InterpolationMode.LANCZOS: 1,
- }
- _is_pil_image = F_pil._is_pil_image
- def get_dimensions(img: Tensor) -> List[int]:
- """Returns the dimensions of an image as [channels, height, width].
- Args:
- img (PIL Image or Tensor): The image to be checked.
- Returns:
- List[int]: The image dimensions.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(get_dimensions)
- if isinstance(img, torch.Tensor):
- return F_t.get_dimensions(img)
- return F_pil.get_dimensions(img)
- def get_image_size(img: Tensor) -> List[int]:
- """Returns the size of an image as [width, height].
- Args:
- img (PIL Image or Tensor): The image to be checked.
- Returns:
- List[int]: The image size.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(get_image_size)
- if isinstance(img, torch.Tensor):
- return F_t.get_image_size(img)
- return F_pil.get_image_size(img)
- def get_image_num_channels(img: Tensor) -> int:
- """Returns the number of channels of an image.
- Args:
- img (PIL Image or Tensor): The image to be checked.
- Returns:
- int: The number of channels.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(get_image_num_channels)
- if isinstance(img, torch.Tensor):
- return F_t.get_image_num_channels(img)
- return F_pil.get_image_num_channels(img)
- @torch.jit.unused
- def _is_numpy(img: Any) -> bool:
- return isinstance(img, np.ndarray)
- @torch.jit.unused
- def _is_numpy_image(img: Any) -> bool:
- return img.ndim in {2, 3}
- def to_tensor(pic) -> Tensor:
- """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
- This function does not support torchscript.
- See :class:`~torchvision.transforms.ToTensor` for more details.
- Args:
- pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
- Returns:
- Tensor: Converted image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(to_tensor)
- if not (F_pil._is_pil_image(pic) or _is_numpy(pic)):
- raise TypeError(f"pic should be PIL Image or ndarray. Got {type(pic)}")
- if _is_numpy(pic) and not _is_numpy_image(pic):
- raise ValueError(f"pic should be 2/3 dimensional. Got {pic.ndim} dimensions.")
- default_float_dtype = torch.get_default_dtype()
- if isinstance(pic, np.ndarray):
- # handle numpy array
- if pic.ndim == 2:
- pic = pic[:, :, None]
- img = torch.from_numpy(pic.transpose((2, 0, 1))).contiguous()
- # backward compatibility
- if isinstance(img, torch.ByteTensor):
- return img.to(dtype=default_float_dtype).div(255)
- else:
- return img
- if accimage is not None and isinstance(pic, accimage.Image):
- nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32)
- pic.copyto(nppic)
- return torch.from_numpy(nppic).to(dtype=default_float_dtype)
- # handle PIL Image
- mode_to_nptype = {"I": np.int32, "I;16": np.int16, "F": np.float32}
- img = torch.from_numpy(np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True))
- if pic.mode == "1":
- img = 255 * img
- img = img.view(pic.size[1], pic.size[0], F_pil.get_image_num_channels(pic))
- # put it from HWC to CHW format
- img = img.permute((2, 0, 1)).contiguous()
- if isinstance(img, torch.ByteTensor):
- return img.to(dtype=default_float_dtype).div(255)
- else:
- return img
- def pil_to_tensor(pic: Any) -> Tensor:
- """Convert a ``PIL Image`` to a tensor of the same type.
- This function does not support torchscript.
- See :class:`~torchvision.transforms.PILToTensor` for more details.
- .. note::
- A deep copy of the underlying array is performed.
- Args:
- pic (PIL Image): Image to be converted to tensor.
- Returns:
- Tensor: Converted image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(pil_to_tensor)
- if not F_pil._is_pil_image(pic):
- raise TypeError(f"pic should be PIL Image. Got {type(pic)}")
- if accimage is not None and isinstance(pic, accimage.Image):
- # accimage format is always uint8 internally, so always return uint8 here
- nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.uint8)
- pic.copyto(nppic)
- return torch.as_tensor(nppic)
- # handle PIL Image
- img = torch.as_tensor(np.array(pic, copy=True))
- img = img.view(pic.size[1], pic.size[0], F_pil.get_image_num_channels(pic))
- # put it from HWC to CHW format
- img = img.permute((2, 0, 1))
- return img
- def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) -> torch.Tensor:
- """Convert a tensor image to the given ``dtype`` and scale the values accordingly
- This function does not support PIL Image.
- Args:
- image (torch.Tensor): Image to be converted
- dtype (torch.dtype): Desired data type of the output
- Returns:
- Tensor: Converted image
- .. note::
- When converting from a smaller to a larger integer ``dtype`` the maximum values are **not** mapped exactly.
- If converted back and forth, this mismatch has no effect.
- Raises:
- RuntimeError: When trying to cast :class:`torch.float32` to :class:`torch.int32` or :class:`torch.int64` as
- well as for trying to cast :class:`torch.float64` to :class:`torch.int64`. These conversions might lead to
- overflow errors since the floating point ``dtype`` cannot store consecutive integers over the whole range
- of the integer ``dtype``.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(convert_image_dtype)
- if not isinstance(image, torch.Tensor):
- raise TypeError("Input img should be Tensor Image")
- return F_t.convert_image_dtype(image, dtype)
- def to_pil_image(pic, mode=None):
- """Convert a tensor or an ndarray to PIL Image. This function does not support torchscript.
- See :class:`~torchvision.transforms.ToPILImage` for more details.
- Args:
- pic (Tensor or numpy.ndarray): Image to be converted to PIL Image.
- mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
- .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes
- Returns:
- PIL Image: Image converted to PIL Image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(to_pil_image)
- if not (isinstance(pic, torch.Tensor) or isinstance(pic, np.ndarray)):
- raise TypeError(f"pic should be Tensor or ndarray. Got {type(pic)}.")
- elif isinstance(pic, torch.Tensor):
- if pic.ndimension() not in {2, 3}:
- raise ValueError(f"pic should be 2/3 dimensional. Got {pic.ndimension()} dimensions.")
- elif pic.ndimension() == 2:
- # if 2D image, add channel dimension (CHW)
- pic = pic.unsqueeze(0)
- # check number of channels
- if pic.shape[-3] > 4:
- raise ValueError(f"pic should not have > 4 channels. Got {pic.shape[-3]} channels.")
- elif isinstance(pic, np.ndarray):
- if pic.ndim not in {2, 3}:
- raise ValueError(f"pic should be 2/3 dimensional. Got {pic.ndim} dimensions.")
- elif pic.ndim == 2:
- # if 2D image, add channel dimension (HWC)
- pic = np.expand_dims(pic, 2)
- # check number of channels
- if pic.shape[-1] > 4:
- raise ValueError(f"pic should not have > 4 channels. Got {pic.shape[-1]} channels.")
- npimg = pic
- if isinstance(pic, torch.Tensor):
- if pic.is_floating_point() and mode != "F":
- pic = pic.mul(255).byte()
- npimg = np.transpose(pic.cpu().numpy(), (1, 2, 0))
- if not isinstance(npimg, np.ndarray):
- raise TypeError("Input pic must be a torch.Tensor or NumPy ndarray, not {type(npimg)}")
- if npimg.shape[2] == 1:
- expected_mode = None
- npimg = npimg[:, :, 0]
- if npimg.dtype == np.uint8:
- expected_mode = "L"
- elif npimg.dtype == np.int16:
- expected_mode = "I;16"
- elif npimg.dtype == np.int32:
- expected_mode = "I"
- elif npimg.dtype == np.float32:
- expected_mode = "F"
- if mode is not None and mode != expected_mode:
- raise ValueError(f"Incorrect mode ({mode}) supplied for input type {np.dtype}. Should be {expected_mode}")
- mode = expected_mode
- elif npimg.shape[2] == 2:
- permitted_2_channel_modes = ["LA"]
- if mode is not None and mode not in permitted_2_channel_modes:
- raise ValueError(f"Only modes {permitted_2_channel_modes} are supported for 2D inputs")
- if mode is None and npimg.dtype == np.uint8:
- mode = "LA"
- elif npimg.shape[2] == 4:
- permitted_4_channel_modes = ["RGBA", "CMYK", "RGBX"]
- if mode is not None and mode not in permitted_4_channel_modes:
- raise ValueError(f"Only modes {permitted_4_channel_modes} are supported for 4D inputs")
- if mode is None and npimg.dtype == np.uint8:
- mode = "RGBA"
- else:
- permitted_3_channel_modes = ["RGB", "YCbCr", "HSV"]
- if mode is not None and mode not in permitted_3_channel_modes:
- raise ValueError(f"Only modes {permitted_3_channel_modes} are supported for 3D inputs")
- if mode is None and npimg.dtype == np.uint8:
- mode = "RGB"
- if mode is None:
- raise TypeError(f"Input type {npimg.dtype} is not supported")
- return Image.fromarray(npimg, mode=mode)
- def normalize(tensor: Tensor, mean: List[float], std: List[float], inplace: bool = False) -> Tensor:
- """Normalize a float tensor image with mean and standard deviation.
- This transform does not support PIL Image.
- .. note::
- This transform acts out of place by default, i.e., it does not mutates the input tensor.
- See :class:`~torchvision.transforms.Normalize` for more details.
- Args:
- tensor (Tensor): Float tensor image of size (C, H, W) or (B, C, H, W) to be normalized.
- mean (sequence): Sequence of means for each channel.
- std (sequence): Sequence of standard deviations for each channel.
- inplace(bool,optional): Bool to make this operation inplace.
- Returns:
- Tensor: Normalized Tensor image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(normalize)
- if not isinstance(tensor, torch.Tensor):
- raise TypeError(f"img should be Tensor Image. Got {type(tensor)}")
- return F_t.normalize(tensor, mean=mean, std=std, inplace=inplace)
- def _compute_resized_output_size(
- image_size: Tuple[int, int], size: List[int], max_size: Optional[int] = None
- ) -> List[int]:
- if len(size) == 1: # specified size only for the smallest edge
- h, w = image_size
- short, long = (w, h) if w <= h else (h, w)
- requested_new_short = size if isinstance(size, int) else size[0]
- new_short, new_long = requested_new_short, int(requested_new_short * long / short)
- if max_size is not None:
- if max_size <= requested_new_short:
- raise ValueError(
- f"max_size = {max_size} must be strictly greater than the requested "
- f"size for the smaller edge size = {size}"
- )
- if new_long > max_size:
- new_short, new_long = int(max_size * new_short / new_long), max_size
- new_w, new_h = (new_short, new_long) if w <= h else (new_long, new_short)
- else: # specified both h and w
- new_w, new_h = size[1], size[0]
- return [new_h, new_w]
- def resize(
- img: Tensor,
- size: List[int],
- interpolation: InterpolationMode = InterpolationMode.BILINEAR,
- max_size: Optional[int] = None,
- antialias: Optional[Union[str, bool]] = "warn",
- ) -> Tensor:
- r"""Resize the input image to the given size.
- If the image is torch Tensor, it is expected
- to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
- .. warning::
- The output image might be different depending on its type: when downsampling, the interpolation of PIL images
- and tensors is slightly different, because PIL applies antialiasing. This may lead to significant differences
- in the performance of a network. Therefore, it is preferable to train and serve a model with the same input
- types. See also below the ``antialias`` parameter, which can help making the output of PIL images and tensors
- closer.
- Args:
- img (PIL Image or Tensor): Image to be resized.
- size (sequence or int): Desired output size. If size is a sequence like
- (h, w), the output size will be matched to this. If size is an int,
- the smaller edge of the image will be matched to this number maintaining
- the aspect ratio. i.e, if height > width, then image will be rescaled to
- :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`.
- .. note::
- In torchscript mode size as single int is not supported, use a sequence of length 1: ``[size, ]``.
- interpolation (InterpolationMode): Desired interpolation enum defined by
- :class:`torchvision.transforms.InterpolationMode`.
- Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``,
- ``InterpolationMode.NEAREST_EXACT``, ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are
- supported.
- The corresponding Pillow integer constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well.
- max_size (int, optional): The maximum allowed for the longer edge of
- the resized image. If the longer edge of the image is greater
- than ``max_size`` after being resized according to ``size``,
- ``size`` will be overruled so that the longer edge is equal to
- ``max_size``.
- As a result, the smaller edge may be shorter than ``size``. This
- is only supported if ``size`` is an int (or a sequence of length
- 1 in torchscript mode).
- antialias (bool, optional): Whether to apply antialiasing.
- It only affects **tensors** with bilinear or bicubic modes and it is
- ignored otherwise: on PIL images, antialiasing is always applied on
- bilinear or bicubic modes; on other modes (for PIL images and
- tensors), antialiasing makes no sense and this parameter is ignored.
- Possible values are:
- - ``True``: will apply antialiasing for bilinear or bicubic modes.
- Other mode aren't affected. This is probably what you want to use.
- - ``False``: will not apply antialiasing for tensors on any mode. PIL
- images are still antialiased on bilinear or bicubic modes, because
- PIL doesn't support no antialias.
- - ``None``: equivalent to ``False`` for tensors and ``True`` for
- PIL images. This value exists for legacy reasons and you probably
- don't want to use it unless you really know what you are doing.
- The current default is ``None`` **but will change to** ``True`` **in
- v0.17** for the PIL and Tensor backends to be consistent.
- Returns:
- PIL Image or Tensor: Resized image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(resize)
- if isinstance(interpolation, int):
- interpolation = _interpolation_modes_from_int(interpolation)
- elif not isinstance(interpolation, InterpolationMode):
- raise TypeError(
- "Argument interpolation should be a InterpolationMode or a corresponding Pillow integer constant"
- )
- if isinstance(size, (list, tuple)):
- if len(size) not in [1, 2]:
- raise ValueError(
- f"Size must be an int or a 1 or 2 element tuple/list, not a {len(size)} element tuple/list"
- )
- if max_size is not None and len(size) != 1:
- raise ValueError(
- "max_size should only be passed if size specifies the length of the smaller edge, "
- "i.e. size should be an int or a sequence of length 1 in torchscript mode."
- )
- _, image_height, image_width = get_dimensions(img)
- if isinstance(size, int):
- size = [size]
- output_size = _compute_resized_output_size((image_height, image_width), size, max_size)
- if [image_height, image_width] == output_size:
- return img
- antialias = _check_antialias(img, antialias, interpolation)
- if not isinstance(img, torch.Tensor):
- if antialias is False:
- warnings.warn("Anti-alias option is always applied for PIL Image input. Argument antialias is ignored.")
- pil_interpolation = pil_modes_mapping[interpolation]
- return F_pil.resize(img, size=output_size, interpolation=pil_interpolation)
- return F_t.resize(img, size=output_size, interpolation=interpolation.value, antialias=antialias)
- def pad(img: Tensor, padding: List[int], fill: Union[int, float] = 0, padding_mode: str = "constant") -> Tensor:
- r"""Pad the given image on all sides with the given "pad" value.
- If the image is torch Tensor, it is expected
- to have [..., H, W] shape, where ... means at most 2 leading dimensions for mode reflect and symmetric,
- at most 3 leading dimensions for mode edge,
- and an arbitrary number of leading dimensions for mode constant
- Args:
- img (PIL Image or Tensor): Image to be padded.
- padding (int or sequence): Padding on each border. If a single int is provided this
- is used to pad all borders. If sequence of length 2 is provided this is the padding
- on left/right and top/bottom respectively. If a sequence of length 4 is provided
- this is the padding for the left, top, right and bottom borders respectively.
- .. note::
- In torchscript mode padding as single int is not supported, use a sequence of
- length 1: ``[padding, ]``.
- fill (number or tuple): Pixel fill value for constant fill. Default is 0.
- If a tuple of length 3, it is used to fill R, G, B channels respectively.
- This value is only used when the padding_mode is constant.
- Only number is supported for torch Tensor.
- Only int or tuple value is supported for PIL Image.
- padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric.
- Default is constant.
- - constant: pads with a constant value, this value is specified with fill
- - edge: pads with the last value at the edge of the image.
- If input a 5D torch Tensor, the last 3 dimensions will be padded instead of the last 2
- - reflect: pads with reflection of image without repeating the last value on the edge.
- For example, padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
- will result in [3, 2, 1, 2, 3, 4, 3, 2]
- - symmetric: pads with reflection of image repeating the last value on the edge.
- For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
- will result in [2, 1, 1, 2, 3, 4, 4, 3]
- Returns:
- PIL Image or Tensor: Padded image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(pad)
- if not isinstance(img, torch.Tensor):
- return F_pil.pad(img, padding=padding, fill=fill, padding_mode=padding_mode)
- return F_t.pad(img, padding=padding, fill=fill, padding_mode=padding_mode)
- def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
- """Crop the given image at specified location and output size.
- If the image is torch Tensor, it is expected
- to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
- If image size is smaller than output size along any edge, image is padded with 0 and then cropped.
- Args:
- img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
- top (int): Vertical component of the top left corner of the crop box.
- left (int): Horizontal component of the top left corner of the crop box.
- height (int): Height of the crop box.
- width (int): Width of the crop box.
- Returns:
- PIL Image or Tensor: Cropped image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(crop)
- if not isinstance(img, torch.Tensor):
- return F_pil.crop(img, top, left, height, width)
- return F_t.crop(img, top, left, height, width)
- def center_crop(img: Tensor, output_size: List[int]) -> Tensor:
- """Crops the given image at the center.
- If the image is torch Tensor, it is expected
- to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
- If image size is smaller than output size along any edge, image is padded with 0 and then center cropped.
- Args:
- img (PIL Image or Tensor): Image to be cropped.
- output_size (sequence or int): (height, width) of the crop box. If int or sequence with single int,
- it is used for both directions.
- Returns:
- PIL Image or Tensor: Cropped image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(center_crop)
- if isinstance(output_size, numbers.Number):
- output_size = (int(output_size), int(output_size))
- elif isinstance(output_size, (tuple, list)) and len(output_size) == 1:
- output_size = (output_size[0], output_size[0])
- _, image_height, image_width = get_dimensions(img)
- crop_height, crop_width = output_size
- if crop_width > image_width or crop_height > image_height:
- padding_ltrb = [
- (crop_width - image_width) // 2 if crop_width > image_width else 0,
- (crop_height - image_height) // 2 if crop_height > image_height else 0,
- (crop_width - image_width + 1) // 2 if crop_width > image_width else 0,
- (crop_height - image_height + 1) // 2 if crop_height > image_height else 0,
- ]
- img = pad(img, padding_ltrb, fill=0) # PIL uses fill value 0
- _, image_height, image_width = get_dimensions(img)
- if crop_width == image_width and crop_height == image_height:
- return img
- crop_top = int(round((image_height - crop_height) / 2.0))
- crop_left = int(round((image_width - crop_width) / 2.0))
- return crop(img, crop_top, crop_left, crop_height, crop_width)
- def resized_crop(
- img: Tensor,
- top: int,
- left: int,
- height: int,
- width: int,
- size: List[int],
- interpolation: InterpolationMode = InterpolationMode.BILINEAR,
- antialias: Optional[Union[str, bool]] = "warn",
- ) -> Tensor:
- """Crop the given image and resize it to desired size.
- If the image is torch Tensor, it is expected
- to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
- Notably used in :class:`~torchvision.transforms.RandomResizedCrop`.
- Args:
- img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
- top (int): Vertical component of the top left corner of the crop box.
- left (int): Horizontal component of the top left corner of the crop box.
- height (int): Height of the crop box.
- width (int): Width of the crop box.
- size (sequence or int): Desired output size. Same semantics as ``resize``.
- interpolation (InterpolationMode): Desired interpolation enum defined by
- :class:`torchvision.transforms.InterpolationMode`.
- Default is ``InterpolationMode.BILINEAR``. If input is Tensor, only ``InterpolationMode.NEAREST``,
- ``InterpolationMode.NEAREST_EXACT``, ``InterpolationMode.BILINEAR`` and ``InterpolationMode.BICUBIC`` are
- supported.
- The corresponding Pillow integer constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well.
- antialias (bool, optional): Whether to apply antialiasing.
- It only affects **tensors** with bilinear or bicubic modes and it is
- ignored otherwise: on PIL images, antialiasing is always applied on
- bilinear or bicubic modes; on other modes (for PIL images and
- tensors), antialiasing makes no sense and this parameter is ignored.
- Possible values are:
- - ``True``: will apply antialiasing for bilinear or bicubic modes.
- Other mode aren't affected. This is probably what you want to use.
- - ``False``: will not apply antialiasing for tensors on any mode. PIL
- images are still antialiased on bilinear or bicubic modes, because
- PIL doesn't support no antialias.
- - ``None``: equivalent to ``False`` for tensors and ``True`` for
- PIL images. This value exists for legacy reasons and you probably
- don't want to use it unless you really know what you are doing.
- The current default is ``None`` **but will change to** ``True`` **in
- v0.17** for the PIL and Tensor backends to be consistent.
- Returns:
- PIL Image or Tensor: Cropped image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(resized_crop)
- img = crop(img, top, left, height, width)
- img = resize(img, size, interpolation, antialias=antialias)
- return img
- def hflip(img: Tensor) -> Tensor:
- """Horizontally flip the given image.
- Args:
- img (PIL Image or Tensor): Image to be flipped. If img
- is a Tensor, it is expected to be in [..., H, W] format,
- where ... means it can have an arbitrary number of leading
- dimensions.
- Returns:
- PIL Image or Tensor: Horizontally flipped image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(hflip)
- if not isinstance(img, torch.Tensor):
- return F_pil.hflip(img)
- return F_t.hflip(img)
- def _get_perspective_coeffs(startpoints: List[List[int]], endpoints: List[List[int]]) -> List[float]:
- """Helper function to get the coefficients (a, b, c, d, e, f, g, h) for the perspective transforms.
- In Perspective Transform each pixel (x, y) in the original image gets transformed as,
- (x, y) -> ( (ax + by + c) / (gx + hy + 1), (dx + ey + f) / (gx + hy + 1) )
- Args:
- startpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
- ``[top-left, top-right, bottom-right, bottom-left]`` of the original image.
- endpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
- ``[top-left, top-right, bottom-right, bottom-left]`` of the transformed image.
- Returns:
- octuple (a, b, c, d, e, f, g, h) for transforming each pixel.
- """
- a_matrix = torch.zeros(2 * len(startpoints), 8, dtype=torch.float)
- for i, (p1, p2) in enumerate(zip(endpoints, startpoints)):
- a_matrix[2 * i, :] = torch.tensor([p1[0], p1[1], 1, 0, 0, 0, -p2[0] * p1[0], -p2[0] * p1[1]])
- a_matrix[2 * i + 1, :] = torch.tensor([0, 0, 0, p1[0], p1[1], 1, -p2[1] * p1[0], -p2[1] * p1[1]])
- b_matrix = torch.tensor(startpoints, dtype=torch.float).view(8)
- res = torch.linalg.lstsq(a_matrix, b_matrix, driver="gels").solution
- output: List[float] = res.tolist()
- return output
- def perspective(
- img: Tensor,
- startpoints: List[List[int]],
- endpoints: List[List[int]],
- interpolation: InterpolationMode = InterpolationMode.BILINEAR,
- fill: Optional[List[float]] = None,
- ) -> Tensor:
- """Perform perspective transform of the given image.
- If the image is torch Tensor, it is expected
- to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
- Args:
- img (PIL Image or Tensor): Image to be transformed.
- startpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
- ``[top-left, top-right, bottom-right, bottom-left]`` of the original image.
- endpoints (list of list of ints): List containing four lists of two integers corresponding to four corners
- ``[top-left, top-right, bottom-right, bottom-left]`` of the transformed image.
- interpolation (InterpolationMode): Desired interpolation enum defined by
- :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.BILINEAR``.
- If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
- The corresponding Pillow integer constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well.
- fill (sequence or number, optional): Pixel fill value for the area outside the transformed
- image. If given a number, the value is used for all bands respectively.
- .. note::
- In torchscript mode single int/float value is not supported, please use a sequence
- of length 1: ``[value, ]``.
- Returns:
- PIL Image or Tensor: transformed Image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(perspective)
- coeffs = _get_perspective_coeffs(startpoints, endpoints)
- if isinstance(interpolation, int):
- interpolation = _interpolation_modes_from_int(interpolation)
- elif not isinstance(interpolation, InterpolationMode):
- raise TypeError(
- "Argument interpolation should be a InterpolationMode or a corresponding Pillow integer constant"
- )
- if not isinstance(img, torch.Tensor):
- pil_interpolation = pil_modes_mapping[interpolation]
- return F_pil.perspective(img, coeffs, interpolation=pil_interpolation, fill=fill)
- return F_t.perspective(img, coeffs, interpolation=interpolation.value, fill=fill)
- def vflip(img: Tensor) -> Tensor:
- """Vertically flip the given image.
- Args:
- img (PIL Image or Tensor): Image to be flipped. If img
- is a Tensor, it is expected to be in [..., H, W] format,
- where ... means it can have an arbitrary number of leading
- dimensions.
- Returns:
- PIL Image or Tensor: Vertically flipped image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(vflip)
- if not isinstance(img, torch.Tensor):
- return F_pil.vflip(img)
- return F_t.vflip(img)
- def five_crop(img: Tensor, size: List[int]) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
- """Crop the given image into four corners and the central crop.
- If the image is torch Tensor, it is expected
- to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
- .. Note::
- This transform returns a tuple of images and there may be a
- mismatch in the number of inputs and targets your ``Dataset`` returns.
- Args:
- img (PIL Image or Tensor): Image to be cropped.
- size (sequence or int): Desired output size of the crop. If size is an
- int instead of sequence like (h, w), a square crop (size, size) is
- made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
- Returns:
- tuple: tuple (tl, tr, bl, br, center)
- Corresponding top left, top right, bottom left, bottom right and center crop.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(five_crop)
- if isinstance(size, numbers.Number):
- size = (int(size), int(size))
- elif isinstance(size, (tuple, list)) and len(size) == 1:
- size = (size[0], size[0])
- if len(size) != 2:
- raise ValueError("Please provide only two dimensions (h, w) for size.")
- _, image_height, image_width = get_dimensions(img)
- crop_height, crop_width = size
- if crop_width > image_width or crop_height > image_height:
- msg = "Requested crop size {} is bigger than input size {}"
- raise ValueError(msg.format(size, (image_height, image_width)))
- tl = crop(img, 0, 0, crop_height, crop_width)
- tr = crop(img, 0, image_width - crop_width, crop_height, crop_width)
- bl = crop(img, image_height - crop_height, 0, crop_height, crop_width)
- br = crop(img, image_height - crop_height, image_width - crop_width, crop_height, crop_width)
- center = center_crop(img, [crop_height, crop_width])
- return tl, tr, bl, br, center
- def ten_crop(
- img: Tensor, size: List[int], vertical_flip: bool = False
- ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
- """Generate ten cropped images from the given image.
- Crop the given image into four corners and the central crop plus the
- flipped version of these (horizontal flipping is used by default).
- If the image is torch Tensor, it is expected
- to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
- .. Note::
- This transform returns a tuple of images and there may be a
- mismatch in the number of inputs and targets your ``Dataset`` returns.
- Args:
- img (PIL Image or Tensor): Image to be cropped.
- size (sequence or int): Desired output size of the crop. If size is an
- int instead of sequence like (h, w), a square crop (size, size) is
- made. If provided a sequence of length 1, it will be interpreted as (size[0], size[0]).
- vertical_flip (bool): Use vertical flipping instead of horizontal
- Returns:
- tuple: tuple (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip)
- Corresponding top left, top right, bottom left, bottom right and
- center crop and same for the flipped image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(ten_crop)
- if isinstance(size, numbers.Number):
- size = (int(size), int(size))
- elif isinstance(size, (tuple, list)) and len(size) == 1:
- size = (size[0], size[0])
- if len(size) != 2:
- raise ValueError("Please provide only two dimensions (h, w) for size.")
- first_five = five_crop(img, size)
- if vertical_flip:
- img = vflip(img)
- else:
- img = hflip(img)
- second_five = five_crop(img, size)
- return first_five + second_five
- def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor:
- """Adjust brightness of an image.
- Args:
- img (PIL Image or Tensor): Image to be adjusted.
- If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
- where ... means it can have an arbitrary number of leading dimensions.
- brightness_factor (float): How much to adjust the brightness. Can be
- any non-negative number. 0 gives a black image, 1 gives the
- original image while 2 increases the brightness by a factor of 2.
- Returns:
- PIL Image or Tensor: Brightness adjusted image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(adjust_brightness)
- if not isinstance(img, torch.Tensor):
- return F_pil.adjust_brightness(img, brightness_factor)
- return F_t.adjust_brightness(img, brightness_factor)
- def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor:
- """Adjust contrast of an image.
- Args:
- img (PIL Image or Tensor): Image to be adjusted.
- If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
- where ... means it can have an arbitrary number of leading dimensions.
- contrast_factor (float): How much to adjust the contrast. Can be any
- non-negative number. 0 gives a solid gray image, 1 gives the
- original image while 2 increases the contrast by a factor of 2.
- Returns:
- PIL Image or Tensor: Contrast adjusted image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(adjust_contrast)
- if not isinstance(img, torch.Tensor):
- return F_pil.adjust_contrast(img, contrast_factor)
- return F_t.adjust_contrast(img, contrast_factor)
- def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor:
- """Adjust color saturation of an image.
- Args:
- img (PIL Image or Tensor): Image to be adjusted.
- If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
- where ... means it can have an arbitrary number of leading dimensions.
- saturation_factor (float): How much to adjust the saturation. 0 will
- give a black and white image, 1 will give the original image while
- 2 will enhance the saturation by a factor of 2.
- Returns:
- PIL Image or Tensor: Saturation adjusted image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(adjust_saturation)
- if not isinstance(img, torch.Tensor):
- return F_pil.adjust_saturation(img, saturation_factor)
- return F_t.adjust_saturation(img, saturation_factor)
- def adjust_hue(img: Tensor, hue_factor: float) -> Tensor:
- """Adjust hue of an image.
- The image hue is adjusted by converting the image to HSV and
- cyclically shifting the intensities in the hue channel (H).
- The image is then converted back to original image mode.
- `hue_factor` is the amount of shift in H channel and must be in the
- interval `[-0.5, 0.5]`.
- See `Hue`_ for more details.
- .. _Hue: https://en.wikipedia.org/wiki/Hue
- Args:
- img (PIL Image or Tensor): Image to be adjusted.
- If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
- where ... means it can have an arbitrary number of leading dimensions.
- If img is PIL Image mode "1", "I", "F" and modes with transparency (alpha channel) are not supported.
- Note: the pixel values of the input image has to be non-negative for conversion to HSV space;
- thus it does not work if you normalize your image to an interval with negative values,
- or use an interpolation that generates negative values before using this function.
- hue_factor (float): How much to shift the hue channel. Should be in
- [-0.5, 0.5]. 0.5 and -0.5 give complete reversal of hue channel in
- HSV space in positive and negative direction respectively.
- 0 means no shift. Therefore, both -0.5 and 0.5 will give an image
- with complementary colors while 0 gives the original image.
- Returns:
- PIL Image or Tensor: Hue adjusted image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(adjust_hue)
- if not isinstance(img, torch.Tensor):
- return F_pil.adjust_hue(img, hue_factor)
- return F_t.adjust_hue(img, hue_factor)
- def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor:
- r"""Perform gamma correction on an image.
- Also known as Power Law Transform. Intensities in RGB mode are adjusted
- based on the following equation:
- .. math::
- I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma}
- See `Gamma Correction`_ for more details.
- .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction
- Args:
- img (PIL Image or Tensor): PIL Image to be adjusted.
- If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
- where ... means it can have an arbitrary number of leading dimensions.
- If img is PIL Image, modes with transparency (alpha channel) are not supported.
- gamma (float): Non negative real number, same as :math:`\gamma` in the equation.
- gamma larger than 1 make the shadows darker,
- while gamma smaller than 1 make dark regions lighter.
- gain (float): The constant multiplier.
- Returns:
- PIL Image or Tensor: Gamma correction adjusted image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(adjust_gamma)
- if not isinstance(img, torch.Tensor):
- return F_pil.adjust_gamma(img, gamma, gain)
- return F_t.adjust_gamma(img, gamma, gain)
- def _get_inverse_affine_matrix(
- center: List[float], angle: float, translate: List[float], scale: float, shear: List[float], inverted: bool = True
- ) -> List[float]:
- # Helper method to compute inverse matrix for affine transformation
- # Pillow requires inverse affine transformation matrix:
- # Affine matrix is : M = T * C * RotateScaleShear * C^-1
- #
- # where T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1]
- # C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1]
- # RotateScaleShear is rotation with scale and shear matrix
- #
- # RotateScaleShear(a, s, (sx, sy)) =
- # = R(a) * S(s) * SHy(sy) * SHx(sx)
- # = [ s*cos(a - sy)/cos(sy), s*(-cos(a - sy)*tan(sx)/cos(sy) - sin(a)), 0 ]
- # [ s*sin(a - sy)/cos(sy), s*(-sin(a - sy)*tan(sx)/cos(sy) + cos(a)), 0 ]
- # [ 0 , 0 , 1 ]
- # where R is a rotation matrix, S is a scaling matrix, and SHx and SHy are the shears:
- # SHx(s) = [1, -tan(s)] and SHy(s) = [1 , 0]
- # [0, 1 ] [-tan(s), 1]
- #
- # Thus, the inverse is M^-1 = C * RotateScaleShear^-1 * C^-1 * T^-1
- rot = math.radians(angle)
- sx = math.radians(shear[0])
- sy = math.radians(shear[1])
- cx, cy = center
- tx, ty = translate
- # RSS without scaling
- a = math.cos(rot - sy) / math.cos(sy)
- b = -math.cos(rot - sy) * math.tan(sx) / math.cos(sy) - math.sin(rot)
- c = math.sin(rot - sy) / math.cos(sy)
- d = -math.sin(rot - sy) * math.tan(sx) / math.cos(sy) + math.cos(rot)
- if inverted:
- # Inverted rotation matrix with scale and shear
- # det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1
- matrix = [d, -b, 0.0, -c, a, 0.0]
- matrix = [x / scale for x in matrix]
- # Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1
- matrix[2] += matrix[0] * (-cx - tx) + matrix[1] * (-cy - ty)
- matrix[5] += matrix[3] * (-cx - tx) + matrix[4] * (-cy - ty)
- # Apply center translation: C * RSS^-1 * C^-1 * T^-1
- matrix[2] += cx
- matrix[5] += cy
- else:
- matrix = [a, b, 0.0, c, d, 0.0]
- matrix = [x * scale for x in matrix]
- # Apply inverse of center translation: RSS * C^-1
- matrix[2] += matrix[0] * (-cx) + matrix[1] * (-cy)
- matrix[5] += matrix[3] * (-cx) + matrix[4] * (-cy)
- # Apply translation and center : T * C * RSS * C^-1
- matrix[2] += cx + tx
- matrix[5] += cy + ty
- return matrix
- def rotate(
- img: Tensor,
- angle: float,
- interpolation: InterpolationMode = InterpolationMode.NEAREST,
- expand: bool = False,
- center: Optional[List[int]] = None,
- fill: Optional[List[float]] = None,
- ) -> Tensor:
- """Rotate the image by angle.
- If the image is torch Tensor, it is expected
- to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
- Args:
- img (PIL Image or Tensor): image to be rotated.
- angle (number): rotation angle value in degrees, counter-clockwise.
- interpolation (InterpolationMode): Desired interpolation enum defined by
- :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
- If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
- The corresponding Pillow integer constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well.
- expand (bool, optional): Optional expansion flag.
- If true, expands the output image to make it large enough to hold the entire rotated image.
- If false or omitted, make the output image the same size as the input image.
- Note that the expand flag assumes rotation around the center and no translation.
- center (sequence, optional): Optional center of rotation. Origin is the upper left corner.
- Default is the center of the image.
- fill (sequence or number, optional): Pixel fill value for the area outside the transformed
- image. If given a number, the value is used for all bands respectively.
- .. note::
- In torchscript mode single int/float value is not supported, please use a sequence
- of length 1: ``[value, ]``.
- Returns:
- PIL Image or Tensor: Rotated image.
- .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(rotate)
- if isinstance(interpolation, int):
- interpolation = _interpolation_modes_from_int(interpolation)
- elif not isinstance(interpolation, InterpolationMode):
- raise TypeError(
- "Argument interpolation should be a InterpolationMode or a corresponding Pillow integer constant"
- )
- if not isinstance(angle, (int, float)):
- raise TypeError("Argument angle should be int or float")
- if center is not None and not isinstance(center, (list, tuple)):
- raise TypeError("Argument center should be a sequence")
- if not isinstance(img, torch.Tensor):
- pil_interpolation = pil_modes_mapping[interpolation]
- return F_pil.rotate(img, angle=angle, interpolation=pil_interpolation, expand=expand, center=center, fill=fill)
- center_f = [0.0, 0.0]
- if center is not None:
- _, height, width = get_dimensions(img)
- # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
- center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, [width, height])]
- # due to current incoherence of rotation angle direction between affine and rotate implementations
- # we need to set -angle.
- matrix = _get_inverse_affine_matrix(center_f, -angle, [0.0, 0.0], 1.0, [0.0, 0.0])
- return F_t.rotate(img, matrix=matrix, interpolation=interpolation.value, expand=expand, fill=fill)
- def affine(
- img: Tensor,
- angle: float,
- translate: List[int],
- scale: float,
- shear: List[float],
- interpolation: InterpolationMode = InterpolationMode.NEAREST,
- fill: Optional[List[float]] = None,
- center: Optional[List[int]] = None,
- ) -> Tensor:
- """Apply affine transformation on the image keeping image center invariant.
- If the image is torch Tensor, it is expected
- to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
- Args:
- img (PIL Image or Tensor): image to transform.
- angle (number): rotation angle in degrees between -180 and 180, clockwise direction.
- translate (sequence of integers): horizontal and vertical translations (post-rotation translation)
- scale (float): overall scale
- shear (float or sequence): shear angle value in degrees between -180 to 180, clockwise direction.
- If a sequence is specified, the first value corresponds to a shear parallel to the x-axis, while
- the second value corresponds to a shear parallel to the y-axis.
- interpolation (InterpolationMode): Desired interpolation enum defined by
- :class:`torchvision.transforms.InterpolationMode`. Default is ``InterpolationMode.NEAREST``.
- If input is Tensor, only ``InterpolationMode.NEAREST``, ``InterpolationMode.BILINEAR`` are supported.
- The corresponding Pillow integer constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well.
- fill (sequence or number, optional): Pixel fill value for the area outside the transformed
- image. If given a number, the value is used for all bands respectively.
- .. note::
- In torchscript mode single int/float value is not supported, please use a sequence
- of length 1: ``[value, ]``.
- center (sequence, optional): Optional center of rotation. Origin is the upper left corner.
- Default is the center of the image.
- Returns:
- PIL Image or Tensor: Transformed image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(affine)
- if isinstance(interpolation, int):
- interpolation = _interpolation_modes_from_int(interpolation)
- elif not isinstance(interpolation, InterpolationMode):
- raise TypeError(
- "Argument interpolation should be a InterpolationMode or a corresponding Pillow integer constant"
- )
- if not isinstance(angle, (int, float)):
- raise TypeError("Argument angle should be int or float")
- if not isinstance(translate, (list, tuple)):
- raise TypeError("Argument translate should be a sequence")
- if len(translate) != 2:
- raise ValueError("Argument translate should be a sequence of length 2")
- if scale <= 0.0:
- raise ValueError("Argument scale should be positive")
- if not isinstance(shear, (numbers.Number, (list, tuple))):
- raise TypeError("Shear should be either a single value or a sequence of two values")
- if isinstance(angle, int):
- angle = float(angle)
- if isinstance(translate, tuple):
- translate = list(translate)
- if isinstance(shear, numbers.Number):
- shear = [shear, 0.0]
- if isinstance(shear, tuple):
- shear = list(shear)
- if len(shear) == 1:
- shear = [shear[0], shear[0]]
- if len(shear) != 2:
- raise ValueError(f"Shear should be a sequence containing two values. Got {shear}")
- if center is not None and not isinstance(center, (list, tuple)):
- raise TypeError("Argument center should be a sequence")
- _, height, width = get_dimensions(img)
- if not isinstance(img, torch.Tensor):
- # center = (width * 0.5 + 0.5, height * 0.5 + 0.5)
- # it is visually better to estimate the center without 0.5 offset
- # otherwise image rotated by 90 degrees is shifted vs output image of torch.rot90 or F_t.affine
- if center is None:
- center = [width * 0.5, height * 0.5]
- matrix = _get_inverse_affine_matrix(center, angle, translate, scale, shear)
- pil_interpolation = pil_modes_mapping[interpolation]
- return F_pil.affine(img, matrix=matrix, interpolation=pil_interpolation, fill=fill)
- center_f = [0.0, 0.0]
- if center is not None:
- _, height, width = get_dimensions(img)
- # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
- center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, [width, height])]
- translate_f = [1.0 * t for t in translate]
- matrix = _get_inverse_affine_matrix(center_f, angle, translate_f, scale, shear)
- return F_t.affine(img, matrix=matrix, interpolation=interpolation.value, fill=fill)
- # Looks like to_grayscale() is a stand-alone functional that is never called
- # from the transform classes. Perhaps it's still here for BC? I can't be
- # bothered to dig.
- @torch.jit.unused
- def to_grayscale(img, num_output_channels=1):
- """Convert PIL image of any mode (RGB, HSV, LAB, etc) to grayscale version of image.
- This transform does not support torch Tensor.
- Args:
- img (PIL Image): PIL Image to be converted to grayscale.
- num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default is 1.
- Returns:
- PIL Image: Grayscale version of the image.
- - if num_output_channels = 1 : returned image is single channel
- - if num_output_channels = 3 : returned image is 3 channel with r = g = b
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(to_grayscale)
- if isinstance(img, Image.Image):
- return F_pil.to_grayscale(img, num_output_channels)
- raise TypeError("Input should be PIL Image")
- def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor:
- """Convert RGB image to grayscale version of image.
- If the image is torch Tensor, it is expected
- to have [..., 3, H, W] shape, where ... means an arbitrary number of leading dimensions
- Note:
- Please, note that this method supports only RGB images as input. For inputs in other color spaces,
- please, consider using meth:`~torchvision.transforms.functional.to_grayscale` with PIL Image.
- Args:
- img (PIL Image or Tensor): RGB Image to be converted to grayscale.
- num_output_channels (int): number of channels of the output image. Value can be 1 or 3. Default, 1.
- Returns:
- PIL Image or Tensor: Grayscale version of the image.
- - if num_output_channels = 1 : returned image is single channel
- - if num_output_channels = 3 : returned image is 3 channel with r = g = b
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(rgb_to_grayscale)
- if not isinstance(img, torch.Tensor):
- return F_pil.to_grayscale(img, num_output_channels)
- return F_t.rgb_to_grayscale(img, num_output_channels)
- def erase(img: Tensor, i: int, j: int, h: int, w: int, v: Tensor, inplace: bool = False) -> Tensor:
- """Erase the input Tensor Image with given value.
- This transform does not support PIL Image.
- Args:
- img (Tensor Image): Tensor image of size (C, H, W) to be erased
- i (int): i in (i,j) i.e coordinates of the upper left corner.
- j (int): j in (i,j) i.e coordinates of the upper left corner.
- h (int): Height of the erased region.
- w (int): Width of the erased region.
- v: Erasing value.
- inplace(bool, optional): For in-place operations. By default, is set False.
- Returns:
- Tensor Image: Erased image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(erase)
- if not isinstance(img, torch.Tensor):
- raise TypeError(f"img should be Tensor Image. Got {type(img)}")
- return F_t.erase(img, i, j, h, w, v, inplace=inplace)
- def gaussian_blur(img: Tensor, kernel_size: List[int], sigma: Optional[List[float]] = None) -> Tensor:
- """Performs Gaussian blurring on the image by given kernel.
- If the image is torch Tensor, it is expected
- to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
- Args:
- img (PIL Image or Tensor): Image to be blurred
- kernel_size (sequence of ints or int): Gaussian kernel size. Can be a sequence of integers
- like ``(kx, ky)`` or a single integer for square kernels.
- .. note::
- In torchscript mode kernel_size as single int is not supported, use a sequence of
- length 1: ``[ksize, ]``.
- sigma (sequence of floats or float, optional): Gaussian kernel standard deviation. Can be a
- sequence of floats like ``(sigma_x, sigma_y)`` or a single float to define the
- same sigma in both X/Y directions. If None, then it is computed using
- ``kernel_size`` as ``sigma = 0.3 * ((kernel_size - 1) * 0.5 - 1) + 0.8``.
- Default, None.
- .. note::
- In torchscript mode sigma as single float is
- not supported, use a sequence of length 1: ``[sigma, ]``.
- Returns:
- PIL Image or Tensor: Gaussian Blurred version of the image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(gaussian_blur)
- if not isinstance(kernel_size, (int, list, tuple)):
- raise TypeError(f"kernel_size should be int or a sequence of integers. Got {type(kernel_size)}")
- if isinstance(kernel_size, int):
- kernel_size = [kernel_size, kernel_size]
- if len(kernel_size) != 2:
- raise ValueError(f"If kernel_size is a sequence its length should be 2. Got {len(kernel_size)}")
- for ksize in kernel_size:
- if ksize % 2 == 0 or ksize < 0:
- raise ValueError(f"kernel_size should have odd and positive integers. Got {kernel_size}")
- if sigma is None:
- sigma = [ksize * 0.15 + 0.35 for ksize in kernel_size]
- if sigma is not None and not isinstance(sigma, (int, float, list, tuple)):
- raise TypeError(f"sigma should be either float or sequence of floats. Got {type(sigma)}")
- if isinstance(sigma, (int, float)):
- sigma = [float(sigma), float(sigma)]
- if isinstance(sigma, (list, tuple)) and len(sigma) == 1:
- sigma = [sigma[0], sigma[0]]
- if len(sigma) != 2:
- raise ValueError(f"If sigma is a sequence, its length should be 2. Got {len(sigma)}")
- for s in sigma:
- if s <= 0.0:
- raise ValueError(f"sigma should have positive values. Got {sigma}")
- t_img = img
- if not isinstance(img, torch.Tensor):
- if not F_pil._is_pil_image(img):
- raise TypeError(f"img should be PIL Image or Tensor. Got {type(img)}")
- t_img = pil_to_tensor(img)
- output = F_t.gaussian_blur(t_img, kernel_size, sigma)
- if not isinstance(img, torch.Tensor):
- output = to_pil_image(output, mode=img.mode)
- return output
- def invert(img: Tensor) -> Tensor:
- """Invert the colors of an RGB/grayscale image.
- Args:
- img (PIL Image or Tensor): Image to have its colors inverted.
- If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
- where ... means it can have an arbitrary number of leading dimensions.
- If img is PIL Image, it is expected to be in mode "L" or "RGB".
- Returns:
- PIL Image or Tensor: Color inverted image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(invert)
- if not isinstance(img, torch.Tensor):
- return F_pil.invert(img)
- return F_t.invert(img)
- def posterize(img: Tensor, bits: int) -> Tensor:
- """Posterize an image by reducing the number of bits for each color channel.
- Args:
- img (PIL Image or Tensor): Image to have its colors posterized.
- If img is torch Tensor, it should be of type torch.uint8, and
- it is expected to be in [..., 1 or 3, H, W] format, where ... means
- it can have an arbitrary number of leading dimensions.
- If img is PIL Image, it is expected to be in mode "L" or "RGB".
- bits (int): The number of bits to keep for each channel (0-8).
- Returns:
- PIL Image or Tensor: Posterized image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(posterize)
- if not (0 <= bits <= 8):
- raise ValueError(f"The number if bits should be between 0 and 8. Got {bits}")
- if not isinstance(img, torch.Tensor):
- return F_pil.posterize(img, bits)
- return F_t.posterize(img, bits)
- def solarize(img: Tensor, threshold: float) -> Tensor:
- """Solarize an RGB/grayscale image by inverting all pixel values above a threshold.
- Args:
- img (PIL Image or Tensor): Image to have its colors inverted.
- If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
- where ... means it can have an arbitrary number of leading dimensions.
- If img is PIL Image, it is expected to be in mode "L" or "RGB".
- threshold (float): All pixels equal or above this value are inverted.
- Returns:
- PIL Image or Tensor: Solarized image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(solarize)
- if not isinstance(img, torch.Tensor):
- return F_pil.solarize(img, threshold)
- return F_t.solarize(img, threshold)
- def adjust_sharpness(img: Tensor, sharpness_factor: float) -> Tensor:
- """Adjust the sharpness of an image.
- Args:
- img (PIL Image or Tensor): Image to be adjusted.
- If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
- where ... means it can have an arbitrary number of leading dimensions.
- sharpness_factor (float): How much to adjust the sharpness. Can be
- any non-negative number. 0 gives a blurred image, 1 gives the
- original image while 2 increases the sharpness by a factor of 2.
- Returns:
- PIL Image or Tensor: Sharpness adjusted image.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(adjust_sharpness)
- if not isinstance(img, torch.Tensor):
- return F_pil.adjust_sharpness(img, sharpness_factor)
- return F_t.adjust_sharpness(img, sharpness_factor)
- def autocontrast(img: Tensor) -> Tensor:
- """Maximize contrast of an image by remapping its
- pixels per channel so that the lowest becomes black and the lightest
- becomes white.
- Args:
- img (PIL Image or Tensor): Image on which autocontrast is applied.
- If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
- where ... means it can have an arbitrary number of leading dimensions.
- If img is PIL Image, it is expected to be in mode "L" or "RGB".
- Returns:
- PIL Image or Tensor: An image that was autocontrasted.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(autocontrast)
- if not isinstance(img, torch.Tensor):
- return F_pil.autocontrast(img)
- return F_t.autocontrast(img)
- def equalize(img: Tensor) -> Tensor:
- """Equalize the histogram of an image by applying
- a non-linear mapping to the input in order to create a uniform
- distribution of grayscale values in the output.
- Args:
- img (PIL Image or Tensor): Image on which equalize is applied.
- If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
- where ... means it can have an arbitrary number of leading dimensions.
- The tensor dtype must be ``torch.uint8`` and values are expected to be in ``[0, 255]``.
- If img is PIL Image, it is expected to be in mode "P", "L" or "RGB".
- Returns:
- PIL Image or Tensor: An image that was equalized.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(equalize)
- if not isinstance(img, torch.Tensor):
- return F_pil.equalize(img)
- return F_t.equalize(img)
- def elastic_transform(
- img: Tensor,
- displacement: Tensor,
- interpolation: InterpolationMode = InterpolationMode.BILINEAR,
- fill: Optional[List[float]] = None,
- ) -> Tensor:
- """Transform a tensor image with elastic transformations.
- Given alpha and sigma, it will generate displacement
- vectors for all pixels based on random offsets. Alpha controls the strength
- and sigma controls the smoothness of the displacements.
- The displacements are added to an identity grid and the resulting grid is
- used to grid_sample from the image.
- Applications:
- Randomly transforms the morphology of objects in images and produces a
- see-through-water-like effect.
- Args:
- img (PIL Image or Tensor): Image on which elastic_transform is applied.
- If img is torch Tensor, it is expected to be in [..., 1 or 3, H, W] format,
- where ... means it can have an arbitrary number of leading dimensions.
- If img is PIL Image, it is expected to be in mode "P", "L" or "RGB".
- displacement (Tensor): The displacement field. Expected shape is [1, H, W, 2].
- interpolation (InterpolationMode): Desired interpolation enum defined by
- :class:`torchvision.transforms.InterpolationMode`.
- Default is ``InterpolationMode.BILINEAR``.
- The corresponding Pillow integer constants, e.g. ``PIL.Image.BILINEAR`` are accepted as well.
- fill (number or str or tuple): Pixel fill value for constant fill. Default is 0.
- If a tuple of length 3, it is used to fill R, G, B channels respectively.
- This value is only used when the padding_mode is constant.
- """
- if not torch.jit.is_scripting() and not torch.jit.is_tracing():
- _log_api_usage_once(elastic_transform)
- # Backward compatibility with integer value
- if isinstance(interpolation, int):
- warnings.warn(
- "Argument interpolation should be of type InterpolationMode instead of int. "
- "Please, use InterpolationMode enum."
- )
- interpolation = _interpolation_modes_from_int(interpolation)
- if not isinstance(displacement, torch.Tensor):
- raise TypeError("Argument displacement should be a Tensor")
- t_img = img
- if not isinstance(img, torch.Tensor):
- if not F_pil._is_pil_image(img):
- raise TypeError(f"img should be PIL Image or Tensor. Got {type(img)}")
- t_img = pil_to_tensor(img)
- shape = t_img.shape
- shape = (1,) + shape[-2:] + (2,)
- if shape != displacement.shape:
- raise ValueError(f"Argument displacement shape should be {shape}, but given {displacement.shape}")
- # TODO: if image shape is [N1, N2, ..., C, H, W] and
- # displacement is [1, H, W, 2] we need to reshape input image
- # such grid_sampler takes internal code for 4D input
- output = F_t.elastic_transform(
- t_img,
- displacement,
- interpolation=interpolation.value,
- fill=fill,
- )
- if not isinstance(img, torch.Tensor):
- output = to_pil_image(output, mode=img.mode)
- return output
- # TODO in v0.17: remove this helper and change default of antialias to True everywhere
- def _check_antialias(
- img: Tensor, antialias: Optional[Union[str, bool]], interpolation: InterpolationMode
- ) -> Optional[bool]:
- if isinstance(antialias, str): # it should be "warn", but we don't bother checking against that
- if isinstance(img, Tensor) and (
- interpolation == InterpolationMode.BILINEAR or interpolation == InterpolationMode.BICUBIC
- ):
- warnings.warn(
- "The default value of the antialias parameter of all the resizing transforms "
- "(Resize(), RandomResizedCrop(), etc.) "
- "will change from None to True in v0.17, "
- "in order to be consistent across the PIL and Tensor backends. "
- "To suppress this warning, directly pass "
- "antialias=True (recommended, future default), antialias=None (current default, "
- "which means False for Tensors and True for PIL), "
- "or antialias=False (only works on Tensors - PIL will still use antialiasing). "
- "This also applies if you are using the inference transforms from the models weights: "
- "update the call to weights.transforms(antialias=True)."
- )
- antialias = None
- return antialias
|