123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283 |
- import torch
- import numbers
- from torch.nn.parameter import Parameter
- from .module import Module
- from ._functions import CrossMapLRN2d as _cross_map_lrn2d
- from .. import functional as F
- from .. import init
- from torch import Tensor, Size
- from typing import Union, List, Tuple
- __all__ = ['LocalResponseNorm', 'CrossMapLRN2d', 'LayerNorm', 'GroupNorm']
- class LocalResponseNorm(Module):
- r"""Applies local response normalization over an input signal composed
- of several input planes, where channels occupy the second dimension.
- Applies normalization across channels.
- .. math::
- b_{c} = a_{c}\left(k + \frac{\alpha}{n}
- \sum_{c'=\max(0, c-n/2)}^{\min(N-1,c+n/2)}a_{c'}^2\right)^{-\beta}
- Args:
- size: amount of neighbouring channels used for normalization
- alpha: multiplicative factor. Default: 0.0001
- beta: exponent. Default: 0.75
- k: additive factor. Default: 1
- Shape:
- - Input: :math:`(N, C, *)`
- - Output: :math:`(N, C, *)` (same shape as input)
- Examples::
- >>> lrn = nn.LocalResponseNorm(2)
- >>> signal_2d = torch.randn(32, 5, 24, 24)
- >>> signal_4d = torch.randn(16, 5, 7, 7, 7, 7)
- >>> output_2d = lrn(signal_2d)
- >>> output_4d = lrn(signal_4d)
- """
- __constants__ = ['size', 'alpha', 'beta', 'k']
- size: int
- alpha: float
- beta: float
- k: float
- def __init__(self, size: int, alpha: float = 1e-4, beta: float = 0.75, k: float = 1.) -> None:
- super().__init__()
- self.size = size
- self.alpha = alpha
- self.beta = beta
- self.k = k
- def forward(self, input: Tensor) -> Tensor:
- return F.local_response_norm(input, self.size, self.alpha, self.beta,
- self.k)
- def extra_repr(self):
- return '{size}, alpha={alpha}, beta={beta}, k={k}'.format(**self.__dict__)
- class CrossMapLRN2d(Module):
- size: int
- alpha: float
- beta: float
- k: float
- def __init__(self, size: int, alpha: float = 1e-4, beta: float = 0.75, k: float = 1) -> None:
- super().__init__()
- self.size = size
- self.alpha = alpha
- self.beta = beta
- self.k = k
- def forward(self, input: Tensor) -> Tensor:
- return _cross_map_lrn2d.apply(input, self.size, self.alpha, self.beta,
- self.k)
- def extra_repr(self) -> str:
- return '{size}, alpha={alpha}, beta={beta}, k={k}'.format(**self.__dict__)
- _shape_t = Union[int, List[int], Size]
- class LayerNorm(Module):
- r"""Applies Layer Normalization over a mini-batch of inputs as described in
- the paper `Layer Normalization <https://arxiv.org/abs/1607.06450>`__
- .. math::
- y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
- The mean and standard-deviation are calculated over the last `D` dimensions, where `D`
- is the dimension of :attr:`normalized_shape`. For example, if :attr:`normalized_shape`
- is ``(3, 5)`` (a 2-dimensional shape), the mean and standard-deviation are computed over
- the last 2 dimensions of the input (i.e. ``input.mean((-2, -1))``).
- :math:`\gamma` and :math:`\beta` are learnable affine transform parameters of
- :attr:`normalized_shape` if :attr:`elementwise_affine` is ``True``.
- The standard-deviation is calculated via the biased estimator, equivalent to
- `torch.var(input, unbiased=False)`.
- .. note::
- Unlike Batch Normalization and Instance Normalization, which applies
- scalar scale and bias for each entire channel/plane with the
- :attr:`affine` option, Layer Normalization applies per-element scale and
- bias with :attr:`elementwise_affine`.
- This layer uses statistics computed from input data in both training and
- evaluation modes.
- Args:
- normalized_shape (int or list or torch.Size): input shape from an expected input
- of size
- .. math::
- [* \times \text{normalized\_shape}[0] \times \text{normalized\_shape}[1]
- \times \ldots \times \text{normalized\_shape}[-1]]
- If a single integer is used, it is treated as a singleton list, and this module will
- normalize over the last dimension which is expected to be of that specific size.
- eps: a value added to the denominator for numerical stability. Default: 1e-5
- elementwise_affine: a boolean value that when set to ``True``, this module
- has learnable per-element affine parameters initialized to ones (for weights)
- and zeros (for biases). Default: ``True``.
- Attributes:
- weight: the learnable weights of the module of shape
- :math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``.
- The values are initialized to 1.
- bias: the learnable bias of the module of shape
- :math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``.
- The values are initialized to 0.
- Shape:
- - Input: :math:`(N, *)`
- - Output: :math:`(N, *)` (same shape as input)
- Examples::
- >>> # NLP Example
- >>> batch, sentence_length, embedding_dim = 20, 5, 10
- >>> embedding = torch.randn(batch, sentence_length, embedding_dim)
- >>> layer_norm = nn.LayerNorm(embedding_dim)
- >>> # Activate module
- >>> layer_norm(embedding)
- >>>
- >>> # Image Example
- >>> N, C, H, W = 20, 5, 10, 10
- >>> input = torch.randn(N, C, H, W)
- >>> # Normalize over the last three dimensions (i.e. the channel and spatial dimensions)
- >>> # as shown in the image below
- >>> layer_norm = nn.LayerNorm([C, H, W])
- >>> output = layer_norm(input)
- .. image:: ../_static/img/nn/layer_norm.jpg
- :scale: 50 %
- """
- __constants__ = ['normalized_shape', 'eps', 'elementwise_affine']
- normalized_shape: Tuple[int, ...]
- eps: float
- elementwise_affine: bool
- def __init__(self, normalized_shape: _shape_t, eps: float = 1e-5, elementwise_affine: bool = True,
- device=None, dtype=None) -> None:
- factory_kwargs = {'device': device, 'dtype': dtype}
- super().__init__()
- if isinstance(normalized_shape, numbers.Integral):
- # mypy error: incompatible types in assignment
- normalized_shape = (normalized_shape,) # type: ignore[assignment]
- self.normalized_shape = tuple(normalized_shape) # type: ignore[arg-type]
- self.eps = eps
- self.elementwise_affine = elementwise_affine
- if self.elementwise_affine:
- self.weight = Parameter(torch.empty(self.normalized_shape, **factory_kwargs))
- self.bias = Parameter(torch.empty(self.normalized_shape, **factory_kwargs))
- else:
- self.register_parameter('weight', None)
- self.register_parameter('bias', None)
- self.reset_parameters()
- def reset_parameters(self) -> None:
- if self.elementwise_affine:
- init.ones_(self.weight)
- init.zeros_(self.bias)
- def forward(self, input: Tensor) -> Tensor:
- return F.layer_norm(
- input, self.normalized_shape, self.weight, self.bias, self.eps)
- def extra_repr(self) -> str:
- return '{normalized_shape}, eps={eps}, ' \
- 'elementwise_affine={elementwise_affine}'.format(**self.__dict__)
- class GroupNorm(Module):
- r"""Applies Group Normalization over a mini-batch of inputs as described in
- the paper `Group Normalization <https://arxiv.org/abs/1803.08494>`__
- .. math::
- y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
- The input channels are separated into :attr:`num_groups` groups, each containing
- ``num_channels / num_groups`` channels. :attr:`num_channels` must be divisible by
- :attr:`num_groups`. The mean and standard-deviation are calculated
- separately over the each group. :math:`\gamma` and :math:`\beta` are learnable
- per-channel affine transform parameter vectors of size :attr:`num_channels` if
- :attr:`affine` is ``True``.
- The standard-deviation is calculated via the biased estimator, equivalent to
- `torch.var(input, unbiased=False)`.
- This layer uses statistics computed from input data in both training and
- evaluation modes.
- Args:
- num_groups (int): number of groups to separate the channels into
- num_channels (int): number of channels expected in input
- eps: a value added to the denominator for numerical stability. Default: 1e-5
- affine: a boolean value that when set to ``True``, this module
- has learnable per-channel affine parameters initialized to ones (for weights)
- and zeros (for biases). Default: ``True``.
- Shape:
- - Input: :math:`(N, C, *)` where :math:`C=\text{num\_channels}`
- - Output: :math:`(N, C, *)` (same shape as input)
- Examples::
- >>> input = torch.randn(20, 6, 10, 10)
- >>> # Separate 6 channels into 3 groups
- >>> m = nn.GroupNorm(3, 6)
- >>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm)
- >>> m = nn.GroupNorm(6, 6)
- >>> # Put all 6 channels into a single group (equivalent with LayerNorm)
- >>> m = nn.GroupNorm(1, 6)
- >>> # Activating the module
- >>> output = m(input)
- """
- __constants__ = ['num_groups', 'num_channels', 'eps', 'affine']
- num_groups: int
- num_channels: int
- eps: float
- affine: bool
- def __init__(self, num_groups: int, num_channels: int, eps: float = 1e-5, affine: bool = True,
- device=None, dtype=None) -> None:
- factory_kwargs = {'device': device, 'dtype': dtype}
- super().__init__()
- if num_channels % num_groups != 0:
- raise ValueError('num_channels must be divisible by num_groups')
- self.num_groups = num_groups
- self.num_channels = num_channels
- self.eps = eps
- self.affine = affine
- if self.affine:
- self.weight = Parameter(torch.empty(num_channels, **factory_kwargs))
- self.bias = Parameter(torch.empty(num_channels, **factory_kwargs))
- else:
- self.register_parameter('weight', None)
- self.register_parameter('bias', None)
- self.reset_parameters()
- def reset_parameters(self) -> None:
- if self.affine:
- init.ones_(self.weight)
- init.zeros_(self.bias)
- def forward(self, input: Tensor) -> Tensor:
- return F.group_norm(
- input, self.num_groups, self.weight, self.bias, self.eps)
- def extra_repr(self) -> str:
- return '{num_groups}, {num_channels}, eps={eps}, ' \
- 'affine={affine}'.format(**self.__dict__)
- # TODO: ContrastiveNorm2d
- # TODO: DivisiveNorm2d
- # TODO: SubtractiveNorm2d
|