123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131 |
- import copy
- import math
- from dataclasses import dataclass
- from functools import partial
- from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
- import torch
- from torch import nn, Tensor
- from torchvision.ops import StochasticDepth
- from ..ops.misc import Conv2dNormActivation, SqueezeExcitation
- from ..transforms._presets import ImageClassification, InterpolationMode
- from ..utils import _log_api_usage_once
- from ._api import register_model, Weights, WeightsEnum
- from ._meta import _IMAGENET_CATEGORIES
- from ._utils import _make_divisible, _ovewrite_named_param, handle_legacy_interface
- __all__ = [
- "EfficientNet",
- "EfficientNet_B0_Weights",
- "EfficientNet_B1_Weights",
- "EfficientNet_B2_Weights",
- "EfficientNet_B3_Weights",
- "EfficientNet_B4_Weights",
- "EfficientNet_B5_Weights",
- "EfficientNet_B6_Weights",
- "EfficientNet_B7_Weights",
- "EfficientNet_V2_S_Weights",
- "EfficientNet_V2_M_Weights",
- "EfficientNet_V2_L_Weights",
- "efficientnet_b0",
- "efficientnet_b1",
- "efficientnet_b2",
- "efficientnet_b3",
- "efficientnet_b4",
- "efficientnet_b5",
- "efficientnet_b6",
- "efficientnet_b7",
- "efficientnet_v2_s",
- "efficientnet_v2_m",
- "efficientnet_v2_l",
- ]
- @dataclass
- class _MBConvConfig:
- expand_ratio: float
- kernel: int
- stride: int
- input_channels: int
- out_channels: int
- num_layers: int
- block: Callable[..., nn.Module]
- @staticmethod
- def adjust_channels(channels: int, width_mult: float, min_value: Optional[int] = None) -> int:
- return _make_divisible(channels * width_mult, 8, min_value)
- class MBConvConfig(_MBConvConfig):
- # Stores information listed at Table 1 of the EfficientNet paper & Table 4 of the EfficientNetV2 paper
- def __init__(
- self,
- expand_ratio: float,
- kernel: int,
- stride: int,
- input_channels: int,
- out_channels: int,
- num_layers: int,
- width_mult: float = 1.0,
- depth_mult: float = 1.0,
- block: Optional[Callable[..., nn.Module]] = None,
- ) -> None:
- input_channels = self.adjust_channels(input_channels, width_mult)
- out_channels = self.adjust_channels(out_channels, width_mult)
- num_layers = self.adjust_depth(num_layers, depth_mult)
- if block is None:
- block = MBConv
- super().__init__(expand_ratio, kernel, stride, input_channels, out_channels, num_layers, block)
- @staticmethod
- def adjust_depth(num_layers: int, depth_mult: float):
- return int(math.ceil(num_layers * depth_mult))
- class FusedMBConvConfig(_MBConvConfig):
- # Stores information listed at Table 4 of the EfficientNetV2 paper
- def __init__(
- self,
- expand_ratio: float,
- kernel: int,
- stride: int,
- input_channels: int,
- out_channels: int,
- num_layers: int,
- block: Optional[Callable[..., nn.Module]] = None,
- ) -> None:
- if block is None:
- block = FusedMBConv
- super().__init__(expand_ratio, kernel, stride, input_channels, out_channels, num_layers, block)
- class MBConv(nn.Module):
- def __init__(
- self,
- cnf: MBConvConfig,
- stochastic_depth_prob: float,
- norm_layer: Callable[..., nn.Module],
- se_layer: Callable[..., nn.Module] = SqueezeExcitation,
- ) -> None:
- super().__init__()
- if not (1 <= cnf.stride <= 2):
- raise ValueError("illegal stride value")
- self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels
- layers: List[nn.Module] = []
- activation_layer = nn.SiLU
- # expand
- expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio)
- if expanded_channels != cnf.input_channels:
- layers.append(
- Conv2dNormActivation(
- cnf.input_channels,
- expanded_channels,
- kernel_size=1,
- norm_layer=norm_layer,
- activation_layer=activation_layer,
- )
- )
- # depthwise
- layers.append(
- Conv2dNormActivation(
- expanded_channels,
- expanded_channels,
- kernel_size=cnf.kernel,
- stride=cnf.stride,
- groups=expanded_channels,
- norm_layer=norm_layer,
- activation_layer=activation_layer,
- )
- )
- # squeeze and excitation
- squeeze_channels = max(1, cnf.input_channels // 4)
- layers.append(se_layer(expanded_channels, squeeze_channels, activation=partial(nn.SiLU, inplace=True)))
- # project
- layers.append(
- Conv2dNormActivation(
- expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=None
- )
- )
- self.block = nn.Sequential(*layers)
- self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row")
- self.out_channels = cnf.out_channels
- def forward(self, input: Tensor) -> Tensor:
- result = self.block(input)
- if self.use_res_connect:
- result = self.stochastic_depth(result)
- result += input
- return result
- class FusedMBConv(nn.Module):
- def __init__(
- self,
- cnf: FusedMBConvConfig,
- stochastic_depth_prob: float,
- norm_layer: Callable[..., nn.Module],
- ) -> None:
- super().__init__()
- if not (1 <= cnf.stride <= 2):
- raise ValueError("illegal stride value")
- self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels
- layers: List[nn.Module] = []
- activation_layer = nn.SiLU
- expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio)
- if expanded_channels != cnf.input_channels:
- # fused expand
- layers.append(
- Conv2dNormActivation(
- cnf.input_channels,
- expanded_channels,
- kernel_size=cnf.kernel,
- stride=cnf.stride,
- norm_layer=norm_layer,
- activation_layer=activation_layer,
- )
- )
- # project
- layers.append(
- Conv2dNormActivation(
- expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer, activation_layer=None
- )
- )
- else:
- layers.append(
- Conv2dNormActivation(
- cnf.input_channels,
- cnf.out_channels,
- kernel_size=cnf.kernel,
- stride=cnf.stride,
- norm_layer=norm_layer,
- activation_layer=activation_layer,
- )
- )
- self.block = nn.Sequential(*layers)
- self.stochastic_depth = StochasticDepth(stochastic_depth_prob, "row")
- self.out_channels = cnf.out_channels
- def forward(self, input: Tensor) -> Tensor:
- result = self.block(input)
- if self.use_res_connect:
- result = self.stochastic_depth(result)
- result += input
- return result
- class EfficientNet(nn.Module):
- def __init__(
- self,
- inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]],
- dropout: float,
- stochastic_depth_prob: float = 0.2,
- num_classes: int = 1000,
- norm_layer: Optional[Callable[..., nn.Module]] = None,
- last_channel: Optional[int] = None,
- ) -> None:
- """
- EfficientNet V1 and V2 main class
- Args:
- inverted_residual_setting (Sequence[Union[MBConvConfig, FusedMBConvConfig]]): Network structure
- dropout (float): The droupout probability
- stochastic_depth_prob (float): The stochastic depth probability
- num_classes (int): Number of classes
- norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use
- last_channel (int): The number of channels on the penultimate layer
- """
- super().__init__()
- _log_api_usage_once(self)
- if not inverted_residual_setting:
- raise ValueError("The inverted_residual_setting should not be empty")
- elif not (
- isinstance(inverted_residual_setting, Sequence)
- and all([isinstance(s, _MBConvConfig) for s in inverted_residual_setting])
- ):
- raise TypeError("The inverted_residual_setting should be List[MBConvConfig]")
- if norm_layer is None:
- norm_layer = nn.BatchNorm2d
- layers: List[nn.Module] = []
- # building first layer
- firstconv_output_channels = inverted_residual_setting[0].input_channels
- layers.append(
- Conv2dNormActivation(
- 3, firstconv_output_channels, kernel_size=3, stride=2, norm_layer=norm_layer, activation_layer=nn.SiLU
- )
- )
- # building inverted residual blocks
- total_stage_blocks = sum(cnf.num_layers for cnf in inverted_residual_setting)
- stage_block_id = 0
- for cnf in inverted_residual_setting:
- stage: List[nn.Module] = []
- for _ in range(cnf.num_layers):
- # copy to avoid modifications. shallow copy is enough
- block_cnf = copy.copy(cnf)
- # overwrite info if not the first conv in the stage
- if stage:
- block_cnf.input_channels = block_cnf.out_channels
- block_cnf.stride = 1
- # adjust stochastic depth probability based on the depth of the stage block
- sd_prob = stochastic_depth_prob * float(stage_block_id) / total_stage_blocks
- stage.append(block_cnf.block(block_cnf, sd_prob, norm_layer))
- stage_block_id += 1
- layers.append(nn.Sequential(*stage))
- # building last several layers
- lastconv_input_channels = inverted_residual_setting[-1].out_channels
- lastconv_output_channels = last_channel if last_channel is not None else 4 * lastconv_input_channels
- layers.append(
- Conv2dNormActivation(
- lastconv_input_channels,
- lastconv_output_channels,
- kernel_size=1,
- norm_layer=norm_layer,
- activation_layer=nn.SiLU,
- )
- )
- self.features = nn.Sequential(*layers)
- self.avgpool = nn.AdaptiveAvgPool2d(1)
- self.classifier = nn.Sequential(
- nn.Dropout(p=dropout, inplace=True),
- nn.Linear(lastconv_output_channels, num_classes),
- )
- for m in self.modules():
- if isinstance(m, nn.Conv2d):
- nn.init.kaiming_normal_(m.weight, mode="fan_out")
- if m.bias is not None:
- nn.init.zeros_(m.bias)
- elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
- nn.init.ones_(m.weight)
- nn.init.zeros_(m.bias)
- elif isinstance(m, nn.Linear):
- init_range = 1.0 / math.sqrt(m.out_features)
- nn.init.uniform_(m.weight, -init_range, init_range)
- nn.init.zeros_(m.bias)
- def _forward_impl(self, x: Tensor) -> Tensor:
- x = self.features(x)
- x = self.avgpool(x)
- x = torch.flatten(x, 1)
- x = self.classifier(x)
- return x
- def forward(self, x: Tensor) -> Tensor:
- return self._forward_impl(x)
- def _efficientnet(
- inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]],
- dropout: float,
- last_channel: Optional[int],
- weights: Optional[WeightsEnum],
- progress: bool,
- **kwargs: Any,
- ) -> EfficientNet:
- if weights is not None:
- _ovewrite_named_param(kwargs, "num_classes", len(weights.meta["categories"]))
- model = EfficientNet(inverted_residual_setting, dropout, last_channel=last_channel, **kwargs)
- if weights is not None:
- model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
- return model
- def _efficientnet_conf(
- arch: str,
- **kwargs: Any,
- ) -> Tuple[Sequence[Union[MBConvConfig, FusedMBConvConfig]], Optional[int]]:
- inverted_residual_setting: Sequence[Union[MBConvConfig, FusedMBConvConfig]]
- if arch.startswith("efficientnet_b"):
- bneck_conf = partial(MBConvConfig, width_mult=kwargs.pop("width_mult"), depth_mult=kwargs.pop("depth_mult"))
- inverted_residual_setting = [
- bneck_conf(1, 3, 1, 32, 16, 1),
- bneck_conf(6, 3, 2, 16, 24, 2),
- bneck_conf(6, 5, 2, 24, 40, 2),
- bneck_conf(6, 3, 2, 40, 80, 3),
- bneck_conf(6, 5, 1, 80, 112, 3),
- bneck_conf(6, 5, 2, 112, 192, 4),
- bneck_conf(6, 3, 1, 192, 320, 1),
- ]
- last_channel = None
- elif arch.startswith("efficientnet_v2_s"):
- inverted_residual_setting = [
- FusedMBConvConfig(1, 3, 1, 24, 24, 2),
- FusedMBConvConfig(4, 3, 2, 24, 48, 4),
- FusedMBConvConfig(4, 3, 2, 48, 64, 4),
- MBConvConfig(4, 3, 2, 64, 128, 6),
- MBConvConfig(6, 3, 1, 128, 160, 9),
- MBConvConfig(6, 3, 2, 160, 256, 15),
- ]
- last_channel = 1280
- elif arch.startswith("efficientnet_v2_m"):
- inverted_residual_setting = [
- FusedMBConvConfig(1, 3, 1, 24, 24, 3),
- FusedMBConvConfig(4, 3, 2, 24, 48, 5),
- FusedMBConvConfig(4, 3, 2, 48, 80, 5),
- MBConvConfig(4, 3, 2, 80, 160, 7),
- MBConvConfig(6, 3, 1, 160, 176, 14),
- MBConvConfig(6, 3, 2, 176, 304, 18),
- MBConvConfig(6, 3, 1, 304, 512, 5),
- ]
- last_channel = 1280
- elif arch.startswith("efficientnet_v2_l"):
- inverted_residual_setting = [
- FusedMBConvConfig(1, 3, 1, 32, 32, 4),
- FusedMBConvConfig(4, 3, 2, 32, 64, 7),
- FusedMBConvConfig(4, 3, 2, 64, 96, 7),
- MBConvConfig(4, 3, 2, 96, 192, 10),
- MBConvConfig(6, 3, 1, 192, 224, 19),
- MBConvConfig(6, 3, 2, 224, 384, 25),
- MBConvConfig(6, 3, 1, 384, 640, 7),
- ]
- last_channel = 1280
- else:
- raise ValueError(f"Unsupported model type {arch}")
- return inverted_residual_setting, last_channel
- _COMMON_META: Dict[str, Any] = {
- "categories": _IMAGENET_CATEGORIES,
- }
- _COMMON_META_V1 = {
- **_COMMON_META,
- "min_size": (1, 1),
- "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet-v1",
- }
- _COMMON_META_V2 = {
- **_COMMON_META,
- "min_size": (33, 33),
- "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#efficientnet-v2",
- }
- class EfficientNet_B0_Weights(WeightsEnum):
- IMAGENET1K_V1 = Weights(
- # Weights ported from https://github.com/rwightman/pytorch-image-models/
- url="https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth",
- transforms=partial(
- ImageClassification, crop_size=224, resize_size=256, interpolation=InterpolationMode.BICUBIC
- ),
- meta={
- **_COMMON_META_V1,
- "num_params": 5288548,
- "_metrics": {
- "ImageNet-1K": {
- "acc@1": 77.692,
- "acc@5": 93.532,
- }
- },
- "_ops": 0.386,
- "_file_size": 20.451,
- "_docs": """These weights are ported from the original paper.""",
- },
- )
- DEFAULT = IMAGENET1K_V1
- class EfficientNet_B1_Weights(WeightsEnum):
- IMAGENET1K_V1 = Weights(
- # Weights ported from https://github.com/rwightman/pytorch-image-models/
- url="https://download.pytorch.org/models/efficientnet_b1_rwightman-bac287d4.pth",
- transforms=partial(
- ImageClassification, crop_size=240, resize_size=256, interpolation=InterpolationMode.BICUBIC
- ),
- meta={
- **_COMMON_META_V1,
- "num_params": 7794184,
- "_metrics": {
- "ImageNet-1K": {
- "acc@1": 78.642,
- "acc@5": 94.186,
- }
- },
- "_ops": 0.687,
- "_file_size": 30.134,
- "_docs": """These weights are ported from the original paper.""",
- },
- )
- IMAGENET1K_V2 = Weights(
- url="https://download.pytorch.org/models/efficientnet_b1-c27df63c.pth",
- transforms=partial(
- ImageClassification, crop_size=240, resize_size=255, interpolation=InterpolationMode.BILINEAR
- ),
- meta={
- **_COMMON_META_V1,
- "num_params": 7794184,
- "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-lr-wd-crop-tuning",
- "_metrics": {
- "ImageNet-1K": {
- "acc@1": 79.838,
- "acc@5": 94.934,
- }
- },
- "_ops": 0.687,
- "_file_size": 30.136,
- "_docs": """
- These weights improve upon the results of the original paper by using a modified version of TorchVision's
- `new training recipe
- <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
- """,
- },
- )
- DEFAULT = IMAGENET1K_V2
- class EfficientNet_B2_Weights(WeightsEnum):
- IMAGENET1K_V1 = Weights(
- # Weights ported from https://github.com/rwightman/pytorch-image-models/
- url="https://download.pytorch.org/models/efficientnet_b2_rwightman-bcdf34b7.pth",
- transforms=partial(
- ImageClassification, crop_size=288, resize_size=288, interpolation=InterpolationMode.BICUBIC
- ),
- meta={
- **_COMMON_META_V1,
- "num_params": 9109994,
- "_metrics": {
- "ImageNet-1K": {
- "acc@1": 80.608,
- "acc@5": 95.310,
- }
- },
- "_ops": 1.088,
- "_file_size": 35.174,
- "_docs": """These weights are ported from the original paper.""",
- },
- )
- DEFAULT = IMAGENET1K_V1
- class EfficientNet_B3_Weights(WeightsEnum):
- IMAGENET1K_V1 = Weights(
- # Weights ported from https://github.com/rwightman/pytorch-image-models/
- url="https://download.pytorch.org/models/efficientnet_b3_rwightman-cf984f9c.pth",
- transforms=partial(
- ImageClassification, crop_size=300, resize_size=320, interpolation=InterpolationMode.BICUBIC
- ),
- meta={
- **_COMMON_META_V1,
- "num_params": 12233232,
- "_metrics": {
- "ImageNet-1K": {
- "acc@1": 82.008,
- "acc@5": 96.054,
- }
- },
- "_ops": 1.827,
- "_file_size": 47.184,
- "_docs": """These weights are ported from the original paper.""",
- },
- )
- DEFAULT = IMAGENET1K_V1
- class EfficientNet_B4_Weights(WeightsEnum):
- IMAGENET1K_V1 = Weights(
- # Weights ported from https://github.com/rwightman/pytorch-image-models/
- url="https://download.pytorch.org/models/efficientnet_b4_rwightman-7eb33cd5.pth",
- transforms=partial(
- ImageClassification, crop_size=380, resize_size=384, interpolation=InterpolationMode.BICUBIC
- ),
- meta={
- **_COMMON_META_V1,
- "num_params": 19341616,
- "_metrics": {
- "ImageNet-1K": {
- "acc@1": 83.384,
- "acc@5": 96.594,
- }
- },
- "_ops": 4.394,
- "_file_size": 74.489,
- "_docs": """These weights are ported from the original paper.""",
- },
- )
- DEFAULT = IMAGENET1K_V1
- class EfficientNet_B5_Weights(WeightsEnum):
- IMAGENET1K_V1 = Weights(
- # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/
- url="https://download.pytorch.org/models/efficientnet_b5_lukemelas-b6417697.pth",
- transforms=partial(
- ImageClassification, crop_size=456, resize_size=456, interpolation=InterpolationMode.BICUBIC
- ),
- meta={
- **_COMMON_META_V1,
- "num_params": 30389784,
- "_metrics": {
- "ImageNet-1K": {
- "acc@1": 83.444,
- "acc@5": 96.628,
- }
- },
- "_ops": 10.266,
- "_file_size": 116.864,
- "_docs": """These weights are ported from the original paper.""",
- },
- )
- DEFAULT = IMAGENET1K_V1
- class EfficientNet_B6_Weights(WeightsEnum):
- IMAGENET1K_V1 = Weights(
- # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/
- url="https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth",
- transforms=partial(
- ImageClassification, crop_size=528, resize_size=528, interpolation=InterpolationMode.BICUBIC
- ),
- meta={
- **_COMMON_META_V1,
- "num_params": 43040704,
- "_metrics": {
- "ImageNet-1K": {
- "acc@1": 84.008,
- "acc@5": 96.916,
- }
- },
- "_ops": 19.068,
- "_file_size": 165.362,
- "_docs": """These weights are ported from the original paper.""",
- },
- )
- DEFAULT = IMAGENET1K_V1
- class EfficientNet_B7_Weights(WeightsEnum):
- IMAGENET1K_V1 = Weights(
- # Weights ported from https://github.com/lukemelas/EfficientNet-PyTorch/
- url="https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth",
- transforms=partial(
- ImageClassification, crop_size=600, resize_size=600, interpolation=InterpolationMode.BICUBIC
- ),
- meta={
- **_COMMON_META_V1,
- "num_params": 66347960,
- "_metrics": {
- "ImageNet-1K": {
- "acc@1": 84.122,
- "acc@5": 96.908,
- }
- },
- "_ops": 37.746,
- "_file_size": 254.675,
- "_docs": """These weights are ported from the original paper.""",
- },
- )
- DEFAULT = IMAGENET1K_V1
- class EfficientNet_V2_S_Weights(WeightsEnum):
- IMAGENET1K_V1 = Weights(
- url="https://download.pytorch.org/models/efficientnet_v2_s-dd5fe13b.pth",
- transforms=partial(
- ImageClassification,
- crop_size=384,
- resize_size=384,
- interpolation=InterpolationMode.BILINEAR,
- ),
- meta={
- **_COMMON_META_V2,
- "num_params": 21458488,
- "_metrics": {
- "ImageNet-1K": {
- "acc@1": 84.228,
- "acc@5": 96.878,
- }
- },
- "_ops": 8.366,
- "_file_size": 82.704,
- "_docs": """
- These weights improve upon the results of the original paper by using a modified version of TorchVision's
- `new training recipe
- <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
- """,
- },
- )
- DEFAULT = IMAGENET1K_V1
- class EfficientNet_V2_M_Weights(WeightsEnum):
- IMAGENET1K_V1 = Weights(
- url="https://download.pytorch.org/models/efficientnet_v2_m-dc08266a.pth",
- transforms=partial(
- ImageClassification,
- crop_size=480,
- resize_size=480,
- interpolation=InterpolationMode.BILINEAR,
- ),
- meta={
- **_COMMON_META_V2,
- "num_params": 54139356,
- "_metrics": {
- "ImageNet-1K": {
- "acc@1": 85.112,
- "acc@5": 97.156,
- }
- },
- "_ops": 24.582,
- "_file_size": 208.01,
- "_docs": """
- These weights improve upon the results of the original paper by using a modified version of TorchVision's
- `new training recipe
- <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
- """,
- },
- )
- DEFAULT = IMAGENET1K_V1
- class EfficientNet_V2_L_Weights(WeightsEnum):
- # Weights ported from https://github.com/google/automl/tree/master/efficientnetv2
- IMAGENET1K_V1 = Weights(
- url="https://download.pytorch.org/models/efficientnet_v2_l-59c71312.pth",
- transforms=partial(
- ImageClassification,
- crop_size=480,
- resize_size=480,
- interpolation=InterpolationMode.BICUBIC,
- mean=(0.5, 0.5, 0.5),
- std=(0.5, 0.5, 0.5),
- ),
- meta={
- **_COMMON_META_V2,
- "num_params": 118515272,
- "_metrics": {
- "ImageNet-1K": {
- "acc@1": 85.808,
- "acc@5": 97.788,
- }
- },
- "_ops": 56.08,
- "_file_size": 454.573,
- "_docs": """These weights are ported from the original paper.""",
- },
- )
- DEFAULT = IMAGENET1K_V1
- @register_model()
- @handle_legacy_interface(weights=("pretrained", EfficientNet_B0_Weights.IMAGENET1K_V1))
- def efficientnet_b0(
- *, weights: Optional[EfficientNet_B0_Weights] = None, progress: bool = True, **kwargs: Any
- ) -> EfficientNet:
- """EfficientNet B0 model architecture from the `EfficientNet: Rethinking Model Scaling for Convolutional
- Neural Networks <https://arxiv.org/abs/1905.11946>`_ paper.
- Args:
- weights (:class:`~torchvision.models.EfficientNet_B0_Weights`, optional): The
- pretrained weights to use. See
- :class:`~torchvision.models.EfficientNet_B0_Weights` below for
- more details, and possible values. By default, no pre-trained
- weights are used.
- progress (bool, optional): If True, displays a progress bar of the
- download to stderr. Default is True.
- **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet``
- base class. Please refer to the `source code
- <https://github.com/pytorch/vision/blob/main/torchvision/models/efficientnet.py>`_
- for more details about this class.
- .. autoclass:: torchvision.models.EfficientNet_B0_Weights
- :members:
- """
- weights = EfficientNet_B0_Weights.verify(weights)
- inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b0", width_mult=1.0, depth_mult=1.0)
- return _efficientnet(
- inverted_residual_setting, kwargs.pop("dropout", 0.2), last_channel, weights, progress, **kwargs
- )
- @register_model()
- @handle_legacy_interface(weights=("pretrained", EfficientNet_B1_Weights.IMAGENET1K_V1))
- def efficientnet_b1(
- *, weights: Optional[EfficientNet_B1_Weights] = None, progress: bool = True, **kwargs: Any
- ) -> EfficientNet:
- """EfficientNet B1 model architecture from the `EfficientNet: Rethinking Model Scaling for Convolutional
- Neural Networks <https://arxiv.org/abs/1905.11946>`_ paper.
- Args:
- weights (:class:`~torchvision.models.EfficientNet_B1_Weights`, optional): The
- pretrained weights to use. See
- :class:`~torchvision.models.EfficientNet_B1_Weights` below for
- more details, and possible values. By default, no pre-trained
- weights are used.
- progress (bool, optional): If True, displays a progress bar of the
- download to stderr. Default is True.
- **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet``
- base class. Please refer to the `source code
- <https://github.com/pytorch/vision/blob/main/torchvision/models/efficientnet.py>`_
- for more details about this class.
- .. autoclass:: torchvision.models.EfficientNet_B1_Weights
- :members:
- """
- weights = EfficientNet_B1_Weights.verify(weights)
- inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b1", width_mult=1.0, depth_mult=1.1)
- return _efficientnet(
- inverted_residual_setting, kwargs.pop("dropout", 0.2), last_channel, weights, progress, **kwargs
- )
- @register_model()
- @handle_legacy_interface(weights=("pretrained", EfficientNet_B2_Weights.IMAGENET1K_V1))
- def efficientnet_b2(
- *, weights: Optional[EfficientNet_B2_Weights] = None, progress: bool = True, **kwargs: Any
- ) -> EfficientNet:
- """EfficientNet B2 model architecture from the `EfficientNet: Rethinking Model Scaling for Convolutional
- Neural Networks <https://arxiv.org/abs/1905.11946>`_ paper.
- Args:
- weights (:class:`~torchvision.models.EfficientNet_B2_Weights`, optional): The
- pretrained weights to use. See
- :class:`~torchvision.models.EfficientNet_B2_Weights` below for
- more details, and possible values. By default, no pre-trained
- weights are used.
- progress (bool, optional): If True, displays a progress bar of the
- download to stderr. Default is True.
- **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet``
- base class. Please refer to the `source code
- <https://github.com/pytorch/vision/blob/main/torchvision/models/efficientnet.py>`_
- for more details about this class.
- .. autoclass:: torchvision.models.EfficientNet_B2_Weights
- :members:
- """
- weights = EfficientNet_B2_Weights.verify(weights)
- inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b2", width_mult=1.1, depth_mult=1.2)
- return _efficientnet(
- inverted_residual_setting, kwargs.pop("dropout", 0.3), last_channel, weights, progress, **kwargs
- )
- @register_model()
- @handle_legacy_interface(weights=("pretrained", EfficientNet_B3_Weights.IMAGENET1K_V1))
- def efficientnet_b3(
- *, weights: Optional[EfficientNet_B3_Weights] = None, progress: bool = True, **kwargs: Any
- ) -> EfficientNet:
- """EfficientNet B3 model architecture from the `EfficientNet: Rethinking Model Scaling for Convolutional
- Neural Networks <https://arxiv.org/abs/1905.11946>`_ paper.
- Args:
- weights (:class:`~torchvision.models.EfficientNet_B3_Weights`, optional): The
- pretrained weights to use. See
- :class:`~torchvision.models.EfficientNet_B3_Weights` below for
- more details, and possible values. By default, no pre-trained
- weights are used.
- progress (bool, optional): If True, displays a progress bar of the
- download to stderr. Default is True.
- **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet``
- base class. Please refer to the `source code
- <https://github.com/pytorch/vision/blob/main/torchvision/models/efficientnet.py>`_
- for more details about this class.
- .. autoclass:: torchvision.models.EfficientNet_B3_Weights
- :members:
- """
- weights = EfficientNet_B3_Weights.verify(weights)
- inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b3", width_mult=1.2, depth_mult=1.4)
- return _efficientnet(
- inverted_residual_setting,
- kwargs.pop("dropout", 0.3),
- last_channel,
- weights,
- progress,
- **kwargs,
- )
- @register_model()
- @handle_legacy_interface(weights=("pretrained", EfficientNet_B4_Weights.IMAGENET1K_V1))
- def efficientnet_b4(
- *, weights: Optional[EfficientNet_B4_Weights] = None, progress: bool = True, **kwargs: Any
- ) -> EfficientNet:
- """EfficientNet B4 model architecture from the `EfficientNet: Rethinking Model Scaling for Convolutional
- Neural Networks <https://arxiv.org/abs/1905.11946>`_ paper.
- Args:
- weights (:class:`~torchvision.models.EfficientNet_B4_Weights`, optional): The
- pretrained weights to use. See
- :class:`~torchvision.models.EfficientNet_B4_Weights` below for
- more details, and possible values. By default, no pre-trained
- weights are used.
- progress (bool, optional): If True, displays a progress bar of the
- download to stderr. Default is True.
- **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet``
- base class. Please refer to the `source code
- <https://github.com/pytorch/vision/blob/main/torchvision/models/efficientnet.py>`_
- for more details about this class.
- .. autoclass:: torchvision.models.EfficientNet_B4_Weights
- :members:
- """
- weights = EfficientNet_B4_Weights.verify(weights)
- inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b4", width_mult=1.4, depth_mult=1.8)
- return _efficientnet(
- inverted_residual_setting,
- kwargs.pop("dropout", 0.4),
- last_channel,
- weights,
- progress,
- **kwargs,
- )
- @register_model()
- @handle_legacy_interface(weights=("pretrained", EfficientNet_B5_Weights.IMAGENET1K_V1))
- def efficientnet_b5(
- *, weights: Optional[EfficientNet_B5_Weights] = None, progress: bool = True, **kwargs: Any
- ) -> EfficientNet:
- """EfficientNet B5 model architecture from the `EfficientNet: Rethinking Model Scaling for Convolutional
- Neural Networks <https://arxiv.org/abs/1905.11946>`_ paper.
- Args:
- weights (:class:`~torchvision.models.EfficientNet_B5_Weights`, optional): The
- pretrained weights to use. See
- :class:`~torchvision.models.EfficientNet_B5_Weights` below for
- more details, and possible values. By default, no pre-trained
- weights are used.
- progress (bool, optional): If True, displays a progress bar of the
- download to stderr. Default is True.
- **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet``
- base class. Please refer to the `source code
- <https://github.com/pytorch/vision/blob/main/torchvision/models/efficientnet.py>`_
- for more details about this class.
- .. autoclass:: torchvision.models.EfficientNet_B5_Weights
- :members:
- """
- weights = EfficientNet_B5_Weights.verify(weights)
- inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b5", width_mult=1.6, depth_mult=2.2)
- return _efficientnet(
- inverted_residual_setting,
- kwargs.pop("dropout", 0.4),
- last_channel,
- weights,
- progress,
- norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01),
- **kwargs,
- )
- @register_model()
- @handle_legacy_interface(weights=("pretrained", EfficientNet_B6_Weights.IMAGENET1K_V1))
- def efficientnet_b6(
- *, weights: Optional[EfficientNet_B6_Weights] = None, progress: bool = True, **kwargs: Any
- ) -> EfficientNet:
- """EfficientNet B6 model architecture from the `EfficientNet: Rethinking Model Scaling for Convolutional
- Neural Networks <https://arxiv.org/abs/1905.11946>`_ paper.
- Args:
- weights (:class:`~torchvision.models.EfficientNet_B6_Weights`, optional): The
- pretrained weights to use. See
- :class:`~torchvision.models.EfficientNet_B6_Weights` below for
- more details, and possible values. By default, no pre-trained
- weights are used.
- progress (bool, optional): If True, displays a progress bar of the
- download to stderr. Default is True.
- **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet``
- base class. Please refer to the `source code
- <https://github.com/pytorch/vision/blob/main/torchvision/models/efficientnet.py>`_
- for more details about this class.
- .. autoclass:: torchvision.models.EfficientNet_B6_Weights
- :members:
- """
- weights = EfficientNet_B6_Weights.verify(weights)
- inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b6", width_mult=1.8, depth_mult=2.6)
- return _efficientnet(
- inverted_residual_setting,
- kwargs.pop("dropout", 0.5),
- last_channel,
- weights,
- progress,
- norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01),
- **kwargs,
- )
- @register_model()
- @handle_legacy_interface(weights=("pretrained", EfficientNet_B7_Weights.IMAGENET1K_V1))
- def efficientnet_b7(
- *, weights: Optional[EfficientNet_B7_Weights] = None, progress: bool = True, **kwargs: Any
- ) -> EfficientNet:
- """EfficientNet B7 model architecture from the `EfficientNet: Rethinking Model Scaling for Convolutional
- Neural Networks <https://arxiv.org/abs/1905.11946>`_ paper.
- Args:
- weights (:class:`~torchvision.models.EfficientNet_B7_Weights`, optional): The
- pretrained weights to use. See
- :class:`~torchvision.models.EfficientNet_B7_Weights` below for
- more details, and possible values. By default, no pre-trained
- weights are used.
- progress (bool, optional): If True, displays a progress bar of the
- download to stderr. Default is True.
- **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet``
- base class. Please refer to the `source code
- <https://github.com/pytorch/vision/blob/main/torchvision/models/efficientnet.py>`_
- for more details about this class.
- .. autoclass:: torchvision.models.EfficientNet_B7_Weights
- :members:
- """
- weights = EfficientNet_B7_Weights.verify(weights)
- inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b7", width_mult=2.0, depth_mult=3.1)
- return _efficientnet(
- inverted_residual_setting,
- kwargs.pop("dropout", 0.5),
- last_channel,
- weights,
- progress,
- norm_layer=partial(nn.BatchNorm2d, eps=0.001, momentum=0.01),
- **kwargs,
- )
- @register_model()
- @handle_legacy_interface(weights=("pretrained", EfficientNet_V2_S_Weights.IMAGENET1K_V1))
- def efficientnet_v2_s(
- *, weights: Optional[EfficientNet_V2_S_Weights] = None, progress: bool = True, **kwargs: Any
- ) -> EfficientNet:
- """
- Constructs an EfficientNetV2-S architecture from
- `EfficientNetV2: Smaller Models and Faster Training <https://arxiv.org/abs/2104.00298>`_.
- Args:
- weights (:class:`~torchvision.models.EfficientNet_V2_S_Weights`, optional): The
- pretrained weights to use. See
- :class:`~torchvision.models.EfficientNet_V2_S_Weights` below for
- more details, and possible values. By default, no pre-trained
- weights are used.
- progress (bool, optional): If True, displays a progress bar of the
- download to stderr. Default is True.
- **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet``
- base class. Please refer to the `source code
- <https://github.com/pytorch/vision/blob/main/torchvision/models/efficientnet.py>`_
- for more details about this class.
- .. autoclass:: torchvision.models.EfficientNet_V2_S_Weights
- :members:
- """
- weights = EfficientNet_V2_S_Weights.verify(weights)
- inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_s")
- return _efficientnet(
- inverted_residual_setting,
- kwargs.pop("dropout", 0.2),
- last_channel,
- weights,
- progress,
- norm_layer=partial(nn.BatchNorm2d, eps=1e-03),
- **kwargs,
- )
- @register_model()
- @handle_legacy_interface(weights=("pretrained", EfficientNet_V2_M_Weights.IMAGENET1K_V1))
- def efficientnet_v2_m(
- *, weights: Optional[EfficientNet_V2_M_Weights] = None, progress: bool = True, **kwargs: Any
- ) -> EfficientNet:
- """
- Constructs an EfficientNetV2-M architecture from
- `EfficientNetV2: Smaller Models and Faster Training <https://arxiv.org/abs/2104.00298>`_.
- Args:
- weights (:class:`~torchvision.models.EfficientNet_V2_M_Weights`, optional): The
- pretrained weights to use. See
- :class:`~torchvision.models.EfficientNet_V2_M_Weights` below for
- more details, and possible values. By default, no pre-trained
- weights are used.
- progress (bool, optional): If True, displays a progress bar of the
- download to stderr. Default is True.
- **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet``
- base class. Please refer to the `source code
- <https://github.com/pytorch/vision/blob/main/torchvision/models/efficientnet.py>`_
- for more details about this class.
- .. autoclass:: torchvision.models.EfficientNet_V2_M_Weights
- :members:
- """
- weights = EfficientNet_V2_M_Weights.verify(weights)
- inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_m")
- return _efficientnet(
- inverted_residual_setting,
- kwargs.pop("dropout", 0.3),
- last_channel,
- weights,
- progress,
- norm_layer=partial(nn.BatchNorm2d, eps=1e-03),
- **kwargs,
- )
- @register_model()
- @handle_legacy_interface(weights=("pretrained", EfficientNet_V2_L_Weights.IMAGENET1K_V1))
- def efficientnet_v2_l(
- *, weights: Optional[EfficientNet_V2_L_Weights] = None, progress: bool = True, **kwargs: Any
- ) -> EfficientNet:
- """
- Constructs an EfficientNetV2-L architecture from
- `EfficientNetV2: Smaller Models and Faster Training <https://arxiv.org/abs/2104.00298>`_.
- Args:
- weights (:class:`~torchvision.models.EfficientNet_V2_L_Weights`, optional): The
- pretrained weights to use. See
- :class:`~torchvision.models.EfficientNet_V2_L_Weights` below for
- more details, and possible values. By default, no pre-trained
- weights are used.
- progress (bool, optional): If True, displays a progress bar of the
- download to stderr. Default is True.
- **kwargs: parameters passed to the ``torchvision.models.efficientnet.EfficientNet``
- base class. Please refer to the `source code
- <https://github.com/pytorch/vision/blob/main/torchvision/models/efficientnet.py>`_
- for more details about this class.
- .. autoclass:: torchvision.models.EfficientNet_V2_L_Weights
- :members:
- """
- weights = EfficientNet_V2_L_Weights.verify(weights)
- inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_l")
- return _efficientnet(
- inverted_residual_setting,
- kwargs.pop("dropout", 0.4),
- last_channel,
- weights,
- progress,
- norm_layer=partial(nn.BatchNorm2d, eps=1e-03),
- **kwargs,
- )
|