1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081 |
- import contextlib
- import functools
- import operator
- import os
- import pkgutil
- import platform
- import sys
- import warnings
- from collections import OrderedDict
- from tempfile import TemporaryDirectory
- from typing import Any
- import pytest
- import torch
- import torch.fx
- import torch.nn as nn
- from _utils_internal import get_relative_path
- from common_utils import cpu_and_cuda, freeze_rng_state, map_nested_tensor_object, needs_cuda, set_rng_seed
- from PIL import Image
- from torchvision import models, transforms
- from torchvision.models import get_model_builder, list_models
- ACCEPT = os.getenv("EXPECTTEST_ACCEPT", "0") == "1"
- SKIP_BIG_MODEL = os.getenv("SKIP_BIG_MODEL", "1") == "1"
- @contextlib.contextmanager
- def disable_tf32():
- previous = torch.backends.cudnn.allow_tf32
- torch.backends.cudnn.allow_tf32 = False
- try:
- yield
- finally:
- torch.backends.cudnn.allow_tf32 = previous
- def list_model_fns(module):
- return [get_model_builder(name) for name in list_models(module)]
- def _get_image(input_shape, real_image, device, dtype=None):
- """This routine loads a real or random image based on `real_image` argument.
- Currently, the real image is utilized for the following list of models:
- - `retinanet_resnet50_fpn`,
- - `retinanet_resnet50_fpn_v2`,
- - `keypointrcnn_resnet50_fpn`,
- - `fasterrcnn_resnet50_fpn`,
- - `fasterrcnn_resnet50_fpn_v2`,
- - `fcos_resnet50_fpn`,
- - `maskrcnn_resnet50_fpn`,
- - `maskrcnn_resnet50_fpn_v2`,
- in `test_classification_model` and `test_detection_model`.
- To do so, a keyword argument `real_image` was added to the abovelisted models in `_model_params`
- """
- if real_image:
- # TODO: Maybe unify file discovery logic with test_image.py
- GRACE_HOPPER = os.path.join(
- os.path.dirname(os.path.abspath(__file__)), "assets", "encode_jpeg", "grace_hopper_517x606.jpg"
- )
- img = Image.open(GRACE_HOPPER)
- original_width, original_height = img.size
- # make the image square
- img = img.crop((0, 0, original_width, original_width))
- img = img.resize(input_shape[1:3])
- convert_tensor = transforms.ToTensor()
- image = convert_tensor(img)
- assert tuple(image.size()) == input_shape
- return image.to(device=device, dtype=dtype)
- # RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests
- return torch.rand(input_shape).to(device=device, dtype=dtype)
- @pytest.fixture
- def disable_weight_loading(mocker):
- """When testing models, the two slowest operations are the downloading of the weights to a file and loading them
- into the model. Unless, you want to test against specific weights, these steps can be disabled without any
- drawbacks.
- Including this fixture into the signature of your test, i.e. `test_foo(disable_weight_loading)`, will recurse
- through all models in `torchvision.models` and will patch all occurrences of the function
- `download_state_dict_from_url` as well as the method `load_state_dict` on all subclasses of `nn.Module` to be
- no-ops.
- .. warning:
- Loaded models are still executable as normal, but will always have random weights. Make sure to not use this
- fixture if you want to compare the model output against reference values.
- """
- starting_point = models
- function_name = "load_state_dict_from_url"
- method_name = "load_state_dict"
- module_names = {info.name for info in pkgutil.walk_packages(starting_point.__path__, f"{starting_point.__name__}.")}
- targets = {f"torchvision._internally_replaced_utils.{function_name}", f"torch.nn.Module.{method_name}"}
- for name in module_names:
- module = sys.modules.get(name)
- if not module:
- continue
- if function_name in module.__dict__:
- targets.add(f"{module.__name__}.{function_name}")
- targets.update(
- {
- f"{module.__name__}.{obj.__name__}.{method_name}"
- for obj in module.__dict__.values()
- if isinstance(obj, type) and issubclass(obj, nn.Module) and method_name in obj.__dict__
- }
- )
- for target in targets:
- # See https://github.com/pytorch/vision/pull/4867#discussion_r743677802 for details
- with contextlib.suppress(AttributeError):
- mocker.patch(target)
- def _get_expected_file(name=None):
- # Determine expected file based on environment
- expected_file_base = get_relative_path(os.path.realpath(__file__), "expect")
- # Note: for legacy reasons, the reference file names all had "ModelTest.test_" in their names
- # We hardcode it here to avoid having to re-generate the reference files
- expected_file = os.path.join(expected_file_base, "ModelTester.test_" + name)
- expected_file += "_expect.pkl"
- if not ACCEPT and not os.path.exists(expected_file):
- raise RuntimeError(
- f"No expect file exists for {os.path.basename(expected_file)} in {expected_file}; "
- "to accept the current output, re-run the failing test after setting the EXPECTTEST_ACCEPT "
- "env variable. For example: EXPECTTEST_ACCEPT=1 pytest test/test_models.py -k alexnet"
- )
- return expected_file
- def _assert_expected(output, name, prec=None, atol=None, rtol=None):
- """Test that a python value matches the recorded contents of a file
- based on a "check" name. The value must be
- pickable with `torch.save`. This file
- is placed in the 'expect' directory in the same directory
- as the test script. You can automatically update the recorded test
- output using an EXPECTTEST_ACCEPT=1 env variable.
- """
- expected_file = _get_expected_file(name)
- if ACCEPT:
- filename = {os.path.basename(expected_file)}
- print(f"Accepting updated output for {filename}:\n\n{output}")
- torch.save(output, expected_file)
- MAX_PICKLE_SIZE = 50 * 1000 # 50 KB
- binary_size = os.path.getsize(expected_file)
- if binary_size > MAX_PICKLE_SIZE:
- raise RuntimeError(f"The output for {filename}, is larger than 50kb - got {binary_size}kb")
- else:
- expected = torch.load(expected_file)
- rtol = rtol or prec # keeping prec param for legacy reason, but could be removed ideally
- atol = atol or prec
- torch.testing.assert_close(output, expected, rtol=rtol, atol=atol, check_dtype=False, check_device=False)
- def _check_jit_scriptable(nn_module, args, unwrapper=None, eager_out=None):
- """Check that a nn.Module's results in TorchScript match eager and that it can be exported"""
- def get_export_import_copy(m):
- """Save and load a TorchScript model"""
- with TemporaryDirectory() as dir:
- path = os.path.join(dir, "script.pt")
- m.save(path)
- imported = torch.jit.load(path)
- return imported
- sm = torch.jit.script(nn_module)
- sm.eval()
- if eager_out is None:
- with torch.no_grad(), freeze_rng_state():
- eager_out = nn_module(*args)
- with torch.no_grad(), freeze_rng_state():
- script_out = sm(*args)
- if unwrapper:
- script_out = unwrapper(script_out)
- torch.testing.assert_close(eager_out, script_out, atol=1e-4, rtol=1e-4)
- m_import = get_export_import_copy(sm)
- with torch.no_grad(), freeze_rng_state():
- imported_script_out = m_import(*args)
- if unwrapper:
- imported_script_out = unwrapper(imported_script_out)
- torch.testing.assert_close(script_out, imported_script_out, atol=3e-4, rtol=3e-4)
- def _check_fx_compatible(model, inputs, eager_out=None):
- model_fx = torch.fx.symbolic_trace(model)
- if eager_out is None:
- eager_out = model(inputs)
- with torch.no_grad(), freeze_rng_state():
- fx_out = model_fx(inputs)
- torch.testing.assert_close(eager_out, fx_out)
- def _check_input_backprop(model, inputs):
- if isinstance(inputs, list):
- requires_grad = list()
- for inp in inputs:
- requires_grad.append(inp.requires_grad)
- inp.requires_grad_(True)
- else:
- requires_grad = inputs.requires_grad
- inputs.requires_grad_(True)
- out = model(inputs)
- if isinstance(out, dict):
- out["out"].sum().backward()
- else:
- if isinstance(out[0], dict):
- out[0]["scores"].sum().backward()
- else:
- out[0].sum().backward()
- if isinstance(inputs, list):
- for i, inp in enumerate(inputs):
- assert inputs[i].grad is not None
- inp.requires_grad_(requires_grad[i])
- else:
- assert inputs.grad is not None
- inputs.requires_grad_(requires_grad)
- # If 'unwrapper' is provided it will be called with the script model outputs
- # before they are compared to the eager model outputs. This is useful if the
- # model outputs are different between TorchScript / Eager mode
- script_model_unwrapper = {
- "googlenet": lambda x: x.logits,
- "inception_v3": lambda x: x.logits,
- "fasterrcnn_resnet50_fpn": lambda x: x[1],
- "fasterrcnn_resnet50_fpn_v2": lambda x: x[1],
- "fasterrcnn_mobilenet_v3_large_fpn": lambda x: x[1],
- "fasterrcnn_mobilenet_v3_large_320_fpn": lambda x: x[1],
- "maskrcnn_resnet50_fpn": lambda x: x[1],
- "maskrcnn_resnet50_fpn_v2": lambda x: x[1],
- "keypointrcnn_resnet50_fpn": lambda x: x[1],
- "retinanet_resnet50_fpn": lambda x: x[1],
- "retinanet_resnet50_fpn_v2": lambda x: x[1],
- "ssd300_vgg16": lambda x: x[1],
- "ssdlite320_mobilenet_v3_large": lambda x: x[1],
- "fcos_resnet50_fpn": lambda x: x[1],
- }
- # The following models exhibit flaky numerics under autocast in _test_*_model harnesses.
- # This may be caused by the harness environment (e.g. num classes, input initialization
- # via torch.rand), and does not prove autocast is unsuitable when training with real data
- # (autocast has been used successfully with real data for some of these models).
- # TODO: investigate why autocast numerics are flaky in the harnesses.
- #
- # For the following models, _test_*_model harnesses skip numerical checks on outputs when
- # trying autocast. However, they still try an autocasted forward pass, so they still ensure
- # autocast coverage suffices to prevent dtype errors in each model.
- autocast_flaky_numerics = (
- "inception_v3",
- "resnet101",
- "resnet152",
- "wide_resnet101_2",
- "deeplabv3_resnet50",
- "deeplabv3_resnet101",
- "deeplabv3_mobilenet_v3_large",
- "fcn_resnet50",
- "fcn_resnet101",
- "lraspp_mobilenet_v3_large",
- "maskrcnn_resnet50_fpn",
- "maskrcnn_resnet50_fpn_v2",
- "keypointrcnn_resnet50_fpn",
- )
- # The tests for the following quantized models are flaky possibly due to inconsistent
- # rounding errors in different platforms. For this reason the input/output consistency
- # tests under test_quantized_classification_model will be skipped for the following models.
- quantized_flaky_models = ("inception_v3", "resnet50")
- # The tests for the following detection models are flaky.
- # We run those tests on float64 to avoid floating point errors.
- # FIXME: we shouldn't have to do that :'/
- detection_flaky_models = ("keypointrcnn_resnet50_fpn", "maskrcnn_resnet50_fpn", "maskrcnn_resnet50_fpn_v2")
- # The following contains configuration parameters for all models which are used by
- # the _test_*_model methods.
- _model_params = {
- "inception_v3": {"input_shape": (1, 3, 299, 299), "init_weights": True},
- "retinanet_resnet50_fpn": {
- "num_classes": 20,
- "score_thresh": 0.01,
- "min_size": 224,
- "max_size": 224,
- "input_shape": (3, 224, 224),
- "real_image": True,
- },
- "retinanet_resnet50_fpn_v2": {
- "num_classes": 20,
- "score_thresh": 0.01,
- "min_size": 224,
- "max_size": 224,
- "input_shape": (3, 224, 224),
- "real_image": True,
- },
- "keypointrcnn_resnet50_fpn": {
- "num_classes": 2,
- "min_size": 224,
- "max_size": 224,
- "box_score_thresh": 0.17,
- "input_shape": (3, 224, 224),
- "real_image": True,
- },
- "fasterrcnn_resnet50_fpn": {
- "num_classes": 20,
- "min_size": 224,
- "max_size": 224,
- "input_shape": (3, 224, 224),
- "real_image": True,
- },
- "fasterrcnn_resnet50_fpn_v2": {
- "num_classes": 20,
- "min_size": 224,
- "max_size": 224,
- "input_shape": (3, 224, 224),
- "real_image": True,
- },
- "fcos_resnet50_fpn": {
- "num_classes": 2,
- "score_thresh": 0.05,
- "min_size": 224,
- "max_size": 224,
- "input_shape": (3, 224, 224),
- "real_image": True,
- },
- "maskrcnn_resnet50_fpn": {
- "num_classes": 10,
- "min_size": 224,
- "max_size": 224,
- "input_shape": (3, 224, 224),
- "real_image": True,
- },
- "maskrcnn_resnet50_fpn_v2": {
- "num_classes": 10,
- "min_size": 224,
- "max_size": 224,
- "input_shape": (3, 224, 224),
- "real_image": True,
- },
- "fasterrcnn_mobilenet_v3_large_fpn": {
- "box_score_thresh": 0.02076,
- },
- "fasterrcnn_mobilenet_v3_large_320_fpn": {
- "box_score_thresh": 0.02076,
- "rpn_pre_nms_top_n_test": 1000,
- "rpn_post_nms_top_n_test": 1000,
- },
- "vit_h_14": {
- "image_size": 56,
- "input_shape": (1, 3, 56, 56),
- },
- "mvit_v1_b": {
- "input_shape": (1, 3, 16, 224, 224),
- },
- "mvit_v2_s": {
- "input_shape": (1, 3, 16, 224, 224),
- },
- "s3d": {
- "input_shape": (1, 3, 16, 224, 224),
- },
- "googlenet": {"init_weights": True},
- }
- # speeding up slow models:
- slow_models = [
- "convnext_base",
- "convnext_large",
- "resnext101_32x8d",
- "resnext101_64x4d",
- "wide_resnet101_2",
- "efficientnet_b6",
- "efficientnet_b7",
- "efficientnet_v2_m",
- "efficientnet_v2_l",
- "regnet_y_16gf",
- "regnet_y_32gf",
- "regnet_y_128gf",
- "regnet_x_16gf",
- "regnet_x_32gf",
- "swin_t",
- "swin_s",
- "swin_b",
- "swin_v2_t",
- "swin_v2_s",
- "swin_v2_b",
- ]
- for m in slow_models:
- _model_params[m] = {"input_shape": (1, 3, 64, 64)}
- # skip big models to reduce memory usage on CI test. We can exclude combinations of (platform-system, device).
- skipped_big_models = {
- "vit_h_14": {("Windows", "cpu"), ("Windows", "cuda")},
- "regnet_y_128gf": {("Windows", "cpu"), ("Windows", "cuda")},
- "mvit_v1_b": {("Windows", "cuda"), ("Linux", "cuda")},
- "mvit_v2_s": {("Windows", "cuda"), ("Linux", "cuda")},
- }
- def is_skippable(model_name, device):
- if model_name not in skipped_big_models:
- return False
- platform_system = platform.system()
- device_name = str(device).split(":")[0]
- return (platform_system, device_name) in skipped_big_models[model_name]
- # The following contains configuration and expected values to be used tests that are model specific
- _model_tests_values = {
- "retinanet_resnet50_fpn": {
- "max_trainable": 5,
- "n_trn_params_per_layer": [36, 46, 65, 78, 88, 89],
- },
- "retinanet_resnet50_fpn_v2": {
- "max_trainable": 5,
- "n_trn_params_per_layer": [44, 74, 131, 170, 200, 203],
- },
- "keypointrcnn_resnet50_fpn": {
- "max_trainable": 5,
- "n_trn_params_per_layer": [48, 58, 77, 90, 100, 101],
- },
- "fasterrcnn_resnet50_fpn": {
- "max_trainable": 5,
- "n_trn_params_per_layer": [30, 40, 59, 72, 82, 83],
- },
- "fasterrcnn_resnet50_fpn_v2": {
- "max_trainable": 5,
- "n_trn_params_per_layer": [50, 80, 137, 176, 206, 209],
- },
- "maskrcnn_resnet50_fpn": {
- "max_trainable": 5,
- "n_trn_params_per_layer": [42, 52, 71, 84, 94, 95],
- },
- "maskrcnn_resnet50_fpn_v2": {
- "max_trainable": 5,
- "n_trn_params_per_layer": [66, 96, 153, 192, 222, 225],
- },
- "fasterrcnn_mobilenet_v3_large_fpn": {
- "max_trainable": 6,
- "n_trn_params_per_layer": [22, 23, 44, 70, 91, 97, 100],
- },
- "fasterrcnn_mobilenet_v3_large_320_fpn": {
- "max_trainable": 6,
- "n_trn_params_per_layer": [22, 23, 44, 70, 91, 97, 100],
- },
- "ssd300_vgg16": {
- "max_trainable": 5,
- "n_trn_params_per_layer": [45, 51, 57, 63, 67, 71],
- },
- "ssdlite320_mobilenet_v3_large": {
- "max_trainable": 6,
- "n_trn_params_per_layer": [96, 99, 138, 200, 239, 257, 266],
- },
- "fcos_resnet50_fpn": {
- "max_trainable": 5,
- "n_trn_params_per_layer": [54, 64, 83, 96, 106, 107],
- },
- }
- def _make_sliced_model(model, stop_layer):
- layers = OrderedDict()
- for name, layer in model.named_children():
- layers[name] = layer
- if name == stop_layer:
- break
- new_model = torch.nn.Sequential(layers)
- return new_model
- @pytest.mark.parametrize("model_fn", [models.densenet121, models.densenet169, models.densenet201, models.densenet161])
- def test_memory_efficient_densenet(model_fn):
- input_shape = (1, 3, 300, 300)
- x = torch.rand(input_shape)
- model1 = model_fn(num_classes=50, memory_efficient=True)
- params = model1.state_dict()
- num_params = sum(x.numel() for x in model1.parameters())
- model1.eval()
- out1 = model1(x)
- out1.sum().backward()
- num_grad = sum(x.grad.numel() for x in model1.parameters() if x.grad is not None)
- model2 = model_fn(num_classes=50, memory_efficient=False)
- model2.load_state_dict(params)
- model2.eval()
- out2 = model2(x)
- assert num_params == num_grad
- torch.testing.assert_close(out1, out2, rtol=0.0, atol=1e-5)
- _check_input_backprop(model1, x)
- _check_input_backprop(model2, x)
- @pytest.mark.parametrize("dilate_layer_2", (True, False))
- @pytest.mark.parametrize("dilate_layer_3", (True, False))
- @pytest.mark.parametrize("dilate_layer_4", (True, False))
- def test_resnet_dilation(dilate_layer_2, dilate_layer_3, dilate_layer_4):
- # TODO improve tests to also check that each layer has the right dimensionality
- model = models.resnet50(replace_stride_with_dilation=(dilate_layer_2, dilate_layer_3, dilate_layer_4))
- model = _make_sliced_model(model, stop_layer="layer4")
- model.eval()
- x = torch.rand(1, 3, 224, 224)
- out = model(x)
- f = 2 ** sum((dilate_layer_2, dilate_layer_3, dilate_layer_4))
- assert out.shape == (1, 2048, 7 * f, 7 * f)
- def test_mobilenet_v2_residual_setting():
- model = models.mobilenet_v2(inverted_residual_setting=[[1, 16, 1, 1], [6, 24, 2, 2]])
- model.eval()
- x = torch.rand(1, 3, 224, 224)
- out = model(x)
- assert out.shape[-1] == 1000
- @pytest.mark.parametrize("model_fn", [models.mobilenet_v2, models.mobilenet_v3_large, models.mobilenet_v3_small])
- def test_mobilenet_norm_layer(model_fn):
- model = model_fn()
- assert any(isinstance(x, nn.BatchNorm2d) for x in model.modules())
- def get_gn(num_channels):
- return nn.GroupNorm(1, num_channels)
- model = model_fn(norm_layer=get_gn)
- assert not (any(isinstance(x, nn.BatchNorm2d) for x in model.modules()))
- assert any(isinstance(x, nn.GroupNorm) for x in model.modules())
- def test_inception_v3_eval():
- kwargs = {}
- kwargs["transform_input"] = True
- kwargs["aux_logits"] = True
- kwargs["init_weights"] = False
- name = "inception_v3"
- model = models.Inception3(**kwargs)
- model.aux_logits = False
- model.AuxLogits = None
- model = model.eval()
- x = torch.rand(1, 3, 299, 299)
- _check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(name, None))
- _check_input_backprop(model, x)
- def test_fasterrcnn_double():
- model = models.detection.fasterrcnn_resnet50_fpn(num_classes=50, weights=None, weights_backbone=None)
- model.double()
- model.eval()
- input_shape = (3, 300, 300)
- x = torch.rand(input_shape, dtype=torch.float64)
- model_input = [x]
- out = model(model_input)
- assert model_input[0] is x
- assert len(out) == 1
- assert "boxes" in out[0]
- assert "scores" in out[0]
- assert "labels" in out[0]
- _check_input_backprop(model, model_input)
- def test_googlenet_eval():
- kwargs = {}
- kwargs["transform_input"] = True
- kwargs["aux_logits"] = True
- kwargs["init_weights"] = False
- name = "googlenet"
- model = models.GoogLeNet(**kwargs)
- model.aux_logits = False
- model.aux1 = None
- model.aux2 = None
- model = model.eval()
- x = torch.rand(1, 3, 224, 224)
- _check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(name, None))
- _check_input_backprop(model, x)
- @needs_cuda
- def test_fasterrcnn_switch_devices():
- def checkOut(out):
- assert len(out) == 1
- assert "boxes" in out[0]
- assert "scores" in out[0]
- assert "labels" in out[0]
- model = models.detection.fasterrcnn_resnet50_fpn(num_classes=50, weights=None, weights_backbone=None)
- model.cuda()
- model.eval()
- input_shape = (3, 300, 300)
- x = torch.rand(input_shape, device="cuda")
- model_input = [x]
- out = model(model_input)
- assert model_input[0] is x
- checkOut(out)
- with torch.cuda.amp.autocast():
- out = model(model_input)
- checkOut(out)
- _check_input_backprop(model, model_input)
- # now switch to cpu and make sure it works
- model.cpu()
- x = x.cpu()
- out_cpu = model([x])
- checkOut(out_cpu)
- _check_input_backprop(model, [x])
- def test_generalizedrcnn_transform_repr():
- min_size, max_size = 224, 299
- image_mean = [0.485, 0.456, 0.406]
- image_std = [0.229, 0.224, 0.225]
- t = models.detection.transform.GeneralizedRCNNTransform(
- min_size=min_size, max_size=max_size, image_mean=image_mean, image_std=image_std
- )
- # Check integrity of object __repr__ attribute
- expected_string = "GeneralizedRCNNTransform("
- _indent = "\n "
- expected_string += f"{_indent}Normalize(mean={image_mean}, std={image_std})"
- expected_string += f"{_indent}Resize(min_size=({min_size},), max_size={max_size}, "
- expected_string += "mode='bilinear')\n)"
- assert t.__repr__() == expected_string
- test_vit_conv_stem_configs = [
- models.vision_transformer.ConvStemConfig(kernel_size=3, stride=2, out_channels=64),
- models.vision_transformer.ConvStemConfig(kernel_size=3, stride=2, out_channels=128),
- models.vision_transformer.ConvStemConfig(kernel_size=3, stride=1, out_channels=128),
- models.vision_transformer.ConvStemConfig(kernel_size=3, stride=2, out_channels=256),
- models.vision_transformer.ConvStemConfig(kernel_size=3, stride=1, out_channels=256),
- models.vision_transformer.ConvStemConfig(kernel_size=3, stride=2, out_channels=512),
- ]
- def vitc_b_16(**kwargs: Any):
- return models.VisionTransformer(
- image_size=224,
- patch_size=16,
- num_layers=12,
- num_heads=12,
- hidden_dim=768,
- mlp_dim=3072,
- conv_stem_configs=test_vit_conv_stem_configs,
- **kwargs,
- )
- @pytest.mark.parametrize("model_fn", [vitc_b_16])
- @pytest.mark.parametrize("dev", cpu_and_cuda())
- def test_vitc_models(model_fn, dev):
- test_classification_model(model_fn, dev)
- @disable_tf32() # see: https://github.com/pytorch/vision/issues/7618
- @pytest.mark.parametrize("model_fn", list_model_fns(models))
- @pytest.mark.parametrize("dev", cpu_and_cuda())
- def test_classification_model(model_fn, dev):
- set_rng_seed(0)
- defaults = {
- "num_classes": 50,
- "input_shape": (1, 3, 224, 224),
- }
- model_name = model_fn.__name__
- if SKIP_BIG_MODEL and is_skippable(model_name, dev):
- pytest.skip("Skipped to reduce memory usage. Set env var SKIP_BIG_MODEL=0 to enable test for this model")
- kwargs = {**defaults, **_model_params.get(model_name, {})}
- num_classes = kwargs.get("num_classes")
- input_shape = kwargs.pop("input_shape")
- real_image = kwargs.pop("real_image", False)
- model = model_fn(**kwargs)
- model.eval().to(device=dev)
- x = _get_image(input_shape=input_shape, real_image=real_image, device=dev)
- out = model(x)
- # FIXME: this if/else is nasty and only here to please our CI prior to the
- # release. We rethink these tests altogether.
- if model_name == "resnet101":
- prec = 0.2
- else:
- # FIXME: this is probably still way too high.
- prec = 0.1
- _assert_expected(out.cpu(), model_name, prec=prec)
- assert out.shape[-1] == num_classes
- _check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(model_name, None), eager_out=out)
- _check_fx_compatible(model, x, eager_out=out)
- if dev == "cuda":
- with torch.cuda.amp.autocast():
- out = model(x)
- # See autocast_flaky_numerics comment at top of file.
- if model_name not in autocast_flaky_numerics:
- _assert_expected(out.cpu(), model_name, prec=0.1)
- assert out.shape[-1] == 50
- _check_input_backprop(model, x)
- @pytest.mark.parametrize("model_fn", list_model_fns(models.segmentation))
- @pytest.mark.parametrize("dev", cpu_and_cuda())
- def test_segmentation_model(model_fn, dev):
- set_rng_seed(0)
- defaults = {
- "num_classes": 10,
- "weights_backbone": None,
- "input_shape": (1, 3, 32, 32),
- }
- model_name = model_fn.__name__
- kwargs = {**defaults, **_model_params.get(model_name, {})}
- input_shape = kwargs.pop("input_shape")
- model = model_fn(**kwargs)
- model.eval().to(device=dev)
- # RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests
- x = torch.rand(input_shape).to(device=dev)
- with torch.no_grad(), freeze_rng_state():
- out = model(x)
- def check_out(out):
- prec = 0.01
- try:
- # We first try to assert the entire output if possible. This is not
- # only the best way to assert results but also handles the cases
- # where we need to create a new expected result.
- _assert_expected(out.cpu(), model_name, prec=prec)
- except AssertionError:
- # Unfortunately some segmentation models are flaky with autocast
- # so instead of validating the probability scores, check that the class
- # predictions match.
- expected_file = _get_expected_file(model_name)
- expected = torch.load(expected_file)
- torch.testing.assert_close(
- out.argmax(dim=1), expected.argmax(dim=1), rtol=prec, atol=prec, check_device=False
- )
- return False # Partial validation performed
- return True # Full validation performed
- full_validation = check_out(out["out"])
- _check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(model_name, None), eager_out=out)
- _check_fx_compatible(model, x, eager_out=out)
- if dev == "cuda":
- with torch.cuda.amp.autocast(), torch.no_grad(), freeze_rng_state():
- out = model(x)
- # See autocast_flaky_numerics comment at top of file.
- if model_name not in autocast_flaky_numerics:
- full_validation &= check_out(out["out"])
- if not full_validation:
- msg = (
- f"The output of {test_segmentation_model.__name__} could only be partially validated. "
- "This is likely due to unit-test flakiness, but you may "
- "want to do additional manual checks if you made "
- "significant changes to the codebase."
- )
- warnings.warn(msg, RuntimeWarning)
- pytest.skip(msg)
- _check_input_backprop(model, x)
- @pytest.mark.parametrize("model_fn", list_model_fns(models.detection))
- @pytest.mark.parametrize("dev", cpu_and_cuda())
- def test_detection_model(model_fn, dev):
- set_rng_seed(0)
- defaults = {
- "num_classes": 50,
- "weights_backbone": None,
- "input_shape": (3, 300, 300),
- }
- model_name = model_fn.__name__
- if model_name in detection_flaky_models:
- dtype = torch.float64
- else:
- dtype = torch.get_default_dtype()
- kwargs = {**defaults, **_model_params.get(model_name, {})}
- input_shape = kwargs.pop("input_shape")
- real_image = kwargs.pop("real_image", False)
- model = model_fn(**kwargs)
- model.eval().to(device=dev, dtype=dtype)
- x = _get_image(input_shape=input_shape, real_image=real_image, device=dev, dtype=dtype)
- model_input = [x]
- with torch.no_grad(), freeze_rng_state():
- out = model(model_input)
- assert model_input[0] is x
- def check_out(out):
- assert len(out) == 1
- def compact(tensor):
- tensor = tensor.cpu()
- size = tensor.size()
- elements_per_sample = functools.reduce(operator.mul, size[1:], 1)
- if elements_per_sample > 30:
- return compute_mean_std(tensor)
- else:
- return subsample_tensor(tensor)
- def subsample_tensor(tensor):
- num_elems = tensor.size(0)
- num_samples = 20
- if num_elems <= num_samples:
- return tensor
- ith_index = num_elems // num_samples
- return tensor[ith_index - 1 :: ith_index]
- def compute_mean_std(tensor):
- # can't compute mean of integral tensor
- tensor = tensor.to(torch.double)
- mean = torch.mean(tensor)
- std = torch.std(tensor)
- return {"mean": mean, "std": std}
- output = map_nested_tensor_object(out, tensor_map_fn=compact)
- prec = 0.01
- try:
- # We first try to assert the entire output if possible. This is not
- # only the best way to assert results but also handles the cases
- # where we need to create a new expected result.
- _assert_expected(output, model_name, prec=prec)
- except AssertionError:
- # Unfortunately detection models are flaky due to the unstable sort
- # in NMS. If matching across all outputs fails, use the same approach
- # as in NMSTester.test_nms_cuda to see if this is caused by duplicate
- # scores.
- expected_file = _get_expected_file(model_name)
- expected = torch.load(expected_file)
- torch.testing.assert_close(
- output[0]["scores"], expected[0]["scores"], rtol=prec, atol=prec, check_device=False, check_dtype=False
- )
- # Note: Fmassa proposed turning off NMS by adapting the threshold
- # and then using the Hungarian algorithm as in DETR to find the
- # best match between output and expected boxes and eliminate some
- # of the flakiness. Worth exploring.
- return False # Partial validation performed
- return True # Full validation performed
- full_validation = check_out(out)
- _check_jit_scriptable(model, ([x],), unwrapper=script_model_unwrapper.get(model_name, None), eager_out=out)
- if dev == "cuda":
- with torch.cuda.amp.autocast(), torch.no_grad(), freeze_rng_state():
- out = model(model_input)
- # See autocast_flaky_numerics comment at top of file.
- if model_name not in autocast_flaky_numerics:
- full_validation &= check_out(out)
- if not full_validation:
- msg = (
- f"The output of {test_detection_model.__name__} could only be partially validated. "
- "This is likely due to unit-test flakiness, but you may "
- "want to do additional manual checks if you made "
- "significant changes to the codebase."
- )
- warnings.warn(msg, RuntimeWarning)
- pytest.skip(msg)
- _check_input_backprop(model, model_input)
- @pytest.mark.parametrize("model_fn", list_model_fns(models.detection))
- def test_detection_model_validation(model_fn):
- set_rng_seed(0)
- model = model_fn(num_classes=50, weights=None, weights_backbone=None)
- input_shape = (3, 300, 300)
- x = [torch.rand(input_shape)]
- # validate that targets are present in training
- with pytest.raises(AssertionError):
- model(x)
- # validate type
- targets = [{"boxes": 0.0}]
- with pytest.raises(AssertionError):
- model(x, targets=targets)
- # validate boxes shape
- for boxes in (torch.rand((4,)), torch.rand((1, 5))):
- targets = [{"boxes": boxes}]
- with pytest.raises(AssertionError):
- model(x, targets=targets)
- # validate that no degenerate boxes are present
- boxes = torch.tensor([[1, 3, 1, 4], [2, 4, 3, 4]])
- targets = [{"boxes": boxes}]
- with pytest.raises(AssertionError):
- model(x, targets=targets)
- @pytest.mark.parametrize("model_fn", list_model_fns(models.video))
- @pytest.mark.parametrize("dev", cpu_and_cuda())
- def test_video_model(model_fn, dev):
- set_rng_seed(0)
- # the default input shape is
- # bs * num_channels * clip_len * h *w
- defaults = {
- "input_shape": (1, 3, 4, 112, 112),
- "num_classes": 50,
- }
- model_name = model_fn.__name__
- if SKIP_BIG_MODEL and is_skippable(model_name, dev):
- pytest.skip("Skipped to reduce memory usage. Set env var SKIP_BIG_MODEL=0 to enable test for this model")
- kwargs = {**defaults, **_model_params.get(model_name, {})}
- num_classes = kwargs.get("num_classes")
- input_shape = kwargs.pop("input_shape")
- # test both basicblock and Bottleneck
- model = model_fn(**kwargs)
- model.eval().to(device=dev)
- # RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests
- x = torch.rand(input_shape).to(device=dev)
- out = model(x)
- _assert_expected(out.cpu(), model_name, prec=0.1)
- assert out.shape[-1] == num_classes
- _check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(model_name, None), eager_out=out)
- _check_fx_compatible(model, x, eager_out=out)
- assert out.shape[-1] == num_classes
- if dev == "cuda":
- with torch.cuda.amp.autocast():
- out = model(x)
- # See autocast_flaky_numerics comment at top of file.
- if model_name not in autocast_flaky_numerics:
- _assert_expected(out.cpu(), model_name, prec=0.1)
- assert out.shape[-1] == num_classes
- _check_input_backprop(model, x)
- @pytest.mark.skipif(
- not (
- "fbgemm" in torch.backends.quantized.supported_engines
- and "qnnpack" in torch.backends.quantized.supported_engines
- ),
- reason="This Pytorch Build has not been built with fbgemm and qnnpack",
- )
- @pytest.mark.parametrize("model_fn", list_model_fns(models.quantization))
- def test_quantized_classification_model(model_fn):
- set_rng_seed(0)
- defaults = {
- "num_classes": 5,
- "input_shape": (1, 3, 224, 224),
- "quantize": True,
- }
- model_name = model_fn.__name__
- kwargs = {**defaults, **_model_params.get(model_name, {})}
- input_shape = kwargs.pop("input_shape")
- # First check if quantize=True provides models that can run with input data
- model = model_fn(**kwargs)
- model.eval()
- x = torch.rand(input_shape)
- out = model(x)
- if model_name not in quantized_flaky_models:
- _assert_expected(out.cpu(), model_name + "_quantized", prec=2e-2)
- assert out.shape[-1] == 5
- _check_jit_scriptable(model, (x,), unwrapper=script_model_unwrapper.get(model_name, None), eager_out=out)
- _check_fx_compatible(model, x, eager_out=out)
- else:
- try:
- torch.jit.script(model)
- except Exception as e:
- raise AssertionError("model cannot be scripted.") from e
- kwargs["quantize"] = False
- for eval_mode in [True, False]:
- model = model_fn(**kwargs)
- if eval_mode:
- model.eval()
- model.qconfig = torch.ao.quantization.default_qconfig
- else:
- model.train()
- model.qconfig = torch.ao.quantization.default_qat_qconfig
- model.fuse_model(is_qat=not eval_mode)
- if eval_mode:
- torch.ao.quantization.prepare(model, inplace=True)
- else:
- torch.ao.quantization.prepare_qat(model, inplace=True)
- model.eval()
- torch.ao.quantization.convert(model, inplace=True)
- @pytest.mark.parametrize("model_fn", list_model_fns(models.detection))
- def test_detection_model_trainable_backbone_layers(model_fn, disable_weight_loading):
- model_name = model_fn.__name__
- max_trainable = _model_tests_values[model_name]["max_trainable"]
- n_trainable_params = []
- for trainable_layers in range(0, max_trainable + 1):
- model = model_fn(weights=None, weights_backbone="DEFAULT", trainable_backbone_layers=trainable_layers)
- n_trainable_params.append(len([p for p in model.parameters() if p.requires_grad]))
- assert n_trainable_params == _model_tests_values[model_name]["n_trn_params_per_layer"]
- @needs_cuda
- @pytest.mark.parametrize("model_fn", list_model_fns(models.optical_flow))
- @pytest.mark.parametrize("scripted", (False, True))
- def test_raft(model_fn, scripted):
- torch.manual_seed(0)
- # We need very small images, otherwise the pickle size would exceed the 50KB
- # As a result we need to override the correlation pyramid to not downsample
- # too much, otherwise we would get nan values (effective H and W would be
- # reduced to 1)
- corr_block = models.optical_flow.raft.CorrBlock(num_levels=2, radius=2)
- model = model_fn(corr_block=corr_block).eval().to("cuda")
- if scripted:
- model = torch.jit.script(model)
- bs = 1
- img1 = torch.rand(bs, 3, 80, 72).cuda()
- img2 = torch.rand(bs, 3, 80, 72).cuda()
- preds = model(img1, img2)
- flow_pred = preds[-1]
- # Tolerance is fairly high, but there are 2 * H * W outputs to check
- # The .pkl were generated on the AWS cluter, on the CI it looks like the results are slightly different
- _assert_expected(flow_pred.cpu(), name=model_fn.__name__, atol=1e-2, rtol=1)
- def test_presets_antialias():
- img = torch.randint(0, 256, size=(1, 3, 224, 224), dtype=torch.uint8)
- match = "The default value of the antialias parameter"
- with pytest.warns(UserWarning, match=match):
- models.ResNet18_Weights.DEFAULT.transforms()(img)
- with pytest.warns(UserWarning, match=match):
- models.segmentation.DeepLabV3_ResNet50_Weights.DEFAULT.transforms()(img)
- with warnings.catch_warnings():
- warnings.simplefilter("error")
- models.ResNet18_Weights.DEFAULT.transforms(antialias=True)(img)
- models.segmentation.DeepLabV3_ResNet50_Weights.DEFAULT.transforms(antialias=True)(img)
- models.detection.FasterRCNN_ResNet50_FPN_Weights.DEFAULT.transforms()(img)
- models.video.R3D_18_Weights.DEFAULT.transforms()(img)
- models.optical_flow.Raft_Small_Weights.DEFAULT.transforms()(img, img)
- if __name__ == "__main__":
- pytest.main([__file__])
|