123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178 |
- import random
- import warnings
- import numpy as np
- import pytest
- import torch
- from common_utils import assert_equal
- from torchvision.transforms import Compose
- try:
- from scipy import stats
- except ImportError:
- stats = None
- with warnings.catch_warnings(record=True):
- warnings.simplefilter("always")
- import torchvision.transforms._transforms_video as transforms
- class TestVideoTransforms:
- def test_random_crop_video(self):
- numFrames = random.randint(4, 128)
- height = random.randint(10, 32) * 2
- width = random.randint(10, 32) * 2
- oheight = random.randint(5, (height - 2) / 2) * 2
- owidth = random.randint(5, (width - 2) / 2) * 2
- clip = torch.randint(0, 256, (numFrames, height, width, 3), dtype=torch.uint8)
- result = Compose(
- [
- transforms.ToTensorVideo(),
- transforms.RandomCropVideo((oheight, owidth)),
- ]
- )(clip)
- assert result.size(2) == oheight
- assert result.size(3) == owidth
- transforms.RandomCropVideo((oheight, owidth)).__repr__()
- def test_random_resized_crop_video(self):
- numFrames = random.randint(4, 128)
- height = random.randint(10, 32) * 2
- width = random.randint(10, 32) * 2
- oheight = random.randint(5, (height - 2) / 2) * 2
- owidth = random.randint(5, (width - 2) / 2) * 2
- clip = torch.randint(0, 256, (numFrames, height, width, 3), dtype=torch.uint8)
- result = Compose(
- [
- transforms.ToTensorVideo(),
- transforms.RandomResizedCropVideo((oheight, owidth)),
- ]
- )(clip)
- assert result.size(2) == oheight
- assert result.size(3) == owidth
- transforms.RandomResizedCropVideo((oheight, owidth)).__repr__()
- def test_center_crop_video(self):
- numFrames = random.randint(4, 128)
- height = random.randint(10, 32) * 2
- width = random.randint(10, 32) * 2
- oheight = random.randint(5, (height - 2) / 2) * 2
- owidth = random.randint(5, (width - 2) / 2) * 2
- clip = torch.ones((numFrames, height, width, 3), dtype=torch.uint8) * 255
- oh1 = (height - oheight) // 2
- ow1 = (width - owidth) // 2
- clipNarrow = clip[:, oh1 : oh1 + oheight, ow1 : ow1 + owidth, :]
- clipNarrow.fill_(0)
- result = Compose(
- [
- transforms.ToTensorVideo(),
- transforms.CenterCropVideo((oheight, owidth)),
- ]
- )(clip)
- msg = (
- "height: " + str(height) + " width: " + str(width) + " oheight: " + str(oheight) + " owidth: " + str(owidth)
- )
- assert result.sum().item() == 0, msg
- oheight += 1
- owidth += 1
- result = Compose(
- [
- transforms.ToTensorVideo(),
- transforms.CenterCropVideo((oheight, owidth)),
- ]
- )(clip)
- sum1 = result.sum()
- msg = (
- "height: " + str(height) + " width: " + str(width) + " oheight: " + str(oheight) + " owidth: " + str(owidth)
- )
- assert sum1.item() > 1, msg
- oheight += 1
- owidth += 1
- result = Compose(
- [
- transforms.ToTensorVideo(),
- transforms.CenterCropVideo((oheight, owidth)),
- ]
- )(clip)
- sum2 = result.sum()
- msg = (
- "height: " + str(height) + " width: " + str(width) + " oheight: " + str(oheight) + " owidth: " + str(owidth)
- )
- assert sum2.item() > 1, msg
- assert sum2.item() > sum1.item(), msg
- @pytest.mark.skipif(stats is None, reason="scipy.stats is not available")
- @pytest.mark.parametrize("channels", [1, 3])
- def test_normalize_video(self, channels):
- def samples_from_standard_normal(tensor):
- p_value = stats.kstest(list(tensor.view(-1)), "norm", args=(0, 1)).pvalue
- return p_value > 0.0001
- random_state = random.getstate()
- random.seed(42)
- numFrames = random.randint(4, 128)
- height = random.randint(32, 256)
- width = random.randint(32, 256)
- mean = random.random()
- std = random.random()
- clip = torch.normal(mean, std, size=(channels, numFrames, height, width))
- mean = [clip[c].mean().item() for c in range(channels)]
- std = [clip[c].std().item() for c in range(channels)]
- normalized = transforms.NormalizeVideo(mean, std)(clip)
- assert samples_from_standard_normal(normalized)
- random.setstate(random_state)
- # Checking the optional in-place behaviour
- tensor = torch.rand((3, 128, 16, 16))
- tensor_inplace = transforms.NormalizeVideo((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)(tensor)
- assert_equal(tensor, tensor_inplace)
- transforms.NormalizeVideo((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True).__repr__()
- def test_to_tensor_video(self):
- numFrames, height, width = 64, 4, 4
- trans = transforms.ToTensorVideo()
- with pytest.raises(TypeError):
- np_rng = np.random.RandomState(0)
- trans(np_rng.rand(numFrames, height, width, 1).tolist())
- with pytest.raises(TypeError):
- trans(torch.rand((numFrames, height, width, 1), dtype=torch.float))
- with pytest.raises(ValueError):
- trans(torch.ones((3, numFrames, height, width, 3), dtype=torch.uint8))
- with pytest.raises(ValueError):
- trans(torch.ones((height, width, 3), dtype=torch.uint8))
- with pytest.raises(ValueError):
- trans(torch.ones((width, 3), dtype=torch.uint8))
- with pytest.raises(ValueError):
- trans(torch.ones((3), dtype=torch.uint8))
- trans.__repr__()
- @pytest.mark.parametrize("p", (0, 1))
- def test_random_horizontal_flip_video(self, p):
- clip = torch.rand((3, 4, 112, 112), dtype=torch.float)
- hclip = clip.flip(-1)
- out = transforms.RandomHorizontalFlipVideo(p=p)(clip)
- if p == 0:
- torch.testing.assert_close(out, clip)
- elif p == 1:
- torch.testing.assert_close(out, hclip)
- transforms.RandomHorizontalFlipVideo().__repr__()
- if __name__ == "__main__":
- pytest.main([__file__])
|