test_io.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. import contextlib
  2. import os
  3. import sys
  4. import tempfile
  5. import pytest
  6. import torch
  7. import torchvision.io as io
  8. from common_utils import assert_equal
  9. from torchvision import get_video_backend
  10. try:
  11. import av
  12. # Do a version test too
  13. io.video._check_av_available()
  14. except ImportError:
  15. av = None
  16. VIDEO_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "videos")
  17. def _create_video_frames(num_frames, height, width):
  18. y, x = torch.meshgrid(torch.linspace(-2, 2, height), torch.linspace(-2, 2, width), indexing="ij")
  19. data = []
  20. for i in range(num_frames):
  21. xc = float(i) / num_frames
  22. yc = 1 - float(i) / (2 * num_frames)
  23. d = torch.exp(-((x - xc) ** 2 + (y - yc) ** 2) / 2) * 255
  24. data.append(d.unsqueeze(2).repeat(1, 1, 3).byte())
  25. return torch.stack(data, 0)
  26. @contextlib.contextmanager
  27. def temp_video(num_frames, height, width, fps, lossless=False, video_codec=None, options=None):
  28. if lossless:
  29. if video_codec is not None:
  30. raise ValueError("video_codec can't be specified together with lossless")
  31. if options is not None:
  32. raise ValueError("options can't be specified together with lossless")
  33. video_codec = "libx264rgb"
  34. options = {"crf": "0"}
  35. if video_codec is None:
  36. if get_video_backend() == "pyav":
  37. video_codec = "libx264"
  38. else:
  39. # when video_codec is not set, we assume it is libx264rgb which accepts
  40. # RGB pixel formats as input instead of YUV
  41. video_codec = "libx264rgb"
  42. if options is None:
  43. options = {}
  44. data = _create_video_frames(num_frames, height, width)
  45. with tempfile.NamedTemporaryFile(suffix=".mp4") as f:
  46. f.close()
  47. io.write_video(f.name, data, fps=fps, video_codec=video_codec, options=options)
  48. yield f.name, data
  49. os.unlink(f.name)
  50. @pytest.mark.skipif(
  51. get_video_backend() != "pyav" and not io._HAS_VIDEO_OPT, reason="video_reader backend not available"
  52. )
  53. @pytest.mark.skipif(av is None, reason="PyAV unavailable")
  54. class TestVideo:
  55. # compression adds artifacts, thus we add a tolerance of
  56. # 6 in 0-255 range
  57. TOLERANCE = 6
  58. def test_write_read_video(self):
  59. with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
  60. lv, _, info = io.read_video(f_name)
  61. assert_equal(data, lv)
  62. assert info["video_fps"] == 5
  63. @pytest.mark.skipif(not io._HAS_VIDEO_OPT, reason="video_reader backend is not chosen")
  64. def test_probe_video_from_file(self):
  65. with temp_video(10, 300, 300, 5) as (f_name, data):
  66. video_info = io._probe_video_from_file(f_name)
  67. assert pytest.approx(2, rel=0.0, abs=0.1) == video_info.video_duration
  68. assert pytest.approx(5, rel=0.0, abs=0.1) == video_info.video_fps
  69. @pytest.mark.skipif(not io._HAS_VIDEO_OPT, reason="video_reader backend is not chosen")
  70. def test_probe_video_from_memory(self):
  71. with temp_video(10, 300, 300, 5) as (f_name, data):
  72. with open(f_name, "rb") as fp:
  73. filebuffer = fp.read()
  74. video_info = io._probe_video_from_memory(filebuffer)
  75. assert pytest.approx(2, rel=0.0, abs=0.1) == video_info.video_duration
  76. assert pytest.approx(5, rel=0.0, abs=0.1) == video_info.video_fps
  77. def test_read_timestamps(self):
  78. with temp_video(10, 300, 300, 5) as (f_name, data):
  79. pts, _ = io.read_video_timestamps(f_name)
  80. # note: not all formats/codecs provide accurate information for computing the
  81. # timestamps. For the format that we use here, this information is available,
  82. # so we use it as a baseline
  83. with av.open(f_name) as container:
  84. stream = container.streams[0]
  85. pts_step = int(round(float(1 / (stream.average_rate * stream.time_base))))
  86. num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration)))
  87. expected_pts = [i * pts_step for i in range(num_frames)]
  88. assert pts == expected_pts
  89. @pytest.mark.parametrize("start", range(5))
  90. @pytest.mark.parametrize("offset", range(1, 4))
  91. def test_read_partial_video(self, start, offset):
  92. with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
  93. pts, _ = io.read_video_timestamps(f_name)
  94. lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1])
  95. s_data = data[start : (start + offset)]
  96. assert len(lv) == offset
  97. assert_equal(s_data, lv)
  98. if get_video_backend() == "pyav":
  99. # for "video_reader" backend, we don't decode the closest early frame
  100. # when the given start pts is not matching any frame pts
  101. lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7])
  102. assert len(lv) == 4
  103. assert_equal(data[4:8], lv)
  104. @pytest.mark.parametrize("start", range(0, 80, 20))
  105. @pytest.mark.parametrize("offset", range(1, 4))
  106. def test_read_partial_video_bframes(self, start, offset):
  107. # do not use lossless encoding, to test the presence of B-frames
  108. options = {"bframes": "16", "keyint": "10", "min-keyint": "4"}
  109. with temp_video(100, 300, 300, 5, options=options) as (f_name, data):
  110. pts, _ = io.read_video_timestamps(f_name)
  111. lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1])
  112. s_data = data[start : (start + offset)]
  113. assert len(lv) == offset
  114. assert_equal(s_data, lv, rtol=0.0, atol=self.TOLERANCE)
  115. lv, _, _ = io.read_video(f_name, pts[4] + 1, pts[7])
  116. # TODO fix this
  117. if get_video_backend() == "pyav":
  118. assert len(lv) == 4
  119. assert_equal(data[4:8], lv, rtol=0.0, atol=self.TOLERANCE)
  120. else:
  121. assert len(lv) == 3
  122. assert_equal(data[5:8], lv, rtol=0.0, atol=self.TOLERANCE)
  123. def test_read_packed_b_frames_divx_file(self):
  124. name = "hmdb51_Turnk_r_Pippi_Michel_cartwheel_f_cm_np2_le_med_6.avi"
  125. f_name = os.path.join(VIDEO_DIR, name)
  126. pts, fps = io.read_video_timestamps(f_name)
  127. assert pts == sorted(pts)
  128. assert fps == 30
  129. def test_read_timestamps_from_packet(self):
  130. with temp_video(10, 300, 300, 5, video_codec="mpeg4") as (f_name, data):
  131. pts, _ = io.read_video_timestamps(f_name)
  132. # note: not all formats/codecs provide accurate information for computing the
  133. # timestamps. For the format that we use here, this information is available,
  134. # so we use it as a baseline
  135. with av.open(f_name) as container:
  136. stream = container.streams[0]
  137. # make sure we went through the optimized codepath
  138. assert b"Lavc" in stream.codec_context.extradata
  139. pts_step = int(round(float(1 / (stream.average_rate * stream.time_base))))
  140. num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration)))
  141. expected_pts = [i * pts_step for i in range(num_frames)]
  142. assert pts == expected_pts
  143. def test_read_video_pts_unit_sec(self):
  144. with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
  145. lv, _, info = io.read_video(f_name, pts_unit="sec")
  146. assert_equal(data, lv)
  147. assert info["video_fps"] == 5
  148. assert info == {"video_fps": 5}
  149. def test_read_timestamps_pts_unit_sec(self):
  150. with temp_video(10, 300, 300, 5) as (f_name, data):
  151. pts, _ = io.read_video_timestamps(f_name, pts_unit="sec")
  152. with av.open(f_name) as container:
  153. stream = container.streams[0]
  154. pts_step = int(round(float(1 / (stream.average_rate * stream.time_base))))
  155. num_frames = int(round(float(stream.average_rate * stream.time_base * stream.duration)))
  156. expected_pts = [i * pts_step * stream.time_base for i in range(num_frames)]
  157. assert pts == expected_pts
  158. @pytest.mark.parametrize("start", range(5))
  159. @pytest.mark.parametrize("offset", range(1, 4))
  160. def test_read_partial_video_pts_unit_sec(self, start, offset):
  161. with temp_video(10, 300, 300, 5, lossless=True) as (f_name, data):
  162. pts, _ = io.read_video_timestamps(f_name, pts_unit="sec")
  163. lv, _, _ = io.read_video(f_name, pts[start], pts[start + offset - 1], pts_unit="sec")
  164. s_data = data[start : (start + offset)]
  165. assert len(lv) == offset
  166. assert_equal(s_data, lv)
  167. with av.open(f_name) as container:
  168. stream = container.streams[0]
  169. lv, _, _ = io.read_video(
  170. f_name, int(pts[4] * (1.0 / stream.time_base) + 1) * stream.time_base, pts[7], pts_unit="sec"
  171. )
  172. if get_video_backend() == "pyav":
  173. # for "video_reader" backend, we don't decode the closest early frame
  174. # when the given start pts is not matching any frame pts
  175. assert len(lv) == 4
  176. assert_equal(data[4:8], lv)
  177. def test_read_video_corrupted_file(self):
  178. with tempfile.NamedTemporaryFile(suffix=".mp4") as f:
  179. f.write(b"This is not an mpg4 file")
  180. video, audio, info = io.read_video(f.name)
  181. assert isinstance(video, torch.Tensor)
  182. assert isinstance(audio, torch.Tensor)
  183. assert video.numel() == 0
  184. assert audio.numel() == 0
  185. assert info == {}
  186. def test_read_video_timestamps_corrupted_file(self):
  187. with tempfile.NamedTemporaryFile(suffix=".mp4") as f:
  188. f.write(b"This is not an mpg4 file")
  189. video_pts, video_fps = io.read_video_timestamps(f.name)
  190. assert video_pts == []
  191. assert video_fps is None
  192. @pytest.mark.skip(reason="Temporarily disabled due to new pyav")
  193. def test_read_video_partially_corrupted_file(self):
  194. with temp_video(5, 4, 4, 5, lossless=True) as (f_name, data):
  195. with open(f_name, "r+b") as f:
  196. size = os.path.getsize(f_name)
  197. bytes_to_overwrite = size // 10
  198. # seek to the middle of the file
  199. f.seek(5 * bytes_to_overwrite)
  200. # corrupt 10% of the file from the middle
  201. f.write(b"\xff" * bytes_to_overwrite)
  202. # this exercises the container.decode assertion check
  203. video, audio, info = io.read_video(f.name, pts_unit="sec")
  204. # check that size is not equal to 5, but 3
  205. # TODO fix this
  206. if get_video_backend() == "pyav":
  207. assert len(video) == 3
  208. else:
  209. assert len(video) == 4
  210. # but the valid decoded content is still correct
  211. assert_equal(video[:3], data[:3])
  212. # and the last few frames are wrong
  213. with pytest.raises(AssertionError):
  214. assert_equal(video, data)
  215. @pytest.mark.skipif(sys.platform == "win32", reason="temporarily disabled on Windows")
  216. def test_write_video_with_audio(self, tmpdir):
  217. f_name = os.path.join(VIDEO_DIR, "R6llTwEh07w.mp4")
  218. video_tensor, audio_tensor, info = io.read_video(f_name, pts_unit="sec")
  219. out_f_name = os.path.join(tmpdir, "testing.mp4")
  220. io.video.write_video(
  221. out_f_name,
  222. video_tensor,
  223. round(info["video_fps"]),
  224. video_codec="libx264rgb",
  225. options={"crf": "0"},
  226. audio_array=audio_tensor,
  227. audio_fps=info["audio_fps"],
  228. audio_codec="aac",
  229. )
  230. out_video_tensor, out_audio_tensor, out_info = io.read_video(out_f_name, pts_unit="sec")
  231. assert info["video_fps"] == out_info["video_fps"]
  232. assert_equal(video_tensor, out_video_tensor)
  233. audio_stream = av.open(f_name).streams.audio[0]
  234. out_audio_stream = av.open(out_f_name).streams.audio[0]
  235. assert info["audio_fps"] == out_info["audio_fps"]
  236. assert audio_stream.rate == out_audio_stream.rate
  237. assert pytest.approx(out_audio_stream.frames, rel=0.0, abs=1) == audio_stream.frames
  238. assert audio_stream.frame_size == out_audio_stream.frame_size
  239. # TODO add tests for audio
  240. if __name__ == "__main__":
  241. pytest.main(__file__)