test_wavfile.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. import os
  2. import sys
  3. from io import BytesIO
  4. import numpy as np
  5. from numpy.testing import (assert_equal, assert_, assert_array_equal,
  6. break_cycles, suppress_warnings, IS_PYPY)
  7. import pytest
  8. from pytest import raises, warns
  9. from scipy.io import wavfile
  10. def datafile(fn):
  11. return os.path.join(os.path.dirname(__file__), 'data', fn)
  12. def test_read_1():
  13. # 32-bit PCM (which uses extensible format)
  14. for mmap in [False, True]:
  15. filename = 'test-44100Hz-le-1ch-4bytes.wav'
  16. rate, data = wavfile.read(datafile(filename), mmap=mmap)
  17. assert_equal(rate, 44100)
  18. assert_(np.issubdtype(data.dtype, np.int32))
  19. assert_equal(data.shape, (4410,))
  20. del data
  21. def test_read_2():
  22. # 8-bit unsigned PCM
  23. for mmap in [False, True]:
  24. filename = 'test-8000Hz-le-2ch-1byteu.wav'
  25. rate, data = wavfile.read(datafile(filename), mmap=mmap)
  26. assert_equal(rate, 8000)
  27. assert_(np.issubdtype(data.dtype, np.uint8))
  28. assert_equal(data.shape, (800, 2))
  29. del data
  30. def test_read_3():
  31. # Little-endian float
  32. for mmap in [False, True]:
  33. filename = 'test-44100Hz-2ch-32bit-float-le.wav'
  34. rate, data = wavfile.read(datafile(filename), mmap=mmap)
  35. assert_equal(rate, 44100)
  36. assert_(np.issubdtype(data.dtype, np.float32))
  37. assert_equal(data.shape, (441, 2))
  38. del data
  39. def test_read_4():
  40. # Contains unsupported 'PEAK' chunk
  41. for mmap in [False, True]:
  42. with suppress_warnings() as sup:
  43. sup.filter(wavfile.WavFileWarning,
  44. "Chunk .non-data. not understood, skipping it")
  45. filename = 'test-48000Hz-2ch-64bit-float-le-wavex.wav'
  46. rate, data = wavfile.read(datafile(filename), mmap=mmap)
  47. assert_equal(rate, 48000)
  48. assert_(np.issubdtype(data.dtype, np.float64))
  49. assert_equal(data.shape, (480, 2))
  50. del data
  51. def test_read_5():
  52. # Big-endian float
  53. for mmap in [False, True]:
  54. filename = 'test-44100Hz-2ch-32bit-float-be.wav'
  55. rate, data = wavfile.read(datafile(filename), mmap=mmap)
  56. assert_equal(rate, 44100)
  57. assert_(np.issubdtype(data.dtype, np.float32))
  58. assert_(data.dtype.byteorder == '>' or (sys.byteorder == 'big' and
  59. data.dtype.byteorder == '='))
  60. assert_equal(data.shape, (441, 2))
  61. del data
  62. def test_5_bit_odd_size_no_pad():
  63. # 5-bit, 1 B container, 5 channels, 9 samples, 45 B data chunk
  64. # Generated by LTspice, which incorrectly omits pad byte, but should be
  65. # readable anyway
  66. for mmap in [False, True]:
  67. filename = 'test-8000Hz-le-5ch-9S-5bit.wav'
  68. rate, data = wavfile.read(datafile(filename), mmap=mmap)
  69. assert_equal(rate, 8000)
  70. assert_(np.issubdtype(data.dtype, np.uint8))
  71. assert_equal(data.shape, (9, 5))
  72. # 8-5 = 3 LSBits should be 0
  73. assert_equal(data & 0b00000111, 0)
  74. # Unsigned
  75. assert_equal(data.max(), 0b11111000) # Highest possible
  76. assert_equal(data[0, 0], 128) # Midpoint is 128 for <= 8-bit
  77. assert_equal(data.min(), 0) # Lowest possible
  78. del data
  79. def test_12_bit_even_size():
  80. # 12-bit, 2 B container, 4 channels, 9 samples, 72 B data chunk
  81. # Generated by LTspice from 1 Vpk sine waves
  82. for mmap in [False, True]:
  83. filename = 'test-8000Hz-le-4ch-9S-12bit.wav'
  84. rate, data = wavfile.read(datafile(filename), mmap=mmap)
  85. assert_equal(rate, 8000)
  86. assert_(np.issubdtype(data.dtype, np.int16))
  87. assert_equal(data.shape, (9, 4))
  88. # 16-12 = 4 LSBits should be 0
  89. assert_equal(data & 0b00000000_00001111, 0)
  90. # Signed
  91. assert_equal(data.max(), 0b01111111_11110000) # Highest possible
  92. assert_equal(data[0, 0], 0) # Midpoint is 0 for >= 9-bit
  93. assert_equal(data.min(), -0b10000000_00000000) # Lowest possible
  94. del data
  95. def test_24_bit_odd_size_with_pad():
  96. # 24-bit, 3 B container, 3 channels, 5 samples, 45 B data chunk
  97. # Should not raise any warnings about the data chunk pad byte
  98. filename = 'test-8000Hz-le-3ch-5S-24bit.wav'
  99. rate, data = wavfile.read(datafile(filename), mmap=False)
  100. assert_equal(rate, 8000)
  101. assert_(np.issubdtype(data.dtype, np.int32))
  102. assert_equal(data.shape, (5, 3))
  103. # All LSBytes should be 0
  104. assert_equal(data & 0xff, 0)
  105. # Hand-made max/min samples under different conventions:
  106. # 2**(N-1) 2**(N-1)-1 LSB
  107. assert_equal(data, [[-0x8000_0000, -0x7fff_ff00, -0x200],
  108. [-0x4000_0000, -0x3fff_ff00, -0x100],
  109. [+0x0000_0000, +0x0000_0000, +0x000],
  110. [+0x4000_0000, +0x3fff_ff00, +0x100],
  111. [+0x7fff_ff00, +0x7fff_ff00, +0x200]])
  112. # ^ clipped
  113. def test_20_bit_extra_data():
  114. # 20-bit, 3 B container, 1 channel, 10 samples, 30 B data chunk
  115. # with extra data filling container beyond the bit depth
  116. filename = 'test-8000Hz-le-1ch-10S-20bit-extra.wav'
  117. rate, data = wavfile.read(datafile(filename), mmap=False)
  118. assert_equal(rate, 1234)
  119. assert_(np.issubdtype(data.dtype, np.int32))
  120. assert_equal(data.shape, (10,))
  121. # All LSBytes should still be 0, because 3 B container in 4 B dtype
  122. assert_equal(data & 0xff, 0)
  123. # But it should load the data beyond 20 bits
  124. assert_((data & 0xf00).any())
  125. # Full-scale positive/negative samples, then being halved each time
  126. assert_equal(data, [+0x7ffff000, # +full-scale 20-bit
  127. -0x7ffff000, # -full-scale 20-bit
  128. +0x7ffff000 >> 1, # +1/2
  129. -0x7ffff000 >> 1, # -1/2
  130. +0x7ffff000 >> 2, # +1/4
  131. -0x7ffff000 >> 2, # -1/4
  132. +0x7ffff000 >> 3, # +1/8
  133. -0x7ffff000 >> 3, # -1/8
  134. +0x7ffff000 >> 4, # +1/16
  135. -0x7ffff000 >> 4, # -1/16
  136. ])
  137. def test_36_bit_odd_size():
  138. # 36-bit, 5 B container, 3 channels, 5 samples, 75 B data chunk + pad
  139. filename = 'test-8000Hz-le-3ch-5S-36bit.wav'
  140. rate, data = wavfile.read(datafile(filename), mmap=False)
  141. assert_equal(rate, 8000)
  142. assert_(np.issubdtype(data.dtype, np.int64))
  143. assert_equal(data.shape, (5, 3))
  144. # 28 LSBits should be 0
  145. assert_equal(data & 0xfffffff, 0)
  146. # Hand-made max/min samples under different conventions:
  147. # Fixed-point 2**(N-1) Full-scale 2**(N-1)-1 LSB
  148. correct = [[-0x8000_0000_0000_0000, -0x7fff_ffff_f000_0000, -0x2000_0000],
  149. [-0x4000_0000_0000_0000, -0x3fff_ffff_f000_0000, -0x1000_0000],
  150. [+0x0000_0000_0000_0000, +0x0000_0000_0000_0000, +0x0000_0000],
  151. [+0x4000_0000_0000_0000, +0x3fff_ffff_f000_0000, +0x1000_0000],
  152. [+0x7fff_ffff_f000_0000, +0x7fff_ffff_f000_0000, +0x2000_0000]]
  153. # ^ clipped
  154. assert_equal(data, correct)
  155. def test_45_bit_even_size():
  156. # 45-bit, 6 B container, 3 channels, 5 samples, 90 B data chunk
  157. filename = 'test-8000Hz-le-3ch-5S-45bit.wav'
  158. rate, data = wavfile.read(datafile(filename), mmap=False)
  159. assert_equal(rate, 8000)
  160. assert_(np.issubdtype(data.dtype, np.int64))
  161. assert_equal(data.shape, (5, 3))
  162. # 19 LSBits should be 0
  163. assert_equal(data & 0x7ffff, 0)
  164. # Hand-made max/min samples under different conventions:
  165. # Fixed-point 2**(N-1) Full-scale 2**(N-1)-1 LSB
  166. correct = [[-0x8000_0000_0000_0000, -0x7fff_ffff_fff8_0000, -0x10_0000],
  167. [-0x4000_0000_0000_0000, -0x3fff_ffff_fff8_0000, -0x08_0000],
  168. [+0x0000_0000_0000_0000, +0x0000_0000_0000_0000, +0x00_0000],
  169. [+0x4000_0000_0000_0000, +0x3fff_ffff_fff8_0000, +0x08_0000],
  170. [+0x7fff_ffff_fff8_0000, +0x7fff_ffff_fff8_0000, +0x10_0000]]
  171. # ^ clipped
  172. assert_equal(data, correct)
  173. def test_53_bit_odd_size():
  174. # 53-bit, 7 B container, 3 channels, 5 samples, 105 B data chunk + pad
  175. filename = 'test-8000Hz-le-3ch-5S-53bit.wav'
  176. rate, data = wavfile.read(datafile(filename), mmap=False)
  177. assert_equal(rate, 8000)
  178. assert_(np.issubdtype(data.dtype, np.int64))
  179. assert_equal(data.shape, (5, 3))
  180. # 11 LSBits should be 0
  181. assert_equal(data & 0x7ff, 0)
  182. # Hand-made max/min samples under different conventions:
  183. # Fixed-point 2**(N-1) Full-scale 2**(N-1)-1 LSB
  184. correct = [[-0x8000_0000_0000_0000, -0x7fff_ffff_ffff_f800, -0x1000],
  185. [-0x4000_0000_0000_0000, -0x3fff_ffff_ffff_f800, -0x0800],
  186. [+0x0000_0000_0000_0000, +0x0000_0000_0000_0000, +0x0000],
  187. [+0x4000_0000_0000_0000, +0x3fff_ffff_ffff_f800, +0x0800],
  188. [+0x7fff_ffff_ffff_f800, +0x7fff_ffff_ffff_f800, +0x1000]]
  189. # ^ clipped
  190. assert_equal(data, correct)
  191. def test_64_bit_even_size():
  192. # 64-bit, 8 B container, 3 channels, 5 samples, 120 B data chunk
  193. for mmap in [False, True]:
  194. filename = 'test-8000Hz-le-3ch-5S-64bit.wav'
  195. rate, data = wavfile.read(datafile(filename), mmap=False)
  196. assert_equal(rate, 8000)
  197. assert_(np.issubdtype(data.dtype, np.int64))
  198. assert_equal(data.shape, (5, 3))
  199. # Hand-made max/min samples under different conventions:
  200. # Fixed-point 2**(N-1) Full-scale 2**(N-1)-1 LSB
  201. correct = [[-0x8000_0000_0000_0000, -0x7fff_ffff_ffff_ffff, -0x2],
  202. [-0x4000_0000_0000_0000, -0x3fff_ffff_ffff_ffff, -0x1],
  203. [+0x0000_0000_0000_0000, +0x0000_0000_0000_0000, +0x0],
  204. [+0x4000_0000_0000_0000, +0x3fff_ffff_ffff_ffff, +0x1],
  205. [+0x7fff_ffff_ffff_ffff, +0x7fff_ffff_ffff_ffff, +0x2]]
  206. # ^ clipped
  207. assert_equal(data, correct)
  208. del data
  209. def test_unsupported_mmap():
  210. # Test containers that cannot be mapped to numpy types
  211. for filename in {'test-8000Hz-le-3ch-5S-24bit.wav',
  212. 'test-8000Hz-le-3ch-5S-36bit.wav',
  213. 'test-8000Hz-le-3ch-5S-45bit.wav',
  214. 'test-8000Hz-le-3ch-5S-53bit.wav',
  215. 'test-8000Hz-le-1ch-10S-20bit-extra.wav'}:
  216. with raises(ValueError, match="mmap.*not compatible"):
  217. rate, data = wavfile.read(datafile(filename), mmap=True)
  218. def test_rifx():
  219. # Compare equivalent RIFX and RIFF files
  220. for rifx, riff in {('test-44100Hz-be-1ch-4bytes.wav',
  221. 'test-44100Hz-le-1ch-4bytes.wav'),
  222. ('test-8000Hz-be-3ch-5S-24bit.wav',
  223. 'test-8000Hz-le-3ch-5S-24bit.wav')}:
  224. rate1, data1 = wavfile.read(datafile(rifx), mmap=False)
  225. rate2, data2 = wavfile.read(datafile(riff), mmap=False)
  226. assert_equal(rate1, rate2)
  227. assert_equal(data1, data2)
  228. def test_read_unknown_filetype_fail():
  229. # Not an RIFF
  230. for mmap in [False, True]:
  231. filename = 'example_1.nc'
  232. with open(datafile(filename), 'rb') as fp:
  233. with raises(ValueError, match="CDF.*'RIFF' and 'RIFX' supported"):
  234. wavfile.read(fp, mmap=mmap)
  235. def test_read_unknown_riff_form_type():
  236. # RIFF, but not WAVE form
  237. for mmap in [False, True]:
  238. filename = 'Transparent Busy.ani'
  239. with open(datafile(filename), 'rb') as fp:
  240. with raises(ValueError, match='Not a WAV file.*ACON'):
  241. wavfile.read(fp, mmap=mmap)
  242. def test_read_unknown_wave_format():
  243. # RIFF and WAVE, but not supported format
  244. for mmap in [False, True]:
  245. filename = 'test-8000Hz-le-1ch-1byte-ulaw.wav'
  246. with open(datafile(filename), 'rb') as fp:
  247. with raises(ValueError, match='Unknown wave file format.*MULAW.*'
  248. 'Supported formats'):
  249. wavfile.read(fp, mmap=mmap)
  250. def test_read_early_eof_with_data():
  251. # File ends inside 'data' chunk, but we keep incomplete data
  252. for mmap in [False, True]:
  253. filename = 'test-44100Hz-le-1ch-4bytes-early-eof.wav'
  254. with open(datafile(filename), 'rb') as fp:
  255. with warns(wavfile.WavFileWarning, match='Reached EOF'):
  256. rate, data = wavfile.read(fp, mmap=mmap)
  257. assert data.size > 0
  258. assert rate == 44100
  259. # also test writing (gh-12176)
  260. data[0] = 0
  261. def test_read_early_eof():
  262. # File ends after 'fact' chunk at boundary, no data read
  263. for mmap in [False, True]:
  264. filename = 'test-44100Hz-le-1ch-4bytes-early-eof-no-data.wav'
  265. with open(datafile(filename), 'rb') as fp:
  266. with raises(ValueError, match="Unexpected end of file."):
  267. wavfile.read(fp, mmap=mmap)
  268. def test_read_incomplete_chunk():
  269. # File ends inside 'fmt ' chunk ID, no data read
  270. for mmap in [False, True]:
  271. filename = 'test-44100Hz-le-1ch-4bytes-incomplete-chunk.wav'
  272. with open(datafile(filename), 'rb') as fp:
  273. with raises(ValueError, match="Incomplete chunk ID.*b'f'"):
  274. wavfile.read(fp, mmap=mmap)
  275. def test_read_inconsistent_header():
  276. # File header's size fields contradict each other
  277. for mmap in [False, True]:
  278. filename = 'test-8000Hz-le-3ch-5S-24bit-inconsistent.wav'
  279. with open(datafile(filename), 'rb') as fp:
  280. with raises(ValueError, match="header is invalid"):
  281. wavfile.read(fp, mmap=mmap)
  282. # signed 8-bit integer PCM is not allowed
  283. # unsigned > 8-bit integer PCM is not allowed
  284. # 8- or 16-bit float PCM is not expected
  285. # g and q are platform-dependent, so not included
  286. @pytest.mark.parametrize("dt_str", ["<i2", "<i4", "<i8", "<f4", "<f8",
  287. ">i2", ">i4", ">i8", ">f4", ">f8", '|u1'])
  288. @pytest.mark.parametrize("channels", [1, 2, 5])
  289. @pytest.mark.parametrize("rate", [8000, 32000])
  290. @pytest.mark.parametrize("mmap", [False, True])
  291. @pytest.mark.parametrize("realfile", [False, True])
  292. def test_write_roundtrip(realfile, mmap, rate, channels, dt_str, tmpdir):
  293. dtype = np.dtype(dt_str)
  294. if realfile:
  295. tmpfile = str(tmpdir.join('temp.wav'))
  296. else:
  297. tmpfile = BytesIO()
  298. data = np.random.rand(100, channels)
  299. if channels == 1:
  300. data = data[:, 0]
  301. if dtype.kind == 'f':
  302. # The range of the float type should be in [-1, 1]
  303. data = data.astype(dtype)
  304. else:
  305. data = (data*128).astype(dtype)
  306. wavfile.write(tmpfile, rate, data)
  307. rate2, data2 = wavfile.read(tmpfile, mmap=mmap)
  308. assert_equal(rate, rate2)
  309. assert_(data2.dtype.byteorder in ('<', '=', '|'), msg=data2.dtype)
  310. assert_array_equal(data, data2)
  311. # also test writing (gh-12176)
  312. if realfile:
  313. data2[0] = 0
  314. else:
  315. with pytest.raises(ValueError, match='read-only'):
  316. data2[0] = 0
  317. if realfile and mmap and IS_PYPY and sys.platform == 'win32':
  318. # windows cannot remove a dead file held by a mmap but not collected
  319. # in PyPy; since the filename gets reused in this test, clean this up
  320. break_cycles()
  321. break_cycles()