wavfile.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840
  1. """
  2. Module to read / write wav files using NumPy arrays
  3. Functions
  4. ---------
  5. `read`: Return the sample rate (in samples/sec) and data from a WAV file.
  6. `write`: Write a NumPy array as a WAV file.
  7. """
  8. import io
  9. import sys
  10. import numpy
  11. import struct
  12. import warnings
  13. from enum import IntEnum
  14. __all__ = [
  15. 'WavFileWarning',
  16. 'read',
  17. 'write'
  18. ]
  19. class WavFileWarning(UserWarning):
  20. pass
  21. class WAVE_FORMAT(IntEnum):
  22. """
  23. WAVE form wFormatTag IDs
  24. Complete list is in mmreg.h in Windows 10 SDK. ALAC and OPUS are the
  25. newest additions, in v10.0.14393 2016-07
  26. """
  27. UNKNOWN = 0x0000
  28. PCM = 0x0001
  29. ADPCM = 0x0002
  30. IEEE_FLOAT = 0x0003
  31. VSELP = 0x0004
  32. IBM_CVSD = 0x0005
  33. ALAW = 0x0006
  34. MULAW = 0x0007
  35. DTS = 0x0008
  36. DRM = 0x0009
  37. WMAVOICE9 = 0x000A
  38. WMAVOICE10 = 0x000B
  39. OKI_ADPCM = 0x0010
  40. DVI_ADPCM = 0x0011
  41. IMA_ADPCM = 0x0011 # Duplicate
  42. MEDIASPACE_ADPCM = 0x0012
  43. SIERRA_ADPCM = 0x0013
  44. G723_ADPCM = 0x0014
  45. DIGISTD = 0x0015
  46. DIGIFIX = 0x0016
  47. DIALOGIC_OKI_ADPCM = 0x0017
  48. MEDIAVISION_ADPCM = 0x0018
  49. CU_CODEC = 0x0019
  50. HP_DYN_VOICE = 0x001A
  51. YAMAHA_ADPCM = 0x0020
  52. SONARC = 0x0021
  53. DSPGROUP_TRUESPEECH = 0x0022
  54. ECHOSC1 = 0x0023
  55. AUDIOFILE_AF36 = 0x0024
  56. APTX = 0x0025
  57. AUDIOFILE_AF10 = 0x0026
  58. PROSODY_1612 = 0x0027
  59. LRC = 0x0028
  60. DOLBY_AC2 = 0x0030
  61. GSM610 = 0x0031
  62. MSNAUDIO = 0x0032
  63. ANTEX_ADPCME = 0x0033
  64. CONTROL_RES_VQLPC = 0x0034
  65. DIGIREAL = 0x0035
  66. DIGIADPCM = 0x0036
  67. CONTROL_RES_CR10 = 0x0037
  68. NMS_VBXADPCM = 0x0038
  69. CS_IMAADPCM = 0x0039
  70. ECHOSC3 = 0x003A
  71. ROCKWELL_ADPCM = 0x003B
  72. ROCKWELL_DIGITALK = 0x003C
  73. XEBEC = 0x003D
  74. G721_ADPCM = 0x0040
  75. G728_CELP = 0x0041
  76. MSG723 = 0x0042
  77. INTEL_G723_1 = 0x0043
  78. INTEL_G729 = 0x0044
  79. SHARP_G726 = 0x0045
  80. MPEG = 0x0050
  81. RT24 = 0x0052
  82. PAC = 0x0053
  83. MPEGLAYER3 = 0x0055
  84. LUCENT_G723 = 0x0059
  85. CIRRUS = 0x0060
  86. ESPCM = 0x0061
  87. VOXWARE = 0x0062
  88. CANOPUS_ATRAC = 0x0063
  89. G726_ADPCM = 0x0064
  90. G722_ADPCM = 0x0065
  91. DSAT = 0x0066
  92. DSAT_DISPLAY = 0x0067
  93. VOXWARE_BYTE_ALIGNED = 0x0069
  94. VOXWARE_AC8 = 0x0070
  95. VOXWARE_AC10 = 0x0071
  96. VOXWARE_AC16 = 0x0072
  97. VOXWARE_AC20 = 0x0073
  98. VOXWARE_RT24 = 0x0074
  99. VOXWARE_RT29 = 0x0075
  100. VOXWARE_RT29HW = 0x0076
  101. VOXWARE_VR12 = 0x0077
  102. VOXWARE_VR18 = 0x0078
  103. VOXWARE_TQ40 = 0x0079
  104. VOXWARE_SC3 = 0x007A
  105. VOXWARE_SC3_1 = 0x007B
  106. SOFTSOUND = 0x0080
  107. VOXWARE_TQ60 = 0x0081
  108. MSRT24 = 0x0082
  109. G729A = 0x0083
  110. MVI_MVI2 = 0x0084
  111. DF_G726 = 0x0085
  112. DF_GSM610 = 0x0086
  113. ISIAUDIO = 0x0088
  114. ONLIVE = 0x0089
  115. MULTITUDE_FT_SX20 = 0x008A
  116. INFOCOM_ITS_G721_ADPCM = 0x008B
  117. CONVEDIA_G729 = 0x008C
  118. CONGRUENCY = 0x008D
  119. SBC24 = 0x0091
  120. DOLBY_AC3_SPDIF = 0x0092
  121. MEDIASONIC_G723 = 0x0093
  122. PROSODY_8KBPS = 0x0094
  123. ZYXEL_ADPCM = 0x0097
  124. PHILIPS_LPCBB = 0x0098
  125. PACKED = 0x0099
  126. MALDEN_PHONYTALK = 0x00A0
  127. RACAL_RECORDER_GSM = 0x00A1
  128. RACAL_RECORDER_G720_A = 0x00A2
  129. RACAL_RECORDER_G723_1 = 0x00A3
  130. RACAL_RECORDER_TETRA_ACELP = 0x00A4
  131. NEC_AAC = 0x00B0
  132. RAW_AAC1 = 0x00FF
  133. RHETOREX_ADPCM = 0x0100
  134. IRAT = 0x0101
  135. VIVO_G723 = 0x0111
  136. VIVO_SIREN = 0x0112
  137. PHILIPS_CELP = 0x0120
  138. PHILIPS_GRUNDIG = 0x0121
  139. DIGITAL_G723 = 0x0123
  140. SANYO_LD_ADPCM = 0x0125
  141. SIPROLAB_ACEPLNET = 0x0130
  142. SIPROLAB_ACELP4800 = 0x0131
  143. SIPROLAB_ACELP8V3 = 0x0132
  144. SIPROLAB_G729 = 0x0133
  145. SIPROLAB_G729A = 0x0134
  146. SIPROLAB_KELVIN = 0x0135
  147. VOICEAGE_AMR = 0x0136
  148. G726ADPCM = 0x0140
  149. DICTAPHONE_CELP68 = 0x0141
  150. DICTAPHONE_CELP54 = 0x0142
  151. QUALCOMM_PUREVOICE = 0x0150
  152. QUALCOMM_HALFRATE = 0x0151
  153. TUBGSM = 0x0155
  154. MSAUDIO1 = 0x0160
  155. WMAUDIO2 = 0x0161
  156. WMAUDIO3 = 0x0162
  157. WMAUDIO_LOSSLESS = 0x0163
  158. WMASPDIF = 0x0164
  159. UNISYS_NAP_ADPCM = 0x0170
  160. UNISYS_NAP_ULAW = 0x0171
  161. UNISYS_NAP_ALAW = 0x0172
  162. UNISYS_NAP_16K = 0x0173
  163. SYCOM_ACM_SYC008 = 0x0174
  164. SYCOM_ACM_SYC701_G726L = 0x0175
  165. SYCOM_ACM_SYC701_CELP54 = 0x0176
  166. SYCOM_ACM_SYC701_CELP68 = 0x0177
  167. KNOWLEDGE_ADVENTURE_ADPCM = 0x0178
  168. FRAUNHOFER_IIS_MPEG2_AAC = 0x0180
  169. DTS_DS = 0x0190
  170. CREATIVE_ADPCM = 0x0200
  171. CREATIVE_FASTSPEECH8 = 0x0202
  172. CREATIVE_FASTSPEECH10 = 0x0203
  173. UHER_ADPCM = 0x0210
  174. ULEAD_DV_AUDIO = 0x0215
  175. ULEAD_DV_AUDIO_1 = 0x0216
  176. QUARTERDECK = 0x0220
  177. ILINK_VC = 0x0230
  178. RAW_SPORT = 0x0240
  179. ESST_AC3 = 0x0241
  180. GENERIC_PASSTHRU = 0x0249
  181. IPI_HSX = 0x0250
  182. IPI_RPELP = 0x0251
  183. CS2 = 0x0260
  184. SONY_SCX = 0x0270
  185. SONY_SCY = 0x0271
  186. SONY_ATRAC3 = 0x0272
  187. SONY_SPC = 0x0273
  188. TELUM_AUDIO = 0x0280
  189. TELUM_IA_AUDIO = 0x0281
  190. NORCOM_VOICE_SYSTEMS_ADPCM = 0x0285
  191. FM_TOWNS_SND = 0x0300
  192. MICRONAS = 0x0350
  193. MICRONAS_CELP833 = 0x0351
  194. BTV_DIGITAL = 0x0400
  195. INTEL_MUSIC_CODER = 0x0401
  196. INDEO_AUDIO = 0x0402
  197. QDESIGN_MUSIC = 0x0450
  198. ON2_VP7_AUDIO = 0x0500
  199. ON2_VP6_AUDIO = 0x0501
  200. VME_VMPCM = 0x0680
  201. TPC = 0x0681
  202. LIGHTWAVE_LOSSLESS = 0x08AE
  203. OLIGSM = 0x1000
  204. OLIADPCM = 0x1001
  205. OLICELP = 0x1002
  206. OLISBC = 0x1003
  207. OLIOPR = 0x1004
  208. LH_CODEC = 0x1100
  209. LH_CODEC_CELP = 0x1101
  210. LH_CODEC_SBC8 = 0x1102
  211. LH_CODEC_SBC12 = 0x1103
  212. LH_CODEC_SBC16 = 0x1104
  213. NORRIS = 0x1400
  214. ISIAUDIO_2 = 0x1401
  215. SOUNDSPACE_MUSICOMPRESS = 0x1500
  216. MPEG_ADTS_AAC = 0x1600
  217. MPEG_RAW_AAC = 0x1601
  218. MPEG_LOAS = 0x1602
  219. NOKIA_MPEG_ADTS_AAC = 0x1608
  220. NOKIA_MPEG_RAW_AAC = 0x1609
  221. VODAFONE_MPEG_ADTS_AAC = 0x160A
  222. VODAFONE_MPEG_RAW_AAC = 0x160B
  223. MPEG_HEAAC = 0x1610
  224. VOXWARE_RT24_SPEECH = 0x181C
  225. SONICFOUNDRY_LOSSLESS = 0x1971
  226. INNINGS_TELECOM_ADPCM = 0x1979
  227. LUCENT_SX8300P = 0x1C07
  228. LUCENT_SX5363S = 0x1C0C
  229. CUSEEME = 0x1F03
  230. NTCSOFT_ALF2CM_ACM = 0x1FC4
  231. DVM = 0x2000
  232. DTS2 = 0x2001
  233. MAKEAVIS = 0x3313
  234. DIVIO_MPEG4_AAC = 0x4143
  235. NOKIA_ADAPTIVE_MULTIRATE = 0x4201
  236. DIVIO_G726 = 0x4243
  237. LEAD_SPEECH = 0x434C
  238. LEAD_VORBIS = 0x564C
  239. WAVPACK_AUDIO = 0x5756
  240. OGG_VORBIS_MODE_1 = 0x674F
  241. OGG_VORBIS_MODE_2 = 0x6750
  242. OGG_VORBIS_MODE_3 = 0x6751
  243. OGG_VORBIS_MODE_1_PLUS = 0x676F
  244. OGG_VORBIS_MODE_2_PLUS = 0x6770
  245. OGG_VORBIS_MODE_3_PLUS = 0x6771
  246. ALAC = 0x6C61
  247. _3COM_NBX = 0x7000 # Can't have leading digit
  248. OPUS = 0x704F
  249. FAAD_AAC = 0x706D
  250. AMR_NB = 0x7361
  251. AMR_WB = 0x7362
  252. AMR_WP = 0x7363
  253. GSM_AMR_CBR = 0x7A21
  254. GSM_AMR_VBR_SID = 0x7A22
  255. COMVERSE_INFOSYS_G723_1 = 0xA100
  256. COMVERSE_INFOSYS_AVQSBC = 0xA101
  257. COMVERSE_INFOSYS_SBC = 0xA102
  258. SYMBOL_G729_A = 0xA103
  259. VOICEAGE_AMR_WB = 0xA104
  260. INGENIENT_G726 = 0xA105
  261. MPEG4_AAC = 0xA106
  262. ENCORE_G726 = 0xA107
  263. ZOLL_ASAO = 0xA108
  264. SPEEX_VOICE = 0xA109
  265. VIANIX_MASC = 0xA10A
  266. WM9_SPECTRUM_ANALYZER = 0xA10B
  267. WMF_SPECTRUM_ANAYZER = 0xA10C
  268. GSM_610 = 0xA10D
  269. GSM_620 = 0xA10E
  270. GSM_660 = 0xA10F
  271. GSM_690 = 0xA110
  272. GSM_ADAPTIVE_MULTIRATE_WB = 0xA111
  273. POLYCOM_G722 = 0xA112
  274. POLYCOM_G728 = 0xA113
  275. POLYCOM_G729_A = 0xA114
  276. POLYCOM_SIREN = 0xA115
  277. GLOBAL_IP_ILBC = 0xA116
  278. RADIOTIME_TIME_SHIFT_RADIO = 0xA117
  279. NICE_ACA = 0xA118
  280. NICE_ADPCM = 0xA119
  281. VOCORD_G721 = 0xA11A
  282. VOCORD_G726 = 0xA11B
  283. VOCORD_G722_1 = 0xA11C
  284. VOCORD_G728 = 0xA11D
  285. VOCORD_G729 = 0xA11E
  286. VOCORD_G729_A = 0xA11F
  287. VOCORD_G723_1 = 0xA120
  288. VOCORD_LBC = 0xA121
  289. NICE_G728 = 0xA122
  290. FRACE_TELECOM_G729 = 0xA123
  291. CODIAN = 0xA124
  292. FLAC = 0xF1AC
  293. EXTENSIBLE = 0xFFFE
  294. DEVELOPMENT = 0xFFFF
  295. KNOWN_WAVE_FORMATS = {WAVE_FORMAT.PCM, WAVE_FORMAT.IEEE_FLOAT}
  296. def _raise_bad_format(format_tag):
  297. try:
  298. format_name = WAVE_FORMAT(format_tag).name
  299. except ValueError:
  300. format_name = f'{format_tag:#06x}'
  301. raise ValueError(f"Unknown wave file format: {format_name}. Supported "
  302. "formats: " +
  303. ', '.join(x.name for x in KNOWN_WAVE_FORMATS))
  304. def _read_fmt_chunk(fid, is_big_endian):
  305. """
  306. Returns
  307. -------
  308. size : int
  309. size of format subchunk in bytes (minus 8 for "fmt " and itself)
  310. format_tag : int
  311. PCM, float, or compressed format
  312. channels : int
  313. number of channels
  314. fs : int
  315. sampling frequency in samples per second
  316. bytes_per_second : int
  317. overall byte rate for the file
  318. block_align : int
  319. bytes per sample, including all channels
  320. bit_depth : int
  321. bits per sample
  322. Notes
  323. -----
  324. Assumes file pointer is immediately after the 'fmt ' id
  325. """
  326. if is_big_endian:
  327. fmt = '>'
  328. else:
  329. fmt = '<'
  330. size = struct.unpack(fmt+'I', fid.read(4))[0]
  331. if size < 16:
  332. raise ValueError("Binary structure of wave file is not compliant")
  333. res = struct.unpack(fmt+'HHIIHH', fid.read(16))
  334. bytes_read = 16
  335. format_tag, channels, fs, bytes_per_second, block_align, bit_depth = res
  336. if format_tag == WAVE_FORMAT.EXTENSIBLE and size >= (16+2):
  337. ext_chunk_size = struct.unpack(fmt+'H', fid.read(2))[0]
  338. bytes_read += 2
  339. if ext_chunk_size >= 22:
  340. extensible_chunk_data = fid.read(22)
  341. bytes_read += 22
  342. raw_guid = extensible_chunk_data[2+4:2+4+16]
  343. # GUID template {XXXXXXXX-0000-0010-8000-00AA00389B71} (RFC-2361)
  344. # MS GUID byte order: first three groups are native byte order,
  345. # rest is Big Endian
  346. if is_big_endian:
  347. tail = b'\x00\x00\x00\x10\x80\x00\x00\xAA\x00\x38\x9B\x71'
  348. else:
  349. tail = b'\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71'
  350. if raw_guid.endswith(tail):
  351. format_tag = struct.unpack(fmt+'I', raw_guid[:4])[0]
  352. else:
  353. raise ValueError("Binary structure of wave file is not compliant")
  354. if format_tag not in KNOWN_WAVE_FORMATS:
  355. _raise_bad_format(format_tag)
  356. # move file pointer to next chunk
  357. if size > bytes_read:
  358. fid.read(size - bytes_read)
  359. # fmt should always be 16, 18 or 40, but handle it just in case
  360. _handle_pad_byte(fid, size)
  361. if format_tag == WAVE_FORMAT.PCM:
  362. if bytes_per_second != fs * block_align:
  363. raise ValueError("WAV header is invalid: nAvgBytesPerSec must"
  364. " equal product of nSamplesPerSec and"
  365. " nBlockAlign, but file has nSamplesPerSec ="
  366. f" {fs}, nBlockAlign = {block_align}, and"
  367. f" nAvgBytesPerSec = {bytes_per_second}")
  368. return (size, format_tag, channels, fs, bytes_per_second, block_align,
  369. bit_depth)
  370. def _read_data_chunk(fid, format_tag, channels, bit_depth, is_big_endian,
  371. block_align, mmap=False):
  372. """
  373. Notes
  374. -----
  375. Assumes file pointer is immediately after the 'data' id
  376. It's possible to not use all available bits in a container, or to store
  377. samples in a container bigger than necessary, so bytes_per_sample uses
  378. the actual reported container size (nBlockAlign / nChannels). Real-world
  379. examples:
  380. Adobe Audition's "24-bit packed int (type 1, 20-bit)"
  381. nChannels = 2, nBlockAlign = 6, wBitsPerSample = 20
  382. http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples/AFsp/M1F1-int12-AFsp.wav
  383. is:
  384. nChannels = 2, nBlockAlign = 4, wBitsPerSample = 12
  385. http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Docs/multichaudP.pdf
  386. gives an example of:
  387. nChannels = 2, nBlockAlign = 8, wBitsPerSample = 20
  388. """
  389. if is_big_endian:
  390. fmt = '>'
  391. else:
  392. fmt = '<'
  393. # Size of the data subchunk in bytes
  394. size = struct.unpack(fmt+'I', fid.read(4))[0]
  395. # Number of bytes per sample (sample container size)
  396. bytes_per_sample = block_align // channels
  397. n_samples = size // bytes_per_sample
  398. if format_tag == WAVE_FORMAT.PCM:
  399. if 1 <= bit_depth <= 8:
  400. dtype = 'u1' # WAV of 8-bit integer or less are unsigned
  401. elif bytes_per_sample in {3, 5, 6, 7}:
  402. # No compatible dtype. Load as raw bytes for reshaping later.
  403. dtype = 'V1'
  404. elif bit_depth <= 64:
  405. # Remaining bit depths can map directly to signed numpy dtypes
  406. dtype = f'{fmt}i{bytes_per_sample}'
  407. else:
  408. raise ValueError("Unsupported bit depth: the WAV file "
  409. f"has {bit_depth}-bit integer data.")
  410. elif format_tag == WAVE_FORMAT.IEEE_FLOAT:
  411. if bit_depth in {32, 64}:
  412. dtype = f'{fmt}f{bytes_per_sample}'
  413. else:
  414. raise ValueError("Unsupported bit depth: the WAV file "
  415. f"has {bit_depth}-bit floating-point data.")
  416. else:
  417. _raise_bad_format(format_tag)
  418. start = fid.tell()
  419. if not mmap:
  420. try:
  421. count = size if dtype == 'V1' else n_samples
  422. data = numpy.fromfile(fid, dtype=dtype, count=count)
  423. except io.UnsupportedOperation: # not a C-like file
  424. fid.seek(start, 0) # just in case it seeked, though it shouldn't
  425. data = numpy.frombuffer(fid.read(size), dtype=dtype)
  426. if dtype == 'V1':
  427. # Rearrange raw bytes into smallest compatible numpy dtype
  428. dt = f'{fmt}i4' if bytes_per_sample == 3 else f'{fmt}i8'
  429. a = numpy.zeros((len(data) // bytes_per_sample, numpy.dtype(dt).itemsize),
  430. dtype='V1')
  431. if is_big_endian:
  432. a[:, :bytes_per_sample] = data.reshape((-1, bytes_per_sample))
  433. else:
  434. a[:, -bytes_per_sample:] = data.reshape((-1, bytes_per_sample))
  435. data = a.view(dt).reshape(a.shape[:-1])
  436. else:
  437. if bytes_per_sample in {1, 2, 4, 8}:
  438. start = fid.tell()
  439. data = numpy.memmap(fid, dtype=dtype, mode='c', offset=start,
  440. shape=(n_samples,))
  441. fid.seek(start + size)
  442. else:
  443. raise ValueError("mmap=True not compatible with "
  444. f"{bytes_per_sample}-byte container size.")
  445. _handle_pad_byte(fid, size)
  446. if channels > 1:
  447. data = data.reshape(-1, channels)
  448. return data
  449. def _skip_unknown_chunk(fid, is_big_endian):
  450. if is_big_endian:
  451. fmt = '>I'
  452. else:
  453. fmt = '<I'
  454. data = fid.read(4)
  455. # call unpack() and seek() only if we have really read data from file
  456. # otherwise empty read at the end of the file would trigger
  457. # unnecessary exception at unpack() call
  458. # in case data equals somehow to 0, there is no need for seek() anyway
  459. if data:
  460. size = struct.unpack(fmt, data)[0]
  461. fid.seek(size, 1)
  462. _handle_pad_byte(fid, size)
  463. def _read_riff_chunk(fid):
  464. str1 = fid.read(4) # File signature
  465. if str1 == b'RIFF':
  466. is_big_endian = False
  467. fmt = '<I'
  468. elif str1 == b'RIFX':
  469. is_big_endian = True
  470. fmt = '>I'
  471. else:
  472. # There are also .wav files with "FFIR" or "XFIR" signatures?
  473. raise ValueError(f"File format {repr(str1)} not understood. Only "
  474. "'RIFF' and 'RIFX' supported.")
  475. # Size of entire file
  476. file_size = struct.unpack(fmt, fid.read(4))[0] + 8
  477. str2 = fid.read(4)
  478. if str2 != b'WAVE':
  479. raise ValueError(f"Not a WAV file. RIFF form type is {repr(str2)}.")
  480. return file_size, is_big_endian
  481. def _handle_pad_byte(fid, size):
  482. # "If the chunk size is an odd number of bytes, a pad byte with value zero
  483. # is written after ckData." So we need to seek past this after each chunk.
  484. if size % 2:
  485. fid.seek(1, 1)
  486. def read(filename, mmap=False):
  487. """
  488. Open a WAV file.
  489. Return the sample rate (in samples/sec) and data from an LPCM WAV file.
  490. Parameters
  491. ----------
  492. filename : string or open file handle
  493. Input WAV file.
  494. mmap : bool, optional
  495. Whether to read data as memory-mapped (default: False). Not compatible
  496. with some bit depths; see Notes. Only to be used on real files.
  497. .. versionadded:: 0.12.0
  498. Returns
  499. -------
  500. rate : int
  501. Sample rate of WAV file.
  502. data : numpy array
  503. Data read from WAV file. Data-type is determined from the file;
  504. see Notes. Data is 1-D for 1-channel WAV, or 2-D of shape
  505. (Nsamples, Nchannels) otherwise. If a file-like input without a
  506. C-like file descriptor (e.g., :class:`python:io.BytesIO`) is
  507. passed, this will not be writeable.
  508. Notes
  509. -----
  510. Common data types: [1]_
  511. ===================== =========== =========== =============
  512. WAV format Min Max NumPy dtype
  513. ===================== =========== =========== =============
  514. 32-bit floating-point -1.0 +1.0 float32
  515. 32-bit integer PCM -2147483648 +2147483647 int32
  516. 24-bit integer PCM -2147483648 +2147483392 int32
  517. 16-bit integer PCM -32768 +32767 int16
  518. 8-bit integer PCM 0 255 uint8
  519. ===================== =========== =========== =============
  520. WAV files can specify arbitrary bit depth, and this function supports
  521. reading any integer PCM depth from 1 to 64 bits. Data is returned in the
  522. smallest compatible numpy int type, in left-justified format. 8-bit and
  523. lower is unsigned, while 9-bit and higher is signed.
  524. For example, 24-bit data will be stored as int32, with the MSB of the
  525. 24-bit data stored at the MSB of the int32, and typically the least
  526. significant byte is 0x00. (However, if a file actually contains data past
  527. its specified bit depth, those bits will be read and output, too. [2]_)
  528. This bit justification and sign matches WAV's native internal format, which
  529. allows memory mapping of WAV files that use 1, 2, 4, or 8 bytes per sample
  530. (so 24-bit files cannot be memory-mapped, but 32-bit can).
  531. IEEE float PCM in 32- or 64-bit format is supported, with or without mmap.
  532. Values exceeding [-1, +1] are not clipped.
  533. Non-linear PCM (mu-law, A-law) is not supported.
  534. References
  535. ----------
  536. .. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming
  537. Interface and Data Specifications 1.0", section "Data Format of the
  538. Samples", August 1991
  539. http://www.tactilemedia.com/info/MCI_Control_Info.html
  540. .. [2] Adobe Systems Incorporated, "Adobe Audition 3 User Guide", section
  541. "Audio file formats: 24-bit Packed Int (type 1, 20-bit)", 2007
  542. Examples
  543. --------
  544. >>> from os.path import dirname, join as pjoin
  545. >>> from scipy.io import wavfile
  546. >>> import scipy.io
  547. Get the filename for an example .wav file from the tests/data directory.
  548. >>> data_dir = pjoin(dirname(scipy.io.__file__), 'tests', 'data')
  549. >>> wav_fname = pjoin(data_dir, 'test-44100Hz-2ch-32bit-float-be.wav')
  550. Load the .wav file contents.
  551. >>> samplerate, data = wavfile.read(wav_fname)
  552. >>> print(f"number of channels = {data.shape[1]}")
  553. number of channels = 2
  554. >>> length = data.shape[0] / samplerate
  555. >>> print(f"length = {length}s")
  556. length = 0.01s
  557. Plot the waveform.
  558. >>> import matplotlib.pyplot as plt
  559. >>> import numpy as np
  560. >>> time = np.linspace(0., length, data.shape[0])
  561. >>> plt.plot(time, data[:, 0], label="Left channel")
  562. >>> plt.plot(time, data[:, 1], label="Right channel")
  563. >>> plt.legend()
  564. >>> plt.xlabel("Time [s]")
  565. >>> plt.ylabel("Amplitude")
  566. >>> plt.show()
  567. """
  568. if hasattr(filename, 'read'):
  569. fid = filename
  570. mmap = False
  571. else:
  572. fid = open(filename, 'rb')
  573. try:
  574. file_size, is_big_endian = _read_riff_chunk(fid)
  575. fmt_chunk_received = False
  576. data_chunk_received = False
  577. while fid.tell() < file_size:
  578. # read the next chunk
  579. chunk_id = fid.read(4)
  580. if not chunk_id:
  581. if data_chunk_received:
  582. # End of file but data successfully read
  583. warnings.warn(
  584. "Reached EOF prematurely; finished at {:d} bytes, "
  585. "expected {:d} bytes from header."
  586. .format(fid.tell(), file_size),
  587. WavFileWarning, stacklevel=2)
  588. break
  589. else:
  590. raise ValueError("Unexpected end of file.")
  591. elif len(chunk_id) < 4:
  592. msg = f"Incomplete chunk ID: {repr(chunk_id)}"
  593. # If we have the data, ignore the broken chunk
  594. if fmt_chunk_received and data_chunk_received:
  595. warnings.warn(msg + ", ignoring it.", WavFileWarning,
  596. stacklevel=2)
  597. else:
  598. raise ValueError(msg)
  599. if chunk_id == b'fmt ':
  600. fmt_chunk_received = True
  601. fmt_chunk = _read_fmt_chunk(fid, is_big_endian)
  602. format_tag, channels, fs = fmt_chunk[1:4]
  603. bit_depth = fmt_chunk[6]
  604. block_align = fmt_chunk[5]
  605. elif chunk_id == b'fact':
  606. _skip_unknown_chunk(fid, is_big_endian)
  607. elif chunk_id == b'data':
  608. data_chunk_received = True
  609. if not fmt_chunk_received:
  610. raise ValueError("No fmt chunk before data")
  611. data = _read_data_chunk(fid, format_tag, channels, bit_depth,
  612. is_big_endian, block_align, mmap)
  613. elif chunk_id == b'LIST':
  614. # Someday this could be handled properly but for now skip it
  615. _skip_unknown_chunk(fid, is_big_endian)
  616. elif chunk_id in {b'JUNK', b'Fake'}:
  617. # Skip alignment chunks without warning
  618. _skip_unknown_chunk(fid, is_big_endian)
  619. else:
  620. warnings.warn("Chunk (non-data) not understood, skipping it.",
  621. WavFileWarning, stacklevel=2)
  622. _skip_unknown_chunk(fid, is_big_endian)
  623. finally:
  624. if not hasattr(filename, 'read'):
  625. fid.close()
  626. else:
  627. fid.seek(0)
  628. return fs, data
  629. def write(filename, rate, data):
  630. """
  631. Write a NumPy array as a WAV file.
  632. Parameters
  633. ----------
  634. filename : string or open file handle
  635. Output wav file.
  636. rate : int
  637. The sample rate (in samples/sec).
  638. data : ndarray
  639. A 1-D or 2-D NumPy array of either integer or float data-type.
  640. Notes
  641. -----
  642. * Writes a simple uncompressed WAV file.
  643. * To write multiple-channels, use a 2-D array of shape
  644. (Nsamples, Nchannels).
  645. * The bits-per-sample and PCM/float will be determined by the data-type.
  646. Common data types: [1]_
  647. ===================== =========== =========== =============
  648. WAV format Min Max NumPy dtype
  649. ===================== =========== =========== =============
  650. 32-bit floating-point -1.0 +1.0 float32
  651. 32-bit PCM -2147483648 +2147483647 int32
  652. 16-bit PCM -32768 +32767 int16
  653. 8-bit PCM 0 255 uint8
  654. ===================== =========== =========== =============
  655. Note that 8-bit PCM is unsigned.
  656. References
  657. ----------
  658. .. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming
  659. Interface and Data Specifications 1.0", section "Data Format of the
  660. Samples", August 1991
  661. http://www.tactilemedia.com/info/MCI_Control_Info.html
  662. Examples
  663. --------
  664. Create a 100Hz sine wave, sampled at 44100Hz.
  665. Write to 16-bit PCM, Mono.
  666. >>> from scipy.io.wavfile import write
  667. >>> import numpy as np
  668. >>> samplerate = 44100; fs = 100
  669. >>> t = np.linspace(0., 1., samplerate)
  670. >>> amplitude = np.iinfo(np.int16).max
  671. >>> data = amplitude * np.sin(2. * np.pi * fs * t)
  672. >>> write("example.wav", samplerate, data.astype(np.int16))
  673. """
  674. if hasattr(filename, 'write'):
  675. fid = filename
  676. else:
  677. fid = open(filename, 'wb')
  678. fs = rate
  679. try:
  680. dkind = data.dtype.kind
  681. if not (dkind == 'i' or dkind == 'f' or (dkind == 'u' and
  682. data.dtype.itemsize == 1)):
  683. raise ValueError("Unsupported data type '%s'" % data.dtype)
  684. header_data = b''
  685. header_data += b'RIFF'
  686. header_data += b'\x00\x00\x00\x00'
  687. header_data += b'WAVE'
  688. # fmt chunk
  689. header_data += b'fmt '
  690. if dkind == 'f':
  691. format_tag = WAVE_FORMAT.IEEE_FLOAT
  692. else:
  693. format_tag = WAVE_FORMAT.PCM
  694. if data.ndim == 1:
  695. channels = 1
  696. else:
  697. channels = data.shape[1]
  698. bit_depth = data.dtype.itemsize * 8
  699. bytes_per_second = fs*(bit_depth // 8)*channels
  700. block_align = channels * (bit_depth // 8)
  701. fmt_chunk_data = struct.pack('<HHIIHH', format_tag, channels, fs,
  702. bytes_per_second, block_align, bit_depth)
  703. if not (dkind == 'i' or dkind == 'u'):
  704. # add cbSize field for non-PCM files
  705. fmt_chunk_data += b'\x00\x00'
  706. header_data += struct.pack('<I', len(fmt_chunk_data))
  707. header_data += fmt_chunk_data
  708. # fact chunk (non-PCM files)
  709. if not (dkind == 'i' or dkind == 'u'):
  710. header_data += b'fact'
  711. header_data += struct.pack('<II', 4, data.shape[0])
  712. # check data size (needs to be immediately before the data chunk)
  713. if ((len(header_data)-4-4) + (4+4+data.nbytes)) > 0xFFFFFFFF:
  714. raise ValueError("Data exceeds wave file size limit")
  715. fid.write(header_data)
  716. # data chunk
  717. fid.write(b'data')
  718. fid.write(struct.pack('<I', data.nbytes))
  719. if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and
  720. sys.byteorder == 'big'):
  721. data = data.byteswap()
  722. _array_tofile(fid, data)
  723. # Determine file size and place it in correct
  724. # position at start of the file.
  725. size = fid.tell()
  726. fid.seek(4)
  727. fid.write(struct.pack('<I', size-8))
  728. finally:
  729. if not hasattr(filename, 'write'):
  730. fid.close()
  731. else:
  732. fid.seek(0)
  733. def _array_tofile(fid, data):
  734. # ravel gives a c-contiguous buffer
  735. fid.write(data.ravel().view('b').data)