123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840 |
- """
- Module to read / write wav files using NumPy arrays
- Functions
- ---------
- `read`: Return the sample rate (in samples/sec) and data from a WAV file.
- `write`: Write a NumPy array as a WAV file.
- """
- import io
- import sys
- import numpy
- import struct
- import warnings
- from enum import IntEnum
- __all__ = [
- 'WavFileWarning',
- 'read',
- 'write'
- ]
- class WavFileWarning(UserWarning):
- pass
- class WAVE_FORMAT(IntEnum):
- """
- WAVE form wFormatTag IDs
- Complete list is in mmreg.h in Windows 10 SDK. ALAC and OPUS are the
- newest additions, in v10.0.14393 2016-07
- """
- UNKNOWN = 0x0000
- PCM = 0x0001
- ADPCM = 0x0002
- IEEE_FLOAT = 0x0003
- VSELP = 0x0004
- IBM_CVSD = 0x0005
- ALAW = 0x0006
- MULAW = 0x0007
- DTS = 0x0008
- DRM = 0x0009
- WMAVOICE9 = 0x000A
- WMAVOICE10 = 0x000B
- OKI_ADPCM = 0x0010
- DVI_ADPCM = 0x0011
- IMA_ADPCM = 0x0011 # Duplicate
- MEDIASPACE_ADPCM = 0x0012
- SIERRA_ADPCM = 0x0013
- G723_ADPCM = 0x0014
- DIGISTD = 0x0015
- DIGIFIX = 0x0016
- DIALOGIC_OKI_ADPCM = 0x0017
- MEDIAVISION_ADPCM = 0x0018
- CU_CODEC = 0x0019
- HP_DYN_VOICE = 0x001A
- YAMAHA_ADPCM = 0x0020
- SONARC = 0x0021
- DSPGROUP_TRUESPEECH = 0x0022
- ECHOSC1 = 0x0023
- AUDIOFILE_AF36 = 0x0024
- APTX = 0x0025
- AUDIOFILE_AF10 = 0x0026
- PROSODY_1612 = 0x0027
- LRC = 0x0028
- DOLBY_AC2 = 0x0030
- GSM610 = 0x0031
- MSNAUDIO = 0x0032
- ANTEX_ADPCME = 0x0033
- CONTROL_RES_VQLPC = 0x0034
- DIGIREAL = 0x0035
- DIGIADPCM = 0x0036
- CONTROL_RES_CR10 = 0x0037
- NMS_VBXADPCM = 0x0038
- CS_IMAADPCM = 0x0039
- ECHOSC3 = 0x003A
- ROCKWELL_ADPCM = 0x003B
- ROCKWELL_DIGITALK = 0x003C
- XEBEC = 0x003D
- G721_ADPCM = 0x0040
- G728_CELP = 0x0041
- MSG723 = 0x0042
- INTEL_G723_1 = 0x0043
- INTEL_G729 = 0x0044
- SHARP_G726 = 0x0045
- MPEG = 0x0050
- RT24 = 0x0052
- PAC = 0x0053
- MPEGLAYER3 = 0x0055
- LUCENT_G723 = 0x0059
- CIRRUS = 0x0060
- ESPCM = 0x0061
- VOXWARE = 0x0062
- CANOPUS_ATRAC = 0x0063
- G726_ADPCM = 0x0064
- G722_ADPCM = 0x0065
- DSAT = 0x0066
- DSAT_DISPLAY = 0x0067
- VOXWARE_BYTE_ALIGNED = 0x0069
- VOXWARE_AC8 = 0x0070
- VOXWARE_AC10 = 0x0071
- VOXWARE_AC16 = 0x0072
- VOXWARE_AC20 = 0x0073
- VOXWARE_RT24 = 0x0074
- VOXWARE_RT29 = 0x0075
- VOXWARE_RT29HW = 0x0076
- VOXWARE_VR12 = 0x0077
- VOXWARE_VR18 = 0x0078
- VOXWARE_TQ40 = 0x0079
- VOXWARE_SC3 = 0x007A
- VOXWARE_SC3_1 = 0x007B
- SOFTSOUND = 0x0080
- VOXWARE_TQ60 = 0x0081
- MSRT24 = 0x0082
- G729A = 0x0083
- MVI_MVI2 = 0x0084
- DF_G726 = 0x0085
- DF_GSM610 = 0x0086
- ISIAUDIO = 0x0088
- ONLIVE = 0x0089
- MULTITUDE_FT_SX20 = 0x008A
- INFOCOM_ITS_G721_ADPCM = 0x008B
- CONVEDIA_G729 = 0x008C
- CONGRUENCY = 0x008D
- SBC24 = 0x0091
- DOLBY_AC3_SPDIF = 0x0092
- MEDIASONIC_G723 = 0x0093
- PROSODY_8KBPS = 0x0094
- ZYXEL_ADPCM = 0x0097
- PHILIPS_LPCBB = 0x0098
- PACKED = 0x0099
- MALDEN_PHONYTALK = 0x00A0
- RACAL_RECORDER_GSM = 0x00A1
- RACAL_RECORDER_G720_A = 0x00A2
- RACAL_RECORDER_G723_1 = 0x00A3
- RACAL_RECORDER_TETRA_ACELP = 0x00A4
- NEC_AAC = 0x00B0
- RAW_AAC1 = 0x00FF
- RHETOREX_ADPCM = 0x0100
- IRAT = 0x0101
- VIVO_G723 = 0x0111
- VIVO_SIREN = 0x0112
- PHILIPS_CELP = 0x0120
- PHILIPS_GRUNDIG = 0x0121
- DIGITAL_G723 = 0x0123
- SANYO_LD_ADPCM = 0x0125
- SIPROLAB_ACEPLNET = 0x0130
- SIPROLAB_ACELP4800 = 0x0131
- SIPROLAB_ACELP8V3 = 0x0132
- SIPROLAB_G729 = 0x0133
- SIPROLAB_G729A = 0x0134
- SIPROLAB_KELVIN = 0x0135
- VOICEAGE_AMR = 0x0136
- G726ADPCM = 0x0140
- DICTAPHONE_CELP68 = 0x0141
- DICTAPHONE_CELP54 = 0x0142
- QUALCOMM_PUREVOICE = 0x0150
- QUALCOMM_HALFRATE = 0x0151
- TUBGSM = 0x0155
- MSAUDIO1 = 0x0160
- WMAUDIO2 = 0x0161
- WMAUDIO3 = 0x0162
- WMAUDIO_LOSSLESS = 0x0163
- WMASPDIF = 0x0164
- UNISYS_NAP_ADPCM = 0x0170
- UNISYS_NAP_ULAW = 0x0171
- UNISYS_NAP_ALAW = 0x0172
- UNISYS_NAP_16K = 0x0173
- SYCOM_ACM_SYC008 = 0x0174
- SYCOM_ACM_SYC701_G726L = 0x0175
- SYCOM_ACM_SYC701_CELP54 = 0x0176
- SYCOM_ACM_SYC701_CELP68 = 0x0177
- KNOWLEDGE_ADVENTURE_ADPCM = 0x0178
- FRAUNHOFER_IIS_MPEG2_AAC = 0x0180
- DTS_DS = 0x0190
- CREATIVE_ADPCM = 0x0200
- CREATIVE_FASTSPEECH8 = 0x0202
- CREATIVE_FASTSPEECH10 = 0x0203
- UHER_ADPCM = 0x0210
- ULEAD_DV_AUDIO = 0x0215
- ULEAD_DV_AUDIO_1 = 0x0216
- QUARTERDECK = 0x0220
- ILINK_VC = 0x0230
- RAW_SPORT = 0x0240
- ESST_AC3 = 0x0241
- GENERIC_PASSTHRU = 0x0249
- IPI_HSX = 0x0250
- IPI_RPELP = 0x0251
- CS2 = 0x0260
- SONY_SCX = 0x0270
- SONY_SCY = 0x0271
- SONY_ATRAC3 = 0x0272
- SONY_SPC = 0x0273
- TELUM_AUDIO = 0x0280
- TELUM_IA_AUDIO = 0x0281
- NORCOM_VOICE_SYSTEMS_ADPCM = 0x0285
- FM_TOWNS_SND = 0x0300
- MICRONAS = 0x0350
- MICRONAS_CELP833 = 0x0351
- BTV_DIGITAL = 0x0400
- INTEL_MUSIC_CODER = 0x0401
- INDEO_AUDIO = 0x0402
- QDESIGN_MUSIC = 0x0450
- ON2_VP7_AUDIO = 0x0500
- ON2_VP6_AUDIO = 0x0501
- VME_VMPCM = 0x0680
- TPC = 0x0681
- LIGHTWAVE_LOSSLESS = 0x08AE
- OLIGSM = 0x1000
- OLIADPCM = 0x1001
- OLICELP = 0x1002
- OLISBC = 0x1003
- OLIOPR = 0x1004
- LH_CODEC = 0x1100
- LH_CODEC_CELP = 0x1101
- LH_CODEC_SBC8 = 0x1102
- LH_CODEC_SBC12 = 0x1103
- LH_CODEC_SBC16 = 0x1104
- NORRIS = 0x1400
- ISIAUDIO_2 = 0x1401
- SOUNDSPACE_MUSICOMPRESS = 0x1500
- MPEG_ADTS_AAC = 0x1600
- MPEG_RAW_AAC = 0x1601
- MPEG_LOAS = 0x1602
- NOKIA_MPEG_ADTS_AAC = 0x1608
- NOKIA_MPEG_RAW_AAC = 0x1609
- VODAFONE_MPEG_ADTS_AAC = 0x160A
- VODAFONE_MPEG_RAW_AAC = 0x160B
- MPEG_HEAAC = 0x1610
- VOXWARE_RT24_SPEECH = 0x181C
- SONICFOUNDRY_LOSSLESS = 0x1971
- INNINGS_TELECOM_ADPCM = 0x1979
- LUCENT_SX8300P = 0x1C07
- LUCENT_SX5363S = 0x1C0C
- CUSEEME = 0x1F03
- NTCSOFT_ALF2CM_ACM = 0x1FC4
- DVM = 0x2000
- DTS2 = 0x2001
- MAKEAVIS = 0x3313
- DIVIO_MPEG4_AAC = 0x4143
- NOKIA_ADAPTIVE_MULTIRATE = 0x4201
- DIVIO_G726 = 0x4243
- LEAD_SPEECH = 0x434C
- LEAD_VORBIS = 0x564C
- WAVPACK_AUDIO = 0x5756
- OGG_VORBIS_MODE_1 = 0x674F
- OGG_VORBIS_MODE_2 = 0x6750
- OGG_VORBIS_MODE_3 = 0x6751
- OGG_VORBIS_MODE_1_PLUS = 0x676F
- OGG_VORBIS_MODE_2_PLUS = 0x6770
- OGG_VORBIS_MODE_3_PLUS = 0x6771
- ALAC = 0x6C61
- _3COM_NBX = 0x7000 # Can't have leading digit
- OPUS = 0x704F
- FAAD_AAC = 0x706D
- AMR_NB = 0x7361
- AMR_WB = 0x7362
- AMR_WP = 0x7363
- GSM_AMR_CBR = 0x7A21
- GSM_AMR_VBR_SID = 0x7A22
- COMVERSE_INFOSYS_G723_1 = 0xA100
- COMVERSE_INFOSYS_AVQSBC = 0xA101
- COMVERSE_INFOSYS_SBC = 0xA102
- SYMBOL_G729_A = 0xA103
- VOICEAGE_AMR_WB = 0xA104
- INGENIENT_G726 = 0xA105
- MPEG4_AAC = 0xA106
- ENCORE_G726 = 0xA107
- ZOLL_ASAO = 0xA108
- SPEEX_VOICE = 0xA109
- VIANIX_MASC = 0xA10A
- WM9_SPECTRUM_ANALYZER = 0xA10B
- WMF_SPECTRUM_ANAYZER = 0xA10C
- GSM_610 = 0xA10D
- GSM_620 = 0xA10E
- GSM_660 = 0xA10F
- GSM_690 = 0xA110
- GSM_ADAPTIVE_MULTIRATE_WB = 0xA111
- POLYCOM_G722 = 0xA112
- POLYCOM_G728 = 0xA113
- POLYCOM_G729_A = 0xA114
- POLYCOM_SIREN = 0xA115
- GLOBAL_IP_ILBC = 0xA116
- RADIOTIME_TIME_SHIFT_RADIO = 0xA117
- NICE_ACA = 0xA118
- NICE_ADPCM = 0xA119
- VOCORD_G721 = 0xA11A
- VOCORD_G726 = 0xA11B
- VOCORD_G722_1 = 0xA11C
- VOCORD_G728 = 0xA11D
- VOCORD_G729 = 0xA11E
- VOCORD_G729_A = 0xA11F
- VOCORD_G723_1 = 0xA120
- VOCORD_LBC = 0xA121
- NICE_G728 = 0xA122
- FRACE_TELECOM_G729 = 0xA123
- CODIAN = 0xA124
- FLAC = 0xF1AC
- EXTENSIBLE = 0xFFFE
- DEVELOPMENT = 0xFFFF
- KNOWN_WAVE_FORMATS = {WAVE_FORMAT.PCM, WAVE_FORMAT.IEEE_FLOAT}
- def _raise_bad_format(format_tag):
- try:
- format_name = WAVE_FORMAT(format_tag).name
- except ValueError:
- format_name = f'{format_tag:#06x}'
- raise ValueError(f"Unknown wave file format: {format_name}. Supported "
- "formats: " +
- ', '.join(x.name for x in KNOWN_WAVE_FORMATS))
- def _read_fmt_chunk(fid, is_big_endian):
- """
- Returns
- -------
- size : int
- size of format subchunk in bytes (minus 8 for "fmt " and itself)
- format_tag : int
- PCM, float, or compressed format
- channels : int
- number of channels
- fs : int
- sampling frequency in samples per second
- bytes_per_second : int
- overall byte rate for the file
- block_align : int
- bytes per sample, including all channels
- bit_depth : int
- bits per sample
- Notes
- -----
- Assumes file pointer is immediately after the 'fmt ' id
- """
- if is_big_endian:
- fmt = '>'
- else:
- fmt = '<'
- size = struct.unpack(fmt+'I', fid.read(4))[0]
- if size < 16:
- raise ValueError("Binary structure of wave file is not compliant")
- res = struct.unpack(fmt+'HHIIHH', fid.read(16))
- bytes_read = 16
- format_tag, channels, fs, bytes_per_second, block_align, bit_depth = res
- if format_tag == WAVE_FORMAT.EXTENSIBLE and size >= (16+2):
- ext_chunk_size = struct.unpack(fmt+'H', fid.read(2))[0]
- bytes_read += 2
- if ext_chunk_size >= 22:
- extensible_chunk_data = fid.read(22)
- bytes_read += 22
- raw_guid = extensible_chunk_data[2+4:2+4+16]
- # GUID template {XXXXXXXX-0000-0010-8000-00AA00389B71} (RFC-2361)
- # MS GUID byte order: first three groups are native byte order,
- # rest is Big Endian
- if is_big_endian:
- tail = b'\x00\x00\x00\x10\x80\x00\x00\xAA\x00\x38\x9B\x71'
- else:
- tail = b'\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71'
- if raw_guid.endswith(tail):
- format_tag = struct.unpack(fmt+'I', raw_guid[:4])[0]
- else:
- raise ValueError("Binary structure of wave file is not compliant")
- if format_tag not in KNOWN_WAVE_FORMATS:
- _raise_bad_format(format_tag)
- # move file pointer to next chunk
- if size > bytes_read:
- fid.read(size - bytes_read)
- # fmt should always be 16, 18 or 40, but handle it just in case
- _handle_pad_byte(fid, size)
- if format_tag == WAVE_FORMAT.PCM:
- if bytes_per_second != fs * block_align:
- raise ValueError("WAV header is invalid: nAvgBytesPerSec must"
- " equal product of nSamplesPerSec and"
- " nBlockAlign, but file has nSamplesPerSec ="
- f" {fs}, nBlockAlign = {block_align}, and"
- f" nAvgBytesPerSec = {bytes_per_second}")
- return (size, format_tag, channels, fs, bytes_per_second, block_align,
- bit_depth)
- def _read_data_chunk(fid, format_tag, channels, bit_depth, is_big_endian,
- block_align, mmap=False):
- """
- Notes
- -----
- Assumes file pointer is immediately after the 'data' id
- It's possible to not use all available bits in a container, or to store
- samples in a container bigger than necessary, so bytes_per_sample uses
- the actual reported container size (nBlockAlign / nChannels). Real-world
- examples:
- Adobe Audition's "24-bit packed int (type 1, 20-bit)"
- nChannels = 2, nBlockAlign = 6, wBitsPerSample = 20
- http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples/AFsp/M1F1-int12-AFsp.wav
- is:
- nChannels = 2, nBlockAlign = 4, wBitsPerSample = 12
- http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Docs/multichaudP.pdf
- gives an example of:
- nChannels = 2, nBlockAlign = 8, wBitsPerSample = 20
- """
- if is_big_endian:
- fmt = '>'
- else:
- fmt = '<'
- # Size of the data subchunk in bytes
- size = struct.unpack(fmt+'I', fid.read(4))[0]
- # Number of bytes per sample (sample container size)
- bytes_per_sample = block_align // channels
- n_samples = size // bytes_per_sample
- if format_tag == WAVE_FORMAT.PCM:
- if 1 <= bit_depth <= 8:
- dtype = 'u1' # WAV of 8-bit integer or less are unsigned
- elif bytes_per_sample in {3, 5, 6, 7}:
- # No compatible dtype. Load as raw bytes for reshaping later.
- dtype = 'V1'
- elif bit_depth <= 64:
- # Remaining bit depths can map directly to signed numpy dtypes
- dtype = f'{fmt}i{bytes_per_sample}'
- else:
- raise ValueError("Unsupported bit depth: the WAV file "
- f"has {bit_depth}-bit integer data.")
- elif format_tag == WAVE_FORMAT.IEEE_FLOAT:
- if bit_depth in {32, 64}:
- dtype = f'{fmt}f{bytes_per_sample}'
- else:
- raise ValueError("Unsupported bit depth: the WAV file "
- f"has {bit_depth}-bit floating-point data.")
- else:
- _raise_bad_format(format_tag)
- start = fid.tell()
- if not mmap:
- try:
- count = size if dtype == 'V1' else n_samples
- data = numpy.fromfile(fid, dtype=dtype, count=count)
- except io.UnsupportedOperation: # not a C-like file
- fid.seek(start, 0) # just in case it seeked, though it shouldn't
- data = numpy.frombuffer(fid.read(size), dtype=dtype)
- if dtype == 'V1':
- # Rearrange raw bytes into smallest compatible numpy dtype
- dt = f'{fmt}i4' if bytes_per_sample == 3 else f'{fmt}i8'
- a = numpy.zeros((len(data) // bytes_per_sample, numpy.dtype(dt).itemsize),
- dtype='V1')
- if is_big_endian:
- a[:, :bytes_per_sample] = data.reshape((-1, bytes_per_sample))
- else:
- a[:, -bytes_per_sample:] = data.reshape((-1, bytes_per_sample))
- data = a.view(dt).reshape(a.shape[:-1])
- else:
- if bytes_per_sample in {1, 2, 4, 8}:
- start = fid.tell()
- data = numpy.memmap(fid, dtype=dtype, mode='c', offset=start,
- shape=(n_samples,))
- fid.seek(start + size)
- else:
- raise ValueError("mmap=True not compatible with "
- f"{bytes_per_sample}-byte container size.")
- _handle_pad_byte(fid, size)
- if channels > 1:
- data = data.reshape(-1, channels)
- return data
- def _skip_unknown_chunk(fid, is_big_endian):
- if is_big_endian:
- fmt = '>I'
- else:
- fmt = '<I'
- data = fid.read(4)
- # call unpack() and seek() only if we have really read data from file
- # otherwise empty read at the end of the file would trigger
- # unnecessary exception at unpack() call
- # in case data equals somehow to 0, there is no need for seek() anyway
- if data:
- size = struct.unpack(fmt, data)[0]
- fid.seek(size, 1)
- _handle_pad_byte(fid, size)
- def _read_riff_chunk(fid):
- str1 = fid.read(4) # File signature
- if str1 == b'RIFF':
- is_big_endian = False
- fmt = '<I'
- elif str1 == b'RIFX':
- is_big_endian = True
- fmt = '>I'
- else:
- # There are also .wav files with "FFIR" or "XFIR" signatures?
- raise ValueError(f"File format {repr(str1)} not understood. Only "
- "'RIFF' and 'RIFX' supported.")
- # Size of entire file
- file_size = struct.unpack(fmt, fid.read(4))[0] + 8
- str2 = fid.read(4)
- if str2 != b'WAVE':
- raise ValueError(f"Not a WAV file. RIFF form type is {repr(str2)}.")
- return file_size, is_big_endian
- def _handle_pad_byte(fid, size):
- # "If the chunk size is an odd number of bytes, a pad byte with value zero
- # is written after ckData." So we need to seek past this after each chunk.
- if size % 2:
- fid.seek(1, 1)
- def read(filename, mmap=False):
- """
- Open a WAV file.
- Return the sample rate (in samples/sec) and data from an LPCM WAV file.
- Parameters
- ----------
- filename : string or open file handle
- Input WAV file.
- mmap : bool, optional
- Whether to read data as memory-mapped (default: False). Not compatible
- with some bit depths; see Notes. Only to be used on real files.
- .. versionadded:: 0.12.0
- Returns
- -------
- rate : int
- Sample rate of WAV file.
- data : numpy array
- Data read from WAV file. Data-type is determined from the file;
- see Notes. Data is 1-D for 1-channel WAV, or 2-D of shape
- (Nsamples, Nchannels) otherwise. If a file-like input without a
- C-like file descriptor (e.g., :class:`python:io.BytesIO`) is
- passed, this will not be writeable.
- Notes
- -----
- Common data types: [1]_
- ===================== =========== =========== =============
- WAV format Min Max NumPy dtype
- ===================== =========== =========== =============
- 32-bit floating-point -1.0 +1.0 float32
- 32-bit integer PCM -2147483648 +2147483647 int32
- 24-bit integer PCM -2147483648 +2147483392 int32
- 16-bit integer PCM -32768 +32767 int16
- 8-bit integer PCM 0 255 uint8
- ===================== =========== =========== =============
- WAV files can specify arbitrary bit depth, and this function supports
- reading any integer PCM depth from 1 to 64 bits. Data is returned in the
- smallest compatible numpy int type, in left-justified format. 8-bit and
- lower is unsigned, while 9-bit and higher is signed.
- For example, 24-bit data will be stored as int32, with the MSB of the
- 24-bit data stored at the MSB of the int32, and typically the least
- significant byte is 0x00. (However, if a file actually contains data past
- its specified bit depth, those bits will be read and output, too. [2]_)
- This bit justification and sign matches WAV's native internal format, which
- allows memory mapping of WAV files that use 1, 2, 4, or 8 bytes per sample
- (so 24-bit files cannot be memory-mapped, but 32-bit can).
- IEEE float PCM in 32- or 64-bit format is supported, with or without mmap.
- Values exceeding [-1, +1] are not clipped.
- Non-linear PCM (mu-law, A-law) is not supported.
- References
- ----------
- .. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming
- Interface and Data Specifications 1.0", section "Data Format of the
- Samples", August 1991
- http://www.tactilemedia.com/info/MCI_Control_Info.html
- .. [2] Adobe Systems Incorporated, "Adobe Audition 3 User Guide", section
- "Audio file formats: 24-bit Packed Int (type 1, 20-bit)", 2007
- Examples
- --------
- >>> from os.path import dirname, join as pjoin
- >>> from scipy.io import wavfile
- >>> import scipy.io
- Get the filename for an example .wav file from the tests/data directory.
- >>> data_dir = pjoin(dirname(scipy.io.__file__), 'tests', 'data')
- >>> wav_fname = pjoin(data_dir, 'test-44100Hz-2ch-32bit-float-be.wav')
- Load the .wav file contents.
- >>> samplerate, data = wavfile.read(wav_fname)
- >>> print(f"number of channels = {data.shape[1]}")
- number of channels = 2
- >>> length = data.shape[0] / samplerate
- >>> print(f"length = {length}s")
- length = 0.01s
- Plot the waveform.
- >>> import matplotlib.pyplot as plt
- >>> import numpy as np
- >>> time = np.linspace(0., length, data.shape[0])
- >>> plt.plot(time, data[:, 0], label="Left channel")
- >>> plt.plot(time, data[:, 1], label="Right channel")
- >>> plt.legend()
- >>> plt.xlabel("Time [s]")
- >>> plt.ylabel("Amplitude")
- >>> plt.show()
- """
- if hasattr(filename, 'read'):
- fid = filename
- mmap = False
- else:
- fid = open(filename, 'rb')
- try:
- file_size, is_big_endian = _read_riff_chunk(fid)
- fmt_chunk_received = False
- data_chunk_received = False
- while fid.tell() < file_size:
- # read the next chunk
- chunk_id = fid.read(4)
- if not chunk_id:
- if data_chunk_received:
- # End of file but data successfully read
- warnings.warn(
- "Reached EOF prematurely; finished at {:d} bytes, "
- "expected {:d} bytes from header."
- .format(fid.tell(), file_size),
- WavFileWarning, stacklevel=2)
- break
- else:
- raise ValueError("Unexpected end of file.")
- elif len(chunk_id) < 4:
- msg = f"Incomplete chunk ID: {repr(chunk_id)}"
- # If we have the data, ignore the broken chunk
- if fmt_chunk_received and data_chunk_received:
- warnings.warn(msg + ", ignoring it.", WavFileWarning,
- stacklevel=2)
- else:
- raise ValueError(msg)
- if chunk_id == b'fmt ':
- fmt_chunk_received = True
- fmt_chunk = _read_fmt_chunk(fid, is_big_endian)
- format_tag, channels, fs = fmt_chunk[1:4]
- bit_depth = fmt_chunk[6]
- block_align = fmt_chunk[5]
- elif chunk_id == b'fact':
- _skip_unknown_chunk(fid, is_big_endian)
- elif chunk_id == b'data':
- data_chunk_received = True
- if not fmt_chunk_received:
- raise ValueError("No fmt chunk before data")
- data = _read_data_chunk(fid, format_tag, channels, bit_depth,
- is_big_endian, block_align, mmap)
- elif chunk_id == b'LIST':
- # Someday this could be handled properly but for now skip it
- _skip_unknown_chunk(fid, is_big_endian)
- elif chunk_id in {b'JUNK', b'Fake'}:
- # Skip alignment chunks without warning
- _skip_unknown_chunk(fid, is_big_endian)
- else:
- warnings.warn("Chunk (non-data) not understood, skipping it.",
- WavFileWarning, stacklevel=2)
- _skip_unknown_chunk(fid, is_big_endian)
- finally:
- if not hasattr(filename, 'read'):
- fid.close()
- else:
- fid.seek(0)
- return fs, data
- def write(filename, rate, data):
- """
- Write a NumPy array as a WAV file.
- Parameters
- ----------
- filename : string or open file handle
- Output wav file.
- rate : int
- The sample rate (in samples/sec).
- data : ndarray
- A 1-D or 2-D NumPy array of either integer or float data-type.
- Notes
- -----
- * Writes a simple uncompressed WAV file.
- * To write multiple-channels, use a 2-D array of shape
- (Nsamples, Nchannels).
- * The bits-per-sample and PCM/float will be determined by the data-type.
- Common data types: [1]_
- ===================== =========== =========== =============
- WAV format Min Max NumPy dtype
- ===================== =========== =========== =============
- 32-bit floating-point -1.0 +1.0 float32
- 32-bit PCM -2147483648 +2147483647 int32
- 16-bit PCM -32768 +32767 int16
- 8-bit PCM 0 255 uint8
- ===================== =========== =========== =============
- Note that 8-bit PCM is unsigned.
- References
- ----------
- .. [1] IBM Corporation and Microsoft Corporation, "Multimedia Programming
- Interface and Data Specifications 1.0", section "Data Format of the
- Samples", August 1991
- http://www.tactilemedia.com/info/MCI_Control_Info.html
- Examples
- --------
- Create a 100Hz sine wave, sampled at 44100Hz.
- Write to 16-bit PCM, Mono.
- >>> from scipy.io.wavfile import write
- >>> import numpy as np
- >>> samplerate = 44100; fs = 100
- >>> t = np.linspace(0., 1., samplerate)
- >>> amplitude = np.iinfo(np.int16).max
- >>> data = amplitude * np.sin(2. * np.pi * fs * t)
- >>> write("example.wav", samplerate, data.astype(np.int16))
- """
- if hasattr(filename, 'write'):
- fid = filename
- else:
- fid = open(filename, 'wb')
- fs = rate
- try:
- dkind = data.dtype.kind
- if not (dkind == 'i' or dkind == 'f' or (dkind == 'u' and
- data.dtype.itemsize == 1)):
- raise ValueError("Unsupported data type '%s'" % data.dtype)
- header_data = b''
- header_data += b'RIFF'
- header_data += b'\x00\x00\x00\x00'
- header_data += b'WAVE'
- # fmt chunk
- header_data += b'fmt '
- if dkind == 'f':
- format_tag = WAVE_FORMAT.IEEE_FLOAT
- else:
- format_tag = WAVE_FORMAT.PCM
- if data.ndim == 1:
- channels = 1
- else:
- channels = data.shape[1]
- bit_depth = data.dtype.itemsize * 8
- bytes_per_second = fs*(bit_depth // 8)*channels
- block_align = channels * (bit_depth // 8)
- fmt_chunk_data = struct.pack('<HHIIHH', format_tag, channels, fs,
- bytes_per_second, block_align, bit_depth)
- if not (dkind == 'i' or dkind == 'u'):
- # add cbSize field for non-PCM files
- fmt_chunk_data += b'\x00\x00'
- header_data += struct.pack('<I', len(fmt_chunk_data))
- header_data += fmt_chunk_data
- # fact chunk (non-PCM files)
- if not (dkind == 'i' or dkind == 'u'):
- header_data += b'fact'
- header_data += struct.pack('<II', 4, data.shape[0])
- # check data size (needs to be immediately before the data chunk)
- if ((len(header_data)-4-4) + (4+4+data.nbytes)) > 0xFFFFFFFF:
- raise ValueError("Data exceeds wave file size limit")
- fid.write(header_data)
- # data chunk
- fid.write(b'data')
- fid.write(struct.pack('<I', data.nbytes))
- if data.dtype.byteorder == '>' or (data.dtype.byteorder == '=' and
- sys.byteorder == 'big'):
- data = data.byteswap()
- _array_tofile(fid, data)
- # Determine file size and place it in correct
- # position at start of the file.
- size = fid.tell()
- fid.seek(4)
- fid.write(struct.pack('<I', size-8))
- finally:
- if not hasattr(filename, 'write'):
- fid.close()
- else:
- fid.seek(0)
- def _array_tofile(fid, data):
- # ravel gives a c-contiguous buffer
- fid.write(data.ravel().view('b').data)
|