123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429 |
- # Authors: Travis Oliphant, Matthew Brett
- """
- Base classes for MATLAB file stream reading.
- MATLAB is a registered trademark of the Mathworks inc.
- """
- import operator
- import functools
- import numpy as np
- from scipy._lib import doccer
- from . import _byteordercodes as boc
- __all__ = [
- 'MatFileReader', 'MatReadError', 'MatReadWarning',
- 'MatVarReader', 'MatWriteError', 'arr_dtype_number',
- 'arr_to_chars', 'convert_dtypes', 'doc_dict',
- 'docfiller', 'get_matfile_version',
- 'matdims', 'read_dtype'
- ]
- class MatReadError(Exception):
- """Exception indicating a read issue."""
- class MatWriteError(Exception):
- """Exception indicating a write issue."""
- class MatReadWarning(UserWarning):
- """Warning class for read issues."""
- doc_dict = \
- {'file_arg':
- '''file_name : str
- Name of the mat file (do not need .mat extension if
- appendmat==True) Can also pass open file-like object.''',
- 'append_arg':
- '''appendmat : bool, optional
- True to append the .mat extension to the end of the given
- filename, if not already present. Default is True.''',
- 'load_args':
- '''byte_order : str or None, optional
- None by default, implying byte order guessed from mat
- file. Otherwise can be one of ('native', '=', 'little', '<',
- 'BIG', '>').
- mat_dtype : bool, optional
- If True, return arrays in same dtype as would be loaded into
- MATLAB (instead of the dtype with which they are saved).
- squeeze_me : bool, optional
- Whether to squeeze unit matrix dimensions or not.
- chars_as_strings : bool, optional
- Whether to convert char arrays to string arrays.
- matlab_compatible : bool, optional
- Returns matrices as would be loaded by MATLAB (implies
- squeeze_me=False, chars_as_strings=False, mat_dtype=True,
- struct_as_record=True).''',
- 'struct_arg':
- '''struct_as_record : bool, optional
- Whether to load MATLAB structs as NumPy record arrays, or as
- old-style NumPy arrays with dtype=object. Setting this flag to
- False replicates the behavior of SciPy version 0.7.x (returning
- numpy object arrays). The default setting is True, because it
- allows easier round-trip load and save of MATLAB files.''',
- 'matstream_arg':
- '''mat_stream : file-like
- Object with file API, open for reading.''',
- 'long_fields':
- '''long_field_names : bool, optional
- * False - maximum field name length in a structure is 31 characters
- which is the documented maximum length. This is the default.
- * True - maximum field name length in a structure is 63 characters
- which works for MATLAB 7.6''',
- 'do_compression':
- '''do_compression : bool, optional
- Whether to compress matrices on write. Default is False.''',
- 'oned_as':
- '''oned_as : {'row', 'column'}, optional
- If 'column', write 1-D NumPy arrays as column vectors.
- If 'row', write 1D NumPy arrays as row vectors.''',
- 'unicode_strings':
- '''unicode_strings : bool, optional
- If True, write strings as Unicode, else MATLAB usual encoding.'''}
- docfiller = doccer.filldoc(doc_dict)
- '''
- Note on architecture
- ======================
- There are three sets of parameters relevant for reading files. The
- first are *file read parameters* - containing options that are common
- for reading the whole file, and therefore every variable within that
- file. At the moment these are:
- * mat_stream
- * dtypes (derived from byte code)
- * byte_order
- * chars_as_strings
- * squeeze_me
- * struct_as_record (MATLAB 5 files)
- * class_dtypes (derived from order code, MATLAB 5 files)
- * codecs (MATLAB 5 files)
- * uint16_codec (MATLAB 5 files)
- Another set of parameters are those that apply only to the current
- variable being read - the *header*:
- * header related variables (different for v4 and v5 mat files)
- * is_complex
- * mclass
- * var_stream
- With the header, we need ``next_position`` to tell us where the next
- variable in the stream is.
- Then, for each element in a matrix, there can be *element read
- parameters*. An element is, for example, one element in a MATLAB cell
- array. At the moment, these are:
- * mat_dtype
- The file-reading object contains the *file read parameters*. The
- *header* is passed around as a data object, or may be read and discarded
- in a single function. The *element read parameters* - the mat_dtype in
- this instance, is passed into a general post-processing function - see
- ``mio_utils`` for details.
- '''
- def convert_dtypes(dtype_template, order_code):
- ''' Convert dtypes in mapping to given order
- Parameters
- ----------
- dtype_template : mapping
- mapping with values returning numpy dtype from ``np.dtype(val)``
- order_code : str
- an order code suitable for using in ``dtype.newbyteorder()``
- Returns
- -------
- dtypes : mapping
- mapping where values have been replaced by
- ``np.dtype(val).newbyteorder(order_code)``
- '''
- dtypes = dtype_template.copy()
- for k in dtypes:
- dtypes[k] = np.dtype(dtypes[k]).newbyteorder(order_code)
- return dtypes
- def read_dtype(mat_stream, a_dtype):
- """
- Generic get of byte stream data of known type
- Parameters
- ----------
- mat_stream : file_like object
- MATLAB (tm) mat file stream
- a_dtype : dtype
- dtype of array to read. `a_dtype` is assumed to be correct
- endianness.
- Returns
- -------
- arr : ndarray
- Array of dtype `a_dtype` read from stream.
- """
- num_bytes = a_dtype.itemsize
- arr = np.ndarray(shape=(),
- dtype=a_dtype,
- buffer=mat_stream.read(num_bytes),
- order='F')
- return arr
- def matfile_version(file_name, *, appendmat=True):
- """
- Return major, minor tuple depending on apparent mat file type
- Where:
- #. 0,x -> version 4 format mat files
- #. 1,x -> version 5 format mat files
- #. 2,x -> version 7.3 format mat files (HDF format)
- Parameters
- ----------
- file_name : str
- Name of the mat file (do not need .mat extension if
- appendmat==True). Can also pass open file-like object.
- appendmat : bool, optional
- True to append the .mat extension to the end of the given
- filename, if not already present. Default is True.
- Returns
- -------
- major_version : {0, 1, 2}
- major MATLAB File format version
- minor_version : int
- minor MATLAB file format version
- Raises
- ------
- MatReadError
- If the file is empty.
- ValueError
- The matfile version is unknown.
- Notes
- -----
- Has the side effect of setting the file read pointer to 0
- """
- from ._mio import _open_file_context
- with _open_file_context(file_name, appendmat=appendmat) as fileobj:
- return _get_matfile_version(fileobj)
- get_matfile_version = matfile_version
- def _get_matfile_version(fileobj):
- # Mat4 files have a zero somewhere in first 4 bytes
- fileobj.seek(0)
- mopt_bytes = fileobj.read(4)
- if len(mopt_bytes) == 0:
- raise MatReadError("Mat file appears to be empty")
- mopt_ints = np.ndarray(shape=(4,), dtype=np.uint8, buffer=mopt_bytes)
- if 0 in mopt_ints:
- fileobj.seek(0)
- return (0,0)
- # For 5 format or 7.3 format we need to read an integer in the
- # header. Bytes 124 through 128 contain a version integer and an
- # endian test string
- fileobj.seek(124)
- tst_str = fileobj.read(4)
- fileobj.seek(0)
- maj_ind = int(tst_str[2] == b'I'[0])
- maj_val = int(tst_str[maj_ind])
- min_val = int(tst_str[1 - maj_ind])
- ret = (maj_val, min_val)
- if maj_val in (1, 2):
- return ret
- raise ValueError('Unknown mat file type, version %s, %s' % ret)
- def matdims(arr, oned_as='column'):
- """
- Determine equivalent MATLAB dimensions for given array
- Parameters
- ----------
- arr : ndarray
- Input array
- oned_as : {'column', 'row'}, optional
- Whether 1-D arrays are returned as MATLAB row or column matrices.
- Default is 'column'.
- Returns
- -------
- dims : tuple
- Shape tuple, in the form MATLAB expects it.
- Notes
- -----
- We had to decide what shape a 1 dimensional array would be by
- default. ``np.atleast_2d`` thinks it is a row vector. The
- default for a vector in MATLAB (e.g., ``>> 1:12``) is a row vector.
- Versions of scipy up to and including 0.11 resulted (accidentally)
- in 1-D arrays being read as column vectors. For the moment, we
- maintain the same tradition here.
- Examples
- --------
- >>> matdims(np.array(1)) # NumPy scalar
- (1, 1)
- >>> matdims(np.array([1])) # 1-D array, 1 element
- (1, 1)
- >>> matdims(np.array([1,2])) # 1-D array, 2 elements
- (2, 1)
- >>> matdims(np.array([[2],[3]])) # 2-D array, column vector
- (2, 1)
- >>> matdims(np.array([[2,3]])) # 2-D array, row vector
- (1, 2)
- >>> matdims(np.array([[[2,3]]])) # 3-D array, rowish vector
- (1, 1, 2)
- >>> matdims(np.array([])) # empty 1-D array
- (0, 0)
- >>> matdims(np.array([[]])) # empty 2-D array
- (0, 0)
- >>> matdims(np.array([[[]]])) # empty 3-D array
- (0, 0, 0)
- Optional argument flips 1-D shape behavior.
- >>> matdims(np.array([1,2]), 'row') # 1-D array, 2 elements
- (1, 2)
- The argument has to make sense though
- >>> matdims(np.array([1,2]), 'bizarre')
- Traceback (most recent call last):
- ...
- ValueError: 1-D option "bizarre" is strange
- """
- shape = arr.shape
- if shape == (): # scalar
- return (1, 1)
- if len(shape) == 1: # 1D
- if shape[0] == 0:
- return (0, 0)
- elif oned_as == 'column':
- return shape + (1,)
- elif oned_as == 'row':
- return (1,) + shape
- else:
- raise ValueError('1-D option "%s" is strange'
- % oned_as)
- return shape
- class MatVarReader:
- ''' Abstract class defining required interface for var readers'''
- def __init__(self, file_reader):
- pass
- def read_header(self):
- ''' Returns header '''
- pass
- def array_from_header(self, header):
- ''' Reads array given header '''
- pass
- class MatFileReader:
- """ Base object for reading mat files
- To make this class functional, you will need to override the
- following methods:
- matrix_getter_factory - gives object to fetch next matrix from stream
- guess_byte_order - guesses file byte order from file
- """
- @docfiller
- def __init__(self, mat_stream,
- byte_order=None,
- mat_dtype=False,
- squeeze_me=False,
- chars_as_strings=True,
- matlab_compatible=False,
- struct_as_record=True,
- verify_compressed_data_integrity=True,
- simplify_cells=False):
- '''
- Initializer for mat file reader
- mat_stream : file-like
- object with file API, open for reading
- %(load_args)s
- '''
- # Initialize stream
- self.mat_stream = mat_stream
- self.dtypes = {}
- if not byte_order:
- byte_order = self.guess_byte_order()
- else:
- byte_order = boc.to_numpy_code(byte_order)
- self.byte_order = byte_order
- self.struct_as_record = struct_as_record
- if matlab_compatible:
- self.set_matlab_compatible()
- else:
- self.squeeze_me = squeeze_me
- self.chars_as_strings = chars_as_strings
- self.mat_dtype = mat_dtype
- self.verify_compressed_data_integrity = verify_compressed_data_integrity
- self.simplify_cells = simplify_cells
- if simplify_cells:
- self.squeeze_me = True
- self.struct_as_record = False
- def set_matlab_compatible(self):
- ''' Sets options to return arrays as MATLAB loads them '''
- self.mat_dtype = True
- self.squeeze_me = False
- self.chars_as_strings = False
- def guess_byte_order(self):
- ''' As we do not know what file type we have, assume native '''
- return boc.native_code
- def end_of_stream(self):
- b = self.mat_stream.read(1)
- curpos = self.mat_stream.tell()
- self.mat_stream.seek(curpos-1)
- return len(b) == 0
- def arr_dtype_number(arr, num):
- ''' Return dtype for given number of items per element'''
- return np.dtype(arr.dtype.str[:2] + str(num))
- def arr_to_chars(arr):
- ''' Convert string array to char array '''
- dims = list(arr.shape)
- if not dims:
- dims = [1]
- dims.append(int(arr.dtype.str[2:]))
- arr = np.ndarray(shape=dims,
- dtype=arr_dtype_number(arr, 1),
- buffer=arr)
- empties = [arr == np.array('', dtype=arr.dtype)]
- if not np.any(empties):
- return arr
- arr = arr.copy()
- arr[tuple(empties)] = ' '
- return arr
|