_mio.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. """
  2. Module for reading and writing matlab (TM) .mat files
  3. """
  4. # Authors: Travis Oliphant, Matthew Brett
  5. from contextlib import contextmanager
  6. from ._miobase import _get_matfile_version, docfiller
  7. from ._mio4 import MatFile4Reader, MatFile4Writer
  8. from ._mio5 import MatFile5Reader, MatFile5Writer
  9. __all__ = ['mat_reader_factory', 'loadmat', 'savemat', 'whosmat']
  10. @contextmanager
  11. def _open_file_context(file_like, appendmat, mode='rb'):
  12. f, opened = _open_file(file_like, appendmat, mode)
  13. try:
  14. yield f
  15. finally:
  16. if opened:
  17. f.close()
  18. def _open_file(file_like, appendmat, mode='rb'):
  19. """
  20. Open `file_like` and return as file-like object. First, check if object is
  21. already file-like; if so, return it as-is. Otherwise, try to pass it
  22. to open(). If that fails, and `file_like` is a string, and `appendmat` is true,
  23. append '.mat' and try again.
  24. """
  25. reqs = {'read'} if set(mode) & set('r+') else set()
  26. if set(mode) & set('wax+'):
  27. reqs.add('write')
  28. if reqs.issubset(dir(file_like)):
  29. return file_like, False
  30. try:
  31. return open(file_like, mode), True
  32. except OSError as e:
  33. # Probably "not found"
  34. if isinstance(file_like, str):
  35. if appendmat and not file_like.endswith('.mat'):
  36. file_like += '.mat'
  37. return open(file_like, mode), True
  38. else:
  39. raise OSError(
  40. 'Reader needs file name or open file-like object'
  41. ) from e
  42. @docfiller
  43. def mat_reader_factory(file_name, appendmat=True, **kwargs):
  44. """
  45. Create reader for matlab .mat format files.
  46. Parameters
  47. ----------
  48. %(file_arg)s
  49. %(append_arg)s
  50. %(load_args)s
  51. %(struct_arg)s
  52. Returns
  53. -------
  54. matreader : MatFileReader object
  55. Initialized instance of MatFileReader class matching the mat file
  56. type detected in `filename`.
  57. file_opened : bool
  58. Whether the file was opened by this routine.
  59. """
  60. byte_stream, file_opened = _open_file(file_name, appendmat)
  61. mjv, mnv = _get_matfile_version(byte_stream)
  62. if mjv == 0:
  63. return MatFile4Reader(byte_stream, **kwargs), file_opened
  64. elif mjv == 1:
  65. return MatFile5Reader(byte_stream, **kwargs), file_opened
  66. elif mjv == 2:
  67. raise NotImplementedError('Please use HDF reader for matlab v7.3 '
  68. 'files, e.g. h5py')
  69. else:
  70. raise TypeError('Did not recognize version %s' % mjv)
  71. @docfiller
  72. def loadmat(file_name, mdict=None, appendmat=True, **kwargs):
  73. """
  74. Load MATLAB file.
  75. Parameters
  76. ----------
  77. file_name : str
  78. Name of the mat file (do not need .mat extension if
  79. appendmat==True). Can also pass open file-like object.
  80. mdict : dict, optional
  81. Dictionary in which to insert matfile variables.
  82. appendmat : bool, optional
  83. True to append the .mat extension to the end of the given
  84. filename, if not already present. Default is True.
  85. byte_order : str or None, optional
  86. None by default, implying byte order guessed from mat
  87. file. Otherwise can be one of ('native', '=', 'little', '<',
  88. 'BIG', '>').
  89. mat_dtype : bool, optional
  90. If True, return arrays in same dtype as would be loaded into
  91. MATLAB (instead of the dtype with which they are saved).
  92. squeeze_me : bool, optional
  93. Whether to squeeze unit matrix dimensions or not.
  94. chars_as_strings : bool, optional
  95. Whether to convert char arrays to string arrays.
  96. matlab_compatible : bool, optional
  97. Returns matrices as would be loaded by MATLAB (implies
  98. squeeze_me=False, chars_as_strings=False, mat_dtype=True,
  99. struct_as_record=True).
  100. struct_as_record : bool, optional
  101. Whether to load MATLAB structs as NumPy record arrays, or as
  102. old-style NumPy arrays with dtype=object. Setting this flag to
  103. False replicates the behavior of scipy version 0.7.x (returning
  104. NumPy object arrays). The default setting is True, because it
  105. allows easier round-trip load and save of MATLAB files.
  106. verify_compressed_data_integrity : bool, optional
  107. Whether the length of compressed sequences in the MATLAB file
  108. should be checked, to ensure that they are not longer than we expect.
  109. It is advisable to enable this (the default) because overlong
  110. compressed sequences in MATLAB files generally indicate that the
  111. files have experienced some sort of corruption.
  112. variable_names : None or sequence
  113. If None (the default) - read all variables in file. Otherwise,
  114. `variable_names` should be a sequence of strings, giving names of the
  115. MATLAB variables to read from the file. The reader will skip any
  116. variable with a name not in this sequence, possibly saving some read
  117. processing.
  118. simplify_cells : False, optional
  119. If True, return a simplified dict structure (which is useful if the mat
  120. file contains cell arrays). Note that this only affects the structure
  121. of the result and not its contents (which is identical for both output
  122. structures). If True, this automatically sets `struct_as_record` to
  123. False and `squeeze_me` to True, which is required to simplify cells.
  124. Returns
  125. -------
  126. mat_dict : dict
  127. dictionary with variable names as keys, and loaded matrices as
  128. values.
  129. Notes
  130. -----
  131. v4 (Level 1.0), v6 and v7 to 7.2 matfiles are supported.
  132. You will need an HDF5 Python library to read MATLAB 7.3 format mat
  133. files. Because SciPy does not supply one, we do not implement the
  134. HDF5 / 7.3 interface here.
  135. Examples
  136. --------
  137. >>> from os.path import dirname, join as pjoin
  138. >>> import scipy.io as sio
  139. Get the filename for an example .mat file from the tests/data directory.
  140. >>> data_dir = pjoin(dirname(sio.__file__), 'matlab', 'tests', 'data')
  141. >>> mat_fname = pjoin(data_dir, 'testdouble_7.4_GLNX86.mat')
  142. Load the .mat file contents.
  143. >>> mat_contents = sio.loadmat(mat_fname)
  144. The result is a dictionary, one key/value pair for each variable:
  145. >>> sorted(mat_contents.keys())
  146. ['__globals__', '__header__', '__version__', 'testdouble']
  147. >>> mat_contents['testdouble']
  148. array([[0. , 0.78539816, 1.57079633, 2.35619449, 3.14159265,
  149. 3.92699082, 4.71238898, 5.49778714, 6.28318531]])
  150. By default SciPy reads MATLAB structs as structured NumPy arrays where the
  151. dtype fields are of type `object` and the names correspond to the MATLAB
  152. struct field names. This can be disabled by setting the optional argument
  153. `struct_as_record=False`.
  154. Get the filename for an example .mat file that contains a MATLAB struct
  155. called `teststruct` and load the contents.
  156. >>> matstruct_fname = pjoin(data_dir, 'teststruct_7.4_GLNX86.mat')
  157. >>> matstruct_contents = sio.loadmat(matstruct_fname)
  158. >>> teststruct = matstruct_contents['teststruct']
  159. >>> teststruct.dtype
  160. dtype([('stringfield', 'O'), ('doublefield', 'O'), ('complexfield', 'O')])
  161. The size of the structured array is the size of the MATLAB struct, not the
  162. number of elements in any particular field. The shape defaults to 2-D
  163. unless the optional argument `squeeze_me=True`, in which case all length 1
  164. dimensions are removed.
  165. >>> teststruct.size
  166. 1
  167. >>> teststruct.shape
  168. (1, 1)
  169. Get the 'stringfield' of the first element in the MATLAB struct.
  170. >>> teststruct[0, 0]['stringfield']
  171. array(['Rats live on no evil star.'],
  172. dtype='<U26')
  173. Get the first element of the 'doublefield'.
  174. >>> teststruct['doublefield'][0, 0]
  175. array([[ 1.41421356, 2.71828183, 3.14159265]])
  176. Load the MATLAB struct, squeezing out length 1 dimensions, and get the item
  177. from the 'complexfield'.
  178. >>> matstruct_squeezed = sio.loadmat(matstruct_fname, squeeze_me=True)
  179. >>> matstruct_squeezed['teststruct'].shape
  180. ()
  181. >>> matstruct_squeezed['teststruct']['complexfield'].shape
  182. ()
  183. >>> matstruct_squeezed['teststruct']['complexfield'].item()
  184. array([ 1.41421356+1.41421356j, 2.71828183+2.71828183j,
  185. 3.14159265+3.14159265j])
  186. """
  187. variable_names = kwargs.pop('variable_names', None)
  188. with _open_file_context(file_name, appendmat) as f:
  189. MR, _ = mat_reader_factory(f, **kwargs)
  190. matfile_dict = MR.get_variables(variable_names)
  191. if mdict is not None:
  192. mdict.update(matfile_dict)
  193. else:
  194. mdict = matfile_dict
  195. return mdict
  196. @docfiller
  197. def savemat(file_name, mdict,
  198. appendmat=True,
  199. format='5',
  200. long_field_names=False,
  201. do_compression=False,
  202. oned_as='row'):
  203. """
  204. Save a dictionary of names and arrays into a MATLAB-style .mat file.
  205. This saves the array objects in the given dictionary to a MATLAB-
  206. style .mat file.
  207. Parameters
  208. ----------
  209. file_name : str or file-like object
  210. Name of the .mat file (.mat extension not needed if ``appendmat ==
  211. True``).
  212. Can also pass open file_like object.
  213. mdict : dict
  214. Dictionary from which to save matfile variables.
  215. appendmat : bool, optional
  216. True (the default) to append the .mat extension to the end of the
  217. given filename, if not already present.
  218. format : {'5', '4'}, string, optional
  219. '5' (the default) for MATLAB 5 and up (to 7.2),
  220. '4' for MATLAB 4 .mat files.
  221. long_field_names : bool, optional
  222. False (the default) - maximum field name length in a structure is
  223. 31 characters which is the documented maximum length.
  224. True - maximum field name length in a structure is 63 characters
  225. which works for MATLAB 7.6+.
  226. do_compression : bool, optional
  227. Whether or not to compress matrices on write. Default is False.
  228. oned_as : {'row', 'column'}, optional
  229. If 'column', write 1-D NumPy arrays as column vectors.
  230. If 'row', write 1-D NumPy arrays as row vectors.
  231. Examples
  232. --------
  233. >>> from scipy.io import savemat
  234. >>> import numpy as np
  235. >>> a = np.arange(20)
  236. >>> mdic = {"a": a, "label": "experiment"}
  237. >>> mdic
  238. {'a': array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
  239. 17, 18, 19]),
  240. 'label': 'experiment'}
  241. >>> savemat("matlab_matrix.mat", mdic)
  242. """
  243. with _open_file_context(file_name, appendmat, 'wb') as file_stream:
  244. if format == '4':
  245. if long_field_names:
  246. raise ValueError("Long field names are not available for version 4 files")
  247. MW = MatFile4Writer(file_stream, oned_as)
  248. elif format == '5':
  249. MW = MatFile5Writer(file_stream,
  250. do_compression=do_compression,
  251. unicode_strings=True,
  252. long_field_names=long_field_names,
  253. oned_as=oned_as)
  254. else:
  255. raise ValueError("Format should be '4' or '5'")
  256. MW.put_variables(mdict)
  257. @docfiller
  258. def whosmat(file_name, appendmat=True, **kwargs):
  259. """
  260. List variables inside a MATLAB file.
  261. Parameters
  262. ----------
  263. %(file_arg)s
  264. %(append_arg)s
  265. %(load_args)s
  266. %(struct_arg)s
  267. Returns
  268. -------
  269. variables : list of tuples
  270. A list of tuples, where each tuple holds the matrix name (a string),
  271. its shape (tuple of ints), and its data class (a string).
  272. Possible data classes are: int8, uint8, int16, uint16, int32, uint32,
  273. int64, uint64, single, double, cell, struct, object, char, sparse,
  274. function, opaque, logical, unknown.
  275. Notes
  276. -----
  277. v4 (Level 1.0), v6 and v7 to 7.2 matfiles are supported.
  278. You will need an HDF5 python library to read matlab 7.3 format mat
  279. files (e.g. h5py). Because SciPy does not supply one, we do not implement the
  280. HDF5 / 7.3 interface here.
  281. .. versionadded:: 0.12.0
  282. Examples
  283. --------
  284. >>> from io import BytesIO
  285. >>> import numpy as np
  286. >>> from scipy.io import savemat, whosmat
  287. Create some arrays, and use `savemat` to write them to a ``BytesIO``
  288. instance.
  289. >>> a = np.array([[10, 20, 30], [11, 21, 31]], dtype=np.int32)
  290. >>> b = np.geomspace(1, 10, 5)
  291. >>> f = BytesIO()
  292. >>> savemat(f, {'a': a, 'b': b})
  293. Use `whosmat` to inspect ``f``. Each tuple in the output list gives
  294. the name, shape and data type of the array in ``f``.
  295. >>> whosmat(f)
  296. [('a', (2, 3), 'int32'), ('b', (1, 5), 'double')]
  297. """
  298. with _open_file_context(file_name, appendmat) as f:
  299. ML, file_opened = mat_reader_factory(f, **kwargs)
  300. variables = ML.list_variables()
  301. return variables