records.py 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099
  1. """
  2. Record Arrays
  3. =============
  4. Record arrays expose the fields of structured arrays as properties.
  5. Most commonly, ndarrays contain elements of a single type, e.g. floats,
  6. integers, bools etc. However, it is possible for elements to be combinations
  7. of these using structured types, such as::
  8. >>> a = np.array([(1, 2.0), (1, 2.0)], dtype=[('x', np.int64), ('y', np.float64)])
  9. >>> a
  10. array([(1, 2.), (1, 2.)], dtype=[('x', '<i8'), ('y', '<f8')])
  11. Here, each element consists of two fields: x (and int), and y (a float).
  12. This is known as a structured array. The different fields are analogous
  13. to columns in a spread-sheet. The different fields can be accessed as
  14. one would a dictionary::
  15. >>> a['x']
  16. array([1, 1])
  17. >>> a['y']
  18. array([2., 2.])
  19. Record arrays allow us to access fields as properties::
  20. >>> ar = np.rec.array(a)
  21. >>> ar.x
  22. array([1, 1])
  23. >>> ar.y
  24. array([2., 2.])
  25. """
  26. import warnings
  27. from collections import Counter
  28. from contextlib import nullcontext
  29. from . import numeric as sb
  30. from . import numerictypes as nt
  31. from numpy.compat import os_fspath
  32. from numpy.core.overrides import set_module
  33. from .arrayprint import _get_legacy_print_mode
  34. # All of the functions allow formats to be a dtype
  35. __all__ = [
  36. 'record', 'recarray', 'format_parser',
  37. 'fromarrays', 'fromrecords', 'fromstring', 'fromfile', 'array',
  38. ]
  39. ndarray = sb.ndarray
  40. _byteorderconv = {'b':'>',
  41. 'l':'<',
  42. 'n':'=',
  43. 'B':'>',
  44. 'L':'<',
  45. 'N':'=',
  46. 'S':'s',
  47. 's':'s',
  48. '>':'>',
  49. '<':'<',
  50. '=':'=',
  51. '|':'|',
  52. 'I':'|',
  53. 'i':'|'}
  54. # formats regular expression
  55. # allows multidimensional spec with a tuple syntax in front
  56. # of the letter code '(2,3)f4' and ' ( 2 , 3 ) f4 '
  57. # are equally allowed
  58. numfmt = nt.sctypeDict
  59. def find_duplicate(list):
  60. """Find duplication in a list, return a list of duplicated elements"""
  61. return [
  62. item
  63. for item, counts in Counter(list).items()
  64. if counts > 1
  65. ]
  66. @set_module('numpy')
  67. class format_parser:
  68. """
  69. Class to convert formats, names, titles description to a dtype.
  70. After constructing the format_parser object, the dtype attribute is
  71. the converted data-type:
  72. ``dtype = format_parser(formats, names, titles).dtype``
  73. Attributes
  74. ----------
  75. dtype : dtype
  76. The converted data-type.
  77. Parameters
  78. ----------
  79. formats : str or list of str
  80. The format description, either specified as a string with
  81. comma-separated format descriptions in the form ``'f8, i4, a5'``, or
  82. a list of format description strings in the form
  83. ``['f8', 'i4', 'a5']``.
  84. names : str or list/tuple of str
  85. The field names, either specified as a comma-separated string in the
  86. form ``'col1, col2, col3'``, or as a list or tuple of strings in the
  87. form ``['col1', 'col2', 'col3']``.
  88. An empty list can be used, in that case default field names
  89. ('f0', 'f1', ...) are used.
  90. titles : sequence
  91. Sequence of title strings. An empty list can be used to leave titles
  92. out.
  93. aligned : bool, optional
  94. If True, align the fields by padding as the C-compiler would.
  95. Default is False.
  96. byteorder : str, optional
  97. If specified, all the fields will be changed to the
  98. provided byte-order. Otherwise, the default byte-order is
  99. used. For all available string specifiers, see `dtype.newbyteorder`.
  100. See Also
  101. --------
  102. dtype, typename, sctype2char
  103. Examples
  104. --------
  105. >>> np.format_parser(['<f8', '<i4', '<a5'], ['col1', 'col2', 'col3'],
  106. ... ['T1', 'T2', 'T3']).dtype
  107. dtype([(('T1', 'col1'), '<f8'), (('T2', 'col2'), '<i4'), (('T3', 'col3'), 'S5')])
  108. `names` and/or `titles` can be empty lists. If `titles` is an empty list,
  109. titles will simply not appear. If `names` is empty, default field names
  110. will be used.
  111. >>> np.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'],
  112. ... []).dtype
  113. dtype([('col1', '<f8'), ('col2', '<i4'), ('col3', '<S5')])
  114. >>> np.format_parser(['<f8', '<i4', '<a5'], [], []).dtype
  115. dtype([('f0', '<f8'), ('f1', '<i4'), ('f2', 'S5')])
  116. """
  117. def __init__(self, formats, names, titles, aligned=False, byteorder=None):
  118. self._parseFormats(formats, aligned)
  119. self._setfieldnames(names, titles)
  120. self._createdtype(byteorder)
  121. def _parseFormats(self, formats, aligned=False):
  122. """ Parse the field formats """
  123. if formats is None:
  124. raise ValueError("Need formats argument")
  125. if isinstance(formats, list):
  126. dtype = sb.dtype(
  127. [('f{}'.format(i), format_) for i, format_ in enumerate(formats)],
  128. aligned,
  129. )
  130. else:
  131. dtype = sb.dtype(formats, aligned)
  132. fields = dtype.fields
  133. if fields is None:
  134. dtype = sb.dtype([('f1', dtype)], aligned)
  135. fields = dtype.fields
  136. keys = dtype.names
  137. self._f_formats = [fields[key][0] for key in keys]
  138. self._offsets = [fields[key][1] for key in keys]
  139. self._nfields = len(keys)
  140. def _setfieldnames(self, names, titles):
  141. """convert input field names into a list and assign to the _names
  142. attribute """
  143. if names:
  144. if type(names) in [list, tuple]:
  145. pass
  146. elif isinstance(names, str):
  147. names = names.split(',')
  148. else:
  149. raise NameError("illegal input names %s" % repr(names))
  150. self._names = [n.strip() for n in names[:self._nfields]]
  151. else:
  152. self._names = []
  153. # if the names are not specified, they will be assigned as
  154. # "f0, f1, f2,..."
  155. # if not enough names are specified, they will be assigned as "f[n],
  156. # f[n+1],..." etc. where n is the number of specified names..."
  157. self._names += ['f%d' % i for i in range(len(self._names),
  158. self._nfields)]
  159. # check for redundant names
  160. _dup = find_duplicate(self._names)
  161. if _dup:
  162. raise ValueError("Duplicate field names: %s" % _dup)
  163. if titles:
  164. self._titles = [n.strip() for n in titles[:self._nfields]]
  165. else:
  166. self._titles = []
  167. titles = []
  168. if self._nfields > len(titles):
  169. self._titles += [None] * (self._nfields - len(titles))
  170. def _createdtype(self, byteorder):
  171. dtype = sb.dtype({
  172. 'names': self._names,
  173. 'formats': self._f_formats,
  174. 'offsets': self._offsets,
  175. 'titles': self._titles,
  176. })
  177. if byteorder is not None:
  178. byteorder = _byteorderconv[byteorder[0]]
  179. dtype = dtype.newbyteorder(byteorder)
  180. self.dtype = dtype
  181. class record(nt.void):
  182. """A data-type scalar that allows field access as attribute lookup.
  183. """
  184. # manually set name and module so that this class's type shows up
  185. # as numpy.record when printed
  186. __name__ = 'record'
  187. __module__ = 'numpy'
  188. def __repr__(self):
  189. if _get_legacy_print_mode() <= 113:
  190. return self.__str__()
  191. return super().__repr__()
  192. def __str__(self):
  193. if _get_legacy_print_mode() <= 113:
  194. return str(self.item())
  195. return super().__str__()
  196. def __getattribute__(self, attr):
  197. if attr in ('setfield', 'getfield', 'dtype'):
  198. return nt.void.__getattribute__(self, attr)
  199. try:
  200. return nt.void.__getattribute__(self, attr)
  201. except AttributeError:
  202. pass
  203. fielddict = nt.void.__getattribute__(self, 'dtype').fields
  204. res = fielddict.get(attr, None)
  205. if res:
  206. obj = self.getfield(*res[:2])
  207. # if it has fields return a record,
  208. # otherwise return the object
  209. try:
  210. dt = obj.dtype
  211. except AttributeError:
  212. #happens if field is Object type
  213. return obj
  214. if dt.names is not None:
  215. return obj.view((self.__class__, obj.dtype))
  216. return obj
  217. else:
  218. raise AttributeError("'record' object has no "
  219. "attribute '%s'" % attr)
  220. def __setattr__(self, attr, val):
  221. if attr in ('setfield', 'getfield', 'dtype'):
  222. raise AttributeError("Cannot set '%s' attribute" % attr)
  223. fielddict = nt.void.__getattribute__(self, 'dtype').fields
  224. res = fielddict.get(attr, None)
  225. if res:
  226. return self.setfield(val, *res[:2])
  227. else:
  228. if getattr(self, attr, None):
  229. return nt.void.__setattr__(self, attr, val)
  230. else:
  231. raise AttributeError("'record' object has no "
  232. "attribute '%s'" % attr)
  233. def __getitem__(self, indx):
  234. obj = nt.void.__getitem__(self, indx)
  235. # copy behavior of record.__getattribute__,
  236. if isinstance(obj, nt.void) and obj.dtype.names is not None:
  237. return obj.view((self.__class__, obj.dtype))
  238. else:
  239. # return a single element
  240. return obj
  241. def pprint(self):
  242. """Pretty-print all fields."""
  243. # pretty-print all fields
  244. names = self.dtype.names
  245. maxlen = max(len(name) for name in names)
  246. fmt = '%% %ds: %%s' % maxlen
  247. rows = [fmt % (name, getattr(self, name)) for name in names]
  248. return "\n".join(rows)
  249. # The recarray is almost identical to a standard array (which supports
  250. # named fields already) The biggest difference is that it can use
  251. # attribute-lookup to find the fields and it is constructed using
  252. # a record.
  253. # If byteorder is given it forces a particular byteorder on all
  254. # the fields (and any subfields)
  255. class recarray(ndarray):
  256. """Construct an ndarray that allows field access using attributes.
  257. Arrays may have a data-types containing fields, analogous
  258. to columns in a spread sheet. An example is ``[(x, int), (y, float)]``,
  259. where each entry in the array is a pair of ``(int, float)``. Normally,
  260. these attributes are accessed using dictionary lookups such as ``arr['x']``
  261. and ``arr['y']``. Record arrays allow the fields to be accessed as members
  262. of the array, using ``arr.x`` and ``arr.y``.
  263. Parameters
  264. ----------
  265. shape : tuple
  266. Shape of output array.
  267. dtype : data-type, optional
  268. The desired data-type. By default, the data-type is determined
  269. from `formats`, `names`, `titles`, `aligned` and `byteorder`.
  270. formats : list of data-types, optional
  271. A list containing the data-types for the different columns, e.g.
  272. ``['i4', 'f8', 'i4']``. `formats` does *not* support the new
  273. convention of using types directly, i.e. ``(int, float, int)``.
  274. Note that `formats` must be a list, not a tuple.
  275. Given that `formats` is somewhat limited, we recommend specifying
  276. `dtype` instead.
  277. names : tuple of str, optional
  278. The name of each column, e.g. ``('x', 'y', 'z')``.
  279. buf : buffer, optional
  280. By default, a new array is created of the given shape and data-type.
  281. If `buf` is specified and is an object exposing the buffer interface,
  282. the array will use the memory from the existing buffer. In this case,
  283. the `offset` and `strides` keywords are available.
  284. Other Parameters
  285. ----------------
  286. titles : tuple of str, optional
  287. Aliases for column names. For example, if `names` were
  288. ``('x', 'y', 'z')`` and `titles` is
  289. ``('x_coordinate', 'y_coordinate', 'z_coordinate')``, then
  290. ``arr['x']`` is equivalent to both ``arr.x`` and ``arr.x_coordinate``.
  291. byteorder : {'<', '>', '='}, optional
  292. Byte-order for all fields.
  293. aligned : bool, optional
  294. Align the fields in memory as the C-compiler would.
  295. strides : tuple of ints, optional
  296. Buffer (`buf`) is interpreted according to these strides (strides
  297. define how many bytes each array element, row, column, etc.
  298. occupy in memory).
  299. offset : int, optional
  300. Start reading buffer (`buf`) from this offset onwards.
  301. order : {'C', 'F'}, optional
  302. Row-major (C-style) or column-major (Fortran-style) order.
  303. Returns
  304. -------
  305. rec : recarray
  306. Empty array of the given shape and type.
  307. See Also
  308. --------
  309. core.records.fromrecords : Construct a record array from data.
  310. record : fundamental data-type for `recarray`.
  311. format_parser : determine a data-type from formats, names, titles.
  312. Notes
  313. -----
  314. This constructor can be compared to ``empty``: it creates a new record
  315. array but does not fill it with data. To create a record array from data,
  316. use one of the following methods:
  317. 1. Create a standard ndarray and convert it to a record array,
  318. using ``arr.view(np.recarray)``
  319. 2. Use the `buf` keyword.
  320. 3. Use `np.rec.fromrecords`.
  321. Examples
  322. --------
  323. Create an array with two fields, ``x`` and ``y``:
  324. >>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', '<f8'), ('y', '<i8')])
  325. >>> x
  326. array([(1., 2), (3., 4)], dtype=[('x', '<f8'), ('y', '<i8')])
  327. >>> x['x']
  328. array([1., 3.])
  329. View the array as a record array:
  330. >>> x = x.view(np.recarray)
  331. >>> x.x
  332. array([1., 3.])
  333. >>> x.y
  334. array([2, 4])
  335. Create a new, empty record array:
  336. >>> np.recarray((2,),
  337. ... dtype=[('x', int), ('y', float), ('z', int)]) #doctest: +SKIP
  338. rec.array([(-1073741821, 1.2249118382103472e-301, 24547520),
  339. (3471280, 1.2134086255804012e-316, 0)],
  340. dtype=[('x', '<i4'), ('y', '<f8'), ('z', '<i4')])
  341. """
  342. # manually set name and module so that this class's type shows
  343. # up as "numpy.recarray" when printed
  344. __name__ = 'recarray'
  345. __module__ = 'numpy'
  346. def __new__(subtype, shape, dtype=None, buf=None, offset=0, strides=None,
  347. formats=None, names=None, titles=None,
  348. byteorder=None, aligned=False, order='C'):
  349. if dtype is not None:
  350. descr = sb.dtype(dtype)
  351. else:
  352. descr = format_parser(formats, names, titles, aligned, byteorder).dtype
  353. if buf is None:
  354. self = ndarray.__new__(subtype, shape, (record, descr), order=order)
  355. else:
  356. self = ndarray.__new__(subtype, shape, (record, descr),
  357. buffer=buf, offset=offset,
  358. strides=strides, order=order)
  359. return self
  360. def __array_finalize__(self, obj):
  361. if self.dtype.type is not record and self.dtype.names is not None:
  362. # if self.dtype is not np.record, invoke __setattr__ which will
  363. # convert it to a record if it is a void dtype.
  364. self.dtype = self.dtype
  365. def __getattribute__(self, attr):
  366. # See if ndarray has this attr, and return it if so. (note that this
  367. # means a field with the same name as an ndarray attr cannot be
  368. # accessed by attribute).
  369. try:
  370. return object.__getattribute__(self, attr)
  371. except AttributeError: # attr must be a fieldname
  372. pass
  373. # look for a field with this name
  374. fielddict = ndarray.__getattribute__(self, 'dtype').fields
  375. try:
  376. res = fielddict[attr][:2]
  377. except (TypeError, KeyError) as e:
  378. raise AttributeError("recarray has no attribute %s" % attr) from e
  379. obj = self.getfield(*res)
  380. # At this point obj will always be a recarray, since (see
  381. # PyArray_GetField) the type of obj is inherited. Next, if obj.dtype is
  382. # non-structured, convert it to an ndarray. Then if obj is structured
  383. # with void type convert it to the same dtype.type (eg to preserve
  384. # numpy.record type if present), since nested structured fields do not
  385. # inherit type. Don't do this for non-void structures though.
  386. if obj.dtype.names is not None:
  387. if issubclass(obj.dtype.type, nt.void):
  388. return obj.view(dtype=(self.dtype.type, obj.dtype))
  389. return obj
  390. else:
  391. return obj.view(ndarray)
  392. # Save the dictionary.
  393. # If the attr is a field name and not in the saved dictionary
  394. # Undo any "setting" of the attribute and do a setfield
  395. # Thus, you can't create attributes on-the-fly that are field names.
  396. def __setattr__(self, attr, val):
  397. # Automatically convert (void) structured types to records
  398. # (but not non-void structures, subarrays, or non-structured voids)
  399. if attr == 'dtype' and issubclass(val.type, nt.void) and val.names is not None:
  400. val = sb.dtype((record, val))
  401. newattr = attr not in self.__dict__
  402. try:
  403. ret = object.__setattr__(self, attr, val)
  404. except Exception:
  405. fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
  406. if attr not in fielddict:
  407. raise
  408. else:
  409. fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
  410. if attr not in fielddict:
  411. return ret
  412. if newattr:
  413. # We just added this one or this setattr worked on an
  414. # internal attribute.
  415. try:
  416. object.__delattr__(self, attr)
  417. except Exception:
  418. return ret
  419. try:
  420. res = fielddict[attr][:2]
  421. except (TypeError, KeyError) as e:
  422. raise AttributeError(
  423. "record array has no attribute %s" % attr
  424. ) from e
  425. return self.setfield(val, *res)
  426. def __getitem__(self, indx):
  427. obj = super().__getitem__(indx)
  428. # copy behavior of getattr, except that here
  429. # we might also be returning a single element
  430. if isinstance(obj, ndarray):
  431. if obj.dtype.names is not None:
  432. obj = obj.view(type(self))
  433. if issubclass(obj.dtype.type, nt.void):
  434. return obj.view(dtype=(self.dtype.type, obj.dtype))
  435. return obj
  436. else:
  437. return obj.view(type=ndarray)
  438. else:
  439. # return a single element
  440. return obj
  441. def __repr__(self):
  442. repr_dtype = self.dtype
  443. if self.dtype.type is record or not issubclass(self.dtype.type, nt.void):
  444. # If this is a full record array (has numpy.record dtype),
  445. # or if it has a scalar (non-void) dtype with no records,
  446. # represent it using the rec.array function. Since rec.array
  447. # converts dtype to a numpy.record for us, convert back
  448. # to non-record before printing
  449. if repr_dtype.type is record:
  450. repr_dtype = sb.dtype((nt.void, repr_dtype))
  451. prefix = "rec.array("
  452. fmt = 'rec.array(%s,%sdtype=%s)'
  453. else:
  454. # otherwise represent it using np.array plus a view
  455. # This should only happen if the user is playing
  456. # strange games with dtypes.
  457. prefix = "array("
  458. fmt = 'array(%s,%sdtype=%s).view(numpy.recarray)'
  459. # get data/shape string. logic taken from numeric.array_repr
  460. if self.size > 0 or self.shape == (0,):
  461. lst = sb.array2string(
  462. self, separator=', ', prefix=prefix, suffix=',')
  463. else:
  464. # show zero-length shape unless it is (0,)
  465. lst = "[], shape=%s" % (repr(self.shape),)
  466. lf = '\n'+' '*len(prefix)
  467. if _get_legacy_print_mode() <= 113:
  468. lf = ' ' + lf # trailing space
  469. return fmt % (lst, lf, repr_dtype)
  470. def field(self, attr, val=None):
  471. if isinstance(attr, int):
  472. names = ndarray.__getattribute__(self, 'dtype').names
  473. attr = names[attr]
  474. fielddict = ndarray.__getattribute__(self, 'dtype').fields
  475. res = fielddict[attr][:2]
  476. if val is None:
  477. obj = self.getfield(*res)
  478. if obj.dtype.names is not None:
  479. return obj
  480. return obj.view(ndarray)
  481. else:
  482. return self.setfield(val, *res)
  483. def _deprecate_shape_0_as_None(shape):
  484. if shape == 0:
  485. warnings.warn(
  486. "Passing `shape=0` to have the shape be inferred is deprecated, "
  487. "and in future will be equivalent to `shape=(0,)`. To infer "
  488. "the shape and suppress this warning, pass `shape=None` instead.",
  489. FutureWarning, stacklevel=3)
  490. return None
  491. else:
  492. return shape
  493. @set_module("numpy.rec")
  494. def fromarrays(arrayList, dtype=None, shape=None, formats=None,
  495. names=None, titles=None, aligned=False, byteorder=None):
  496. """Create a record array from a (flat) list of arrays
  497. Parameters
  498. ----------
  499. arrayList : list or tuple
  500. List of array-like objects (such as lists, tuples,
  501. and ndarrays).
  502. dtype : data-type, optional
  503. valid dtype for all arrays
  504. shape : int or tuple of ints, optional
  505. Shape of the resulting array. If not provided, inferred from
  506. ``arrayList[0]``.
  507. formats, names, titles, aligned, byteorder :
  508. If `dtype` is ``None``, these arguments are passed to
  509. `numpy.format_parser` to construct a dtype. See that function for
  510. detailed documentation.
  511. Returns
  512. -------
  513. np.recarray
  514. Record array consisting of given arrayList columns.
  515. Examples
  516. --------
  517. >>> x1=np.array([1,2,3,4])
  518. >>> x2=np.array(['a','dd','xyz','12'])
  519. >>> x3=np.array([1.1,2,3,4])
  520. >>> r = np.core.records.fromarrays([x1,x2,x3],names='a,b,c')
  521. >>> print(r[1])
  522. (2, 'dd', 2.0) # may vary
  523. >>> x1[1]=34
  524. >>> r.a
  525. array([1, 2, 3, 4])
  526. >>> x1 = np.array([1, 2, 3, 4])
  527. >>> x2 = np.array(['a', 'dd', 'xyz', '12'])
  528. >>> x3 = np.array([1.1, 2, 3,4])
  529. >>> r = np.core.records.fromarrays(
  530. ... [x1, x2, x3],
  531. ... dtype=np.dtype([('a', np.int32), ('b', 'S3'), ('c', np.float32)]))
  532. >>> r
  533. rec.array([(1, b'a', 1.1), (2, b'dd', 2. ), (3, b'xyz', 3. ),
  534. (4, b'12', 4. )],
  535. dtype=[('a', '<i4'), ('b', 'S3'), ('c', '<f4')])
  536. """
  537. arrayList = [sb.asarray(x) for x in arrayList]
  538. # NumPy 1.19.0, 2020-01-01
  539. shape = _deprecate_shape_0_as_None(shape)
  540. if shape is None:
  541. shape = arrayList[0].shape
  542. elif isinstance(shape, int):
  543. shape = (shape,)
  544. if formats is None and dtype is None:
  545. # go through each object in the list to see if it is an ndarray
  546. # and determine the formats.
  547. formats = [obj.dtype for obj in arrayList]
  548. if dtype is not None:
  549. descr = sb.dtype(dtype)
  550. else:
  551. descr = format_parser(formats, names, titles, aligned, byteorder).dtype
  552. _names = descr.names
  553. # Determine shape from data-type.
  554. if len(descr) != len(arrayList):
  555. raise ValueError("mismatch between the number of fields "
  556. "and the number of arrays")
  557. d0 = descr[0].shape
  558. nn = len(d0)
  559. if nn > 0:
  560. shape = shape[:-nn]
  561. _array = recarray(shape, descr)
  562. # populate the record array (makes a copy)
  563. for k, obj in enumerate(arrayList):
  564. nn = descr[k].ndim
  565. testshape = obj.shape[:obj.ndim - nn]
  566. name = _names[k]
  567. if testshape != shape:
  568. raise ValueError(f'array-shape mismatch in array {k} ("{name}")')
  569. _array[name] = obj
  570. return _array
  571. @set_module("numpy.rec")
  572. def fromrecords(recList, dtype=None, shape=None, formats=None, names=None,
  573. titles=None, aligned=False, byteorder=None):
  574. """Create a recarray from a list of records in text form.
  575. Parameters
  576. ----------
  577. recList : sequence
  578. data in the same field may be heterogeneous - they will be promoted
  579. to the highest data type.
  580. dtype : data-type, optional
  581. valid dtype for all arrays
  582. shape : int or tuple of ints, optional
  583. shape of each array.
  584. formats, names, titles, aligned, byteorder :
  585. If `dtype` is ``None``, these arguments are passed to
  586. `numpy.format_parser` to construct a dtype. See that function for
  587. detailed documentation.
  588. If both `formats` and `dtype` are None, then this will auto-detect
  589. formats. Use list of tuples rather than list of lists for faster
  590. processing.
  591. Returns
  592. -------
  593. np.recarray
  594. record array consisting of given recList rows.
  595. Examples
  596. --------
  597. >>> r=np.core.records.fromrecords([(456,'dbe',1.2),(2,'de',1.3)],
  598. ... names='col1,col2,col3')
  599. >>> print(r[0])
  600. (456, 'dbe', 1.2)
  601. >>> r.col1
  602. array([456, 2])
  603. >>> r.col2
  604. array(['dbe', 'de'], dtype='<U3')
  605. >>> import pickle
  606. >>> pickle.loads(pickle.dumps(r))
  607. rec.array([(456, 'dbe', 1.2), ( 2, 'de', 1.3)],
  608. dtype=[('col1', '<i8'), ('col2', '<U3'), ('col3', '<f8')])
  609. """
  610. if formats is None and dtype is None: # slower
  611. obj = sb.array(recList, dtype=object)
  612. arrlist = [sb.array(obj[..., i].tolist()) for i in range(obj.shape[-1])]
  613. return fromarrays(arrlist, formats=formats, shape=shape, names=names,
  614. titles=titles, aligned=aligned, byteorder=byteorder)
  615. if dtype is not None:
  616. descr = sb.dtype((record, dtype))
  617. else:
  618. descr = format_parser(formats, names, titles, aligned, byteorder).dtype
  619. try:
  620. retval = sb.array(recList, dtype=descr)
  621. except (TypeError, ValueError):
  622. # NumPy 1.19.0, 2020-01-01
  623. shape = _deprecate_shape_0_as_None(shape)
  624. if shape is None:
  625. shape = len(recList)
  626. if isinstance(shape, int):
  627. shape = (shape,)
  628. if len(shape) > 1:
  629. raise ValueError("Can only deal with 1-d array.")
  630. _array = recarray(shape, descr)
  631. for k in range(_array.size):
  632. _array[k] = tuple(recList[k])
  633. # list of lists instead of list of tuples ?
  634. # 2018-02-07, 1.14.1
  635. warnings.warn(
  636. "fromrecords expected a list of tuples, may have received a list "
  637. "of lists instead. In the future that will raise an error",
  638. FutureWarning, stacklevel=2)
  639. return _array
  640. else:
  641. if shape is not None and retval.shape != shape:
  642. retval.shape = shape
  643. res = retval.view(recarray)
  644. return res
  645. @set_module("numpy.rec")
  646. def fromstring(datastring, dtype=None, shape=None, offset=0, formats=None,
  647. names=None, titles=None, aligned=False, byteorder=None):
  648. r"""Create a record array from binary data
  649. Note that despite the name of this function it does not accept `str`
  650. instances.
  651. Parameters
  652. ----------
  653. datastring : bytes-like
  654. Buffer of binary data
  655. dtype : data-type, optional
  656. Valid dtype for all arrays
  657. shape : int or tuple of ints, optional
  658. Shape of each array.
  659. offset : int, optional
  660. Position in the buffer to start reading from.
  661. formats, names, titles, aligned, byteorder :
  662. If `dtype` is ``None``, these arguments are passed to
  663. `numpy.format_parser` to construct a dtype. See that function for
  664. detailed documentation.
  665. Returns
  666. -------
  667. np.recarray
  668. Record array view into the data in datastring. This will be readonly
  669. if `datastring` is readonly.
  670. See Also
  671. --------
  672. numpy.frombuffer
  673. Examples
  674. --------
  675. >>> a = b'\x01\x02\x03abc'
  676. >>> np.core.records.fromstring(a, dtype='u1,u1,u1,S3')
  677. rec.array([(1, 2, 3, b'abc')],
  678. dtype=[('f0', 'u1'), ('f1', 'u1'), ('f2', 'u1'), ('f3', 'S3')])
  679. >>> grades_dtype = [('Name', (np.str_, 10)), ('Marks', np.float64),
  680. ... ('GradeLevel', np.int32)]
  681. >>> grades_array = np.array([('Sam', 33.3, 3), ('Mike', 44.4, 5),
  682. ... ('Aadi', 66.6, 6)], dtype=grades_dtype)
  683. >>> np.core.records.fromstring(grades_array.tobytes(), dtype=grades_dtype)
  684. rec.array([('Sam', 33.3, 3), ('Mike', 44.4, 5), ('Aadi', 66.6, 6)],
  685. dtype=[('Name', '<U10'), ('Marks', '<f8'), ('GradeLevel', '<i4')])
  686. >>> s = '\x01\x02\x03abc'
  687. >>> np.core.records.fromstring(s, dtype='u1,u1,u1,S3')
  688. Traceback (most recent call last)
  689. ...
  690. TypeError: a bytes-like object is required, not 'str'
  691. """
  692. if dtype is None and formats is None:
  693. raise TypeError("fromstring() needs a 'dtype' or 'formats' argument")
  694. if dtype is not None:
  695. descr = sb.dtype(dtype)
  696. else:
  697. descr = format_parser(formats, names, titles, aligned, byteorder).dtype
  698. itemsize = descr.itemsize
  699. # NumPy 1.19.0, 2020-01-01
  700. shape = _deprecate_shape_0_as_None(shape)
  701. if shape in (None, -1):
  702. shape = (len(datastring) - offset) // itemsize
  703. _array = recarray(shape, descr, buf=datastring, offset=offset)
  704. return _array
  705. def get_remaining_size(fd):
  706. pos = fd.tell()
  707. try:
  708. fd.seek(0, 2)
  709. return fd.tell() - pos
  710. finally:
  711. fd.seek(pos, 0)
  712. @set_module("numpy.rec")
  713. def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
  714. names=None, titles=None, aligned=False, byteorder=None):
  715. """Create an array from binary file data
  716. Parameters
  717. ----------
  718. fd : str or file type
  719. If file is a string or a path-like object then that file is opened,
  720. else it is assumed to be a file object. The file object must
  721. support random access (i.e. it must have tell and seek methods).
  722. dtype : data-type, optional
  723. valid dtype for all arrays
  724. shape : int or tuple of ints, optional
  725. shape of each array.
  726. offset : int, optional
  727. Position in the file to start reading from.
  728. formats, names, titles, aligned, byteorder :
  729. If `dtype` is ``None``, these arguments are passed to
  730. `numpy.format_parser` to construct a dtype. See that function for
  731. detailed documentation
  732. Returns
  733. -------
  734. np.recarray
  735. record array consisting of data enclosed in file.
  736. Examples
  737. --------
  738. >>> from tempfile import TemporaryFile
  739. >>> a = np.empty(10,dtype='f8,i4,a5')
  740. >>> a[5] = (0.5,10,'abcde')
  741. >>>
  742. >>> fd=TemporaryFile()
  743. >>> a = a.newbyteorder('<')
  744. >>> a.tofile(fd)
  745. >>>
  746. >>> _ = fd.seek(0)
  747. >>> r=np.core.records.fromfile(fd, formats='f8,i4,a5', shape=10,
  748. ... byteorder='<')
  749. >>> print(r[5])
  750. (0.5, 10, 'abcde')
  751. >>> r.shape
  752. (10,)
  753. """
  754. if dtype is None and formats is None:
  755. raise TypeError("fromfile() needs a 'dtype' or 'formats' argument")
  756. # NumPy 1.19.0, 2020-01-01
  757. shape = _deprecate_shape_0_as_None(shape)
  758. if shape is None:
  759. shape = (-1,)
  760. elif isinstance(shape, int):
  761. shape = (shape,)
  762. if hasattr(fd, 'readinto'):
  763. # GH issue 2504. fd supports io.RawIOBase or io.BufferedIOBase interface.
  764. # Example of fd: gzip, BytesIO, BufferedReader
  765. # file already opened
  766. ctx = nullcontext(fd)
  767. else:
  768. # open file
  769. ctx = open(os_fspath(fd), 'rb')
  770. with ctx as fd:
  771. if offset > 0:
  772. fd.seek(offset, 1)
  773. size = get_remaining_size(fd)
  774. if dtype is not None:
  775. descr = sb.dtype(dtype)
  776. else:
  777. descr = format_parser(formats, names, titles, aligned, byteorder).dtype
  778. itemsize = descr.itemsize
  779. shapeprod = sb.array(shape).prod(dtype=nt.intp)
  780. shapesize = shapeprod * itemsize
  781. if shapesize < 0:
  782. shape = list(shape)
  783. shape[shape.index(-1)] = size // -shapesize
  784. shape = tuple(shape)
  785. shapeprod = sb.array(shape).prod(dtype=nt.intp)
  786. nbytes = shapeprod * itemsize
  787. if nbytes > size:
  788. raise ValueError(
  789. "Not enough bytes left in file for specified shape and type")
  790. # create the array
  791. _array = recarray(shape, descr)
  792. nbytesread = fd.readinto(_array.data)
  793. if nbytesread != nbytes:
  794. raise OSError("Didn't read as many bytes as expected")
  795. return _array
  796. @set_module("numpy.rec")
  797. def array(obj, dtype=None, shape=None, offset=0, strides=None, formats=None,
  798. names=None, titles=None, aligned=False, byteorder=None, copy=True):
  799. """
  800. Construct a record array from a wide-variety of objects.
  801. A general-purpose record array constructor that dispatches to the
  802. appropriate `recarray` creation function based on the inputs (see Notes).
  803. Parameters
  804. ----------
  805. obj : any
  806. Input object. See Notes for details on how various input types are
  807. treated.
  808. dtype : data-type, optional
  809. Valid dtype for array.
  810. shape : int or tuple of ints, optional
  811. Shape of each array.
  812. offset : int, optional
  813. Position in the file or buffer to start reading from.
  814. strides : tuple of ints, optional
  815. Buffer (`buf`) is interpreted according to these strides (strides
  816. define how many bytes each array element, row, column, etc.
  817. occupy in memory).
  818. formats, names, titles, aligned, byteorder :
  819. If `dtype` is ``None``, these arguments are passed to
  820. `numpy.format_parser` to construct a dtype. See that function for
  821. detailed documentation.
  822. copy : bool, optional
  823. Whether to copy the input object (True), or to use a reference instead.
  824. This option only applies when the input is an ndarray or recarray.
  825. Defaults to True.
  826. Returns
  827. -------
  828. np.recarray
  829. Record array created from the specified object.
  830. Notes
  831. -----
  832. If `obj` is ``None``, then call the `~numpy.recarray` constructor. If
  833. `obj` is a string, then call the `fromstring` constructor. If `obj` is a
  834. list or a tuple, then if the first object is an `~numpy.ndarray`, call
  835. `fromarrays`, otherwise call `fromrecords`. If `obj` is a
  836. `~numpy.recarray`, then make a copy of the data in the recarray
  837. (if ``copy=True``) and use the new formats, names, and titles. If `obj`
  838. is a file, then call `fromfile`. Finally, if obj is an `ndarray`, then
  839. return ``obj.view(recarray)``, making a copy of the data if ``copy=True``.
  840. Examples
  841. --------
  842. >>> a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
  843. array([[1, 2, 3],
  844. [4, 5, 6],
  845. [7, 8, 9]])
  846. >>> np.core.records.array(a)
  847. rec.array([[1, 2, 3],
  848. [4, 5, 6],
  849. [7, 8, 9]],
  850. dtype=int32)
  851. >>> b = [(1, 1), (2, 4), (3, 9)]
  852. >>> c = np.core.records.array(b, formats = ['i2', 'f2'], names = ('x', 'y'))
  853. >>> c
  854. rec.array([(1, 1.0), (2, 4.0), (3, 9.0)],
  855. dtype=[('x', '<i2'), ('y', '<f2')])
  856. >>> c.x
  857. rec.array([1, 2, 3], dtype=int16)
  858. >>> c.y
  859. rec.array([ 1.0, 4.0, 9.0], dtype=float16)
  860. >>> r = np.rec.array(['abc','def'], names=['col1','col2'])
  861. >>> print(r.col1)
  862. abc
  863. >>> r.col1
  864. array('abc', dtype='<U3')
  865. >>> r.col2
  866. array('def', dtype='<U3')
  867. """
  868. if ((isinstance(obj, (type(None), str)) or hasattr(obj, 'readinto')) and
  869. formats is None and dtype is None):
  870. raise ValueError("Must define formats (or dtype) if object is "
  871. "None, string, or an open file")
  872. kwds = {}
  873. if dtype is not None:
  874. dtype = sb.dtype(dtype)
  875. elif formats is not None:
  876. dtype = format_parser(formats, names, titles,
  877. aligned, byteorder).dtype
  878. else:
  879. kwds = {'formats': formats,
  880. 'names': names,
  881. 'titles': titles,
  882. 'aligned': aligned,
  883. 'byteorder': byteorder
  884. }
  885. if obj is None:
  886. if shape is None:
  887. raise ValueError("Must define a shape if obj is None")
  888. return recarray(shape, dtype, buf=obj, offset=offset, strides=strides)
  889. elif isinstance(obj, bytes):
  890. return fromstring(obj, dtype, shape=shape, offset=offset, **kwds)
  891. elif isinstance(obj, (list, tuple)):
  892. if isinstance(obj[0], (tuple, list)):
  893. return fromrecords(obj, dtype=dtype, shape=shape, **kwds)
  894. else:
  895. return fromarrays(obj, dtype=dtype, shape=shape, **kwds)
  896. elif isinstance(obj, recarray):
  897. if dtype is not None and (obj.dtype != dtype):
  898. new = obj.view(dtype)
  899. else:
  900. new = obj
  901. if copy:
  902. new = new.copy()
  903. return new
  904. elif hasattr(obj, 'readinto'):
  905. return fromfile(obj, dtype=dtype, shape=shape, offset=offset)
  906. elif isinstance(obj, ndarray):
  907. if dtype is not None and (obj.dtype != dtype):
  908. new = obj.view(dtype)
  909. else:
  910. new = obj
  911. if copy:
  912. new = new.copy()
  913. return new.view(recarray)
  914. else:
  915. interface = getattr(obj, "__array_interface__", None)
  916. if interface is None or not isinstance(interface, dict):
  917. raise ValueError("Unknown input type")
  918. obj = sb.array(obj)
  919. if dtype is not None and (obj.dtype != dtype):
  920. obj = obj.view(dtype)
  921. return obj.view(recarray)