nanfunctions.py 64 KB


  1. """
  2. Functions that ignore NaN.
  3. Functions
  4. ---------
  5. - `nanmin` -- minimum non-NaN value
  6. - `nanmax` -- maximum non-NaN value
  7. - `nanargmin` -- index of minimum non-NaN value
  8. - `nanargmax` -- index of maximum non-NaN value
  9. - `nansum` -- sum of non-NaN values
  10. - `nanprod` -- product of non-NaN values
  11. - `nancumsum` -- cumulative sum of non-NaN values
  12. - `nancumprod` -- cumulative product of non-NaN values
  13. - `nanmean` -- mean of non-NaN values
  14. - `nanvar` -- variance of non-NaN values
  15. - `nanstd` -- standard deviation of non-NaN values
  16. - `nanmedian` -- median of non-NaN values
  17. - `nanquantile` -- qth quantile of non-NaN values
  18. - `nanpercentile` -- qth percentile of non-NaN values
  19. """
  20. import functools
  21. import warnings
  22. import numpy as np
  23. from numpy.lib import function_base
  24. from numpy.core import overrides
  25. array_function_dispatch = functools.partial(
  26. overrides.array_function_dispatch, module='numpy')
  27. __all__ = [
  28. 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean',
  29. 'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod',
  30. 'nancumsum', 'nancumprod', 'nanquantile'
  31. ]
  32. def _nan_mask(a, out=None):
  33. """
  34. Parameters
  35. ----------
  36. a : array-like
  37. Input array with at least 1 dimension.
  38. out : ndarray, optional
  39. Alternate output array in which to place the result. The default
  40. is ``None``; if provided, it must have the same shape as the
  41. expected output and will prevent the allocation of a new array.
  42. Returns
  43. -------
  44. y : bool ndarray or True
  45. A bool array where ``np.nan`` positions are marked with ``False``
  46. and other positions are marked with ``True``. If the type of ``a``
  47. is such that it can't possibly contain ``np.nan``, returns ``True``.
  48. """
  49. # we assume that a is an array for this private function
  50. if a.dtype.kind not in 'fc':
  51. return True
  52. y = np.isnan(a, out=out)
  53. y = np.invert(y, out=y)
  54. return y
  55. def _replace_nan(a, val):
  56. """
  57. If `a` is of inexact type, make a copy of `a`, replace NaNs with
  58. the `val` value, and return the copy together with a boolean mask
  59. marking the locations where NaNs were present. If `a` is not of
  60. inexact type, do nothing and return `a` together with a mask of None.
  61. Note that scalars will end up as array scalars, which is important
  62. for using the result as the value of the out argument in some
  63. operations.
  64. Parameters
  65. ----------
  66. a : array-like
  67. Input array.
  68. val : float
  69. NaN values are set to val before doing the operation.
  70. Returns
  71. -------
  72. y : ndarray
  73. If `a` is of inexact type, return a copy of `a` with the NaNs
  74. replaced by the fill value, otherwise return `a`.
  75. mask: {bool, None}
  76. If `a` is of inexact type, return a boolean mask marking locations of
  77. NaNs, otherwise return None.
  78. """
  79. a = np.asanyarray(a)
  80. if a.dtype == np.object_:
  81. # object arrays do not support `isnan` (gh-9009), so make a guess
  82. mask = np.not_equal(a, a, dtype=bool)
  83. elif issubclass(a.dtype.type, np.inexact):
  84. mask = np.isnan(a)
  85. else:
  86. mask = None
  87. if mask is not None:
  88. a = np.array(a, subok=True, copy=True)
  89. np.copyto(a, val, where=mask)
  90. return a, mask
  91. def _copyto(a, val, mask):
  92. """
  93. Replace values in `a` with NaN where `mask` is True. This differs from
  94. copyto in that it will deal with the case where `a` is a numpy scalar.
  95. Parameters
  96. ----------
  97. a : ndarray or numpy scalar
  98. Array or numpy scalar some of whose values are to be replaced
  99. by val.
  100. val : numpy scalar
  101. Value used a replacement.
  102. mask : ndarray, scalar
  103. Boolean array. Where True the corresponding element of `a` is
  104. replaced by `val`. Broadcasts.
  105. Returns
  106. -------
  107. res : ndarray, scalar
  108. Array with elements replaced or scalar `val`.
  109. """
  110. if isinstance(a, np.ndarray):
  111. np.copyto(a, val, where=mask, casting='unsafe')
  112. else:
  113. a = a.dtype.type(val)
  114. return a
  115. def _remove_nan_1d(arr1d, overwrite_input=False):
  116. """
  117. Equivalent to arr1d[~arr1d.isnan()], but in a different order
  118. Presumably faster as it incurs fewer copies
  119. Parameters
  120. ----------
  121. arr1d : ndarray
  122. Array to remove nans from
  123. overwrite_input : bool
  124. True if `arr1d` can be modified in place
  125. Returns
  126. -------
  127. res : ndarray
  128. Array with nan elements removed
  129. overwrite_input : bool
  130. True if `res` can be modified in place, given the constraint on the
  131. input
  132. """
  133. if arr1d.dtype == object:
  134. # object arrays do not support `isnan` (gh-9009), so make a guess
  135. c = np.not_equal(arr1d, arr1d, dtype=bool)
  136. else:
  137. c = np.isnan(arr1d)
  138. s = np.nonzero(c)[0]
  139. if s.size == arr1d.size:
  140. warnings.warn("All-NaN slice encountered", RuntimeWarning,
  141. stacklevel=5)
  142. return arr1d[:0], True
  143. elif s.size == 0:
  144. return arr1d, overwrite_input
  145. else:
  146. if not overwrite_input:
  147. arr1d = arr1d.copy()
  148. # select non-nans at end of array
  149. enonan = arr1d[-s.size:][~c[-s.size:]]
  150. # fill nans in beginning of array with non-nans of end
  151. arr1d[s[:enonan.size]] = enonan
  152. return arr1d[:-s.size], True
  153. def _divide_by_count(a, b, out=None):
  154. """
  155. Compute a/b ignoring invalid results. If `a` is an array the division
  156. is done in place. If `a` is a scalar, then its type is preserved in the
  157. output. If out is None, then a is used instead so that the division
  158. is in place. Note that this is only called with `a` an inexact type.
  159. Parameters
  160. ----------
  161. a : {ndarray, numpy scalar}
  162. Numerator. Expected to be of inexact type but not checked.
  163. b : {ndarray, numpy scalar}
  164. Denominator.
  165. out : ndarray, optional
  166. Alternate output array in which to place the result. The default
  167. is ``None``; if provided, it must have the same shape as the
  168. expected output, but the type will be cast if necessary.
  169. Returns
  170. -------
  171. ret : {ndarray, numpy scalar}
  172. The return value is a/b. If `a` was an ndarray the division is done
  173. in place. If `a` is a numpy scalar, the division preserves its type.
  174. """
  175. with np.errstate(invalid='ignore', divide='ignore'):
  176. if isinstance(a, np.ndarray):
  177. if out is None:
  178. return np.divide(a, b, out=a, casting='unsafe')
  179. else:
  180. return np.divide(a, b, out=out, casting='unsafe')
  181. else:
  182. if out is None:
  183. # Precaution against reduced object arrays
  184. try:
  185. return a.dtype.type(a / b)
  186. except AttributeError:
  187. return a / b
  188. else:
  189. # This is questionable, but currently a numpy scalar can
  190. # be output to a zero dimensional array.
  191. return np.divide(a, b, out=out, casting='unsafe')
  192. def _nanmin_dispatcher(a, axis=None, out=None, keepdims=None,
  193. initial=None, where=None):
  194. return (a, out)
  195. @array_function_dispatch(_nanmin_dispatcher)
  196. def nanmin(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue,
  197. where=np._NoValue):
  198. """
  199. Return minimum of an array or minimum along an axis, ignoring any NaNs.
  200. When all-NaN slices are encountered a ``RuntimeWarning`` is raised and
  201. Nan is returned for that slice.
  202. Parameters
  203. ----------
  204. a : array_like
  205. Array containing numbers whose minimum is desired. If `a` is not an
  206. array, a conversion is attempted.
  207. axis : {int, tuple of int, None}, optional
  208. Axis or axes along which the minimum is computed. The default is to compute
  209. the minimum of the flattened array.
  210. out : ndarray, optional
  211. Alternate output array in which to place the result. The default
  212. is ``None``; if provided, it must have the same shape as the
  213. expected output, but the type will be cast if necessary. See
  214. :ref:`ufuncs-output-type` for more details.
  215. .. versionadded:: 1.8.0
  216. keepdims : bool, optional
  217. If this is set to True, the axes which are reduced are left
  218. in the result as dimensions with size one. With this option,
  219. the result will broadcast correctly against the original `a`.
  220. If the value is anything but the default, then
  221. `keepdims` will be passed through to the `min` method
  222. of sub-classes of `ndarray`. If the sub-classes methods
  223. does not implement `keepdims` any exceptions will be raised.
  224. .. versionadded:: 1.8.0
  225. initial : scalar, optional
  226. The maximum value of an output element. Must be present to allow
  227. computation on empty slice. See `~numpy.ufunc.reduce` for details.
  228. .. versionadded:: 1.22.0
  229. where : array_like of bool, optional
  230. Elements to compare for the minimum. See `~numpy.ufunc.reduce`
  231. for details.
  232. .. versionadded:: 1.22.0
  233. Returns
  234. -------
  235. nanmin : ndarray
  236. An array with the same shape as `a`, with the specified axis
  237. removed. If `a` is a 0-d array, or if axis is None, an ndarray
  238. scalar is returned. The same dtype as `a` is returned.
  239. See Also
  240. --------
  241. nanmax :
  242. The maximum value of an array along a given axis, ignoring any NaNs.
  243. amin :
  244. The minimum value of an array along a given axis, propagating any NaNs.
  245. fmin :
  246. Element-wise minimum of two arrays, ignoring any NaNs.
  247. minimum :
  248. Element-wise minimum of two arrays, propagating any NaNs.
  249. isnan :
  250. Shows which elements are Not a Number (NaN).
  251. isfinite:
  252. Shows which elements are neither NaN nor infinity.
  253. amax, fmax, maximum
  254. Notes
  255. -----
  256. NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic
  257. (IEEE 754). This means that Not a Number is not equivalent to infinity.
  258. Positive infinity is treated as a very large number and negative
  259. infinity is treated as a very small (i.e. negative) number.
  260. If the input has a integer type the function is equivalent to np.min.
  261. Examples
  262. --------
  263. >>> a = np.array([[1, 2], [3, np.nan]])
  264. >>> np.nanmin(a)
  265. 1.0
  266. >>> np.nanmin(a, axis=0)
  267. array([1., 2.])
  268. >>> np.nanmin(a, axis=1)
  269. array([1., 3.])
  270. When positive infinity and negative infinity are present:
  271. >>> np.nanmin([1, 2, np.nan, np.inf])
  272. 1.0
  273. >>> np.nanmin([1, 2, np.nan, np.NINF])
  274. -inf
  275. """
  276. kwargs = {}
  277. if keepdims is not np._NoValue:
  278. kwargs['keepdims'] = keepdims
  279. if initial is not np._NoValue:
  280. kwargs['initial'] = initial
  281. if where is not np._NoValue:
  282. kwargs['where'] = where
  283. if type(a) is np.ndarray and a.dtype != np.object_:
  284. # Fast, but not safe for subclasses of ndarray, or object arrays,
  285. # which do not implement isnan (gh-9009), or fmin correctly (gh-8975)
  286. res = np.fmin.reduce(a, axis=axis, out=out, **kwargs)
  287. if np.isnan(res).any():
  288. warnings.warn("All-NaN slice encountered", RuntimeWarning,
  289. stacklevel=3)
  290. else:
  291. # Slow, but safe for subclasses of ndarray
  292. a, mask = _replace_nan(a, +np.inf)
  293. res = np.amin(a, axis=axis, out=out, **kwargs)
  294. if mask is None:
  295. return res
  296. # Check for all-NaN axis
  297. kwargs.pop("initial", None)
  298. mask = np.all(mask, axis=axis, **kwargs)
  299. if np.any(mask):
  300. res = _copyto(res, np.nan, mask)
  301. warnings.warn("All-NaN axis encountered", RuntimeWarning,
  302. stacklevel=3)
  303. return res
  304. def _nanmax_dispatcher(a, axis=None, out=None, keepdims=None,
  305. initial=None, where=None):
  306. return (a, out)
  307. @array_function_dispatch(_nanmax_dispatcher)
  308. def nanmax(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue,
  309. where=np._NoValue):
  310. """
  311. Return the maximum of an array or maximum along an axis, ignoring any
  312. NaNs. When all-NaN slices are encountered a ``RuntimeWarning`` is
  313. raised and NaN is returned for that slice.
  314. Parameters
  315. ----------
  316. a : array_like
  317. Array containing numbers whose maximum is desired. If `a` is not an
  318. array, a conversion is attempted.
  319. axis : {int, tuple of int, None}, optional
  320. Axis or axes along which the maximum is computed. The default is to compute
  321. the maximum of the flattened array.
  322. out : ndarray, optional
  323. Alternate output array in which to place the result. The default
  324. is ``None``; if provided, it must have the same shape as the
  325. expected output, but the type will be cast if necessary. See
  326. :ref:`ufuncs-output-type` for more details.
  327. .. versionadded:: 1.8.0
  328. keepdims : bool, optional
  329. If this is set to True, the axes which are reduced are left
  330. in the result as dimensions with size one. With this option,
  331. the result will broadcast correctly against the original `a`.
  332. If the value is anything but the default, then
  333. `keepdims` will be passed through to the `max` method
  334. of sub-classes of `ndarray`. If the sub-classes methods
  335. does not implement `keepdims` any exceptions will be raised.
  336. .. versionadded:: 1.8.0
  337. initial : scalar, optional
  338. The minimum value of an output element. Must be present to allow
  339. computation on empty slice. See `~numpy.ufunc.reduce` for details.
  340. .. versionadded:: 1.22.0
  341. where : array_like of bool, optional
  342. Elements to compare for the maximum. See `~numpy.ufunc.reduce`
  343. for details.
  344. .. versionadded:: 1.22.0
  345. Returns
  346. -------
  347. nanmax : ndarray
  348. An array with the same shape as `a`, with the specified axis removed.
  349. If `a` is a 0-d array, or if axis is None, an ndarray scalar is
  350. returned. The same dtype as `a` is returned.
  351. See Also
  352. --------
  353. nanmin :
  354. The minimum value of an array along a given axis, ignoring any NaNs.
  355. amax :
  356. The maximum value of an array along a given axis, propagating any NaNs.
  357. fmax :
  358. Element-wise maximum of two arrays, ignoring any NaNs.
  359. maximum :
  360. Element-wise maximum of two arrays, propagating any NaNs.
  361. isnan :
  362. Shows which elements are Not a Number (NaN).
  363. isfinite:
  364. Shows which elements are neither NaN nor infinity.
  365. amin, fmin, minimum
  366. Notes
  367. -----
  368. NumPy uses the IEEE Standard for Binary Floating-Point for Arithmetic
  369. (IEEE 754). This means that Not a Number is not equivalent to infinity.
  370. Positive infinity is treated as a very large number and negative
  371. infinity is treated as a very small (i.e. negative) number.
  372. If the input has a integer type the function is equivalent to np.max.
  373. Examples
  374. --------
  375. >>> a = np.array([[1, 2], [3, np.nan]])
  376. >>> np.nanmax(a)
  377. 3.0
  378. >>> np.nanmax(a, axis=0)
  379. array([3., 2.])
  380. >>> np.nanmax(a, axis=1)
  381. array([2., 3.])
  382. When positive infinity and negative infinity are present:
  383. >>> np.nanmax([1, 2, np.nan, np.NINF])
  384. 2.0
  385. >>> np.nanmax([1, 2, np.nan, np.inf])
  386. inf
  387. """
  388. kwargs = {}
  389. if keepdims is not np._NoValue:
  390. kwargs['keepdims'] = keepdims
  391. if initial is not np._NoValue:
  392. kwargs['initial'] = initial
  393. if where is not np._NoValue:
  394. kwargs['where'] = where
  395. if type(a) is np.ndarray and a.dtype != np.object_:
  396. # Fast, but not safe for subclasses of ndarray, or object arrays,
  397. # which do not implement isnan (gh-9009), or fmax correctly (gh-8975)
  398. res = np.fmax.reduce(a, axis=axis, out=out, **kwargs)
  399. if np.isnan(res).any():
  400. warnings.warn("All-NaN slice encountered", RuntimeWarning,
  401. stacklevel=3)
  402. else:
  403. # Slow, but safe for subclasses of ndarray
  404. a, mask = _replace_nan(a, -np.inf)
  405. res = np.amax(a, axis=axis, out=out, **kwargs)
  406. if mask is None:
  407. return res
  408. # Check for all-NaN axis
  409. kwargs.pop("initial", None)
  410. mask = np.all(mask, axis=axis, **kwargs)
  411. if np.any(mask):
  412. res = _copyto(res, np.nan, mask)
  413. warnings.warn("All-NaN axis encountered", RuntimeWarning,
  414. stacklevel=3)
  415. return res
  416. def _nanargmin_dispatcher(a, axis=None, out=None, *, keepdims=None):
  417. return (a,)
  418. @array_function_dispatch(_nanargmin_dispatcher)
  419. def nanargmin(a, axis=None, out=None, *, keepdims=np._NoValue):
  420. """
  421. Return the indices of the minimum values in the specified axis ignoring
  422. NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the results
  423. cannot be trusted if a slice contains only NaNs and Infs.
  424. Parameters
  425. ----------
  426. a : array_like
  427. Input data.
  428. axis : int, optional
  429. Axis along which to operate. By default flattened input is used.
  430. out : array, optional
  431. If provided, the result will be inserted into this array. It should
  432. be of the appropriate shape and dtype.
  433. .. versionadded:: 1.22.0
  434. keepdims : bool, optional
  435. If this is set to True, the axes which are reduced are left
  436. in the result as dimensions with size one. With this option,
  437. the result will broadcast correctly against the array.
  438. .. versionadded:: 1.22.0
  439. Returns
  440. -------
  441. index_array : ndarray
  442. An array of indices or a single index value.
  443. See Also
  444. --------
  445. argmin, nanargmax
  446. Examples
  447. --------
  448. >>> a = np.array([[np.nan, 4], [2, 3]])
  449. >>> np.argmin(a)
  450. 0
  451. >>> np.nanargmin(a)
  452. 2
  453. >>> np.nanargmin(a, axis=0)
  454. array([1, 1])
  455. >>> np.nanargmin(a, axis=1)
  456. array([1, 0])
  457. """
  458. a, mask = _replace_nan(a, np.inf)
  459. if mask is not None:
  460. mask = np.all(mask, axis=axis)
  461. if np.any(mask):
  462. raise ValueError("All-NaN slice encountered")
  463. res = np.argmin(a, axis=axis, out=out, keepdims=keepdims)
  464. return res
  465. def _nanargmax_dispatcher(a, axis=None, out=None, *, keepdims=None):
  466. return (a,)
  467. @array_function_dispatch(_nanargmax_dispatcher)
  468. def nanargmax(a, axis=None, out=None, *, keepdims=np._NoValue):
  469. """
  470. Return the indices of the maximum values in the specified axis ignoring
  471. NaNs. For all-NaN slices ``ValueError`` is raised. Warning: the
  472. results cannot be trusted if a slice contains only NaNs and -Infs.
  473. Parameters
  474. ----------
  475. a : array_like
  476. Input data.
  477. axis : int, optional
  478. Axis along which to operate. By default flattened input is used.
  479. out : array, optional
  480. If provided, the result will be inserted into this array. It should
  481. be of the appropriate shape and dtype.
  482. .. versionadded:: 1.22.0
  483. keepdims : bool, optional
  484. If this is set to True, the axes which are reduced are left
  485. in the result as dimensions with size one. With this option,
  486. the result will broadcast correctly against the array.
  487. .. versionadded:: 1.22.0
  488. Returns
  489. -------
  490. index_array : ndarray
  491. An array of indices or a single index value.
  492. See Also
  493. --------
  494. argmax, nanargmin
  495. Examples
  496. --------
  497. >>> a = np.array([[np.nan, 4], [2, 3]])
  498. >>> np.argmax(a)
  499. 0
  500. >>> np.nanargmax(a)
  501. 1
  502. >>> np.nanargmax(a, axis=0)
  503. array([1, 0])
  504. >>> np.nanargmax(a, axis=1)
  505. array([1, 1])
  506. """
  507. a, mask = _replace_nan(a, -np.inf)
  508. if mask is not None:
  509. mask = np.all(mask, axis=axis)
  510. if np.any(mask):
  511. raise ValueError("All-NaN slice encountered")
  512. res = np.argmax(a, axis=axis, out=out, keepdims=keepdims)
  513. return res
  514. def _nansum_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
  515. initial=None, where=None):
  516. return (a, out)
  517. @array_function_dispatch(_nansum_dispatcher)
  518. def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
  519. initial=np._NoValue, where=np._NoValue):
  520. """
  521. Return the sum of array elements over a given axis treating Not a
  522. Numbers (NaNs) as zero.
  523. In NumPy versions <= 1.9.0 Nan is returned for slices that are all-NaN or
  524. empty. In later versions zero is returned.
  525. Parameters
  526. ----------
  527. a : array_like
  528. Array containing numbers whose sum is desired. If `a` is not an
  529. array, a conversion is attempted.
  530. axis : {int, tuple of int, None}, optional
  531. Axis or axes along which the sum is computed. The default is to compute the
  532. sum of the flattened array.
  533. dtype : data-type, optional
  534. The type of the returned array and of the accumulator in which the
  535. elements are summed. By default, the dtype of `a` is used. An
  536. exception is when `a` has an integer type with less precision than
  537. the platform (u)intp. In that case, the default will be either
  538. (u)int32 or (u)int64 depending on whether the platform is 32 or 64
  539. bits. For inexact inputs, dtype must be inexact.
  540. .. versionadded:: 1.8.0
  541. out : ndarray, optional
  542. Alternate output array in which to place the result. The default
  543. is ``None``. If provided, it must have the same shape as the
  544. expected output, but the type will be cast if necessary. See
  545. :ref:`ufuncs-output-type` for more details. The casting of NaN to integer
  546. can yield unexpected results.
  547. .. versionadded:: 1.8.0
  548. keepdims : bool, optional
  549. If this is set to True, the axes which are reduced are left
  550. in the result as dimensions with size one. With this option,
  551. the result will broadcast correctly against the original `a`.
  552. If the value is anything but the default, then
  553. `keepdims` will be passed through to the `mean` or `sum` methods
  554. of sub-classes of `ndarray`. If the sub-classes methods
  555. does not implement `keepdims` any exceptions will be raised.
  556. .. versionadded:: 1.8.0
  557. initial : scalar, optional
  558. Starting value for the sum. See `~numpy.ufunc.reduce` for details.
  559. .. versionadded:: 1.22.0
  560. where : array_like of bool, optional
  561. Elements to include in the sum. See `~numpy.ufunc.reduce` for details.
  562. .. versionadded:: 1.22.0
  563. Returns
  564. -------
  565. nansum : ndarray.
  566. A new array holding the result is returned unless `out` is
  567. specified, in which it is returned. The result has the same
  568. size as `a`, and the same shape as `a` if `axis` is not None
  569. or `a` is a 1-d array.
  570. See Also
  571. --------
  572. numpy.sum : Sum across array propagating NaNs.
  573. isnan : Show which elements are NaN.
  574. isfinite : Show which elements are not NaN or +/-inf.
  575. Notes
  576. -----
  577. If both positive and negative infinity are present, the sum will be Not
  578. A Number (NaN).
  579. Examples
  580. --------
  581. >>> np.nansum(1)
  582. 1
  583. >>> np.nansum([1])
  584. 1
  585. >>> np.nansum([1, np.nan])
  586. 1.0
  587. >>> a = np.array([[1, 1], [1, np.nan]])
  588. >>> np.nansum(a)
  589. 3.0
  590. >>> np.nansum(a, axis=0)
  591. array([2., 1.])
  592. >>> np.nansum([1, np.nan, np.inf])
  593. inf
  594. >>> np.nansum([1, np.nan, np.NINF])
  595. -inf
  596. >>> from numpy.testing import suppress_warnings
  597. >>> with suppress_warnings() as sup:
  598. ... sup.filter(RuntimeWarning)
  599. ... np.nansum([1, np.nan, np.inf, -np.inf]) # both +/- infinity present
  600. nan
  601. """
  602. a, mask = _replace_nan(a, 0)
  603. return np.sum(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
  604. initial=initial, where=where)
  605. def _nanprod_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
  606. initial=None, where=None):
  607. return (a, out)
  608. @array_function_dispatch(_nanprod_dispatcher)
  609. def nanprod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
  610. initial=np._NoValue, where=np._NoValue):
  611. """
  612. Return the product of array elements over a given axis treating Not a
  613. Numbers (NaNs) as ones.
  614. One is returned for slices that are all-NaN or empty.
  615. .. versionadded:: 1.10.0
  616. Parameters
  617. ----------
  618. a : array_like
  619. Array containing numbers whose product is desired. If `a` is not an
  620. array, a conversion is attempted.
  621. axis : {int, tuple of int, None}, optional
  622. Axis or axes along which the product is computed. The default is to compute
  623. the product of the flattened array.
  624. dtype : data-type, optional
  625. The type of the returned array and of the accumulator in which the
  626. elements are summed. By default, the dtype of `a` is used. An
  627. exception is when `a` has an integer type with less precision than
  628. the platform (u)intp. In that case, the default will be either
  629. (u)int32 or (u)int64 depending on whether the platform is 32 or 64
  630. bits. For inexact inputs, dtype must be inexact.
  631. out : ndarray, optional
  632. Alternate output array in which to place the result. The default
  633. is ``None``. If provided, it must have the same shape as the
  634. expected output, but the type will be cast if necessary. See
  635. :ref:`ufuncs-output-type` for more details. The casting of NaN to integer
  636. can yield unexpected results.
  637. keepdims : bool, optional
  638. If True, the axes which are reduced are left in the result as
  639. dimensions with size one. With this option, the result will
  640. broadcast correctly against the original `arr`.
  641. initial : scalar, optional
  642. The starting value for this product. See `~numpy.ufunc.reduce`
  643. for details.
  644. .. versionadded:: 1.22.0
  645. where : array_like of bool, optional
  646. Elements to include in the product. See `~numpy.ufunc.reduce`
  647. for details.
  648. .. versionadded:: 1.22.0
  649. Returns
  650. -------
  651. nanprod : ndarray
  652. A new array holding the result is returned unless `out` is
  653. specified, in which case it is returned.
  654. See Also
  655. --------
  656. numpy.prod : Product across array propagating NaNs.
  657. isnan : Show which elements are NaN.
  658. Examples
  659. --------
  660. >>> np.nanprod(1)
  661. 1
  662. >>> np.nanprod([1])
  663. 1
  664. >>> np.nanprod([1, np.nan])
  665. 1.0
  666. >>> a = np.array([[1, 2], [3, np.nan]])
  667. >>> np.nanprod(a)
  668. 6.0
  669. >>> np.nanprod(a, axis=0)
  670. array([3., 2.])
  671. """
  672. a, mask = _replace_nan(a, 1)
  673. return np.prod(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
  674. initial=initial, where=where)
  675. def _nancumsum_dispatcher(a, axis=None, dtype=None, out=None):
  676. return (a, out)
  677. @array_function_dispatch(_nancumsum_dispatcher)
  678. def nancumsum(a, axis=None, dtype=None, out=None):
  679. """
  680. Return the cumulative sum of array elements over a given axis treating Not a
  681. Numbers (NaNs) as zero. The cumulative sum does not change when NaNs are
  682. encountered and leading NaNs are replaced by zeros.
  683. Zeros are returned for slices that are all-NaN or empty.
  684. .. versionadded:: 1.12.0
  685. Parameters
  686. ----------
  687. a : array_like
  688. Input array.
  689. axis : int, optional
  690. Axis along which the cumulative sum is computed. The default
  691. (None) is to compute the cumsum over the flattened array.
  692. dtype : dtype, optional
  693. Type of the returned array and of the accumulator in which the
  694. elements are summed. If `dtype` is not specified, it defaults
  695. to the dtype of `a`, unless `a` has an integer dtype with a
  696. precision less than that of the default platform integer. In
  697. that case, the default platform integer is used.
  698. out : ndarray, optional
  699. Alternative output array in which to place the result. It must
  700. have the same shape and buffer length as the expected output
  701. but the type will be cast if necessary. See :ref:`ufuncs-output-type` for
  702. more details.
  703. Returns
  704. -------
  705. nancumsum : ndarray.
  706. A new array holding the result is returned unless `out` is
  707. specified, in which it is returned. The result has the same
  708. size as `a`, and the same shape as `a` if `axis` is not None
  709. or `a` is a 1-d array.
  710. See Also
  711. --------
  712. numpy.cumsum : Cumulative sum across array propagating NaNs.
  713. isnan : Show which elements are NaN.
  714. Examples
  715. --------
  716. >>> np.nancumsum(1)
  717. array([1])
  718. >>> np.nancumsum([1])
  719. array([1])
  720. >>> np.nancumsum([1, np.nan])
  721. array([1., 1.])
  722. >>> a = np.array([[1, 2], [3, np.nan]])
  723. >>> np.nancumsum(a)
  724. array([1., 3., 6., 6.])
  725. >>> np.nancumsum(a, axis=0)
  726. array([[1., 2.],
  727. [4., 2.]])
  728. >>> np.nancumsum(a, axis=1)
  729. array([[1., 3.],
  730. [3., 3.]])
  731. """
  732. a, mask = _replace_nan(a, 0)
  733. return np.cumsum(a, axis=axis, dtype=dtype, out=out)
  734. def _nancumprod_dispatcher(a, axis=None, dtype=None, out=None):
  735. return (a, out)
  736. @array_function_dispatch(_nancumprod_dispatcher)
  737. def nancumprod(a, axis=None, dtype=None, out=None):
  738. """
  739. Return the cumulative product of array elements over a given axis treating Not a
  740. Numbers (NaNs) as one. The cumulative product does not change when NaNs are
  741. encountered and leading NaNs are replaced by ones.
  742. Ones are returned for slices that are all-NaN or empty.
  743. .. versionadded:: 1.12.0
  744. Parameters
  745. ----------
  746. a : array_like
  747. Input array.
  748. axis : int, optional
  749. Axis along which the cumulative product is computed. By default
  750. the input is flattened.
  751. dtype : dtype, optional
  752. Type of the returned array, as well as of the accumulator in which
  753. the elements are multiplied. If *dtype* is not specified, it
  754. defaults to the dtype of `a`, unless `a` has an integer dtype with
  755. a precision less than that of the default platform integer. In
  756. that case, the default platform integer is used instead.
  757. out : ndarray, optional
  758. Alternative output array in which to place the result. It must
  759. have the same shape and buffer length as the expected output
  760. but the type of the resulting values will be cast if necessary.
  761. Returns
  762. -------
  763. nancumprod : ndarray
  764. A new array holding the result is returned unless `out` is
  765. specified, in which case it is returned.
  766. See Also
  767. --------
  768. numpy.cumprod : Cumulative product across array propagating NaNs.
  769. isnan : Show which elements are NaN.
  770. Examples
  771. --------
  772. >>> np.nancumprod(1)
  773. array([1])
  774. >>> np.nancumprod([1])
  775. array([1])
  776. >>> np.nancumprod([1, np.nan])
  777. array([1., 1.])
  778. >>> a = np.array([[1, 2], [3, np.nan]])
  779. >>> np.nancumprod(a)
  780. array([1., 2., 6., 6.])
  781. >>> np.nancumprod(a, axis=0)
  782. array([[1., 2.],
  783. [3., 2.]])
  784. >>> np.nancumprod(a, axis=1)
  785. array([[1., 2.],
  786. [3., 3.]])
  787. """
  788. a, mask = _replace_nan(a, 1)
  789. return np.cumprod(a, axis=axis, dtype=dtype, out=out)
  790. def _nanmean_dispatcher(a, axis=None, dtype=None, out=None, keepdims=None,
  791. *, where=None):
  792. return (a, out)
  793. @array_function_dispatch(_nanmean_dispatcher)
  794. def nanmean(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
  795. *, where=np._NoValue):
  796. """
  797. Compute the arithmetic mean along the specified axis, ignoring NaNs.
  798. Returns the average of the array elements. The average is taken over
  799. the flattened array by default, otherwise over the specified axis.
  800. `float64` intermediate and return values are used for integer inputs.
  801. For all-NaN slices, NaN is returned and a `RuntimeWarning` is raised.
  802. .. versionadded:: 1.8.0
  803. Parameters
  804. ----------
  805. a : array_like
  806. Array containing numbers whose mean is desired. If `a` is not an
  807. array, a conversion is attempted.
  808. axis : {int, tuple of int, None}, optional
  809. Axis or axes along which the means are computed. The default is to compute
  810. the mean of the flattened array.
  811. dtype : data-type, optional
  812. Type to use in computing the mean. For integer inputs, the default
  813. is `float64`; for inexact inputs, it is the same as the input
  814. dtype.
  815. out : ndarray, optional
  816. Alternate output array in which to place the result. The default
  817. is ``None``; if provided, it must have the same shape as the
  818. expected output, but the type will be cast if necessary. See
  819. :ref:`ufuncs-output-type` for more details.
  820. keepdims : bool, optional
  821. If this is set to True, the axes which are reduced are left
  822. in the result as dimensions with size one. With this option,
  823. the result will broadcast correctly against the original `a`.
  824. If the value is anything but the default, then
  825. `keepdims` will be passed through to the `mean` or `sum` methods
  826. of sub-classes of `ndarray`. If the sub-classes methods
  827. does not implement `keepdims` any exceptions will be raised.
  828. where : array_like of bool, optional
  829. Elements to include in the mean. See `~numpy.ufunc.reduce` for details.
  830. .. versionadded:: 1.22.0
  831. Returns
  832. -------
  833. m : ndarray, see dtype parameter above
  834. If `out=None`, returns a new array containing the mean values,
  835. otherwise a reference to the output array is returned. Nan is
  836. returned for slices that contain only NaNs.
  837. See Also
  838. --------
  839. average : Weighted average
  840. mean : Arithmetic mean taken while not ignoring NaNs
  841. var, nanvar
  842. Notes
  843. -----
  844. The arithmetic mean is the sum of the non-NaN elements along the axis
  845. divided by the number of non-NaN elements.
  846. Note that for floating-point input, the mean is computed using the same
  847. precision the input has. Depending on the input data, this can cause
  848. the results to be inaccurate, especially for `float32`. Specifying a
  849. higher-precision accumulator using the `dtype` keyword can alleviate
  850. this issue.
  851. Examples
  852. --------
  853. >>> a = np.array([[1, np.nan], [3, 4]])
  854. >>> np.nanmean(a)
  855. 2.6666666666666665
  856. >>> np.nanmean(a, axis=0)
  857. array([2., 4.])
  858. >>> np.nanmean(a, axis=1)
  859. array([1., 3.5]) # may vary
  860. """
  861. arr, mask = _replace_nan(a, 0)
  862. if mask is None:
  863. return np.mean(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
  864. where=where)
  865. if dtype is not None:
  866. dtype = np.dtype(dtype)
  867. if dtype is not None and not issubclass(dtype.type, np.inexact):
  868. raise TypeError("If a is inexact, then dtype must be inexact")
  869. if out is not None and not issubclass(out.dtype.type, np.inexact):
  870. raise TypeError("If a is inexact, then out must be inexact")
  871. cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=keepdims,
  872. where=where)
  873. tot = np.sum(arr, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
  874. where=where)
  875. avg = _divide_by_count(tot, cnt, out=out)
  876. isbad = (cnt == 0)
  877. if isbad.any():
  878. warnings.warn("Mean of empty slice", RuntimeWarning, stacklevel=3)
  879. # NaN is the only possible bad value, so no further
  880. # action is needed to handle bad results.
  881. return avg
  882. def _nanmedian1d(arr1d, overwrite_input=False):
  883. """
  884. Private function for rank 1 arrays. Compute the median ignoring NaNs.
  885. See nanmedian for parameter usage
  886. """
  887. arr1d_parsed, overwrite_input = _remove_nan_1d(
  888. arr1d, overwrite_input=overwrite_input,
  889. )
  890. if arr1d_parsed.size == 0:
  891. # Ensure that a nan-esque scalar of the appropriate type (and unit)
  892. # is returned for `timedelta64` and `complexfloating`
  893. return arr1d[-1]
  894. return np.median(arr1d_parsed, overwrite_input=overwrite_input)
  895. def _nanmedian(a, axis=None, out=None, overwrite_input=False):
  896. """
  897. Private function that doesn't support extended axis or keepdims.
  898. These methods are extended to this function using _ureduce
  899. See nanmedian for parameter usage
  900. """
  901. if axis is None or a.ndim == 1:
  902. part = a.ravel()
  903. if out is None:
  904. return _nanmedian1d(part, overwrite_input)
  905. else:
  906. out[...] = _nanmedian1d(part, overwrite_input)
  907. return out
  908. else:
  909. # for small medians use sort + indexing which is still faster than
  910. # apply_along_axis
  911. # benchmarked with shuffled (50, 50, x) containing a few NaN
  912. if a.shape[axis] < 600:
  913. return _nanmedian_small(a, axis, out, overwrite_input)
  914. result = np.apply_along_axis(_nanmedian1d, axis, a, overwrite_input)
  915. if out is not None:
  916. out[...] = result
  917. return result
  918. def _nanmedian_small(a, axis=None, out=None, overwrite_input=False):
  919. """
  920. sort + indexing median, faster for small medians along multiple
  921. dimensions due to the high overhead of apply_along_axis
  922. see nanmedian for parameter usage
  923. """
  924. a = np.ma.masked_array(a, np.isnan(a))
  925. m = np.ma.median(a, axis=axis, overwrite_input=overwrite_input)
  926. for i in range(np.count_nonzero(m.mask.ravel())):
  927. warnings.warn("All-NaN slice encountered", RuntimeWarning,
  928. stacklevel=4)
  929. fill_value = np.timedelta64("NaT") if m.dtype.kind == "m" else np.nan
  930. if out is not None:
  931. out[...] = m.filled(fill_value)
  932. return out
  933. return m.filled(fill_value)
  934. def _nanmedian_dispatcher(
  935. a, axis=None, out=None, overwrite_input=None, keepdims=None):
  936. return (a, out)
  937. @array_function_dispatch(_nanmedian_dispatcher)
  938. def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=np._NoValue):
  939. """
  940. Compute the median along the specified axis, while ignoring NaNs.
  941. Returns the median of the array elements.
  942. .. versionadded:: 1.9.0
  943. Parameters
  944. ----------
  945. a : array_like
  946. Input array or object that can be converted to an array.
  947. axis : {int, sequence of int, None}, optional
  948. Axis or axes along which the medians are computed. The default
  949. is to compute the median along a flattened version of the array.
  950. A sequence of axes is supported since version 1.9.0.
  951. out : ndarray, optional
  952. Alternative output array in which to place the result. It must
  953. have the same shape and buffer length as the expected output,
  954. but the type (of the output) will be cast if necessary.
  955. overwrite_input : bool, optional
  956. If True, then allow use of memory of input array `a` for
  957. calculations. The input array will be modified by the call to
  958. `median`. This will save memory when you do not need to preserve
  959. the contents of the input array. Treat the input as undefined,
  960. but it will probably be fully or partially sorted. Default is
  961. False. If `overwrite_input` is ``True`` and `a` is not already an
  962. `ndarray`, an error will be raised.
  963. keepdims : bool, optional
  964. If this is set to True, the axes which are reduced are left
  965. in the result as dimensions with size one. With this option,
  966. the result will broadcast correctly against the original `a`.
  967. If this is anything but the default value it will be passed
  968. through (in the special case of an empty array) to the
  969. `mean` function of the underlying array. If the array is
  970. a sub-class and `mean` does not have the kwarg `keepdims` this
  971. will raise a RuntimeError.
  972. Returns
  973. -------
  974. median : ndarray
  975. A new array holding the result. If the input contains integers
  976. or floats smaller than ``float64``, then the output data-type is
  977. ``np.float64``. Otherwise, the data-type of the output is the
  978. same as that of the input. If `out` is specified, that array is
  979. returned instead.
  980. See Also
  981. --------
  982. mean, median, percentile
  983. Notes
  984. -----
  985. Given a vector ``V`` of length ``N``, the median of ``V`` is the
  986. middle value of a sorted copy of ``V``, ``V_sorted`` - i.e.,
  987. ``V_sorted[(N-1)/2]``, when ``N`` is odd and the average of the two
  988. middle values of ``V_sorted`` when ``N`` is even.
  989. Examples
  990. --------
  991. >>> a = np.array([[10.0, 7, 4], [3, 2, 1]])
  992. >>> a[0, 1] = np.nan
  993. >>> a
  994. array([[10., nan, 4.],
  995. [ 3., 2., 1.]])
  996. >>> np.median(a)
  997. nan
  998. >>> np.nanmedian(a)
  999. 3.0
  1000. >>> np.nanmedian(a, axis=0)
  1001. array([6.5, 2. , 2.5])
  1002. >>> np.median(a, axis=1)
  1003. array([nan, 2.])
  1004. >>> b = a.copy()
  1005. >>> np.nanmedian(b, axis=1, overwrite_input=True)
  1006. array([7., 2.])
  1007. >>> assert not np.all(a==b)
  1008. >>> b = a.copy()
  1009. >>> np.nanmedian(b, axis=None, overwrite_input=True)
  1010. 3.0
  1011. >>> assert not np.all(a==b)
  1012. """
  1013. a = np.asanyarray(a)
  1014. # apply_along_axis in _nanmedian doesn't handle empty arrays well,
  1015. # so deal them upfront
  1016. if a.size == 0:
  1017. return np.nanmean(a, axis, out=out, keepdims=keepdims)
  1018. return function_base._ureduce(a, func=_nanmedian, keepdims=keepdims,
  1019. axis=axis, out=out,
  1020. overwrite_input=overwrite_input)
  1021. def _nanpercentile_dispatcher(
  1022. a, q, axis=None, out=None, overwrite_input=None,
  1023. method=None, keepdims=None, *, interpolation=None):
  1024. return (a, q, out)
  1025. @array_function_dispatch(_nanpercentile_dispatcher)
  1026. def nanpercentile(
  1027. a,
  1028. q,
  1029. axis=None,
  1030. out=None,
  1031. overwrite_input=False,
  1032. method="linear",
  1033. keepdims=np._NoValue,
  1034. *,
  1035. interpolation=None,
  1036. ):
  1037. """
  1038. Compute the qth percentile of the data along the specified axis,
  1039. while ignoring nan values.
  1040. Returns the qth percentile(s) of the array elements.
  1041. .. versionadded:: 1.9.0
  1042. Parameters
  1043. ----------
  1044. a : array_like
  1045. Input array or object that can be converted to an array, containing
  1046. nan values to be ignored.
  1047. q : array_like of float
  1048. Percentile or sequence of percentiles to compute, which must be
  1049. between 0 and 100 inclusive.
  1050. axis : {int, tuple of int, None}, optional
  1051. Axis or axes along which the percentiles are computed. The default
  1052. is to compute the percentile(s) along a flattened version of the
  1053. array.
  1054. out : ndarray, optional
  1055. Alternative output array in which to place the result. It must have
  1056. the same shape and buffer length as the expected output, but the
  1057. type (of the output) will be cast if necessary.
  1058. overwrite_input : bool, optional
  1059. If True, then allow the input array `a` to be modified by
  1060. intermediate calculations, to save memory. In this case, the
  1061. contents of the input `a` after this function completes is
  1062. undefined.
  1063. method : str, optional
  1064. This parameter specifies the method to use for estimating the
  1065. percentile. There are many different methods, some unique to NumPy.
  1066. See the notes for explanation. The options sorted by their R type
  1067. as summarized in the H&F paper [1]_ are:
  1068. 1. 'inverted_cdf'
  1069. 2. 'averaged_inverted_cdf'
  1070. 3. 'closest_observation'
  1071. 4. 'interpolated_inverted_cdf'
  1072. 5. 'hazen'
  1073. 6. 'weibull'
  1074. 7. 'linear' (default)
  1075. 8. 'median_unbiased'
  1076. 9. 'normal_unbiased'
  1077. The first three methods are discontinuous. NumPy further defines the
  1078. following discontinuous variations of the default 'linear' (7.) option:
  1079. * 'lower'
  1080. * 'higher',
  1081. * 'midpoint'
  1082. * 'nearest'
  1083. .. versionchanged:: 1.22.0
  1084. This argument was previously called "interpolation" and only
  1085. offered the "linear" default and last four options.
  1086. keepdims : bool, optional
  1087. If this is set to True, the axes which are reduced are left in
  1088. the result as dimensions with size one. With this option, the
  1089. result will broadcast correctly against the original array `a`.
  1090. If this is anything but the default value it will be passed
  1091. through (in the special case of an empty array) to the
  1092. `mean` function of the underlying array. If the array is
  1093. a sub-class and `mean` does not have the kwarg `keepdims` this
  1094. will raise a RuntimeError.
  1095. interpolation : str, optional
  1096. Deprecated name for the method keyword argument.
  1097. .. deprecated:: 1.22.0
  1098. Returns
  1099. -------
  1100. percentile : scalar or ndarray
  1101. If `q` is a single percentile and `axis=None`, then the result
  1102. is a scalar. If multiple percentiles are given, first axis of
  1103. the result corresponds to the percentiles. The other axes are
  1104. the axes that remain after the reduction of `a`. If the input
  1105. contains integers or floats smaller than ``float64``, the output
  1106. data-type is ``float64``. Otherwise, the output data-type is the
  1107. same as that of the input. If `out` is specified, that array is
  1108. returned instead.
  1109. See Also
  1110. --------
  1111. nanmean
  1112. nanmedian : equivalent to ``nanpercentile(..., 50)``
  1113. percentile, median, mean
  1114. nanquantile : equivalent to nanpercentile, except q in range [0, 1].
  1115. Notes
  1116. -----
  1117. For more information please see `numpy.percentile`
  1118. Examples
  1119. --------
  1120. >>> a = np.array([[10., 7., 4.], [3., 2., 1.]])
  1121. >>> a[0][1] = np.nan
  1122. >>> a
  1123. array([[10., nan, 4.],
  1124. [ 3., 2., 1.]])
  1125. >>> np.percentile(a, 50)
  1126. nan
  1127. >>> np.nanpercentile(a, 50)
  1128. 3.0
  1129. >>> np.nanpercentile(a, 50, axis=0)
  1130. array([6.5, 2. , 2.5])
  1131. >>> np.nanpercentile(a, 50, axis=1, keepdims=True)
  1132. array([[7.],
  1133. [2.]])
  1134. >>> m = np.nanpercentile(a, 50, axis=0)
  1135. >>> out = np.zeros_like(m)
  1136. >>> np.nanpercentile(a, 50, axis=0, out=out)
  1137. array([6.5, 2. , 2.5])
  1138. >>> m
  1139. array([6.5, 2. , 2.5])
  1140. >>> b = a.copy()
  1141. >>> np.nanpercentile(b, 50, axis=1, overwrite_input=True)
  1142. array([7., 2.])
  1143. >>> assert not np.all(a==b)
  1144. References
  1145. ----------
  1146. .. [1] R. J. Hyndman and Y. Fan,
  1147. "Sample quantiles in statistical packages,"
  1148. The American Statistician, 50(4), pp. 361-365, 1996
  1149. """
  1150. if interpolation is not None:
  1151. method = function_base._check_interpolation_as_method(
  1152. method, interpolation, "nanpercentile")
  1153. a = np.asanyarray(a)
  1154. q = np.true_divide(q, 100.0)
  1155. # undo any decay that the ufunc performed (see gh-13105)
  1156. q = np.asanyarray(q)
  1157. if not function_base._quantile_is_valid(q):
  1158. raise ValueError("Percentiles must be in the range [0, 100]")
  1159. return _nanquantile_unchecked(
  1160. a, q, axis, out, overwrite_input, method, keepdims)
  1161. def _nanquantile_dispatcher(a, q, axis=None, out=None, overwrite_input=None,
  1162. method=None, keepdims=None, *, interpolation=None):
  1163. return (a, q, out)
  1164. @array_function_dispatch(_nanquantile_dispatcher)
  1165. def nanquantile(
  1166. a,
  1167. q,
  1168. axis=None,
  1169. out=None,
  1170. overwrite_input=False,
  1171. method="linear",
  1172. keepdims=np._NoValue,
  1173. *,
  1174. interpolation=None,
  1175. ):
  1176. """
  1177. Compute the qth quantile of the data along the specified axis,
  1178. while ignoring nan values.
  1179. Returns the qth quantile(s) of the array elements.
  1180. .. versionadded:: 1.15.0
  1181. Parameters
  1182. ----------
  1183. a : array_like
  1184. Input array or object that can be converted to an array, containing
  1185. nan values to be ignored
  1186. q : array_like of float
  1187. Quantile or sequence of quantiles to compute, which must be between
  1188. 0 and 1 inclusive.
  1189. axis : {int, tuple of int, None}, optional
  1190. Axis or axes along which the quantiles are computed. The
  1191. default is to compute the quantile(s) along a flattened
  1192. version of the array.
  1193. out : ndarray, optional
  1194. Alternative output array in which to place the result. It must
  1195. have the same shape and buffer length as the expected output,
  1196. but the type (of the output) will be cast if necessary.
  1197. overwrite_input : bool, optional
  1198. If True, then allow the input array `a` to be modified by intermediate
  1199. calculations, to save memory. In this case, the contents of the input
  1200. `a` after this function completes is undefined.
  1201. method : str, optional
  1202. This parameter specifies the method to use for estimating the
  1203. quantile. There are many different methods, some unique to NumPy.
  1204. See the notes for explanation. The options sorted by their R type
  1205. as summarized in the H&F paper [1]_ are:
  1206. 1. 'inverted_cdf'
  1207. 2. 'averaged_inverted_cdf'
  1208. 3. 'closest_observation'
  1209. 4. 'interpolated_inverted_cdf'
  1210. 5. 'hazen'
  1211. 6. 'weibull'
  1212. 7. 'linear' (default)
  1213. 8. 'median_unbiased'
  1214. 9. 'normal_unbiased'
  1215. The first three methods are discontinuous. NumPy further defines the
  1216. following discontinuous variations of the default 'linear' (7.) option:
  1217. * 'lower'
  1218. * 'higher',
  1219. * 'midpoint'
  1220. * 'nearest'
  1221. .. versionchanged:: 1.22.0
  1222. This argument was previously called "interpolation" and only
  1223. offered the "linear" default and last four options.
  1224. keepdims : bool, optional
  1225. If this is set to True, the axes which are reduced are left in
  1226. the result as dimensions with size one. With this option, the
  1227. result will broadcast correctly against the original array `a`.
  1228. If this is anything but the default value it will be passed
  1229. through (in the special case of an empty array) to the
  1230. `mean` function of the underlying array. If the array is
  1231. a sub-class and `mean` does not have the kwarg `keepdims` this
  1232. will raise a RuntimeError.
  1233. interpolation : str, optional
  1234. Deprecated name for the method keyword argument.
  1235. .. deprecated:: 1.22.0
  1236. Returns
  1237. -------
  1238. quantile : scalar or ndarray
  1239. If `q` is a single percentile and `axis=None`, then the result
  1240. is a scalar. If multiple quantiles are given, first axis of
  1241. the result corresponds to the quantiles. The other axes are
  1242. the axes that remain after the reduction of `a`. If the input
  1243. contains integers or floats smaller than ``float64``, the output
  1244. data-type is ``float64``. Otherwise, the output data-type is the
  1245. same as that of the input. If `out` is specified, that array is
  1246. returned instead.
  1247. See Also
  1248. --------
  1249. quantile
  1250. nanmean, nanmedian
  1251. nanmedian : equivalent to ``nanquantile(..., 0.5)``
  1252. nanpercentile : same as nanquantile, but with q in the range [0, 100].
  1253. Notes
  1254. -----
  1255. For more information please see `numpy.quantile`
  1256. Examples
  1257. --------
  1258. >>> a = np.array([[10., 7., 4.], [3., 2., 1.]])
  1259. >>> a[0][1] = np.nan
  1260. >>> a
  1261. array([[10., nan, 4.],
  1262. [ 3., 2., 1.]])
  1263. >>> np.quantile(a, 0.5)
  1264. nan
  1265. >>> np.nanquantile(a, 0.5)
  1266. 3.0
  1267. >>> np.nanquantile(a, 0.5, axis=0)
  1268. array([6.5, 2. , 2.5])
  1269. >>> np.nanquantile(a, 0.5, axis=1, keepdims=True)
  1270. array([[7.],
  1271. [2.]])
  1272. >>> m = np.nanquantile(a, 0.5, axis=0)
  1273. >>> out = np.zeros_like(m)
  1274. >>> np.nanquantile(a, 0.5, axis=0, out=out)
  1275. array([6.5, 2. , 2.5])
  1276. >>> m
  1277. array([6.5, 2. , 2.5])
  1278. >>> b = a.copy()
  1279. >>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True)
  1280. array([7., 2.])
  1281. >>> assert not np.all(a==b)
  1282. References
  1283. ----------
  1284. .. [1] R. J. Hyndman and Y. Fan,
  1285. "Sample quantiles in statistical packages,"
  1286. The American Statistician, 50(4), pp. 361-365, 1996
  1287. """
  1288. if interpolation is not None:
  1289. method = function_base._check_interpolation_as_method(
  1290. method, interpolation, "nanquantile")
  1291. a = np.asanyarray(a)
  1292. q = np.asanyarray(q)
  1293. if not function_base._quantile_is_valid(q):
  1294. raise ValueError("Quantiles must be in the range [0, 1]")
  1295. return _nanquantile_unchecked(
  1296. a, q, axis, out, overwrite_input, method, keepdims)
  1297. def _nanquantile_unchecked(
  1298. a,
  1299. q,
  1300. axis=None,
  1301. out=None,
  1302. overwrite_input=False,
  1303. method="linear",
  1304. keepdims=np._NoValue,
  1305. ):
  1306. """Assumes that q is in [0, 1], and is an ndarray"""
  1307. # apply_along_axis in _nanpercentile doesn't handle empty arrays well,
  1308. # so deal them upfront
  1309. if a.size == 0:
  1310. return np.nanmean(a, axis, out=out, keepdims=keepdims)
  1311. return function_base._ureduce(a,
  1312. func=_nanquantile_ureduce_func,
  1313. q=q,
  1314. keepdims=keepdims,
  1315. axis=axis,
  1316. out=out,
  1317. overwrite_input=overwrite_input,
  1318. method=method)
  1319. def _nanquantile_ureduce_func(a, q, axis=None, out=None, overwrite_input=False,
  1320. method="linear"):
  1321. """
  1322. Private function that doesn't support extended axis or keepdims.
  1323. These methods are extended to this function using _ureduce
  1324. See nanpercentile for parameter usage
  1325. """
  1326. if axis is None or a.ndim == 1:
  1327. part = a.ravel()
  1328. result = _nanquantile_1d(part, q, overwrite_input, method)
  1329. else:
  1330. result = np.apply_along_axis(_nanquantile_1d, axis, a, q,
  1331. overwrite_input, method)
  1332. # apply_along_axis fills in collapsed axis with results.
  1333. # Move that axis to the beginning to match percentile's
  1334. # convention.
  1335. if q.ndim != 0:
  1336. result = np.moveaxis(result, axis, 0)
  1337. if out is not None:
  1338. out[...] = result
  1339. return result
  1340. def _nanquantile_1d(arr1d, q, overwrite_input=False, method="linear"):
  1341. """
  1342. Private function for rank 1 arrays. Compute quantile ignoring NaNs.
  1343. See nanpercentile for parameter usage
  1344. """
  1345. arr1d, overwrite_input = _remove_nan_1d(arr1d,
  1346. overwrite_input=overwrite_input)
  1347. if arr1d.size == 0:
  1348. # convert to scalar
  1349. return np.full(q.shape, np.nan, dtype=arr1d.dtype)[()]
  1350. return function_base._quantile_unchecked(
  1351. arr1d, q, overwrite_input=overwrite_input, method=method)
  1352. def _nanvar_dispatcher(a, axis=None, dtype=None, out=None, ddof=None,
  1353. keepdims=None, *, where=None):
  1354. return (a, out)
  1355. @array_function_dispatch(_nanvar_dispatcher)
  1356. def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue,
  1357. *, where=np._NoValue):
  1358. """
  1359. Compute the variance along the specified axis, while ignoring NaNs.
  1360. Returns the variance of the array elements, a measure of the spread of
  1361. a distribution. The variance is computed for the flattened array by
  1362. default, otherwise over the specified axis.
  1363. For all-NaN slices or slices with zero degrees of freedom, NaN is
  1364. returned and a `RuntimeWarning` is raised.
  1365. .. versionadded:: 1.8.0
  1366. Parameters
  1367. ----------
  1368. a : array_like
  1369. Array containing numbers whose variance is desired. If `a` is not an
  1370. array, a conversion is attempted.
  1371. axis : {int, tuple of int, None}, optional
  1372. Axis or axes along which the variance is computed. The default is to compute
  1373. the variance of the flattened array.
  1374. dtype : data-type, optional
  1375. Type to use in computing the variance. For arrays of integer type
  1376. the default is `float64`; for arrays of float types it is the same as
  1377. the array type.
  1378. out : ndarray, optional
  1379. Alternate output array in which to place the result. It must have
  1380. the same shape as the expected output, but the type is cast if
  1381. necessary.
  1382. ddof : int, optional
  1383. "Delta Degrees of Freedom": the divisor used in the calculation is
  1384. ``N - ddof``, where ``N`` represents the number of non-NaN
  1385. elements. By default `ddof` is zero.
  1386. keepdims : bool, optional
  1387. If this is set to True, the axes which are reduced are left
  1388. in the result as dimensions with size one. With this option,
  1389. the result will broadcast correctly against the original `a`.
  1390. where : array_like of bool, optional
  1391. Elements to include in the variance. See `~numpy.ufunc.reduce` for
  1392. details.
  1393. .. versionadded:: 1.22.0
  1394. Returns
  1395. -------
  1396. variance : ndarray, see dtype parameter above
  1397. If `out` is None, return a new array containing the variance,
  1398. otherwise return a reference to the output array. If ddof is >= the
  1399. number of non-NaN elements in a slice or the slice contains only
  1400. NaNs, then the result for that slice is NaN.
  1401. See Also
  1402. --------
  1403. std : Standard deviation
  1404. mean : Average
  1405. var : Variance while not ignoring NaNs
  1406. nanstd, nanmean
  1407. :ref:`ufuncs-output-type`
  1408. Notes
  1409. -----
  1410. The variance is the average of the squared deviations from the mean,
  1411. i.e., ``var = mean(abs(x - x.mean())**2)``.
  1412. The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``.
  1413. If, however, `ddof` is specified, the divisor ``N - ddof`` is used
  1414. instead. In standard statistical practice, ``ddof=1`` provides an
  1415. unbiased estimator of the variance of a hypothetical infinite
  1416. population. ``ddof=0`` provides a maximum likelihood estimate of the
  1417. variance for normally distributed variables.
  1418. Note that for complex numbers, the absolute value is taken before
  1419. squaring, so that the result is always real and nonnegative.
  1420. For floating-point input, the variance is computed using the same
  1421. precision the input has. Depending on the input data, this can cause
  1422. the results to be inaccurate, especially for `float32` (see example
  1423. below). Specifying a higher-accuracy accumulator using the ``dtype``
  1424. keyword can alleviate this issue.
  1425. For this function to work on sub-classes of ndarray, they must define
  1426. `sum` with the kwarg `keepdims`
  1427. Examples
  1428. --------
  1429. >>> a = np.array([[1, np.nan], [3, 4]])
  1430. >>> np.nanvar(a)
  1431. 1.5555555555555554
  1432. >>> np.nanvar(a, axis=0)
  1433. array([1., 0.])
  1434. >>> np.nanvar(a, axis=1)
  1435. array([0., 0.25]) # may vary
  1436. """
  1437. arr, mask = _replace_nan(a, 0)
  1438. if mask is None:
  1439. return np.var(arr, axis=axis, dtype=dtype, out=out, ddof=ddof,
  1440. keepdims=keepdims, where=where)
  1441. if dtype is not None:
  1442. dtype = np.dtype(dtype)
  1443. if dtype is not None and not issubclass(dtype.type, np.inexact):
  1444. raise TypeError("If a is inexact, then dtype must be inexact")
  1445. if out is not None and not issubclass(out.dtype.type, np.inexact):
  1446. raise TypeError("If a is inexact, then out must be inexact")
  1447. # Compute mean
  1448. if type(arr) is np.matrix:
  1449. _keepdims = np._NoValue
  1450. else:
  1451. _keepdims = True
  1452. # we need to special case matrix for reverse compatibility
  1453. # in order for this to work, these sums need to be called with
  1454. # keepdims=True, however matrix now raises an error in this case, but
  1455. # the reason that it drops the keepdims kwarg is to force keepdims=True
  1456. # so this used to work by serendipity.
  1457. cnt = np.sum(~mask, axis=axis, dtype=np.intp, keepdims=_keepdims,
  1458. where=where)
  1459. avg = np.sum(arr, axis=axis, dtype=dtype, keepdims=_keepdims, where=where)
  1460. avg = _divide_by_count(avg, cnt)
  1461. # Compute squared deviation from mean.
  1462. np.subtract(arr, avg, out=arr, casting='unsafe', where=where)
  1463. arr = _copyto(arr, 0, mask)
  1464. if issubclass(arr.dtype.type, np.complexfloating):
  1465. sqr = np.multiply(arr, arr.conj(), out=arr, where=where).real
  1466. else:
  1467. sqr = np.multiply(arr, arr, out=arr, where=where)
  1468. # Compute variance.
  1469. var = np.sum(sqr, axis=axis, dtype=dtype, out=out, keepdims=keepdims,
  1470. where=where)
  1471. # Precaution against reduced object arrays
  1472. try:
  1473. var_ndim = var.ndim
  1474. except AttributeError:
  1475. var_ndim = np.ndim(var)
  1476. if var_ndim < cnt.ndim:
  1477. # Subclasses of ndarray may ignore keepdims, so check here.
  1478. cnt = cnt.squeeze(axis)
  1479. dof = cnt - ddof
  1480. var = _divide_by_count(var, dof)
  1481. isbad = (dof <= 0)
  1482. if np.any(isbad):
  1483. warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning,
  1484. stacklevel=3)
  1485. # NaN, inf, or negative numbers are all possible bad
  1486. # values, so explicitly replace them with NaN.
  1487. var = _copyto(var, np.nan, isbad)
  1488. return var
  1489. def _nanstd_dispatcher(a, axis=None, dtype=None, out=None, ddof=None,
  1490. keepdims=None, *, where=None):
  1491. return (a, out)
  1492. @array_function_dispatch(_nanstd_dispatcher)
  1493. def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue,
  1494. *, where=np._NoValue):
  1495. """
  1496. Compute the standard deviation along the specified axis, while
  1497. ignoring NaNs.
  1498. Returns the standard deviation, a measure of the spread of a
  1499. distribution, of the non-NaN array elements. The standard deviation is
  1500. computed for the flattened array by default, otherwise over the
  1501. specified axis.
  1502. For all-NaN slices or slices with zero degrees of freedom, NaN is
  1503. returned and a `RuntimeWarning` is raised.
  1504. .. versionadded:: 1.8.0
  1505. Parameters
  1506. ----------
  1507. a : array_like
  1508. Calculate the standard deviation of the non-NaN values.
  1509. axis : {int, tuple of int, None}, optional
  1510. Axis or axes along which the standard deviation is computed. The default is
  1511. to compute the standard deviation of the flattened array.
  1512. dtype : dtype, optional
  1513. Type to use in computing the standard deviation. For arrays of
  1514. integer type the default is float64, for arrays of float types it
  1515. is the same as the array type.
  1516. out : ndarray, optional
  1517. Alternative output array in which to place the result. It must have
  1518. the same shape as the expected output but the type (of the
  1519. calculated values) will be cast if necessary.
  1520. ddof : int, optional
  1521. Means Delta Degrees of Freedom. The divisor used in calculations
  1522. is ``N - ddof``, where ``N`` represents the number of non-NaN
  1523. elements. By default `ddof` is zero.
  1524. keepdims : bool, optional
  1525. If this is set to True, the axes which are reduced are left
  1526. in the result as dimensions with size one. With this option,
  1527. the result will broadcast correctly against the original `a`.
  1528. If this value is anything but the default it is passed through
  1529. as-is to the relevant functions of the sub-classes. If these
  1530. functions do not have a `keepdims` kwarg, a RuntimeError will
  1531. be raised.
  1532. where : array_like of bool, optional
  1533. Elements to include in the standard deviation.
  1534. See `~numpy.ufunc.reduce` for details.
  1535. .. versionadded:: 1.22.0
  1536. Returns
  1537. -------
  1538. standard_deviation : ndarray, see dtype parameter above.
  1539. If `out` is None, return a new array containing the standard
  1540. deviation, otherwise return a reference to the output array. If
  1541. ddof is >= the number of non-NaN elements in a slice or the slice
  1542. contains only NaNs, then the result for that slice is NaN.
  1543. See Also
  1544. --------
  1545. var, mean, std
  1546. nanvar, nanmean
  1547. :ref:`ufuncs-output-type`
  1548. Notes
  1549. -----
  1550. The standard deviation is the square root of the average of the squared
  1551. deviations from the mean: ``std = sqrt(mean(abs(x - x.mean())**2))``.
  1552. The average squared deviation is normally calculated as
  1553. ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is
  1554. specified, the divisor ``N - ddof`` is used instead. In standard
  1555. statistical practice, ``ddof=1`` provides an unbiased estimator of the
  1556. variance of the infinite population. ``ddof=0`` provides a maximum
  1557. likelihood estimate of the variance for normally distributed variables.
  1558. The standard deviation computed in this function is the square root of
  1559. the estimated variance, so even with ``ddof=1``, it will not be an
  1560. unbiased estimate of the standard deviation per se.
  1561. Note that, for complex numbers, `std` takes the absolute value before
  1562. squaring, so that the result is always real and nonnegative.
  1563. For floating-point input, the *std* is computed using the same
  1564. precision the input has. Depending on the input data, this can cause
  1565. the results to be inaccurate, especially for float32 (see example
  1566. below). Specifying a higher-accuracy accumulator using the `dtype`
  1567. keyword can alleviate this issue.
  1568. Examples
  1569. --------
  1570. >>> a = np.array([[1, np.nan], [3, 4]])
  1571. >>> np.nanstd(a)
  1572. 1.247219128924647
  1573. >>> np.nanstd(a, axis=0)
  1574. array([1., 0.])
  1575. >>> np.nanstd(a, axis=1)
  1576. array([0., 0.5]) # may vary
  1577. """
  1578. var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  1579. keepdims=keepdims, where=where)
  1580. if isinstance(var, np.ndarray):
  1581. std = np.sqrt(var, out=var)
  1582. elif hasattr(var, 'dtype'):
  1583. std = var.dtype.type(np.sqrt(var))
  1584. else:
  1585. std = np.sqrt(var)
  1586. return std