masked.py 46 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391
  1. from __future__ import annotations
  2. from typing import (
  3. TYPE_CHECKING,
  4. Any,
  5. Iterator,
  6. Literal,
  7. Sequence,
  8. TypeVar,
  9. overload,
  10. )
  11. import warnings
  12. import numpy as np
  13. from pandas._libs import (
  14. lib,
  15. missing as libmissing,
  16. )
  17. from pandas._libs.tslibs import (
  18. get_unit_from_dtype,
  19. is_supported_unit,
  20. )
  21. from pandas._typing import (
  22. ArrayLike,
  23. AstypeArg,
  24. AxisInt,
  25. DtypeObj,
  26. NpDtype,
  27. PositionalIndexer,
  28. Scalar,
  29. ScalarIndexer,
  30. SequenceIndexer,
  31. Shape,
  32. npt,
  33. )
  34. from pandas.errors import AbstractMethodError
  35. from pandas.util._decorators import doc
  36. from pandas.util._validators import validate_fillna_kwargs
  37. from pandas.core.dtypes.base import ExtensionDtype
  38. from pandas.core.dtypes.common import (
  39. is_bool,
  40. is_bool_dtype,
  41. is_datetime64_dtype,
  42. is_dtype_equal,
  43. is_float_dtype,
  44. is_integer_dtype,
  45. is_list_like,
  46. is_object_dtype,
  47. is_scalar,
  48. is_string_dtype,
  49. pandas_dtype,
  50. )
  51. from pandas.core.dtypes.dtypes import BaseMaskedDtype
  52. from pandas.core.dtypes.inference import is_array_like
  53. from pandas.core.dtypes.missing import (
  54. array_equivalent,
  55. is_valid_na_for_dtype,
  56. isna,
  57. notna,
  58. )
  59. from pandas.core import (
  60. algorithms as algos,
  61. arraylike,
  62. missing,
  63. nanops,
  64. ops,
  65. )
  66. from pandas.core.algorithms import (
  67. factorize_array,
  68. isin,
  69. take,
  70. )
  71. from pandas.core.array_algos import (
  72. masked_accumulations,
  73. masked_reductions,
  74. )
  75. from pandas.core.array_algos.quantile import quantile_with_mask
  76. from pandas.core.arraylike import OpsMixin
  77. from pandas.core.arrays import ExtensionArray
  78. from pandas.core.construction import ensure_wrapped_if_datetimelike
  79. from pandas.core.indexers import check_array_indexer
  80. from pandas.core.ops import invalid_comparison
  81. if TYPE_CHECKING:
  82. from pandas import Series
  83. from pandas.core.arrays import BooleanArray
  84. from pandas._typing import (
  85. NumpySorter,
  86. NumpyValueArrayLike,
  87. )
  88. from pandas.compat.numpy import function as nv
  89. BaseMaskedArrayT = TypeVar("BaseMaskedArrayT", bound="BaseMaskedArray")
  90. class BaseMaskedArray(OpsMixin, ExtensionArray):
  91. """
  92. Base class for masked arrays (which use _data and _mask to store the data).
  93. numpy based
  94. """
  95. # The value used to fill '_data' to avoid upcasting
  96. _internal_fill_value: Scalar
  97. # our underlying data and mask are each ndarrays
  98. _data: np.ndarray
  99. _mask: npt.NDArray[np.bool_]
  100. # Fill values used for any/all
  101. _truthy_value = Scalar # bool(_truthy_value) = True
  102. _falsey_value = Scalar # bool(_falsey_value) = False
  103. def __init__(
  104. self, values: np.ndarray, mask: npt.NDArray[np.bool_], copy: bool = False
  105. ) -> None:
  106. # values is supposed to already be validated in the subclass
  107. if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_):
  108. raise TypeError(
  109. "mask should be boolean numpy array. Use "
  110. "the 'pd.array' function instead"
  111. )
  112. if values.shape != mask.shape:
  113. raise ValueError("values.shape must match mask.shape")
  114. if copy:
  115. values = values.copy()
  116. mask = mask.copy()
  117. self._data = values
  118. self._mask = mask
  119. @classmethod
  120. def _from_sequence(
  121. cls: type[BaseMaskedArrayT], scalars, *, dtype=None, copy: bool = False
  122. ) -> BaseMaskedArrayT:
  123. values, mask = cls._coerce_to_array(scalars, dtype=dtype, copy=copy)
  124. return cls(values, mask)
  125. @property
  126. def dtype(self) -> BaseMaskedDtype:
  127. raise AbstractMethodError(self)
  128. @overload
  129. def __getitem__(self, item: ScalarIndexer) -> Any:
  130. ...
  131. @overload
  132. def __getitem__(self: BaseMaskedArrayT, item: SequenceIndexer) -> BaseMaskedArrayT:
  133. ...
  134. def __getitem__(
  135. self: BaseMaskedArrayT, item: PositionalIndexer
  136. ) -> BaseMaskedArrayT | Any:
  137. item = check_array_indexer(self, item)
  138. newmask = self._mask[item]
  139. if is_bool(newmask):
  140. # This is a scalar indexing
  141. if newmask:
  142. return self.dtype.na_value
  143. return self._data[item]
  144. return type(self)(self._data[item], newmask)
  145. @doc(ExtensionArray.fillna)
  146. def fillna(
  147. self: BaseMaskedArrayT, value=None, method=None, limit=None
  148. ) -> BaseMaskedArrayT:
  149. value, method = validate_fillna_kwargs(value, method)
  150. mask = self._mask
  151. if is_array_like(value):
  152. if len(value) != len(self):
  153. raise ValueError(
  154. f"Length of 'value' does not match. Got ({len(value)}) "
  155. f" expected {len(self)}"
  156. )
  157. value = value[mask]
  158. if mask.any():
  159. if method is not None:
  160. func = missing.get_fill_func(method, ndim=self.ndim)
  161. npvalues = self._data.copy().T
  162. new_mask = mask.copy().T
  163. func(npvalues, limit=limit, mask=new_mask)
  164. return type(self)(npvalues.T, new_mask.T)
  165. else:
  166. # fill with value
  167. new_values = self.copy()
  168. new_values[mask] = value
  169. else:
  170. new_values = self.copy()
  171. return new_values
  172. @classmethod
  173. def _coerce_to_array(
  174. cls, values, *, dtype: DtypeObj, copy: bool = False
  175. ) -> tuple[np.ndarray, np.ndarray]:
  176. raise AbstractMethodError(cls)
  177. def _validate_setitem_value(self, value):
  178. """
  179. Check if we have a scalar that we can cast losslessly.
  180. Raises
  181. ------
  182. TypeError
  183. """
  184. kind = self.dtype.kind
  185. # TODO: get this all from np_can_hold_element?
  186. if kind == "b":
  187. if lib.is_bool(value):
  188. return value
  189. elif kind == "f":
  190. if lib.is_integer(value) or lib.is_float(value):
  191. return value
  192. else:
  193. if lib.is_integer(value) or (lib.is_float(value) and value.is_integer()):
  194. return value
  195. # TODO: unsigned checks
  196. # Note: without the "str" here, the f-string rendering raises in
  197. # py38 builds.
  198. raise TypeError(f"Invalid value '{str(value)}' for dtype {self.dtype}")
  199. def __setitem__(self, key, value) -> None:
  200. key = check_array_indexer(self, key)
  201. if is_scalar(value):
  202. if is_valid_na_for_dtype(value, self.dtype):
  203. self._mask[key] = True
  204. else:
  205. value = self._validate_setitem_value(value)
  206. self._data[key] = value
  207. self._mask[key] = False
  208. return
  209. value, mask = self._coerce_to_array(value, dtype=self.dtype)
  210. self._data[key] = value
  211. self._mask[key] = mask
  212. def __iter__(self) -> Iterator:
  213. if self.ndim == 1:
  214. if not self._hasna:
  215. for val in self._data:
  216. yield val
  217. else:
  218. na_value = self.dtype.na_value
  219. for isna_, val in zip(self._mask, self._data):
  220. if isna_:
  221. yield na_value
  222. else:
  223. yield val
  224. else:
  225. for i in range(len(self)):
  226. yield self[i]
  227. def __len__(self) -> int:
  228. return len(self._data)
  229. @property
  230. def shape(self) -> Shape:
  231. return self._data.shape
  232. @property
  233. def ndim(self) -> int:
  234. return self._data.ndim
  235. def swapaxes(self: BaseMaskedArrayT, axis1, axis2) -> BaseMaskedArrayT:
  236. data = self._data.swapaxes(axis1, axis2)
  237. mask = self._mask.swapaxes(axis1, axis2)
  238. return type(self)(data, mask)
  239. def delete(self: BaseMaskedArrayT, loc, axis: AxisInt = 0) -> BaseMaskedArrayT:
  240. data = np.delete(self._data, loc, axis=axis)
  241. mask = np.delete(self._mask, loc, axis=axis)
  242. return type(self)(data, mask)
  243. def reshape(self: BaseMaskedArrayT, *args, **kwargs) -> BaseMaskedArrayT:
  244. data = self._data.reshape(*args, **kwargs)
  245. mask = self._mask.reshape(*args, **kwargs)
  246. return type(self)(data, mask)
  247. def ravel(self: BaseMaskedArrayT, *args, **kwargs) -> BaseMaskedArrayT:
  248. # TODO: need to make sure we have the same order for data/mask
  249. data = self._data.ravel(*args, **kwargs)
  250. mask = self._mask.ravel(*args, **kwargs)
  251. return type(self)(data, mask)
  252. @property
  253. def T(self: BaseMaskedArrayT) -> BaseMaskedArrayT:
  254. return type(self)(self._data.T, self._mask.T)
  255. def round(self, decimals: int = 0, *args, **kwargs):
  256. """
  257. Round each value in the array a to the given number of decimals.
  258. Parameters
  259. ----------
  260. decimals : int, default 0
  261. Number of decimal places to round to. If decimals is negative,
  262. it specifies the number of positions to the left of the decimal point.
  263. *args, **kwargs
  264. Additional arguments and keywords have no effect but might be
  265. accepted for compatibility with NumPy.
  266. Returns
  267. -------
  268. NumericArray
  269. Rounded values of the NumericArray.
  270. See Also
  271. --------
  272. numpy.around : Round values of an np.array.
  273. DataFrame.round : Round values of a DataFrame.
  274. Series.round : Round values of a Series.
  275. """
  276. nv.validate_round(args, kwargs)
  277. values = np.round(self._data, decimals=decimals, **kwargs)
  278. # Usually we'll get same type as self, but ndarray[bool] casts to float
  279. return self._maybe_mask_result(values, self._mask.copy())
  280. # ------------------------------------------------------------------
  281. # Unary Methods
  282. def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT:
  283. return type(self)(~self._data, self._mask.copy())
  284. def __neg__(self: BaseMaskedArrayT) -> BaseMaskedArrayT:
  285. return type(self)(-self._data, self._mask.copy())
  286. def __pos__(self: BaseMaskedArrayT) -> BaseMaskedArrayT:
  287. return self.copy()
  288. def __abs__(self: BaseMaskedArrayT) -> BaseMaskedArrayT:
  289. return type(self)(abs(self._data), self._mask.copy())
  290. # ------------------------------------------------------------------
  291. def to_numpy(
  292. self,
  293. dtype: npt.DTypeLike | None = None,
  294. copy: bool = False,
  295. na_value: object = lib.no_default,
  296. ) -> np.ndarray:
  297. """
  298. Convert to a NumPy Array.
  299. By default converts to an object-dtype NumPy array. Specify the `dtype` and
  300. `na_value` keywords to customize the conversion.
  301. Parameters
  302. ----------
  303. dtype : dtype, default object
  304. The numpy dtype to convert to.
  305. copy : bool, default False
  306. Whether to ensure that the returned value is a not a view on
  307. the array. Note that ``copy=False`` does not *ensure* that
  308. ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
  309. a copy is made, even if not strictly necessary. This is typically
  310. only possible when no missing values are present and `dtype`
  311. is the equivalent numpy dtype.
  312. na_value : scalar, optional
  313. Scalar missing value indicator to use in numpy array. Defaults
  314. to the native missing value indicator of this array (pd.NA).
  315. Returns
  316. -------
  317. numpy.ndarray
  318. Examples
  319. --------
  320. An object-dtype is the default result
  321. >>> a = pd.array([True, False, pd.NA], dtype="boolean")
  322. >>> a.to_numpy()
  323. array([True, False, <NA>], dtype=object)
  324. When no missing values are present, an equivalent dtype can be used.
  325. >>> pd.array([True, False], dtype="boolean").to_numpy(dtype="bool")
  326. array([ True, False])
  327. >>> pd.array([1, 2], dtype="Int64").to_numpy("int64")
  328. array([1, 2])
  329. However, requesting such dtype will raise a ValueError if
  330. missing values are present and the default missing value :attr:`NA`
  331. is used.
  332. >>> a = pd.array([True, False, pd.NA], dtype="boolean")
  333. >>> a
  334. <BooleanArray>
  335. [True, False, <NA>]
  336. Length: 3, dtype: boolean
  337. >>> a.to_numpy(dtype="bool")
  338. Traceback (most recent call last):
  339. ...
  340. ValueError: cannot convert to bool numpy array in presence of missing values
  341. Specify a valid `na_value` instead
  342. >>> a.to_numpy(dtype="bool", na_value=False)
  343. array([ True, False, False])
  344. """
  345. if na_value is lib.no_default:
  346. na_value = libmissing.NA
  347. if dtype is None:
  348. dtype = object
  349. if self._hasna:
  350. if (
  351. not is_object_dtype(dtype)
  352. and not is_string_dtype(dtype)
  353. and na_value is libmissing.NA
  354. ):
  355. raise ValueError(
  356. f"cannot convert to '{dtype}'-dtype NumPy array "
  357. "with missing values. Specify an appropriate 'na_value' "
  358. "for this dtype."
  359. )
  360. # don't pass copy to astype -> always need a copy since we are mutating
  361. with warnings.catch_warnings():
  362. warnings.filterwarnings("ignore", category=RuntimeWarning)
  363. data = self._data.astype(dtype)
  364. data[self._mask] = na_value
  365. else:
  366. with warnings.catch_warnings():
  367. warnings.filterwarnings("ignore", category=RuntimeWarning)
  368. data = self._data.astype(dtype, copy=copy)
  369. return data
  370. @doc(ExtensionArray.tolist)
  371. def tolist(self):
  372. if self.ndim > 1:
  373. return [x.tolist() for x in self]
  374. dtype = None if self._hasna else self._data.dtype
  375. return self.to_numpy(dtype=dtype).tolist()
  376. @overload
  377. def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
  378. ...
  379. @overload
  380. def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray:
  381. ...
  382. @overload
  383. def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike:
  384. ...
  385. def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
  386. dtype = pandas_dtype(dtype)
  387. if is_dtype_equal(dtype, self.dtype):
  388. if copy:
  389. return self.copy()
  390. return self
  391. # if we are astyping to another nullable masked dtype, we can fastpath
  392. if isinstance(dtype, BaseMaskedDtype):
  393. # TODO deal with NaNs for FloatingArray case
  394. with warnings.catch_warnings():
  395. warnings.filterwarnings("ignore", category=RuntimeWarning)
  396. # TODO: Is rounding what we want long term?
  397. data = self._data.astype(dtype.numpy_dtype, copy=copy)
  398. # mask is copied depending on whether the data was copied, and
  399. # not directly depending on the `copy` keyword
  400. mask = self._mask if data is self._data else self._mask.copy()
  401. cls = dtype.construct_array_type()
  402. return cls(data, mask, copy=False)
  403. if isinstance(dtype, ExtensionDtype):
  404. eacls = dtype.construct_array_type()
  405. return eacls._from_sequence(self, dtype=dtype, copy=copy)
  406. na_value: float | np.datetime64 | lib.NoDefault
  407. # coerce
  408. if is_float_dtype(dtype):
  409. # In astype, we consider dtype=float to also mean na_value=np.nan
  410. na_value = np.nan
  411. elif is_datetime64_dtype(dtype):
  412. na_value = np.datetime64("NaT")
  413. else:
  414. na_value = lib.no_default
  415. # to_numpy will also raise, but we get somewhat nicer exception messages here
  416. if is_integer_dtype(dtype) and self._hasna:
  417. raise ValueError("cannot convert NA to integer")
  418. if is_bool_dtype(dtype) and self._hasna:
  419. # careful: astype_nansafe converts np.nan to True
  420. raise ValueError("cannot convert float NaN to bool")
  421. data = self.to_numpy(dtype=dtype, na_value=na_value, copy=copy)
  422. return data
  423. __array_priority__ = 1000 # higher than ndarray so ops dispatch to us
  424. def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
  425. """
  426. the array interface, return my values
  427. We return an object array here to preserve our scalar values
  428. """
  429. return self.to_numpy(dtype=dtype)
  430. _HANDLED_TYPES: tuple[type, ...]
  431. def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
  432. # For MaskedArray inputs, we apply the ufunc to ._data
  433. # and mask the result.
  434. out = kwargs.get("out", ())
  435. for x in inputs + out:
  436. if not isinstance(x, self._HANDLED_TYPES + (BaseMaskedArray,)):
  437. return NotImplemented
  438. # for binary ops, use our custom dunder methods
  439. result = ops.maybe_dispatch_ufunc_to_dunder_op(
  440. self, ufunc, method, *inputs, **kwargs
  441. )
  442. if result is not NotImplemented:
  443. return result
  444. if "out" in kwargs:
  445. # e.g. test_ufunc_with_out
  446. return arraylike.dispatch_ufunc_with_out(
  447. self, ufunc, method, *inputs, **kwargs
  448. )
  449. if method == "reduce":
  450. result = arraylike.dispatch_reduction_ufunc(
  451. self, ufunc, method, *inputs, **kwargs
  452. )
  453. if result is not NotImplemented:
  454. return result
  455. mask = np.zeros(len(self), dtype=bool)
  456. inputs2 = []
  457. for x in inputs:
  458. if isinstance(x, BaseMaskedArray):
  459. mask |= x._mask
  460. inputs2.append(x._data)
  461. else:
  462. inputs2.append(x)
  463. def reconstruct(x):
  464. # we don't worry about scalar `x` here, since we
  465. # raise for reduce up above.
  466. from pandas.core.arrays import (
  467. BooleanArray,
  468. FloatingArray,
  469. IntegerArray,
  470. )
  471. if is_bool_dtype(x.dtype):
  472. m = mask.copy()
  473. return BooleanArray(x, m)
  474. elif is_integer_dtype(x.dtype):
  475. m = mask.copy()
  476. return IntegerArray(x, m)
  477. elif is_float_dtype(x.dtype):
  478. m = mask.copy()
  479. if x.dtype == np.float16:
  480. # reached in e.g. np.sqrt on BooleanArray
  481. # we don't support float16
  482. x = x.astype(np.float32)
  483. return FloatingArray(x, m)
  484. else:
  485. x[mask] = np.nan
  486. return x
  487. result = getattr(ufunc, method)(*inputs2, **kwargs)
  488. if ufunc.nout > 1:
  489. # e.g. np.divmod
  490. return tuple(reconstruct(x) for x in result)
  491. elif method == "reduce":
  492. # e.g. np.add.reduce; test_ufunc_reduce_raises
  493. if self._mask.any():
  494. return self._na_value
  495. return result
  496. else:
  497. return reconstruct(result)
  498. def __arrow_array__(self, type=None):
  499. """
  500. Convert myself into a pyarrow Array.
  501. """
  502. import pyarrow as pa
  503. return pa.array(self._data, mask=self._mask, type=type)
  504. @property
  505. def _hasna(self) -> bool:
  506. # Note: this is expensive right now! The hope is that we can
  507. # make this faster by having an optional mask, but not have to change
  508. # source code using it..
  509. # error: Incompatible return value type (got "bool_", expected "bool")
  510. return self._mask.any() # type: ignore[return-value]
  511. def _propagate_mask(
  512. self, mask: npt.NDArray[np.bool_] | None, other
  513. ) -> npt.NDArray[np.bool_]:
  514. if mask is None:
  515. mask = self._mask.copy() # TODO: need test for BooleanArray needing a copy
  516. if other is libmissing.NA:
  517. # GH#45421 don't alter inplace
  518. mask = mask | True
  519. elif is_list_like(other) and len(other) == len(mask):
  520. mask = mask | isna(other)
  521. else:
  522. mask = self._mask | mask
  523. # Incompatible return value type (got "Optional[ndarray[Any, dtype[bool_]]]",
  524. # expected "ndarray[Any, dtype[bool_]]")
  525. return mask # type: ignore[return-value]
  526. def _arith_method(self, other, op):
  527. op_name = op.__name__
  528. omask = None
  529. if (
  530. not hasattr(other, "dtype")
  531. and is_list_like(other)
  532. and len(other) == len(self)
  533. ):
  534. # Try inferring masked dtype instead of casting to object
  535. inferred_dtype = lib.infer_dtype(other, skipna=True)
  536. if inferred_dtype == "integer":
  537. from pandas.core.arrays import IntegerArray
  538. other = IntegerArray._from_sequence(other)
  539. elif inferred_dtype in ["floating", "mixed-integer-float"]:
  540. from pandas.core.arrays import FloatingArray
  541. other = FloatingArray._from_sequence(other)
  542. elif inferred_dtype in ["boolean"]:
  543. from pandas.core.arrays import BooleanArray
  544. other = BooleanArray._from_sequence(other)
  545. if isinstance(other, BaseMaskedArray):
  546. other, omask = other._data, other._mask
  547. elif is_list_like(other):
  548. if not isinstance(other, ExtensionArray):
  549. other = np.asarray(other)
  550. if other.ndim > 1:
  551. raise NotImplementedError("can only perform ops with 1-d structures")
  552. # We wrap the non-masked arithmetic logic used for numpy dtypes
  553. # in Series/Index arithmetic ops.
  554. other = ops.maybe_prepare_scalar_for_op(other, (len(self),))
  555. pd_op = ops.get_array_op(op)
  556. other = ensure_wrapped_if_datetimelike(other)
  557. if op_name in {"pow", "rpow"} and isinstance(other, np.bool_):
  558. # Avoid DeprecationWarning: In future, it will be an error
  559. # for 'np.bool_' scalars to be interpreted as an index
  560. # e.g. test_array_scalar_like_equivalence
  561. other = bool(other)
  562. mask = self._propagate_mask(omask, other)
  563. if other is libmissing.NA:
  564. result = np.ones_like(self._data)
  565. if self.dtype.kind == "b":
  566. if op_name in {
  567. "floordiv",
  568. "rfloordiv",
  569. "pow",
  570. "rpow",
  571. "truediv",
  572. "rtruediv",
  573. }:
  574. # GH#41165 Try to match non-masked Series behavior
  575. # This is still imperfect GH#46043
  576. raise NotImplementedError(
  577. f"operator '{op_name}' not implemented for bool dtypes"
  578. )
  579. if op_name in {"mod", "rmod"}:
  580. dtype = "int8"
  581. else:
  582. dtype = "bool"
  583. result = result.astype(dtype)
  584. elif "truediv" in op_name and self.dtype.kind != "f":
  585. # The actual data here doesn't matter since the mask
  586. # will be all-True, but since this is division, we want
  587. # to end up with floating dtype.
  588. result = result.astype(np.float64)
  589. else:
  590. # Make sure we do this before the "pow" mask checks
  591. # to get an expected exception message on shape mismatch.
  592. if self.dtype.kind in ["i", "u"] and op_name in ["floordiv", "mod"]:
  593. # TODO(GH#30188) ATM we don't match the behavior of non-masked
  594. # types with respect to floordiv-by-zero
  595. pd_op = op
  596. with np.errstate(all="ignore"):
  597. result = pd_op(self._data, other)
  598. if op_name == "pow":
  599. # 1 ** x is 1.
  600. mask = np.where((self._data == 1) & ~self._mask, False, mask)
  601. # x ** 0 is 1.
  602. if omask is not None:
  603. mask = np.where((other == 0) & ~omask, False, mask)
  604. elif other is not libmissing.NA:
  605. mask = np.where(other == 0, False, mask)
  606. elif op_name == "rpow":
  607. # 1 ** x is 1.
  608. if omask is not None:
  609. mask = np.where((other == 1) & ~omask, False, mask)
  610. elif other is not libmissing.NA:
  611. mask = np.where(other == 1, False, mask)
  612. # x ** 0 is 1.
  613. mask = np.where((self._data == 0) & ~self._mask, False, mask)
  614. return self._maybe_mask_result(result, mask)
  615. _logical_method = _arith_method
  616. def _cmp_method(self, other, op) -> BooleanArray:
  617. from pandas.core.arrays import BooleanArray
  618. mask = None
  619. if isinstance(other, BaseMaskedArray):
  620. other, mask = other._data, other._mask
  621. elif is_list_like(other):
  622. other = np.asarray(other)
  623. if other.ndim > 1:
  624. raise NotImplementedError("can only perform ops with 1-d structures")
  625. if len(self) != len(other):
  626. raise ValueError("Lengths must match to compare")
  627. if other is libmissing.NA:
  628. # numpy does not handle pd.NA well as "other" scalar (it returns
  629. # a scalar False instead of an array)
  630. # This may be fixed by NA.__array_ufunc__. Revisit this check
  631. # once that's implemented.
  632. result = np.zeros(self._data.shape, dtype="bool")
  633. mask = np.ones(self._data.shape, dtype="bool")
  634. else:
  635. with warnings.catch_warnings():
  636. # numpy may show a FutureWarning or DeprecationWarning:
  637. # elementwise comparison failed; returning scalar instead,
  638. # but in the future will perform elementwise comparison
  639. # before returning NotImplemented. We fall back to the correct
  640. # behavior today, so that should be fine to ignore.
  641. warnings.filterwarnings("ignore", "elementwise", FutureWarning)
  642. warnings.filterwarnings("ignore", "elementwise", DeprecationWarning)
  643. with np.errstate(all="ignore"):
  644. method = getattr(self._data, f"__{op.__name__}__")
  645. result = method(other)
  646. if result is NotImplemented:
  647. result = invalid_comparison(self._data, other, op)
  648. mask = self._propagate_mask(mask, other)
  649. return BooleanArray(result, mask, copy=False)
  650. def _maybe_mask_result(self, result, mask):
  651. """
  652. Parameters
  653. ----------
  654. result : array-like or tuple[array-like]
  655. mask : array-like bool
  656. """
  657. if isinstance(result, tuple):
  658. # i.e. divmod
  659. div, mod = result
  660. return (
  661. self._maybe_mask_result(div, mask),
  662. self._maybe_mask_result(mod, mask),
  663. )
  664. if is_float_dtype(result.dtype):
  665. from pandas.core.arrays import FloatingArray
  666. return FloatingArray(result, mask, copy=False)
  667. elif is_bool_dtype(result.dtype):
  668. from pandas.core.arrays import BooleanArray
  669. return BooleanArray(result, mask, copy=False)
  670. elif (
  671. isinstance(result.dtype, np.dtype)
  672. and result.dtype.kind == "m"
  673. and is_supported_unit(get_unit_from_dtype(result.dtype))
  674. ):
  675. # e.g. test_numeric_arr_mul_tdscalar_numexpr_path
  676. from pandas.core.arrays import TimedeltaArray
  677. if not isinstance(result, TimedeltaArray):
  678. result = TimedeltaArray._simple_new(result, dtype=result.dtype)
  679. result[mask] = result.dtype.type("NaT")
  680. return result
  681. elif is_integer_dtype(result.dtype):
  682. from pandas.core.arrays import IntegerArray
  683. return IntegerArray(result, mask, copy=False)
  684. else:
  685. result[mask] = np.nan
  686. return result
  687. def isna(self) -> np.ndarray:
  688. return self._mask.copy()
  689. @property
  690. def _na_value(self):
  691. return self.dtype.na_value
  692. @property
  693. def nbytes(self) -> int:
  694. return self._data.nbytes + self._mask.nbytes
  695. @classmethod
  696. def _concat_same_type(
  697. cls: type[BaseMaskedArrayT],
  698. to_concat: Sequence[BaseMaskedArrayT],
  699. axis: AxisInt = 0,
  700. ) -> BaseMaskedArrayT:
  701. data = np.concatenate([x._data for x in to_concat], axis=axis)
  702. mask = np.concatenate([x._mask for x in to_concat], axis=axis)
  703. return cls(data, mask)
  704. def take(
  705. self: BaseMaskedArrayT,
  706. indexer,
  707. *,
  708. allow_fill: bool = False,
  709. fill_value: Scalar | None = None,
  710. axis: AxisInt = 0,
  711. ) -> BaseMaskedArrayT:
  712. # we always fill with 1 internally
  713. # to avoid upcasting
  714. data_fill_value = self._internal_fill_value if isna(fill_value) else fill_value
  715. result = take(
  716. self._data,
  717. indexer,
  718. fill_value=data_fill_value,
  719. allow_fill=allow_fill,
  720. axis=axis,
  721. )
  722. mask = take(
  723. self._mask, indexer, fill_value=True, allow_fill=allow_fill, axis=axis
  724. )
  725. # if we are filling
  726. # we only fill where the indexer is null
  727. # not existing missing values
  728. # TODO(jreback) what if we have a non-na float as a fill value?
  729. if allow_fill and notna(fill_value):
  730. fill_mask = np.asarray(indexer) == -1
  731. result[fill_mask] = fill_value
  732. mask = mask ^ fill_mask
  733. return type(self)(result, mask, copy=False)
  734. # error: Return type "BooleanArray" of "isin" incompatible with return type
  735. # "ndarray" in supertype "ExtensionArray"
  736. def isin(self, values) -> BooleanArray: # type: ignore[override]
  737. from pandas.core.arrays import BooleanArray
  738. # algorithms.isin will eventually convert values to an ndarray, so no extra
  739. # cost to doing it here first
  740. values_arr = np.asarray(values)
  741. result = isin(self._data, values_arr)
  742. if self._hasna:
  743. values_have_NA = is_object_dtype(values_arr.dtype) and any(
  744. val is self.dtype.na_value for val in values_arr
  745. )
  746. # For now, NA does not propagate so set result according to presence of NA,
  747. # see https://github.com/pandas-dev/pandas/pull/38379 for some discussion
  748. result[self._mask] = values_have_NA
  749. mask = np.zeros(self._data.shape, dtype=bool)
  750. return BooleanArray(result, mask, copy=False)
  751. def copy(self: BaseMaskedArrayT) -> BaseMaskedArrayT:
  752. data, mask = self._data, self._mask
  753. data = data.copy()
  754. mask = mask.copy()
  755. return type(self)(data, mask, copy=False)
  756. def unique(self: BaseMaskedArrayT) -> BaseMaskedArrayT:
  757. """
  758. Compute the BaseMaskedArray of unique values.
  759. Returns
  760. -------
  761. uniques : BaseMaskedArray
  762. """
  763. uniques, mask = algos.unique_with_mask(self._data, self._mask)
  764. return type(self)(uniques, mask, copy=False)
  765. @doc(ExtensionArray.searchsorted)
  766. def searchsorted(
  767. self,
  768. value: NumpyValueArrayLike | ExtensionArray,
  769. side: Literal["left", "right"] = "left",
  770. sorter: NumpySorter = None,
  771. ) -> npt.NDArray[np.intp] | np.intp:
  772. if self._hasna:
  773. raise ValueError(
  774. "searchsorted requires array to be sorted, which is impossible "
  775. "with NAs present."
  776. )
  777. if isinstance(value, ExtensionArray):
  778. value = value.astype(object)
  779. # Base class searchsorted would cast to object, which is *much* slower.
  780. return self._data.searchsorted(value, side=side, sorter=sorter)
  781. @doc(ExtensionArray.factorize)
  782. def factorize(
  783. self,
  784. use_na_sentinel: bool = True,
  785. ) -> tuple[np.ndarray, ExtensionArray]:
  786. arr = self._data
  787. mask = self._mask
  788. # Use a sentinel for na; recode and add NA to uniques if necessary below
  789. codes, uniques = factorize_array(arr, use_na_sentinel=True, mask=mask)
  790. # check that factorize_array correctly preserves dtype.
  791. assert uniques.dtype == self.dtype.numpy_dtype, (uniques.dtype, self.dtype)
  792. has_na = mask.any()
  793. if use_na_sentinel or not has_na:
  794. size = len(uniques)
  795. else:
  796. # Make room for an NA value
  797. size = len(uniques) + 1
  798. uniques_mask = np.zeros(size, dtype=bool)
  799. if not use_na_sentinel and has_na:
  800. na_index = mask.argmax()
  801. # Insert na with the proper code
  802. if na_index == 0:
  803. na_code = np.intp(0)
  804. else:
  805. # mypy error: Slice index must be an integer or None
  806. # https://github.com/python/mypy/issues/2410
  807. na_code = codes[:na_index].max() + 1 # type: ignore[misc]
  808. codes[codes >= na_code] += 1
  809. codes[codes == -1] = na_code
  810. # dummy value for uniques; not used since uniques_mask will be True
  811. uniques = np.insert(uniques, na_code, 0)
  812. uniques_mask[na_code] = True
  813. uniques_ea = type(self)(uniques, uniques_mask)
  814. return codes, uniques_ea
  815. @doc(ExtensionArray._values_for_argsort)
  816. def _values_for_argsort(self) -> np.ndarray:
  817. return self._data
  818. def value_counts(self, dropna: bool = True) -> Series:
  819. """
  820. Returns a Series containing counts of each unique value.
  821. Parameters
  822. ----------
  823. dropna : bool, default True
  824. Don't include counts of missing values.
  825. Returns
  826. -------
  827. counts : Series
  828. See Also
  829. --------
  830. Series.value_counts
  831. """
  832. from pandas import (
  833. Index,
  834. Series,
  835. )
  836. from pandas.arrays import IntegerArray
  837. keys, value_counts = algos.value_counts_arraylike(
  838. self._data, dropna=True, mask=self._mask
  839. )
  840. if dropna:
  841. res = Series(value_counts, index=keys, name="count", copy=False)
  842. res.index = res.index.astype(self.dtype)
  843. res = res.astype("Int64")
  844. return res
  845. # if we want nans, count the mask
  846. counts = np.empty(len(value_counts) + 1, dtype="int64")
  847. counts[:-1] = value_counts
  848. counts[-1] = self._mask.sum()
  849. index = Index(keys, dtype=self.dtype).insert(len(keys), self.dtype.na_value)
  850. index = index.astype(self.dtype)
  851. mask = np.zeros(len(counts), dtype="bool")
  852. counts_array = IntegerArray(counts, mask)
  853. return Series(counts_array, index=index, name="count", copy=False)
  854. @doc(ExtensionArray.equals)
  855. def equals(self, other) -> bool:
  856. if type(self) != type(other):
  857. return False
  858. if other.dtype != self.dtype:
  859. return False
  860. # GH#44382 if e.g. self[1] is np.nan and other[1] is pd.NA, we are NOT
  861. # equal.
  862. if not np.array_equal(self._mask, other._mask):
  863. return False
  864. left = self._data[~self._mask]
  865. right = other._data[~other._mask]
  866. return array_equivalent(left, right, dtype_equal=True)
  867. def _quantile(
  868. self, qs: npt.NDArray[np.float64], interpolation: str
  869. ) -> BaseMaskedArray:
  870. """
  871. Dispatch to quantile_with_mask, needed because we do not have
  872. _from_factorized.
  873. Notes
  874. -----
  875. We assume that all impacted cases are 1D-only.
  876. """
  877. res = quantile_with_mask(
  878. self._data,
  879. mask=self._mask,
  880. # TODO(GH#40932): na_value_for_dtype(self.dtype.numpy_dtype)
  881. # instead of np.nan
  882. fill_value=np.nan,
  883. qs=qs,
  884. interpolation=interpolation,
  885. )
  886. if self._hasna:
  887. # Our result mask is all-False unless we are all-NA, in which
  888. # case it is all-True.
  889. if self.ndim == 2:
  890. # I think this should be out_mask=self.isna().all(axis=1)
  891. # but am holding off until we have tests
  892. raise NotImplementedError
  893. if self.isna().all():
  894. out_mask = np.ones(res.shape, dtype=bool)
  895. if is_integer_dtype(self.dtype):
  896. # We try to maintain int dtype if possible for not all-na case
  897. # as well
  898. res = np.zeros(res.shape, dtype=self.dtype.numpy_dtype)
  899. else:
  900. out_mask = np.zeros(res.shape, dtype=bool)
  901. else:
  902. out_mask = np.zeros(res.shape, dtype=bool)
  903. return self._maybe_mask_result(res, mask=out_mask)
  904. # ------------------------------------------------------------------
  905. # Reductions
  906. def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
  907. if name in {"any", "all", "min", "max", "sum", "prod", "mean", "var", "std"}:
  908. return getattr(self, name)(skipna=skipna, **kwargs)
  909. data = self._data
  910. mask = self._mask
  911. # median, skew, kurt, sem
  912. op = getattr(nanops, f"nan{name}")
  913. result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)
  914. if np.isnan(result):
  915. return libmissing.NA
  916. return result
  917. def _wrap_reduction_result(self, name: str, result, skipna, **kwargs):
  918. if isinstance(result, np.ndarray):
  919. axis = kwargs["axis"]
  920. if skipna:
  921. # we only retain mask for all-NA rows/columns
  922. mask = self._mask.all(axis=axis)
  923. else:
  924. mask = self._mask.any(axis=axis)
  925. return self._maybe_mask_result(result, mask)
  926. return result
  927. def sum(
  928. self,
  929. *,
  930. skipna: bool = True,
  931. min_count: int = 0,
  932. axis: AxisInt | None = 0,
  933. **kwargs,
  934. ):
  935. nv.validate_sum((), kwargs)
  936. # TODO: do this in validate_sum?
  937. if "out" in kwargs:
  938. # np.sum; test_floating_array_numpy_sum
  939. if kwargs["out"] is not None:
  940. raise NotImplementedError
  941. kwargs.pop("out")
  942. result = masked_reductions.sum(
  943. self._data,
  944. self._mask,
  945. skipna=skipna,
  946. min_count=min_count,
  947. axis=axis,
  948. )
  949. return self._wrap_reduction_result(
  950. "sum", result, skipna=skipna, axis=axis, **kwargs
  951. )
  952. def prod(
  953. self,
  954. *,
  955. skipna: bool = True,
  956. min_count: int = 0,
  957. axis: AxisInt | None = 0,
  958. **kwargs,
  959. ):
  960. nv.validate_prod((), kwargs)
  961. result = masked_reductions.prod(
  962. self._data,
  963. self._mask,
  964. skipna=skipna,
  965. min_count=min_count,
  966. axis=axis,
  967. )
  968. return self._wrap_reduction_result(
  969. "prod", result, skipna=skipna, axis=axis, **kwargs
  970. )
  971. def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
  972. nv.validate_mean((), kwargs)
  973. result = masked_reductions.mean(
  974. self._data,
  975. self._mask,
  976. skipna=skipna,
  977. axis=axis,
  978. )
  979. return self._wrap_reduction_result(
  980. "mean", result, skipna=skipna, axis=axis, **kwargs
  981. )
  982. def var(
  983. self, *, skipna: bool = True, axis: AxisInt | None = 0, ddof: int = 1, **kwargs
  984. ):
  985. nv.validate_stat_ddof_func((), kwargs, fname="var")
  986. result = masked_reductions.var(
  987. self._data,
  988. self._mask,
  989. skipna=skipna,
  990. axis=axis,
  991. ddof=ddof,
  992. )
  993. return self._wrap_reduction_result(
  994. "var", result, skipna=skipna, axis=axis, **kwargs
  995. )
  996. def std(
  997. self, *, skipna: bool = True, axis: AxisInt | None = 0, ddof: int = 1, **kwargs
  998. ):
  999. nv.validate_stat_ddof_func((), kwargs, fname="std")
  1000. result = masked_reductions.std(
  1001. self._data,
  1002. self._mask,
  1003. skipna=skipna,
  1004. axis=axis,
  1005. ddof=ddof,
  1006. )
  1007. return self._wrap_reduction_result(
  1008. "std", result, skipna=skipna, axis=axis, **kwargs
  1009. )
  1010. def min(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
  1011. nv.validate_min((), kwargs)
  1012. return masked_reductions.min(
  1013. self._data,
  1014. self._mask,
  1015. skipna=skipna,
  1016. axis=axis,
  1017. )
  1018. def max(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
  1019. nv.validate_max((), kwargs)
  1020. return masked_reductions.max(
  1021. self._data,
  1022. self._mask,
  1023. skipna=skipna,
  1024. axis=axis,
  1025. )
  1026. def any(self, *, skipna: bool = True, **kwargs):
  1027. """
  1028. Return whether any element is truthy.
  1029. Returns False unless there is at least one element that is truthy.
  1030. By default, NAs are skipped. If ``skipna=False`` is specified and
  1031. missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
  1032. is used as for logical operations.
  1033. .. versionchanged:: 1.4.0
  1034. Parameters
  1035. ----------
  1036. skipna : bool, default True
  1037. Exclude NA values. If the entire array is NA and `skipna` is
  1038. True, then the result will be False, as for an empty array.
  1039. If `skipna` is False, the result will still be True if there is
  1040. at least one element that is truthy, otherwise NA will be returned
  1041. if there are NA's present.
  1042. **kwargs : any, default None
  1043. Additional keywords have no effect but might be accepted for
  1044. compatibility with NumPy.
  1045. Returns
  1046. -------
  1047. bool or :attr:`pandas.NA`
  1048. See Also
  1049. --------
  1050. numpy.any : Numpy version of this method.
  1051. BaseMaskedArray.all : Return whether all elements are truthy.
  1052. Examples
  1053. --------
  1054. The result indicates whether any element is truthy (and by default
  1055. skips NAs):
  1056. >>> pd.array([True, False, True]).any()
  1057. True
  1058. >>> pd.array([True, False, pd.NA]).any()
  1059. True
  1060. >>> pd.array([False, False, pd.NA]).any()
  1061. False
  1062. >>> pd.array([], dtype="boolean").any()
  1063. False
  1064. >>> pd.array([pd.NA], dtype="boolean").any()
  1065. False
  1066. >>> pd.array([pd.NA], dtype="Float64").any()
  1067. False
  1068. With ``skipna=False``, the result can be NA if this is logically
  1069. required (whether ``pd.NA`` is True or False influences the result):
  1070. >>> pd.array([True, False, pd.NA]).any(skipna=False)
  1071. True
  1072. >>> pd.array([1, 0, pd.NA]).any(skipna=False)
  1073. True
  1074. >>> pd.array([False, False, pd.NA]).any(skipna=False)
  1075. <NA>
  1076. >>> pd.array([0, 0, pd.NA]).any(skipna=False)
  1077. <NA>
  1078. """
  1079. kwargs.pop("axis", None)
  1080. nv.validate_any((), kwargs)
  1081. values = self._data.copy()
  1082. # error: Argument 3 to "putmask" has incompatible type "object";
  1083. # expected "Union[_SupportsArray[dtype[Any]],
  1084. # _NestedSequence[_SupportsArray[dtype[Any]]],
  1085. # bool, int, float, complex, str, bytes,
  1086. # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
  1087. np.putmask(values, self._mask, self._falsey_value) # type: ignore[arg-type]
  1088. result = values.any()
  1089. if skipna:
  1090. return result
  1091. else:
  1092. if result or len(self) == 0 or not self._mask.any():
  1093. return result
  1094. else:
  1095. return self.dtype.na_value
  1096. def all(self, *, skipna: bool = True, **kwargs):
  1097. """
  1098. Return whether all elements are truthy.
  1099. Returns True unless there is at least one element that is falsey.
  1100. By default, NAs are skipped. If ``skipna=False`` is specified and
  1101. missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
  1102. is used as for logical operations.
  1103. .. versionchanged:: 1.4.0
  1104. Parameters
  1105. ----------
  1106. skipna : bool, default True
  1107. Exclude NA values. If the entire array is NA and `skipna` is
  1108. True, then the result will be True, as for an empty array.
  1109. If `skipna` is False, the result will still be False if there is
  1110. at least one element that is falsey, otherwise NA will be returned
  1111. if there are NA's present.
  1112. **kwargs : any, default None
  1113. Additional keywords have no effect but might be accepted for
  1114. compatibility with NumPy.
  1115. Returns
  1116. -------
  1117. bool or :attr:`pandas.NA`
  1118. See Also
  1119. --------
  1120. numpy.all : Numpy version of this method.
  1121. BooleanArray.any : Return whether any element is truthy.
  1122. Examples
  1123. --------
  1124. The result indicates whether all elements are truthy (and by default
  1125. skips NAs):
  1126. >>> pd.array([True, True, pd.NA]).all()
  1127. True
  1128. >>> pd.array([1, 1, pd.NA]).all()
  1129. True
  1130. >>> pd.array([True, False, pd.NA]).all()
  1131. False
  1132. >>> pd.array([], dtype="boolean").all()
  1133. True
  1134. >>> pd.array([pd.NA], dtype="boolean").all()
  1135. True
  1136. >>> pd.array([pd.NA], dtype="Float64").all()
  1137. True
  1138. With ``skipna=False``, the result can be NA if this is logically
  1139. required (whether ``pd.NA`` is True or False influences the result):
  1140. >>> pd.array([True, True, pd.NA]).all(skipna=False)
  1141. <NA>
  1142. >>> pd.array([1, 1, pd.NA]).all(skipna=False)
  1143. <NA>
  1144. >>> pd.array([True, False, pd.NA]).all(skipna=False)
  1145. False
  1146. >>> pd.array([1, 0, pd.NA]).all(skipna=False)
  1147. False
  1148. """
  1149. kwargs.pop("axis", None)
  1150. nv.validate_all((), kwargs)
  1151. values = self._data.copy()
  1152. # error: Argument 3 to "putmask" has incompatible type "object";
  1153. # expected "Union[_SupportsArray[dtype[Any]],
  1154. # _NestedSequence[_SupportsArray[dtype[Any]]],
  1155. # bool, int, float, complex, str, bytes,
  1156. # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
  1157. np.putmask(values, self._mask, self._truthy_value) # type: ignore[arg-type]
  1158. result = values.all()
  1159. if skipna:
  1160. return result
  1161. else:
  1162. if not result or len(self) == 0 or not self._mask.any():
  1163. return result
  1164. else:
  1165. return self.dtype.na_value
  1166. def _accumulate(
  1167. self, name: str, *, skipna: bool = True, **kwargs
  1168. ) -> BaseMaskedArray:
  1169. data = self._data
  1170. mask = self._mask
  1171. op = getattr(masked_accumulations, name)
  1172. data, mask = op(data, mask, skipna=skipna, **kwargs)
  1173. return type(self)(data, mask, copy=False)