base.py 61 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873
  1. """
  2. An interface for extending pandas with custom arrays.
  3. .. warning::
  4. This is an experimental API and subject to breaking changes
  5. without warning.
  6. """
  7. from __future__ import annotations
  8. import operator
  9. from typing import (
  10. TYPE_CHECKING,
  11. Any,
  12. Callable,
  13. ClassVar,
  14. Iterator,
  15. Literal,
  16. Sequence,
  17. TypeVar,
  18. cast,
  19. overload,
  20. )
  21. import numpy as np
  22. from pandas._libs import lib
  23. from pandas._typing import (
  24. ArrayLike,
  25. AstypeArg,
  26. AxisInt,
  27. Dtype,
  28. FillnaOptions,
  29. PositionalIndexer,
  30. ScalarIndexer,
  31. SequenceIndexer,
  32. Shape,
  33. SortKind,
  34. TakeIndexer,
  35. npt,
  36. )
  37. from pandas.compat import set_function_name
  38. from pandas.compat.numpy import function as nv
  39. from pandas.errors import AbstractMethodError
  40. from pandas.util._decorators import (
  41. Appender,
  42. Substitution,
  43. cache_readonly,
  44. )
  45. from pandas.util._validators import (
  46. validate_bool_kwarg,
  47. validate_fillna_kwargs,
  48. validate_insert_loc,
  49. )
  50. from pandas.core.dtypes.cast import maybe_cast_to_extension_array
  51. from pandas.core.dtypes.common import (
  52. is_datetime64_dtype,
  53. is_dtype_equal,
  54. is_list_like,
  55. is_scalar,
  56. is_timedelta64_dtype,
  57. pandas_dtype,
  58. )
  59. from pandas.core.dtypes.dtypes import ExtensionDtype
  60. from pandas.core.dtypes.generic import (
  61. ABCDataFrame,
  62. ABCIndex,
  63. ABCSeries,
  64. )
  65. from pandas.core.dtypes.missing import isna
  66. from pandas.core import (
  67. arraylike,
  68. missing,
  69. roperator,
  70. )
  71. from pandas.core.algorithms import (
  72. factorize_array,
  73. isin,
  74. mode,
  75. rank,
  76. unique,
  77. )
  78. from pandas.core.array_algos.quantile import quantile_with_mask
  79. from pandas.core.sorting import (
  80. nargminmax,
  81. nargsort,
  82. )
  83. if TYPE_CHECKING:
  84. from pandas._typing import (
  85. NumpySorter,
  86. NumpyValueArrayLike,
  87. )
  88. _extension_array_shared_docs: dict[str, str] = {}
  89. ExtensionArrayT = TypeVar("ExtensionArrayT", bound="ExtensionArray")
  90. class ExtensionArray:
  91. """
  92. Abstract base class for custom 1-D array types.
  93. pandas will recognize instances of this class as proper arrays
  94. with a custom type and will not attempt to coerce them to objects. They
  95. may be stored directly inside a :class:`DataFrame` or :class:`Series`.
  96. Attributes
  97. ----------
  98. dtype
  99. nbytes
  100. ndim
  101. shape
  102. Methods
  103. -------
  104. argsort
  105. astype
  106. copy
  107. dropna
  108. factorize
  109. fillna
  110. equals
  111. insert
  112. isin
  113. isna
  114. ravel
  115. repeat
  116. searchsorted
  117. shift
  118. take
  119. tolist
  120. unique
  121. view
  122. _accumulate
  123. _concat_same_type
  124. _formatter
  125. _from_factorized
  126. _from_sequence
  127. _from_sequence_of_strings
  128. _reduce
  129. _values_for_argsort
  130. _values_for_factorize
  131. Notes
  132. -----
  133. The interface includes the following abstract methods that must be
  134. implemented by subclasses:
  135. * _from_sequence
  136. * _from_factorized
  137. * __getitem__
  138. * __len__
  139. * __eq__
  140. * dtype
  141. * nbytes
  142. * isna
  143. * take
  144. * copy
  145. * _concat_same_type
  146. A default repr displaying the type, (truncated) data, length,
  147. and dtype is provided. It can be customized or replaced by
  148. by overriding:
  149. * __repr__ : A default repr for the ExtensionArray.
  150. * _formatter : Print scalars inside a Series or DataFrame.
  151. Some methods require casting the ExtensionArray to an ndarray of Python
  152. objects with ``self.astype(object)``, which may be expensive. When
  153. performance is a concern, we highly recommend overriding the following
  154. methods:
  155. * fillna
  156. * dropna
  157. * unique
  158. * factorize / _values_for_factorize
  159. * argsort, argmax, argmin / _values_for_argsort
  160. * searchsorted
  161. The remaining methods implemented on this class should be performant,
  162. as they only compose abstract methods. Still, a more efficient
  163. implementation may be available, and these methods can be overridden.
  164. One can implement methods to handle array accumulations or reductions.
  165. * _accumulate
  166. * _reduce
  167. One can implement methods to handle parsing from strings that will be used
  168. in methods such as ``pandas.io.parsers.read_csv``.
  169. * _from_sequence_of_strings
  170. This class does not inherit from 'abc.ABCMeta' for performance reasons.
  171. Methods and properties required by the interface raise
  172. ``pandas.errors.AbstractMethodError`` and no ``register`` method is
  173. provided for registering virtual subclasses.
  174. ExtensionArrays are limited to 1 dimension.
  175. They may be backed by none, one, or many NumPy arrays. For example,
  176. ``pandas.Categorical`` is an extension array backed by two arrays,
  177. one for codes and one for categories. An array of IPv6 address may
  178. be backed by a NumPy structured array with two fields, one for the
  179. lower 64 bits and one for the upper 64 bits. Or they may be backed
  180. by some other storage type, like Python lists. Pandas makes no
  181. assumptions on how the data are stored, just that it can be converted
  182. to a NumPy array.
  183. The ExtensionArray interface does not impose any rules on how this data
  184. is stored. However, currently, the backing data cannot be stored in
  185. attributes called ``.values`` or ``._values`` to ensure full compatibility
  186. with pandas internals. But other names as ``.data``, ``._data``,
  187. ``._items``, ... can be freely used.
  188. If implementing NumPy's ``__array_ufunc__`` interface, pandas expects
  189. that
  190. 1. You defer by returning ``NotImplemented`` when any Series are present
  191. in `inputs`. Pandas will extract the arrays and call the ufunc again.
  192. 2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class.
  193. Pandas inspect this to determine whether the ufunc is valid for the
  194. types present.
  195. See :ref:`extending.extension.ufunc` for more.
  196. By default, ExtensionArrays are not hashable. Immutable subclasses may
  197. override this behavior.
  198. """
  199. # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray.
  200. # Don't override this.
  201. _typ = "extension"
  202. # ------------------------------------------------------------------------
  203. # Constructors
  204. # ------------------------------------------------------------------------
  205. @classmethod
  206. def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):
  207. """
  208. Construct a new ExtensionArray from a sequence of scalars.
  209. Parameters
  210. ----------
  211. scalars : Sequence
  212. Each element will be an instance of the scalar type for this
  213. array, ``cls.dtype.type`` or be converted into this type in this method.
  214. dtype : dtype, optional
  215. Construct for this particular dtype. This should be a Dtype
  216. compatible with the ExtensionArray.
  217. copy : bool, default False
  218. If True, copy the underlying data.
  219. Returns
  220. -------
  221. ExtensionArray
  222. """
  223. raise AbstractMethodError(cls)
  224. @classmethod
  225. def _from_sequence_of_strings(
  226. cls, strings, *, dtype: Dtype | None = None, copy: bool = False
  227. ):
  228. """
  229. Construct a new ExtensionArray from a sequence of strings.
  230. Parameters
  231. ----------
  232. strings : Sequence
  233. Each element will be an instance of the scalar type for this
  234. array, ``cls.dtype.type``.
  235. dtype : dtype, optional
  236. Construct for this particular dtype. This should be a Dtype
  237. compatible with the ExtensionArray.
  238. copy : bool, default False
  239. If True, copy the underlying data.
  240. Returns
  241. -------
  242. ExtensionArray
  243. """
  244. raise AbstractMethodError(cls)
  245. @classmethod
  246. def _from_factorized(cls, values, original):
  247. """
  248. Reconstruct an ExtensionArray after factorization.
  249. Parameters
  250. ----------
  251. values : ndarray
  252. An integer ndarray with the factorized values.
  253. original : ExtensionArray
  254. The original ExtensionArray that factorize was called on.
  255. See Also
  256. --------
  257. factorize : Top-level factorize method that dispatches here.
  258. ExtensionArray.factorize : Encode the extension array as an enumerated type.
  259. """
  260. raise AbstractMethodError(cls)
  261. # ------------------------------------------------------------------------
  262. # Must be a Sequence
  263. # ------------------------------------------------------------------------
  264. @overload
  265. def __getitem__(self, item: ScalarIndexer) -> Any:
  266. ...
  267. @overload
  268. def __getitem__(self: ExtensionArrayT, item: SequenceIndexer) -> ExtensionArrayT:
  269. ...
  270. def __getitem__(
  271. self: ExtensionArrayT, item: PositionalIndexer
  272. ) -> ExtensionArrayT | Any:
  273. """
  274. Select a subset of self.
  275. Parameters
  276. ----------
  277. item : int, slice, or ndarray
  278. * int: The position in 'self' to get.
  279. * slice: A slice object, where 'start', 'stop', and 'step' are
  280. integers or None
  281. * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'
  282. * list[int]: A list of int
  283. Returns
  284. -------
  285. item : scalar or ExtensionArray
  286. Notes
  287. -----
  288. For scalar ``item``, return a scalar value suitable for the array's
  289. type. This should be an instance of ``self.dtype.type``.
  290. For slice ``key``, return an instance of ``ExtensionArray``, even
  291. if the slice is length 0 or 1.
  292. For a boolean mask, return an instance of ``ExtensionArray``, filtered
  293. to the values where ``item`` is True.
  294. """
  295. raise AbstractMethodError(self)
  296. def __setitem__(self, key, value) -> None:
  297. """
  298. Set one or more values inplace.
  299. This method is not required to satisfy the pandas extension array
  300. interface.
  301. Parameters
  302. ----------
  303. key : int, ndarray, or slice
  304. When called from, e.g. ``Series.__setitem__``, ``key`` will be
  305. one of
  306. * scalar int
  307. * ndarray of integers.
  308. * boolean ndarray
  309. * slice object
  310. value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
  311. value or values to be set of ``key``.
  312. Returns
  313. -------
  314. None
  315. """
  316. # Some notes to the ExtensionArray implementor who may have ended up
  317. # here. While this method is not required for the interface, if you
  318. # *do* choose to implement __setitem__, then some semantics should be
  319. # observed:
  320. #
  321. # * Setting multiple values : ExtensionArrays should support setting
  322. # multiple values at once, 'key' will be a sequence of integers and
  323. # 'value' will be a same-length sequence.
  324. #
  325. # * Broadcasting : For a sequence 'key' and a scalar 'value',
  326. # each position in 'key' should be set to 'value'.
  327. #
  328. # * Coercion : Most users will expect basic coercion to work. For
  329. # example, a string like '2018-01-01' is coerced to a datetime
  330. # when setting on a datetime64ns array. In general, if the
  331. # __init__ method coerces that value, then so should __setitem__
  332. # Note, also, that Series/DataFrame.where internally use __setitem__
  333. # on a copy of the data.
  334. raise NotImplementedError(f"{type(self)} does not implement __setitem__.")
  335. def __len__(self) -> int:
  336. """
  337. Length of this array
  338. Returns
  339. -------
  340. length : int
  341. """
  342. raise AbstractMethodError(self)
  343. def __iter__(self) -> Iterator[Any]:
  344. """
  345. Iterate over elements of the array.
  346. """
  347. # This needs to be implemented so that pandas recognizes extension
  348. # arrays as list-like. The default implementation makes successive
  349. # calls to ``__getitem__``, which may be slower than necessary.
  350. for i in range(len(self)):
  351. yield self[i]
  352. def __contains__(self, item: object) -> bool | np.bool_:
  353. """
  354. Return for `item in self`.
  355. """
  356. # GH37867
  357. # comparisons of any item to pd.NA always return pd.NA, so e.g. "a" in [pd.NA]
  358. # would raise a TypeError. The implementation below works around that.
  359. if is_scalar(item) and isna(item):
  360. if not self._can_hold_na:
  361. return False
  362. elif item is self.dtype.na_value or isinstance(item, self.dtype.type):
  363. return self._hasna
  364. else:
  365. return False
  366. else:
  367. # error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no
  368. # attribute "any"
  369. return (item == self).any() # type: ignore[union-attr]
  370. # error: Signature of "__eq__" incompatible with supertype "object"
  371. def __eq__(self, other: Any) -> ArrayLike: # type: ignore[override]
  372. """
  373. Return for `self == other` (element-wise equality).
  374. """
  375. # Implementer note: this should return a boolean numpy ndarray or
  376. # a boolean ExtensionArray.
  377. # When `other` is one of Series, Index, or DataFrame, this method should
  378. # return NotImplemented (to ensure that those objects are responsible for
  379. # first unpacking the arrays, and then dispatch the operation to the
  380. # underlying arrays)
  381. raise AbstractMethodError(self)
  382. # error: Signature of "__ne__" incompatible with supertype "object"
  383. def __ne__(self, other: Any) -> ArrayLike: # type: ignore[override]
  384. """
  385. Return for `self != other` (element-wise in-equality).
  386. """
  387. return ~(self == other)
  388. def to_numpy(
  389. self,
  390. dtype: npt.DTypeLike | None = None,
  391. copy: bool = False,
  392. na_value: object = lib.no_default,
  393. ) -> np.ndarray:
  394. """
  395. Convert to a NumPy ndarray.
  396. This is similar to :meth:`numpy.asarray`, but may provide additional control
  397. over how the conversion is done.
  398. Parameters
  399. ----------
  400. dtype : str or numpy.dtype, optional
  401. The dtype to pass to :meth:`numpy.asarray`.
  402. copy : bool, default False
  403. Whether to ensure that the returned value is a not a view on
  404. another array. Note that ``copy=False`` does not *ensure* that
  405. ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
  406. a copy is made, even if not strictly necessary.
  407. na_value : Any, optional
  408. The value to use for missing values. The default value depends
  409. on `dtype` and the type of the array.
  410. Returns
  411. -------
  412. numpy.ndarray
  413. """
  414. result = np.asarray(self, dtype=dtype)
  415. if copy or na_value is not lib.no_default:
  416. result = result.copy()
  417. if na_value is not lib.no_default:
  418. result[self.isna()] = na_value
  419. return result
  420. # ------------------------------------------------------------------------
  421. # Required attributes
  422. # ------------------------------------------------------------------------
  423. @property
  424. def dtype(self) -> ExtensionDtype:
  425. """
  426. An instance of 'ExtensionDtype'.
  427. """
  428. raise AbstractMethodError(self)
  429. @property
  430. def shape(self) -> Shape:
  431. """
  432. Return a tuple of the array dimensions.
  433. """
  434. return (len(self),)
  435. @property
  436. def size(self) -> int:
  437. """
  438. The number of elements in the array.
  439. """
  440. # error: Incompatible return value type (got "signedinteger[_64Bit]",
  441. # expected "int") [return-value]
  442. return np.prod(self.shape) # type: ignore[return-value]
  443. @property
  444. def ndim(self) -> int:
  445. """
  446. Extension Arrays are only allowed to be 1-dimensional.
  447. """
  448. return 1
  449. @property
  450. def nbytes(self) -> int:
  451. """
  452. The number of bytes needed to store this object in memory.
  453. """
  454. # If this is expensive to compute, return an approximate lower bound
  455. # on the number of bytes needed.
  456. raise AbstractMethodError(self)
  457. # ------------------------------------------------------------------------
  458. # Additional Methods
  459. # ------------------------------------------------------------------------
  460. @overload
  461. def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
  462. ...
  463. @overload
  464. def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray:
  465. ...
  466. @overload
  467. def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike:
  468. ...
  469. def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
  470. """
  471. Cast to a NumPy array or ExtensionArray with 'dtype'.
  472. Parameters
  473. ----------
  474. dtype : str or dtype
  475. Typecode or data-type to which the array is cast.
  476. copy : bool, default True
  477. Whether to copy the data, even if not necessary. If False,
  478. a copy is made only if the old dtype does not match the
  479. new dtype.
  480. Returns
  481. -------
  482. np.ndarray or pandas.api.extensions.ExtensionArray
  483. An ExtensionArray if dtype is ExtensionDtype,
  484. Otherwise a NumPy ndarray with 'dtype' for its dtype.
  485. """
  486. dtype = pandas_dtype(dtype)
  487. if is_dtype_equal(dtype, self.dtype):
  488. if not copy:
  489. return self
  490. else:
  491. return self.copy()
  492. if isinstance(dtype, ExtensionDtype):
  493. cls = dtype.construct_array_type()
  494. return cls._from_sequence(self, dtype=dtype, copy=copy)
  495. elif is_datetime64_dtype(dtype):
  496. from pandas.core.arrays import DatetimeArray
  497. return DatetimeArray._from_sequence(self, dtype=dtype, copy=copy)
  498. elif is_timedelta64_dtype(dtype):
  499. from pandas.core.arrays import TimedeltaArray
  500. return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy)
  501. return np.array(self, dtype=dtype, copy=copy)
  502. def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll:
  503. """
  504. A 1-D array indicating if each value is missing.
  505. Returns
  506. -------
  507. numpy.ndarray or pandas.api.extensions.ExtensionArray
  508. In most cases, this should return a NumPy ndarray. For
  509. exceptional cases like ``SparseArray``, where returning
  510. an ndarray would be expensive, an ExtensionArray may be
  511. returned.
  512. Notes
  513. -----
  514. If returning an ExtensionArray, then
  515. * ``na_values._is_boolean`` should be True
  516. * `na_values` should implement :func:`ExtensionArray._reduce`
  517. * ``na_values.any`` and ``na_values.all`` should be implemented
  518. """
  519. raise AbstractMethodError(self)
  520. @property
  521. def _hasna(self) -> bool:
  522. # GH#22680
  523. """
  524. Equivalent to `self.isna().any()`.
  525. Some ExtensionArray subclasses may be able to optimize this check.
  526. """
  527. return bool(self.isna().any())
  528. def _values_for_argsort(self) -> np.ndarray:
  529. """
  530. Return values for sorting.
  531. Returns
  532. -------
  533. ndarray
  534. The transformed values should maintain the ordering between values
  535. within the array.
  536. See Also
  537. --------
  538. ExtensionArray.argsort : Return the indices that would sort this array.
  539. Notes
  540. -----
  541. The caller is responsible for *not* modifying these values in-place, so
  542. it is safe for implementors to give views on `self`.
  543. Functions that use this (e.g. ExtensionArray.argsort) should ignore
  544. entries with missing values in the original array (according to `self.isna()`).
  545. This means that the corresponding entries in the returned array don't need to
  546. be modified to sort correctly.
  547. """
  548. # Note: this is used in `ExtensionArray.argsort/argmin/argmax`.
  549. return np.array(self)
  550. def argsort(
  551. self,
  552. *,
  553. ascending: bool = True,
  554. kind: SortKind = "quicksort",
  555. na_position: str = "last",
  556. **kwargs,
  557. ) -> np.ndarray:
  558. """
  559. Return the indices that would sort this array.
  560. Parameters
  561. ----------
  562. ascending : bool, default True
  563. Whether the indices should result in an ascending
  564. or descending sort.
  565. kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional
  566. Sorting algorithm.
  567. *args, **kwargs:
  568. Passed through to :func:`numpy.argsort`.
  569. Returns
  570. -------
  571. np.ndarray[np.intp]
  572. Array of indices that sort ``self``. If NaN values are contained,
  573. NaN values are placed at the end.
  574. See Also
  575. --------
  576. numpy.argsort : Sorting implementation used internally.
  577. """
  578. # Implementor note: You have two places to override the behavior of
  579. # argsort.
  580. # 1. _values_for_argsort : construct the values passed to np.argsort
  581. # 2. argsort : total control over sorting. In case of overriding this,
  582. # it is recommended to also override argmax/argmin
  583. ascending = nv.validate_argsort_with_ascending(ascending, (), kwargs)
  584. values = self._values_for_argsort()
  585. return nargsort(
  586. values,
  587. kind=kind,
  588. ascending=ascending,
  589. na_position=na_position,
  590. mask=np.asarray(self.isna()),
  591. )
  592. def argmin(self, skipna: bool = True) -> int:
  593. """
  594. Return the index of minimum value.
  595. In case of multiple occurrences of the minimum value, the index
  596. corresponding to the first occurrence is returned.
  597. Parameters
  598. ----------
  599. skipna : bool, default True
  600. Returns
  601. -------
  602. int
  603. See Also
  604. --------
  605. ExtensionArray.argmax
  606. """
  607. # Implementor note: You have two places to override the behavior of
  608. # argmin.
  609. # 1. _values_for_argsort : construct the values used in nargminmax
  610. # 2. argmin itself : total control over sorting.
  611. validate_bool_kwarg(skipna, "skipna")
  612. if not skipna and self._hasna:
  613. raise NotImplementedError
  614. return nargminmax(self, "argmin")
  615. def argmax(self, skipna: bool = True) -> int:
  616. """
  617. Return the index of maximum value.
  618. In case of multiple occurrences of the maximum value, the index
  619. corresponding to the first occurrence is returned.
  620. Parameters
  621. ----------
  622. skipna : bool, default True
  623. Returns
  624. -------
  625. int
  626. See Also
  627. --------
  628. ExtensionArray.argmin
  629. """
  630. # Implementor note: You have two places to override the behavior of
  631. # argmax.
  632. # 1. _values_for_argsort : construct the values used in nargminmax
  633. # 2. argmax itself : total control over sorting.
  634. validate_bool_kwarg(skipna, "skipna")
  635. if not skipna and self._hasna:
  636. raise NotImplementedError
  637. return nargminmax(self, "argmax")
  638. def fillna(
  639. self: ExtensionArrayT,
  640. value: object | ArrayLike | None = None,
  641. method: FillnaOptions | None = None,
  642. limit: int | None = None,
  643. ) -> ExtensionArrayT:
  644. """
  645. Fill NA/NaN values using the specified method.
  646. Parameters
  647. ----------
  648. value : scalar, array-like
  649. If a scalar value is passed it is used to fill all missing values.
  650. Alternatively, an array-like 'value' can be given. It's expected
  651. that the array-like have the same length as 'self'.
  652. method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
  653. Method to use for filling holes in reindexed Series:
  654. * pad / ffill: propagate last valid observation forward to next valid.
  655. * backfill / bfill: use NEXT valid observation to fill gap.
  656. limit : int, default None
  657. If method is specified, this is the maximum number of consecutive
  658. NaN values to forward/backward fill. In other words, if there is
  659. a gap with more than this number of consecutive NaNs, it will only
  660. be partially filled. If method is not specified, this is the
  661. maximum number of entries along the entire axis where NaNs will be
  662. filled.
  663. Returns
  664. -------
  665. ExtensionArray
  666. With NA/NaN filled.
  667. """
  668. value, method = validate_fillna_kwargs(value, method)
  669. mask = self.isna()
  670. # error: Argument 2 to "check_value_size" has incompatible type
  671. # "ExtensionArray"; expected "ndarray"
  672. value = missing.check_value_size(
  673. value, mask, len(self) # type: ignore[arg-type]
  674. )
  675. if mask.any():
  676. if method is not None:
  677. func = missing.get_fill_func(method)
  678. npvalues = self.astype(object)
  679. func(npvalues, limit=limit, mask=mask)
  680. new_values = self._from_sequence(npvalues, dtype=self.dtype)
  681. else:
  682. # fill with value
  683. new_values = self.copy()
  684. new_values[mask] = value
  685. else:
  686. new_values = self.copy()
  687. return new_values
  688. def dropna(self: ExtensionArrayT) -> ExtensionArrayT:
  689. """
  690. Return ExtensionArray without NA values.
  691. Returns
  692. -------
  693. pandas.api.extensions.ExtensionArray
  694. """
  695. # error: Unsupported operand type for ~ ("ExtensionArray")
  696. return self[~self.isna()] # type: ignore[operator]
  697. def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray:
  698. """
  699. Shift values by desired number.
  700. Newly introduced missing values are filled with
  701. ``self.dtype.na_value``.
  702. Parameters
  703. ----------
  704. periods : int, default 1
  705. The number of periods to shift. Negative values are allowed
  706. for shifting backwards.
  707. fill_value : object, optional
  708. The scalar value to use for newly introduced missing values.
  709. The default is ``self.dtype.na_value``.
  710. Returns
  711. -------
  712. ExtensionArray
  713. Shifted.
  714. Notes
  715. -----
  716. If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is
  717. returned.
  718. If ``periods > len(self)``, then an array of size
  719. len(self) is returned, with all values filled with
  720. ``self.dtype.na_value``.
  721. """
  722. # Note: this implementation assumes that `self.dtype.na_value` can be
  723. # stored in an instance of your ExtensionArray with `self.dtype`.
  724. if not len(self) or periods == 0:
  725. return self.copy()
  726. if isna(fill_value):
  727. fill_value = self.dtype.na_value
  728. empty = self._from_sequence(
  729. [fill_value] * min(abs(periods), len(self)), dtype=self.dtype
  730. )
  731. if periods > 0:
  732. a = empty
  733. b = self[:-periods]
  734. else:
  735. a = self[abs(periods) :]
  736. b = empty
  737. return self._concat_same_type([a, b])
  738. def unique(self: ExtensionArrayT) -> ExtensionArrayT:
  739. """
  740. Compute the ExtensionArray of unique values.
  741. Returns
  742. -------
  743. pandas.api.extensions.ExtensionArray
  744. """
  745. uniques = unique(self.astype(object))
  746. return self._from_sequence(uniques, dtype=self.dtype)
  747. def searchsorted(
  748. self,
  749. value: NumpyValueArrayLike | ExtensionArray,
  750. side: Literal["left", "right"] = "left",
  751. sorter: NumpySorter = None,
  752. ) -> npt.NDArray[np.intp] | np.intp:
  753. """
  754. Find indices where elements should be inserted to maintain order.
  755. Find the indices into a sorted array `self` (a) such that, if the
  756. corresponding elements in `value` were inserted before the indices,
  757. the order of `self` would be preserved.
  758. Assuming that `self` is sorted:
  759. ====== ================================
  760. `side` returned index `i` satisfies
  761. ====== ================================
  762. left ``self[i-1] < value <= self[i]``
  763. right ``self[i-1] <= value < self[i]``
  764. ====== ================================
  765. Parameters
  766. ----------
  767. value : array-like, list or scalar
  768. Value(s) to insert into `self`.
  769. side : {'left', 'right'}, optional
  770. If 'left', the index of the first suitable location found is given.
  771. If 'right', return the last such index. If there is no suitable
  772. index, return either 0 or N (where N is the length of `self`).
  773. sorter : 1-D array-like, optional
  774. Optional array of integer indices that sort array a into ascending
  775. order. They are typically the result of argsort.
  776. Returns
  777. -------
  778. array of ints or int
  779. If value is array-like, array of insertion points.
  780. If value is scalar, a single integer.
  781. See Also
  782. --------
  783. numpy.searchsorted : Similar method from NumPy.
  784. """
  785. # Note: the base tests provided by pandas only test the basics.
  786. # We do not test
  787. # 1. Values outside the range of the `data_for_sorting` fixture
  788. # 2. Values between the values in the `data_for_sorting` fixture
  789. # 3. Missing values.
  790. arr = self.astype(object)
  791. if isinstance(value, ExtensionArray):
  792. value = value.astype(object)
  793. return arr.searchsorted(value, side=side, sorter=sorter)
  794. def equals(self, other: object) -> bool:
  795. """
  796. Return if another array is equivalent to this array.
  797. Equivalent means that both arrays have the same shape and dtype, and
  798. all values compare equal. Missing values in the same location are
  799. considered equal (in contrast with normal equality).
  800. Parameters
  801. ----------
  802. other : ExtensionArray
  803. Array to compare to this Array.
  804. Returns
  805. -------
  806. boolean
  807. Whether the arrays are equivalent.
  808. """
  809. if type(self) != type(other):
  810. return False
  811. other = cast(ExtensionArray, other)
  812. if not is_dtype_equal(self.dtype, other.dtype):
  813. return False
  814. elif len(self) != len(other):
  815. return False
  816. else:
  817. equal_values = self == other
  818. if isinstance(equal_values, ExtensionArray):
  819. # boolean array with NA -> fill with False
  820. equal_values = equal_values.fillna(False)
  821. # error: Unsupported left operand type for & ("ExtensionArray")
  822. equal_na = self.isna() & other.isna() # type: ignore[operator]
  823. return bool((equal_values | equal_na).all())
  824. def isin(self, values) -> npt.NDArray[np.bool_]:
  825. """
  826. Pointwise comparison for set containment in the given values.
  827. Roughly equivalent to `np.array([x in values for x in self])`
  828. Parameters
  829. ----------
  830. values : Sequence
  831. Returns
  832. -------
  833. np.ndarray[bool]
  834. """
  835. return isin(np.asarray(self), values)
  836. def _values_for_factorize(self) -> tuple[np.ndarray, Any]:
  837. """
  838. Return an array and missing value suitable for factorization.
  839. Returns
  840. -------
  841. values : ndarray
  842. An array suitable for factorization. This should maintain order
  843. and be a supported dtype (Float64, Int64, UInt64, String, Object).
  844. By default, the extension array is cast to object dtype.
  845. na_value : object
  846. The value in `values` to consider missing. This will be treated
  847. as NA in the factorization routines, so it will be coded as
  848. `-1` and not included in `uniques`. By default,
  849. ``np.nan`` is used.
  850. Notes
  851. -----
  852. The values returned by this method are also used in
  853. :func:`pandas.util.hash_pandas_object`.
  854. """
  855. return self.astype(object), np.nan
  856. def factorize(
  857. self,
  858. use_na_sentinel: bool = True,
  859. ) -> tuple[np.ndarray, ExtensionArray]:
  860. """
  861. Encode the extension array as an enumerated type.
  862. Parameters
  863. ----------
  864. use_na_sentinel : bool, default True
  865. If True, the sentinel -1 will be used for NaN values. If False,
  866. NaN values will be encoded as non-negative integers and will not drop the
  867. NaN from the uniques of the values.
  868. .. versionadded:: 1.5.0
  869. Returns
  870. -------
  871. codes : ndarray
  872. An integer NumPy array that's an indexer into the original
  873. ExtensionArray.
  874. uniques : ExtensionArray
  875. An ExtensionArray containing the unique values of `self`.
  876. .. note::
  877. uniques will *not* contain an entry for the NA value of
  878. the ExtensionArray if there are any missing values present
  879. in `self`.
  880. See Also
  881. --------
  882. factorize : Top-level factorize method that dispatches here.
  883. Notes
  884. -----
  885. :meth:`pandas.factorize` offers a `sort` keyword as well.
  886. """
  887. # Implementer note: There are two ways to override the behavior of
  888. # pandas.factorize
  889. # 1. _values_for_factorize and _from_factorize.
  890. # Specify the values passed to pandas' internal factorization
  891. # routines, and how to convert from those values back to the
  892. # original ExtensionArray.
  893. # 2. ExtensionArray.factorize.
  894. # Complete control over factorization.
  895. arr, na_value = self._values_for_factorize()
  896. codes, uniques = factorize_array(
  897. arr, use_na_sentinel=use_na_sentinel, na_value=na_value
  898. )
  899. uniques_ea = self._from_factorized(uniques, self)
  900. return codes, uniques_ea
  901. _extension_array_shared_docs[
  902. "repeat"
  903. ] = """
  904. Repeat elements of a %(klass)s.
  905. Returns a new %(klass)s where each element of the current %(klass)s
  906. is repeated consecutively a given number of times.
  907. Parameters
  908. ----------
  909. repeats : int or array of ints
  910. The number of repetitions for each element. This should be a
  911. non-negative integer. Repeating 0 times will return an empty
  912. %(klass)s.
  913. axis : None
  914. Must be ``None``. Has no effect but is accepted for compatibility
  915. with numpy.
  916. Returns
  917. -------
  918. %(klass)s
  919. Newly created %(klass)s with repeated elements.
  920. See Also
  921. --------
  922. Series.repeat : Equivalent function for Series.
  923. Index.repeat : Equivalent function for Index.
  924. numpy.repeat : Similar method for :class:`numpy.ndarray`.
  925. ExtensionArray.take : Take arbitrary positions.
  926. Examples
  927. --------
  928. >>> cat = pd.Categorical(['a', 'b', 'c'])
  929. >>> cat
  930. ['a', 'b', 'c']
  931. Categories (3, object): ['a', 'b', 'c']
  932. >>> cat.repeat(2)
  933. ['a', 'a', 'b', 'b', 'c', 'c']
  934. Categories (3, object): ['a', 'b', 'c']
  935. >>> cat.repeat([1, 2, 3])
  936. ['a', 'b', 'b', 'c', 'c', 'c']
  937. Categories (3, object): ['a', 'b', 'c']
  938. """
  939. @Substitution(klass="ExtensionArray")
  940. @Appender(_extension_array_shared_docs["repeat"])
  941. def repeat(
  942. self: ExtensionArrayT, repeats: int | Sequence[int], axis: AxisInt | None = None
  943. ) -> ExtensionArrayT:
  944. nv.validate_repeat((), {"axis": axis})
  945. ind = np.arange(len(self)).repeat(repeats)
  946. return self.take(ind)
  947. # ------------------------------------------------------------------------
  948. # Indexing methods
  949. # ------------------------------------------------------------------------
  950. def take(
  951. self: ExtensionArrayT,
  952. indices: TakeIndexer,
  953. *,
  954. allow_fill: bool = False,
  955. fill_value: Any = None,
  956. ) -> ExtensionArrayT:
  957. """
  958. Take elements from an array.
  959. Parameters
  960. ----------
  961. indices : sequence of int or one-dimensional np.ndarray of int
  962. Indices to be taken.
  963. allow_fill : bool, default False
  964. How to handle negative values in `indices`.
  965. * False: negative values in `indices` indicate positional indices
  966. from the right (the default). This is similar to
  967. :func:`numpy.take`.
  968. * True: negative values in `indices` indicate
  969. missing values. These values are set to `fill_value`. Any other
  970. other negative values raise a ``ValueError``.
  971. fill_value : any, optional
  972. Fill value to use for NA-indices when `allow_fill` is True.
  973. This may be ``None``, in which case the default NA value for
  974. the type, ``self.dtype.na_value``, is used.
  975. For many ExtensionArrays, there will be two representations of
  976. `fill_value`: a user-facing "boxed" scalar, and a low-level
  977. physical NA value. `fill_value` should be the user-facing version,
  978. and the implementation should handle translating that to the
  979. physical version for processing the take if necessary.
  980. Returns
  981. -------
  982. ExtensionArray
  983. Raises
  984. ------
  985. IndexError
  986. When the indices are out of bounds for the array.
  987. ValueError
  988. When `indices` contains negative values other than ``-1``
  989. and `allow_fill` is True.
  990. See Also
  991. --------
  992. numpy.take : Take elements from an array along an axis.
  993. api.extensions.take : Take elements from an array.
  994. Notes
  995. -----
  996. ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
  997. ``iloc``, when `indices` is a sequence of values. Additionally,
  998. it's called by :meth:`Series.reindex`, or any other method
  999. that causes realignment, with a `fill_value`.
  1000. Examples
  1001. --------
  1002. Here's an example implementation, which relies on casting the
  1003. extension array to object dtype. This uses the helper method
  1004. :func:`pandas.api.extensions.take`.
  1005. .. code-block:: python
  1006. def take(self, indices, allow_fill=False, fill_value=None):
  1007. from pandas.core.algorithms import take
  1008. # If the ExtensionArray is backed by an ndarray, then
  1009. # just pass that here instead of coercing to object.
  1010. data = self.astype(object)
  1011. if allow_fill and fill_value is None:
  1012. fill_value = self.dtype.na_value
  1013. # fill value should always be translated from the scalar
  1014. # type for the array, to the physical storage type for
  1015. # the data, before passing to take.
  1016. result = take(data, indices, fill_value=fill_value,
  1017. allow_fill=allow_fill)
  1018. return self._from_sequence(result, dtype=self.dtype)
  1019. """
  1020. # Implementer note: The `fill_value` parameter should be a user-facing
  1021. # value, an instance of self.dtype.type. When passed `fill_value=None`,
  1022. # the default of `self.dtype.na_value` should be used.
  1023. # This may differ from the physical storage type your ExtensionArray
  1024. # uses. In this case, your implementation is responsible for casting
  1025. # the user-facing type to the storage type, before using
  1026. # pandas.api.extensions.take
  1027. raise AbstractMethodError(self)
  1028. def copy(self: ExtensionArrayT) -> ExtensionArrayT:
  1029. """
  1030. Return a copy of the array.
  1031. Returns
  1032. -------
  1033. ExtensionArray
  1034. """
  1035. raise AbstractMethodError(self)
  1036. def view(self, dtype: Dtype | None = None) -> ArrayLike:
  1037. """
  1038. Return a view on the array.
  1039. Parameters
  1040. ----------
  1041. dtype : str, np.dtype, or ExtensionDtype, optional
  1042. Default None.
  1043. Returns
  1044. -------
  1045. ExtensionArray or np.ndarray
  1046. A view on the :class:`ExtensionArray`'s data.
  1047. """
  1048. # NB:
  1049. # - This must return a *new* object referencing the same data, not self.
  1050. # - The only case that *must* be implemented is with dtype=None,
  1051. # giving a view with the same dtype as self.
  1052. if dtype is not None:
  1053. raise NotImplementedError(dtype)
  1054. return self[:]
  1055. # ------------------------------------------------------------------------
  1056. # Printing
  1057. # ------------------------------------------------------------------------
  1058. def __repr__(self) -> str:
  1059. if self.ndim > 1:
  1060. return self._repr_2d()
  1061. from pandas.io.formats.printing import format_object_summary
  1062. # the short repr has no trailing newline, while the truncated
  1063. # repr does. So we include a newline in our template, and strip
  1064. # any trailing newlines from format_object_summary
  1065. data = format_object_summary(
  1066. self, self._formatter(), indent_for_name=False
  1067. ).rstrip(", \n")
  1068. class_name = f"<{type(self).__name__}>\n"
  1069. return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}"
  1070. def _repr_2d(self) -> str:
  1071. from pandas.io.formats.printing import format_object_summary
  1072. # the short repr has no trailing newline, while the truncated
  1073. # repr does. So we include a newline in our template, and strip
  1074. # any trailing newlines from format_object_summary
  1075. lines = [
  1076. format_object_summary(x, self._formatter(), indent_for_name=False).rstrip(
  1077. ", \n"
  1078. )
  1079. for x in self
  1080. ]
  1081. data = ",\n".join(lines)
  1082. class_name = f"<{type(self).__name__}>"
  1083. return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}"
  1084. def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:
  1085. """
  1086. Formatting function for scalar values.
  1087. This is used in the default '__repr__'. The returned formatting
  1088. function receives instances of your scalar type.
  1089. Parameters
  1090. ----------
  1091. boxed : bool, default False
  1092. An indicated for whether or not your array is being printed
  1093. within a Series, DataFrame, or Index (True), or just by
  1094. itself (False). This may be useful if you want scalar values
  1095. to appear differently within a Series versus on its own (e.g.
  1096. quoted or not).
  1097. Returns
  1098. -------
  1099. Callable[[Any], str]
  1100. A callable that gets instances of the scalar type and
  1101. returns a string. By default, :func:`repr` is used
  1102. when ``boxed=False`` and :func:`str` is used when
  1103. ``boxed=True``.
  1104. """
  1105. if boxed:
  1106. return str
  1107. return repr
  1108. # ------------------------------------------------------------------------
  1109. # Reshaping
  1110. # ------------------------------------------------------------------------
  1111. def transpose(self, *axes: int) -> ExtensionArray:
  1112. """
  1113. Return a transposed view on this array.
  1114. Because ExtensionArrays are always 1D, this is a no-op. It is included
  1115. for compatibility with np.ndarray.
  1116. """
  1117. return self[:]
  1118. @property
  1119. def T(self) -> ExtensionArray:
  1120. return self.transpose()
  1121. def ravel(self, order: Literal["C", "F", "A", "K"] | None = "C") -> ExtensionArray:
  1122. """
  1123. Return a flattened view on this array.
  1124. Parameters
  1125. ----------
  1126. order : {None, 'C', 'F', 'A', 'K'}, default 'C'
  1127. Returns
  1128. -------
  1129. ExtensionArray
  1130. Notes
  1131. -----
  1132. - Because ExtensionArrays are 1D-only, this is a no-op.
  1133. - The "order" argument is ignored, is for compatibility with NumPy.
  1134. """
  1135. return self
  1136. @classmethod
  1137. def _concat_same_type(
  1138. cls: type[ExtensionArrayT], to_concat: Sequence[ExtensionArrayT]
  1139. ) -> ExtensionArrayT:
  1140. """
  1141. Concatenate multiple array of this dtype.
  1142. Parameters
  1143. ----------
  1144. to_concat : sequence of this type
  1145. Returns
  1146. -------
  1147. ExtensionArray
  1148. """
  1149. # Implementer note: this method will only be called with a sequence of
  1150. # ExtensionArrays of this class and with the same dtype as self. This
  1151. # should allow "easy" concatenation (no upcasting needed), and result
  1152. # in a new ExtensionArray of the same dtype.
  1153. # Note: this strict behaviour is only guaranteed starting with pandas 1.1
  1154. raise AbstractMethodError(cls)
  1155. # The _can_hold_na attribute is set to True so that pandas internals
  1156. # will use the ExtensionDtype.na_value as the NA value in operations
  1157. # such as take(), reindex(), shift(), etc. In addition, those results
  1158. # will then be of the ExtensionArray subclass rather than an array
  1159. # of objects
  1160. @cache_readonly
  1161. def _can_hold_na(self) -> bool:
  1162. return self.dtype._can_hold_na
  1163. def _accumulate(
  1164. self, name: str, *, skipna: bool = True, **kwargs
  1165. ) -> ExtensionArray:
  1166. """
  1167. Return an ExtensionArray performing an accumulation operation.
  1168. The underlying data type might change.
  1169. Parameters
  1170. ----------
  1171. name : str
  1172. Name of the function, supported values are:
  1173. - cummin
  1174. - cummax
  1175. - cumsum
  1176. - cumprod
  1177. skipna : bool, default True
  1178. If True, skip NA values.
  1179. **kwargs
  1180. Additional keyword arguments passed to the accumulation function.
  1181. Currently, there is no supported kwarg.
  1182. Returns
  1183. -------
  1184. array
  1185. Raises
  1186. ------
  1187. NotImplementedError : subclass does not define accumulations
  1188. """
  1189. raise NotImplementedError(f"cannot perform {name} with type {self.dtype}")
  1190. def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
  1191. """
  1192. Return a scalar result of performing the reduction operation.
  1193. Parameters
  1194. ----------
  1195. name : str
  1196. Name of the function, supported values are:
  1197. { any, all, min, max, sum, mean, median, prod,
  1198. std, var, sem, kurt, skew }.
  1199. skipna : bool, default True
  1200. If True, skip NaN values.
  1201. **kwargs
  1202. Additional keyword arguments passed to the reduction function.
  1203. Currently, `ddof` is the only supported kwarg.
  1204. Returns
  1205. -------
  1206. scalar
  1207. Raises
  1208. ------
  1209. TypeError : subclass does not define reductions
  1210. """
  1211. meth = getattr(self, name, None)
  1212. if meth is None:
  1213. raise TypeError(
  1214. f"'{type(self).__name__}' with dtype {self.dtype} "
  1215. f"does not support reduction '{name}'"
  1216. )
  1217. return meth(skipna=skipna, **kwargs)
  1218. # https://github.com/python/typeshed/issues/2148#issuecomment-520783318
  1219. # Incompatible types in assignment (expression has type "None", base class
  1220. # "object" defined the type as "Callable[[object], int]")
  1221. __hash__: ClassVar[None] # type: ignore[assignment]
  1222. # ------------------------------------------------------------------------
  1223. # Non-Optimized Default Methods; in the case of the private methods here,
  1224. # these are not guaranteed to be stable across pandas versions.
  1225. def tolist(self) -> list:
  1226. """
  1227. Return a list of the values.
  1228. These are each a scalar type, which is a Python scalar
  1229. (for str, int, float) or a pandas scalar
  1230. (for Timestamp/Timedelta/Interval/Period)
  1231. Returns
  1232. -------
  1233. list
  1234. """
  1235. if self.ndim > 1:
  1236. return [x.tolist() for x in self]
  1237. return list(self)
  1238. def delete(self: ExtensionArrayT, loc: PositionalIndexer) -> ExtensionArrayT:
  1239. indexer = np.delete(np.arange(len(self)), loc)
  1240. return self.take(indexer)
  1241. def insert(self: ExtensionArrayT, loc: int, item) -> ExtensionArrayT:
  1242. """
  1243. Insert an item at the given position.
  1244. Parameters
  1245. ----------
  1246. loc : int
  1247. item : scalar-like
  1248. Returns
  1249. -------
  1250. same type as self
  1251. Notes
  1252. -----
  1253. This method should be both type and dtype-preserving. If the item
  1254. cannot be held in an array of this type/dtype, either ValueError or
  1255. TypeError should be raised.
  1256. The default implementation relies on _from_sequence to raise on invalid
  1257. items.
  1258. """
  1259. loc = validate_insert_loc(loc, len(self))
  1260. item_arr = type(self)._from_sequence([item], dtype=self.dtype)
  1261. return type(self)._concat_same_type([self[:loc], item_arr, self[loc:]])
  1262. def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
  1263. """
  1264. Analogue to np.putmask(self, mask, value)
  1265. Parameters
  1266. ----------
  1267. mask : np.ndarray[bool]
  1268. value : scalar or listlike
  1269. If listlike, must be arraylike with same length as self.
  1270. Returns
  1271. -------
  1272. None
  1273. Notes
  1274. -----
  1275. Unlike np.putmask, we do not repeat listlike values with mismatched length.
  1276. 'value' should either be a scalar or an arraylike with the same length
  1277. as self.
  1278. """
  1279. if is_list_like(value):
  1280. val = value[mask]
  1281. else:
  1282. val = value
  1283. self[mask] = val
  1284. def _where(
  1285. self: ExtensionArrayT, mask: npt.NDArray[np.bool_], value
  1286. ) -> ExtensionArrayT:
  1287. """
  1288. Analogue to np.where(mask, self, value)
  1289. Parameters
  1290. ----------
  1291. mask : np.ndarray[bool]
  1292. value : scalar or listlike
  1293. Returns
  1294. -------
  1295. same type as self
  1296. """
  1297. result = self.copy()
  1298. if is_list_like(value):
  1299. val = value[~mask]
  1300. else:
  1301. val = value
  1302. result[~mask] = val
  1303. return result
  1304. def _fill_mask_inplace(
  1305. self, method: str, limit, mask: npt.NDArray[np.bool_]
  1306. ) -> None:
  1307. """
  1308. Replace values in locations specified by 'mask' using pad or backfill.
  1309. See also
  1310. --------
  1311. ExtensionArray.fillna
  1312. """
  1313. func = missing.get_fill_func(method)
  1314. npvalues = self.astype(object)
  1315. # NB: if we don't copy mask here, it may be altered inplace, which
  1316. # would mess up the `self[mask] = ...` below.
  1317. func(npvalues, limit=limit, mask=mask.copy())
  1318. new_values = self._from_sequence(npvalues, dtype=self.dtype)
  1319. self[mask] = new_values[mask]
  1320. def _rank(
  1321. self,
  1322. *,
  1323. axis: AxisInt = 0,
  1324. method: str = "average",
  1325. na_option: str = "keep",
  1326. ascending: bool = True,
  1327. pct: bool = False,
  1328. ):
  1329. """
  1330. See Series.rank.__doc__.
  1331. """
  1332. if axis != 0:
  1333. raise NotImplementedError
  1334. return rank(
  1335. self,
  1336. axis=axis,
  1337. method=method,
  1338. na_option=na_option,
  1339. ascending=ascending,
  1340. pct=pct,
  1341. )
  1342. @classmethod
  1343. def _empty(cls, shape: Shape, dtype: ExtensionDtype):
  1344. """
  1345. Create an ExtensionArray with the given shape and dtype.
  1346. See also
  1347. --------
  1348. ExtensionDtype.empty
  1349. ExtensionDtype.empty is the 'official' public version of this API.
  1350. """
  1351. # Implementer note: while ExtensionDtype.empty is the public way to
  1352. # call this method, it is still required to implement this `_empty`
  1353. # method as well (it is called internally in pandas)
  1354. obj = cls._from_sequence([], dtype=dtype)
  1355. taker = np.broadcast_to(np.intp(-1), shape)
  1356. result = obj.take(taker, allow_fill=True)
  1357. if not isinstance(result, cls) or dtype != result.dtype:
  1358. raise NotImplementedError(
  1359. f"Default 'empty' implementation is invalid for dtype='{dtype}'"
  1360. )
  1361. return result
  1362. def _quantile(
  1363. self: ExtensionArrayT, qs: npt.NDArray[np.float64], interpolation: str
  1364. ) -> ExtensionArrayT:
  1365. """
  1366. Compute the quantiles of self for each quantile in `qs`.
  1367. Parameters
  1368. ----------
  1369. qs : np.ndarray[float64]
  1370. interpolation: str
  1371. Returns
  1372. -------
  1373. same type as self
  1374. """
  1375. mask = np.asarray(self.isna())
  1376. arr = np.asarray(self)
  1377. fill_value = np.nan
  1378. res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation)
  1379. return type(self)._from_sequence(res_values)
  1380. def _mode(self: ExtensionArrayT, dropna: bool = True) -> ExtensionArrayT:
  1381. """
  1382. Returns the mode(s) of the ExtensionArray.
  1383. Always returns `ExtensionArray` even if only one value.
  1384. Parameters
  1385. ----------
  1386. dropna : bool, default True
  1387. Don't consider counts of NA values.
  1388. Returns
  1389. -------
  1390. same type as self
  1391. Sorted, if possible.
  1392. """
  1393. # error: Incompatible return value type (got "Union[ExtensionArray,
  1394. # ndarray[Any, Any]]", expected "ExtensionArrayT")
  1395. return mode(self, dropna=dropna) # type: ignore[return-value]
  1396. def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
  1397. if any(
  1398. isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs
  1399. ):
  1400. return NotImplemented
  1401. result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
  1402. self, ufunc, method, *inputs, **kwargs
  1403. )
  1404. if result is not NotImplemented:
  1405. return result
  1406. if "out" in kwargs:
  1407. return arraylike.dispatch_ufunc_with_out(
  1408. self, ufunc, method, *inputs, **kwargs
  1409. )
  1410. if method == "reduce":
  1411. result = arraylike.dispatch_reduction_ufunc(
  1412. self, ufunc, method, *inputs, **kwargs
  1413. )
  1414. if result is not NotImplemented:
  1415. return result
  1416. return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)
  1417. class ExtensionArraySupportsAnyAll(ExtensionArray):
  1418. def any(self, *, skipna: bool = True) -> bool:
  1419. raise AbstractMethodError(self)
  1420. def all(self, *, skipna: bool = True) -> bool:
  1421. raise AbstractMethodError(self)
  1422. class ExtensionOpsMixin:
  1423. """
  1424. A base class for linking the operators to their dunder names.
  1425. .. note::
  1426. You may want to set ``__array_priority__`` if you want your
  1427. implementation to be called when involved in binary operations
  1428. with NumPy arrays.
  1429. """
  1430. @classmethod
  1431. def _create_arithmetic_method(cls, op):
  1432. raise AbstractMethodError(cls)
  1433. @classmethod
  1434. def _add_arithmetic_ops(cls) -> None:
  1435. setattr(cls, "__add__", cls._create_arithmetic_method(operator.add))
  1436. setattr(cls, "__radd__", cls._create_arithmetic_method(roperator.radd))
  1437. setattr(cls, "__sub__", cls._create_arithmetic_method(operator.sub))
  1438. setattr(cls, "__rsub__", cls._create_arithmetic_method(roperator.rsub))
  1439. setattr(cls, "__mul__", cls._create_arithmetic_method(operator.mul))
  1440. setattr(cls, "__rmul__", cls._create_arithmetic_method(roperator.rmul))
  1441. setattr(cls, "__pow__", cls._create_arithmetic_method(operator.pow))
  1442. setattr(cls, "__rpow__", cls._create_arithmetic_method(roperator.rpow))
  1443. setattr(cls, "__mod__", cls._create_arithmetic_method(operator.mod))
  1444. setattr(cls, "__rmod__", cls._create_arithmetic_method(roperator.rmod))
  1445. setattr(cls, "__floordiv__", cls._create_arithmetic_method(operator.floordiv))
  1446. setattr(
  1447. cls, "__rfloordiv__", cls._create_arithmetic_method(roperator.rfloordiv)
  1448. )
  1449. setattr(cls, "__truediv__", cls._create_arithmetic_method(operator.truediv))
  1450. setattr(cls, "__rtruediv__", cls._create_arithmetic_method(roperator.rtruediv))
  1451. setattr(cls, "__divmod__", cls._create_arithmetic_method(divmod))
  1452. setattr(cls, "__rdivmod__", cls._create_arithmetic_method(roperator.rdivmod))
  1453. @classmethod
  1454. def _create_comparison_method(cls, op):
  1455. raise AbstractMethodError(cls)
  1456. @classmethod
  1457. def _add_comparison_ops(cls) -> None:
  1458. setattr(cls, "__eq__", cls._create_comparison_method(operator.eq))
  1459. setattr(cls, "__ne__", cls._create_comparison_method(operator.ne))
  1460. setattr(cls, "__lt__", cls._create_comparison_method(operator.lt))
  1461. setattr(cls, "__gt__", cls._create_comparison_method(operator.gt))
  1462. setattr(cls, "__le__", cls._create_comparison_method(operator.le))
  1463. setattr(cls, "__ge__", cls._create_comparison_method(operator.ge))
  1464. @classmethod
  1465. def _create_logical_method(cls, op):
  1466. raise AbstractMethodError(cls)
  1467. @classmethod
  1468. def _add_logical_ops(cls) -> None:
  1469. setattr(cls, "__and__", cls._create_logical_method(operator.and_))
  1470. setattr(cls, "__rand__", cls._create_logical_method(roperator.rand_))
  1471. setattr(cls, "__or__", cls._create_logical_method(operator.or_))
  1472. setattr(cls, "__ror__", cls._create_logical_method(roperator.ror_))
  1473. setattr(cls, "__xor__", cls._create_logical_method(operator.xor))
  1474. setattr(cls, "__rxor__", cls._create_logical_method(roperator.rxor))
  1475. class ExtensionScalarOpsMixin(ExtensionOpsMixin):
  1476. """
  1477. A mixin for defining ops on an ExtensionArray.
  1478. It is assumed that the underlying scalar objects have the operators
  1479. already defined.
  1480. Notes
  1481. -----
  1482. If you have defined a subclass MyExtensionArray(ExtensionArray), then
  1483. use MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin) to
  1484. get the arithmetic operators. After the definition of MyExtensionArray,
  1485. insert the lines
  1486. MyExtensionArray._add_arithmetic_ops()
  1487. MyExtensionArray._add_comparison_ops()
  1488. to link the operators to your class.
  1489. .. note::
  1490. You may want to set ``__array_priority__`` if you want your
  1491. implementation to be called when involved in binary operations
  1492. with NumPy arrays.
  1493. """
  1494. @classmethod
  1495. def _create_method(cls, op, coerce_to_dtype: bool = True, result_dtype=None):
  1496. """
  1497. A class method that returns a method that will correspond to an
  1498. operator for an ExtensionArray subclass, by dispatching to the
  1499. relevant operator defined on the individual elements of the
  1500. ExtensionArray.
  1501. Parameters
  1502. ----------
  1503. op : function
  1504. An operator that takes arguments op(a, b)
  1505. coerce_to_dtype : bool, default True
  1506. boolean indicating whether to attempt to convert
  1507. the result to the underlying ExtensionArray dtype.
  1508. If it's not possible to create a new ExtensionArray with the
  1509. values, an ndarray is returned instead.
  1510. Returns
  1511. -------
  1512. Callable[[Any, Any], Union[ndarray, ExtensionArray]]
  1513. A method that can be bound to a class. When used, the method
  1514. receives the two arguments, one of which is the instance of
  1515. this class, and should return an ExtensionArray or an ndarray.
  1516. Returning an ndarray may be necessary when the result of the
  1517. `op` cannot be stored in the ExtensionArray. The dtype of the
  1518. ndarray uses NumPy's normal inference rules.
  1519. Examples
  1520. --------
  1521. Given an ExtensionArray subclass called MyExtensionArray, use
  1522. __add__ = cls._create_method(operator.add)
  1523. in the class definition of MyExtensionArray to create the operator
  1524. for addition, that will be based on the operator implementation
  1525. of the underlying elements of the ExtensionArray
  1526. """
  1527. def _binop(self, other):
  1528. def convert_values(param):
  1529. if isinstance(param, ExtensionArray) or is_list_like(param):
  1530. ovalues = param
  1531. else: # Assume its an object
  1532. ovalues = [param] * len(self)
  1533. return ovalues
  1534. if isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)):
  1535. # rely on pandas to unbox and dispatch to us
  1536. return NotImplemented
  1537. lvalues = self
  1538. rvalues = convert_values(other)
  1539. # If the operator is not defined for the underlying objects,
  1540. # a TypeError should be raised
  1541. res = [op(a, b) for (a, b) in zip(lvalues, rvalues)]
  1542. def _maybe_convert(arr):
  1543. if coerce_to_dtype:
  1544. # https://github.com/pandas-dev/pandas/issues/22850
  1545. # We catch all regular exceptions here, and fall back
  1546. # to an ndarray.
  1547. res = maybe_cast_to_extension_array(type(self), arr)
  1548. if not isinstance(res, type(self)):
  1549. # exception raised in _from_sequence; ensure we have ndarray
  1550. res = np.asarray(arr)
  1551. else:
  1552. res = np.asarray(arr, dtype=result_dtype)
  1553. return res
  1554. if op.__name__ in {"divmod", "rdivmod"}:
  1555. a, b = zip(*res)
  1556. return _maybe_convert(a), _maybe_convert(b)
  1557. return _maybe_convert(res)
  1558. op_name = f"__{op.__name__}__"
  1559. return set_function_name(_binop, op_name, cls)
  1560. @classmethod
  1561. def _create_arithmetic_method(cls, op):
  1562. return cls._create_method(op)
  1563. @classmethod
  1564. def _create_comparison_method(cls, op):
  1565. return cls._create_method(op, coerce_to_dtype=False, result_dtype=bool)