arraylike.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527
  1. """
  2. Methods that can be shared by many array-like classes or subclasses:
  3. Series
  4. Index
  5. ExtensionArray
  6. """
  7. from __future__ import annotations
  8. import operator
  9. from typing import Any
  10. import numpy as np
  11. from pandas._libs import lib
  12. from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
  13. from pandas.core.dtypes.generic import ABCNDFrame
  14. from pandas.core import roperator
  15. from pandas.core.construction import extract_array
  16. from pandas.core.ops.common import unpack_zerodim_and_defer
  17. REDUCTION_ALIASES = {
  18. "maximum": "max",
  19. "minimum": "min",
  20. "add": "sum",
  21. "multiply": "prod",
  22. }
  23. class OpsMixin:
  24. # -------------------------------------------------------------
  25. # Comparisons
  26. def _cmp_method(self, other, op):
  27. return NotImplemented
  28. @unpack_zerodim_and_defer("__eq__")
  29. def __eq__(self, other):
  30. return self._cmp_method(other, operator.eq)
  31. @unpack_zerodim_and_defer("__ne__")
  32. def __ne__(self, other):
  33. return self._cmp_method(other, operator.ne)
  34. @unpack_zerodim_and_defer("__lt__")
  35. def __lt__(self, other):
  36. return self._cmp_method(other, operator.lt)
  37. @unpack_zerodim_and_defer("__le__")
  38. def __le__(self, other):
  39. return self._cmp_method(other, operator.le)
  40. @unpack_zerodim_and_defer("__gt__")
  41. def __gt__(self, other):
  42. return self._cmp_method(other, operator.gt)
  43. @unpack_zerodim_and_defer("__ge__")
  44. def __ge__(self, other):
  45. return self._cmp_method(other, operator.ge)
  46. # -------------------------------------------------------------
  47. # Logical Methods
  48. def _logical_method(self, other, op):
  49. return NotImplemented
  50. @unpack_zerodim_and_defer("__and__")
  51. def __and__(self, other):
  52. return self._logical_method(other, operator.and_)
  53. @unpack_zerodim_and_defer("__rand__")
  54. def __rand__(self, other):
  55. return self._logical_method(other, roperator.rand_)
  56. @unpack_zerodim_and_defer("__or__")
  57. def __or__(self, other):
  58. return self._logical_method(other, operator.or_)
  59. @unpack_zerodim_and_defer("__ror__")
  60. def __ror__(self, other):
  61. return self._logical_method(other, roperator.ror_)
  62. @unpack_zerodim_and_defer("__xor__")
  63. def __xor__(self, other):
  64. return self._logical_method(other, operator.xor)
  65. @unpack_zerodim_and_defer("__rxor__")
  66. def __rxor__(self, other):
  67. return self._logical_method(other, roperator.rxor)
  68. # -------------------------------------------------------------
  69. # Arithmetic Methods
  70. def _arith_method(self, other, op):
  71. return NotImplemented
  72. @unpack_zerodim_and_defer("__add__")
  73. def __add__(self, other):
  74. """
  75. Get Addition of DataFrame and other, column-wise.
  76. Equivalent to ``DataFrame.add(other)``.
  77. Parameters
  78. ----------
  79. other : scalar, sequence, Series, dict or DataFrame
  80. Object to be added to the DataFrame.
  81. Returns
  82. -------
  83. DataFrame
  84. The result of adding ``other`` to DataFrame.
  85. See Also
  86. --------
  87. DataFrame.add : Add a DataFrame and another object, with option for index-
  88. or column-oriented addition.
  89. Examples
  90. --------
  91. >>> df = pd.DataFrame({'height': [1.5, 2.6], 'weight': [500, 800]},
  92. ... index=['elk', 'moose'])
  93. >>> df
  94. height weight
  95. elk 1.5 500
  96. moose 2.6 800
  97. Adding a scalar affects all rows and columns.
  98. >>> df[['height', 'weight']] + 1.5
  99. height weight
  100. elk 3.0 501.5
  101. moose 4.1 801.5
  102. Each element of a list is added to a column of the DataFrame, in order.
  103. >>> df[['height', 'weight']] + [0.5, 1.5]
  104. height weight
  105. elk 2.0 501.5
  106. moose 3.1 801.5
  107. Keys of a dictionary are aligned to the DataFrame, based on column names;
  108. each value in the dictionary is added to the corresponding column.
  109. >>> df[['height', 'weight']] + {'height': 0.5, 'weight': 1.5}
  110. height weight
  111. elk 2.0 501.5
  112. moose 3.1 801.5
  113. When `other` is a :class:`Series`, the index of `other` is aligned with the
  114. columns of the DataFrame.
  115. >>> s1 = pd.Series([0.5, 1.5], index=['weight', 'height'])
  116. >>> df[['height', 'weight']] + s1
  117. height weight
  118. elk 3.0 500.5
  119. moose 4.1 800.5
  120. Even when the index of `other` is the same as the index of the DataFrame,
  121. the :class:`Series` will not be reoriented. If index-wise alignment is desired,
  122. :meth:`DataFrame.add` should be used with `axis='index'`.
  123. >>> s2 = pd.Series([0.5, 1.5], index=['elk', 'moose'])
  124. >>> df[['height', 'weight']] + s2
  125. elk height moose weight
  126. elk NaN NaN NaN NaN
  127. moose NaN NaN NaN NaN
  128. >>> df[['height', 'weight']].add(s2, axis='index')
  129. height weight
  130. elk 2.0 500.5
  131. moose 4.1 801.5
  132. When `other` is a :class:`DataFrame`, both columns names and the
  133. index are aligned.
  134. >>> other = pd.DataFrame({'height': [0.2, 0.4, 0.6]},
  135. ... index=['elk', 'moose', 'deer'])
  136. >>> df[['height', 'weight']] + other
  137. height weight
  138. deer NaN NaN
  139. elk 1.7 NaN
  140. moose 3.0 NaN
  141. """
  142. return self._arith_method(other, operator.add)
  143. @unpack_zerodim_and_defer("__radd__")
  144. def __radd__(self, other):
  145. return self._arith_method(other, roperator.radd)
  146. @unpack_zerodim_and_defer("__sub__")
  147. def __sub__(self, other):
  148. return self._arith_method(other, operator.sub)
  149. @unpack_zerodim_and_defer("__rsub__")
  150. def __rsub__(self, other):
  151. return self._arith_method(other, roperator.rsub)
  152. @unpack_zerodim_and_defer("__mul__")
  153. def __mul__(self, other):
  154. return self._arith_method(other, operator.mul)
  155. @unpack_zerodim_and_defer("__rmul__")
  156. def __rmul__(self, other):
  157. return self._arith_method(other, roperator.rmul)
  158. @unpack_zerodim_and_defer("__truediv__")
  159. def __truediv__(self, other):
  160. return self._arith_method(other, operator.truediv)
  161. @unpack_zerodim_and_defer("__rtruediv__")
  162. def __rtruediv__(self, other):
  163. return self._arith_method(other, roperator.rtruediv)
  164. @unpack_zerodim_and_defer("__floordiv__")
  165. def __floordiv__(self, other):
  166. return self._arith_method(other, operator.floordiv)
  167. @unpack_zerodim_and_defer("__rfloordiv")
  168. def __rfloordiv__(self, other):
  169. return self._arith_method(other, roperator.rfloordiv)
  170. @unpack_zerodim_and_defer("__mod__")
  171. def __mod__(self, other):
  172. return self._arith_method(other, operator.mod)
  173. @unpack_zerodim_and_defer("__rmod__")
  174. def __rmod__(self, other):
  175. return self._arith_method(other, roperator.rmod)
  176. @unpack_zerodim_and_defer("__divmod__")
  177. def __divmod__(self, other):
  178. return self._arith_method(other, divmod)
  179. @unpack_zerodim_and_defer("__rdivmod__")
  180. def __rdivmod__(self, other):
  181. return self._arith_method(other, roperator.rdivmod)
  182. @unpack_zerodim_and_defer("__pow__")
  183. def __pow__(self, other):
  184. return self._arith_method(other, operator.pow)
  185. @unpack_zerodim_and_defer("__rpow__")
  186. def __rpow__(self, other):
  187. return self._arith_method(other, roperator.rpow)
  188. # -----------------------------------------------------------------------------
  189. # Helpers to implement __array_ufunc__
  190. def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any):
  191. """
  192. Compatibility with numpy ufuncs.
  193. See also
  194. --------
  195. numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__
  196. """
  197. from pandas.core.frame import (
  198. DataFrame,
  199. Series,
  200. )
  201. from pandas.core.generic import NDFrame
  202. from pandas.core.internals import BlockManager
  203. cls = type(self)
  204. kwargs = _standardize_out_kwarg(**kwargs)
  205. # for binary ops, use our custom dunder methods
  206. result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs)
  207. if result is not NotImplemented:
  208. return result
  209. # Determine if we should defer.
  210. no_defer = (
  211. np.ndarray.__array_ufunc__,
  212. cls.__array_ufunc__,
  213. )
  214. for item in inputs:
  215. higher_priority = (
  216. hasattr(item, "__array_priority__")
  217. and item.__array_priority__ > self.__array_priority__
  218. )
  219. has_array_ufunc = (
  220. hasattr(item, "__array_ufunc__")
  221. and type(item).__array_ufunc__ not in no_defer
  222. and not isinstance(item, self._HANDLED_TYPES)
  223. )
  224. if higher_priority or has_array_ufunc:
  225. return NotImplemented
  226. # align all the inputs.
  227. types = tuple(type(x) for x in inputs)
  228. alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)]
  229. if len(alignable) > 1:
  230. # This triggers alignment.
  231. # At the moment, there aren't any ufuncs with more than two inputs
  232. # so this ends up just being x1.index | x2.index, but we write
  233. # it to handle *args.
  234. set_types = set(types)
  235. if len(set_types) > 1 and {DataFrame, Series}.issubset(set_types):
  236. # We currently don't handle ufunc(DataFrame, Series)
  237. # well. Previously this raised an internal ValueError. We might
  238. # support it someday, so raise a NotImplementedError.
  239. raise NotImplementedError(
  240. f"Cannot apply ufunc {ufunc} to mixed DataFrame and Series inputs."
  241. )
  242. axes = self.axes
  243. for obj in alignable[1:]:
  244. # this relies on the fact that we aren't handling mixed
  245. # series / frame ufuncs.
  246. for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)):
  247. axes[i] = ax1.union(ax2)
  248. reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes))
  249. inputs = tuple(
  250. x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x
  251. for x, t in zip(inputs, types)
  252. )
  253. else:
  254. reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes))
  255. if self.ndim == 1:
  256. names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]
  257. name = names[0] if len(set(names)) == 1 else None
  258. reconstruct_kwargs = {"name": name}
  259. else:
  260. reconstruct_kwargs = {}
  261. def reconstruct(result):
  262. if ufunc.nout > 1:
  263. # np.modf, np.frexp, np.divmod
  264. return tuple(_reconstruct(x) for x in result)
  265. return _reconstruct(result)
  266. def _reconstruct(result):
  267. if lib.is_scalar(result):
  268. return result
  269. if result.ndim != self.ndim:
  270. if method == "outer":
  271. raise NotImplementedError
  272. return result
  273. if isinstance(result, BlockManager):
  274. # we went through BlockManager.apply e.g. np.sqrt
  275. result = self._constructor(result, **reconstruct_kwargs, copy=False)
  276. else:
  277. # we converted an array, lost our axes
  278. result = self._constructor(
  279. result, **reconstruct_axes, **reconstruct_kwargs, copy=False
  280. )
  281. # TODO: When we support multiple values in __finalize__, this
  282. # should pass alignable to `__finalize__` instead of self.
  283. # Then `np.add(a, b)` would consider attrs from both a and b
  284. # when a and b are NDFrames.
  285. if len(alignable) == 1:
  286. result = result.__finalize__(self)
  287. return result
  288. if "out" in kwargs:
  289. # e.g. test_multiindex_get_loc
  290. result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs)
  291. return reconstruct(result)
  292. if method == "reduce":
  293. # e.g. test.series.test_ufunc.test_reduce
  294. result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs)
  295. if result is not NotImplemented:
  296. return result
  297. # We still get here with kwargs `axis` for e.g. np.maximum.accumulate
  298. # and `dtype` and `keepdims` for np.ptp
  299. if self.ndim > 1 and (len(inputs) > 1 or ufunc.nout > 1):
  300. # Just give up on preserving types in the complex case.
  301. # In theory we could preserve them for them.
  302. # * nout>1 is doable if BlockManager.apply took nout and
  303. # returned a Tuple[BlockManager].
  304. # * len(inputs) > 1 is doable when we know that we have
  305. # aligned blocks / dtypes.
  306. # e.g. my_ufunc, modf, logaddexp, heaviside, subtract, add
  307. inputs = tuple(np.asarray(x) for x in inputs)
  308. # Note: we can't use default_array_ufunc here bc reindexing means
  309. # that `self` may not be among `inputs`
  310. result = getattr(ufunc, method)(*inputs, **kwargs)
  311. elif self.ndim == 1:
  312. # ufunc(series, ...)
  313. inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)
  314. result = getattr(ufunc, method)(*inputs, **kwargs)
  315. else:
  316. # ufunc(dataframe)
  317. if method == "__call__" and not kwargs:
  318. # for np.<ufunc>(..) calls
  319. # kwargs cannot necessarily be handled block-by-block, so only
  320. # take this path if there are no kwargs
  321. mgr = inputs[0]._mgr
  322. result = mgr.apply(getattr(ufunc, method))
  323. else:
  324. # otherwise specific ufunc methods (eg np.<ufunc>.accumulate(..))
  325. # Those can have an axis keyword and thus can't be called block-by-block
  326. result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs)
  327. # e.g. np.negative (only one reached), with "where" and "out" in kwargs
  328. result = reconstruct(result)
  329. return result
  330. def _standardize_out_kwarg(**kwargs) -> dict:
  331. """
  332. If kwargs contain "out1" and "out2", replace that with a tuple "out"
  333. np.divmod, np.modf, np.frexp can have either `out=(out1, out2)` or
  334. `out1=out1, out2=out2)`
  335. """
  336. if "out" not in kwargs and "out1" in kwargs and "out2" in kwargs:
  337. out1 = kwargs.pop("out1")
  338. out2 = kwargs.pop("out2")
  339. out = (out1, out2)
  340. kwargs["out"] = out
  341. return kwargs
  342. def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
  343. """
  344. If we have an `out` keyword, then call the ufunc without `out` and then
  345. set the result into the given `out`.
  346. """
  347. # Note: we assume _standardize_out_kwarg has already been called.
  348. out = kwargs.pop("out")
  349. where = kwargs.pop("where", None)
  350. result = getattr(ufunc, method)(*inputs, **kwargs)
  351. if result is NotImplemented:
  352. return NotImplemented
  353. if isinstance(result, tuple):
  354. # i.e. np.divmod, np.modf, np.frexp
  355. if not isinstance(out, tuple) or len(out) != len(result):
  356. raise NotImplementedError
  357. for arr, res in zip(out, result):
  358. _assign_where(arr, res, where)
  359. return out
  360. if isinstance(out, tuple):
  361. if len(out) == 1:
  362. out = out[0]
  363. else:
  364. raise NotImplementedError
  365. _assign_where(out, result, where)
  366. return out
  367. def _assign_where(out, result, where) -> None:
  368. """
  369. Set a ufunc result into 'out', masking with a 'where' argument if necessary.
  370. """
  371. if where is None:
  372. # no 'where' arg passed to ufunc
  373. out[:] = result
  374. else:
  375. np.putmask(out, where, result)
  376. def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
  377. """
  378. Fallback to the behavior we would get if we did not define __array_ufunc__.
  379. Notes
  380. -----
  381. We are assuming that `self` is among `inputs`.
  382. """
  383. if not any(x is self for x in inputs):
  384. raise NotImplementedError
  385. new_inputs = [x if x is not self else np.asarray(x) for x in inputs]
  386. return getattr(ufunc, method)(*new_inputs, **kwargs)
  387. def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
  388. """
  389. Dispatch ufunc reductions to self's reduction methods.
  390. """
  391. assert method == "reduce"
  392. if len(inputs) != 1 or inputs[0] is not self:
  393. return NotImplemented
  394. if ufunc.__name__ not in REDUCTION_ALIASES:
  395. return NotImplemented
  396. method_name = REDUCTION_ALIASES[ufunc.__name__]
  397. # NB: we are assuming that min/max represent minimum/maximum methods,
  398. # which would not be accurate for e.g. Timestamp.min
  399. if not hasattr(self, method_name):
  400. return NotImplemented
  401. if self.ndim > 1:
  402. if isinstance(self, ABCNDFrame):
  403. # TODO: test cases where this doesn't hold, i.e. 2D DTA/TDA
  404. kwargs["numeric_only"] = False
  405. if "axis" not in kwargs:
  406. # For DataFrame reductions we don't want the default axis=0
  407. # Note: np.min is not a ufunc, but uses array_function_dispatch,
  408. # so calls DataFrame.min (without ever getting here) with the np.min
  409. # default of axis=None, which DataFrame.min catches and changes to axis=0.
  410. # np.minimum.reduce(df) gets here bc axis is not in kwargs,
  411. # so we set axis=0 to match the behaviorof np.minimum.reduce(df.values)
  412. kwargs["axis"] = 0
  413. # By default, numpy's reductions do not skip NaNs, so we have to
  414. # pass skipna=False
  415. return getattr(self, method_name)(skipna=False, **kwargs)