123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527 |
- """
- Methods that can be shared by many array-like classes or subclasses:
- Series
- Index
- ExtensionArray
- """
- from __future__ import annotations
- import operator
- from typing import Any
- import numpy as np
- from pandas._libs import lib
- from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
- from pandas.core.dtypes.generic import ABCNDFrame
- from pandas.core import roperator
- from pandas.core.construction import extract_array
- from pandas.core.ops.common import unpack_zerodim_and_defer
- REDUCTION_ALIASES = {
- "maximum": "max",
- "minimum": "min",
- "add": "sum",
- "multiply": "prod",
- }
- class OpsMixin:
- # -------------------------------------------------------------
- # Comparisons
- def _cmp_method(self, other, op):
- return NotImplemented
- @unpack_zerodim_and_defer("__eq__")
- def __eq__(self, other):
- return self._cmp_method(other, operator.eq)
- @unpack_zerodim_and_defer("__ne__")
- def __ne__(self, other):
- return self._cmp_method(other, operator.ne)
- @unpack_zerodim_and_defer("__lt__")
- def __lt__(self, other):
- return self._cmp_method(other, operator.lt)
- @unpack_zerodim_and_defer("__le__")
- def __le__(self, other):
- return self._cmp_method(other, operator.le)
- @unpack_zerodim_and_defer("__gt__")
- def __gt__(self, other):
- return self._cmp_method(other, operator.gt)
- @unpack_zerodim_and_defer("__ge__")
- def __ge__(self, other):
- return self._cmp_method(other, operator.ge)
- # -------------------------------------------------------------
- # Logical Methods
- def _logical_method(self, other, op):
- return NotImplemented
- @unpack_zerodim_and_defer("__and__")
- def __and__(self, other):
- return self._logical_method(other, operator.and_)
- @unpack_zerodim_and_defer("__rand__")
- def __rand__(self, other):
- return self._logical_method(other, roperator.rand_)
- @unpack_zerodim_and_defer("__or__")
- def __or__(self, other):
- return self._logical_method(other, operator.or_)
- @unpack_zerodim_and_defer("__ror__")
- def __ror__(self, other):
- return self._logical_method(other, roperator.ror_)
- @unpack_zerodim_and_defer("__xor__")
- def __xor__(self, other):
- return self._logical_method(other, operator.xor)
- @unpack_zerodim_and_defer("__rxor__")
- def __rxor__(self, other):
- return self._logical_method(other, roperator.rxor)
- # -------------------------------------------------------------
- # Arithmetic Methods
- def _arith_method(self, other, op):
- return NotImplemented
- @unpack_zerodim_and_defer("__add__")
- def __add__(self, other):
- """
- Get Addition of DataFrame and other, column-wise.
- Equivalent to ``DataFrame.add(other)``.
- Parameters
- ----------
- other : scalar, sequence, Series, dict or DataFrame
- Object to be added to the DataFrame.
- Returns
- -------
- DataFrame
- The result of adding ``other`` to DataFrame.
- See Also
- --------
- DataFrame.add : Add a DataFrame and another object, with option for index-
- or column-oriented addition.
- Examples
- --------
- >>> df = pd.DataFrame({'height': [1.5, 2.6], 'weight': [500, 800]},
- ... index=['elk', 'moose'])
- >>> df
- height weight
- elk 1.5 500
- moose 2.6 800
- Adding a scalar affects all rows and columns.
- >>> df[['height', 'weight']] + 1.5
- height weight
- elk 3.0 501.5
- moose 4.1 801.5
- Each element of a list is added to a column of the DataFrame, in order.
- >>> df[['height', 'weight']] + [0.5, 1.5]
- height weight
- elk 2.0 501.5
- moose 3.1 801.5
- Keys of a dictionary are aligned to the DataFrame, based on column names;
- each value in the dictionary is added to the corresponding column.
- >>> df[['height', 'weight']] + {'height': 0.5, 'weight': 1.5}
- height weight
- elk 2.0 501.5
- moose 3.1 801.5
- When `other` is a :class:`Series`, the index of `other` is aligned with the
- columns of the DataFrame.
- >>> s1 = pd.Series([0.5, 1.5], index=['weight', 'height'])
- >>> df[['height', 'weight']] + s1
- height weight
- elk 3.0 500.5
- moose 4.1 800.5
- Even when the index of `other` is the same as the index of the DataFrame,
- the :class:`Series` will not be reoriented. If index-wise alignment is desired,
- :meth:`DataFrame.add` should be used with `axis='index'`.
- >>> s2 = pd.Series([0.5, 1.5], index=['elk', 'moose'])
- >>> df[['height', 'weight']] + s2
- elk height moose weight
- elk NaN NaN NaN NaN
- moose NaN NaN NaN NaN
- >>> df[['height', 'weight']].add(s2, axis='index')
- height weight
- elk 2.0 500.5
- moose 4.1 801.5
- When `other` is a :class:`DataFrame`, both columns names and the
- index are aligned.
- >>> other = pd.DataFrame({'height': [0.2, 0.4, 0.6]},
- ... index=['elk', 'moose', 'deer'])
- >>> df[['height', 'weight']] + other
- height weight
- deer NaN NaN
- elk 1.7 NaN
- moose 3.0 NaN
- """
- return self._arith_method(other, operator.add)
- @unpack_zerodim_and_defer("__radd__")
- def __radd__(self, other):
- return self._arith_method(other, roperator.radd)
- @unpack_zerodim_and_defer("__sub__")
- def __sub__(self, other):
- return self._arith_method(other, operator.sub)
- @unpack_zerodim_and_defer("__rsub__")
- def __rsub__(self, other):
- return self._arith_method(other, roperator.rsub)
- @unpack_zerodim_and_defer("__mul__")
- def __mul__(self, other):
- return self._arith_method(other, operator.mul)
- @unpack_zerodim_and_defer("__rmul__")
- def __rmul__(self, other):
- return self._arith_method(other, roperator.rmul)
- @unpack_zerodim_and_defer("__truediv__")
- def __truediv__(self, other):
- return self._arith_method(other, operator.truediv)
- @unpack_zerodim_and_defer("__rtruediv__")
- def __rtruediv__(self, other):
- return self._arith_method(other, roperator.rtruediv)
- @unpack_zerodim_and_defer("__floordiv__")
- def __floordiv__(self, other):
- return self._arith_method(other, operator.floordiv)
- @unpack_zerodim_and_defer("__rfloordiv")
- def __rfloordiv__(self, other):
- return self._arith_method(other, roperator.rfloordiv)
- @unpack_zerodim_and_defer("__mod__")
- def __mod__(self, other):
- return self._arith_method(other, operator.mod)
- @unpack_zerodim_and_defer("__rmod__")
- def __rmod__(self, other):
- return self._arith_method(other, roperator.rmod)
- @unpack_zerodim_and_defer("__divmod__")
- def __divmod__(self, other):
- return self._arith_method(other, divmod)
- @unpack_zerodim_and_defer("__rdivmod__")
- def __rdivmod__(self, other):
- return self._arith_method(other, roperator.rdivmod)
- @unpack_zerodim_and_defer("__pow__")
- def __pow__(self, other):
- return self._arith_method(other, operator.pow)
- @unpack_zerodim_and_defer("__rpow__")
- def __rpow__(self, other):
- return self._arith_method(other, roperator.rpow)
- # -----------------------------------------------------------------------------
- # Helpers to implement __array_ufunc__
- def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any):
- """
- Compatibility with numpy ufuncs.
- See also
- --------
- numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__
- """
- from pandas.core.frame import (
- DataFrame,
- Series,
- )
- from pandas.core.generic import NDFrame
- from pandas.core.internals import BlockManager
- cls = type(self)
- kwargs = _standardize_out_kwarg(**kwargs)
- # for binary ops, use our custom dunder methods
- result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs)
- if result is not NotImplemented:
- return result
- # Determine if we should defer.
- no_defer = (
- np.ndarray.__array_ufunc__,
- cls.__array_ufunc__,
- )
- for item in inputs:
- higher_priority = (
- hasattr(item, "__array_priority__")
- and item.__array_priority__ > self.__array_priority__
- )
- has_array_ufunc = (
- hasattr(item, "__array_ufunc__")
- and type(item).__array_ufunc__ not in no_defer
- and not isinstance(item, self._HANDLED_TYPES)
- )
- if higher_priority or has_array_ufunc:
- return NotImplemented
- # align all the inputs.
- types = tuple(type(x) for x in inputs)
- alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)]
- if len(alignable) > 1:
- # This triggers alignment.
- # At the moment, there aren't any ufuncs with more than two inputs
- # so this ends up just being x1.index | x2.index, but we write
- # it to handle *args.
- set_types = set(types)
- if len(set_types) > 1 and {DataFrame, Series}.issubset(set_types):
- # We currently don't handle ufunc(DataFrame, Series)
- # well. Previously this raised an internal ValueError. We might
- # support it someday, so raise a NotImplementedError.
- raise NotImplementedError(
- f"Cannot apply ufunc {ufunc} to mixed DataFrame and Series inputs."
- )
- axes = self.axes
- for obj in alignable[1:]:
- # this relies on the fact that we aren't handling mixed
- # series / frame ufuncs.
- for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)):
- axes[i] = ax1.union(ax2)
- reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes))
- inputs = tuple(
- x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x
- for x, t in zip(inputs, types)
- )
- else:
- reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes))
- if self.ndim == 1:
- names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]
- name = names[0] if len(set(names)) == 1 else None
- reconstruct_kwargs = {"name": name}
- else:
- reconstruct_kwargs = {}
- def reconstruct(result):
- if ufunc.nout > 1:
- # np.modf, np.frexp, np.divmod
- return tuple(_reconstruct(x) for x in result)
- return _reconstruct(result)
- def _reconstruct(result):
- if lib.is_scalar(result):
- return result
- if result.ndim != self.ndim:
- if method == "outer":
- raise NotImplementedError
- return result
- if isinstance(result, BlockManager):
- # we went through BlockManager.apply e.g. np.sqrt
- result = self._constructor(result, **reconstruct_kwargs, copy=False)
- else:
- # we converted an array, lost our axes
- result = self._constructor(
- result, **reconstruct_axes, **reconstruct_kwargs, copy=False
- )
- # TODO: When we support multiple values in __finalize__, this
- # should pass alignable to `__finalize__` instead of self.
- # Then `np.add(a, b)` would consider attrs from both a and b
- # when a and b are NDFrames.
- if len(alignable) == 1:
- result = result.__finalize__(self)
- return result
- if "out" in kwargs:
- # e.g. test_multiindex_get_loc
- result = dispatch_ufunc_with_out(self, ufunc, method, *inputs, **kwargs)
- return reconstruct(result)
- if method == "reduce":
- # e.g. test.series.test_ufunc.test_reduce
- result = dispatch_reduction_ufunc(self, ufunc, method, *inputs, **kwargs)
- if result is not NotImplemented:
- return result
- # We still get here with kwargs `axis` for e.g. np.maximum.accumulate
- # and `dtype` and `keepdims` for np.ptp
- if self.ndim > 1 and (len(inputs) > 1 or ufunc.nout > 1):
- # Just give up on preserving types in the complex case.
- # In theory we could preserve them for them.
- # * nout>1 is doable if BlockManager.apply took nout and
- # returned a Tuple[BlockManager].
- # * len(inputs) > 1 is doable when we know that we have
- # aligned blocks / dtypes.
- # e.g. my_ufunc, modf, logaddexp, heaviside, subtract, add
- inputs = tuple(np.asarray(x) for x in inputs)
- # Note: we can't use default_array_ufunc here bc reindexing means
- # that `self` may not be among `inputs`
- result = getattr(ufunc, method)(*inputs, **kwargs)
- elif self.ndim == 1:
- # ufunc(series, ...)
- inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)
- result = getattr(ufunc, method)(*inputs, **kwargs)
- else:
- # ufunc(dataframe)
- if method == "__call__" and not kwargs:
- # for np.<ufunc>(..) calls
- # kwargs cannot necessarily be handled block-by-block, so only
- # take this path if there are no kwargs
- mgr = inputs[0]._mgr
- result = mgr.apply(getattr(ufunc, method))
- else:
- # otherwise specific ufunc methods (eg np.<ufunc>.accumulate(..))
- # Those can have an axis keyword and thus can't be called block-by-block
- result = default_array_ufunc(inputs[0], ufunc, method, *inputs, **kwargs)
- # e.g. np.negative (only one reached), with "where" and "out" in kwargs
- result = reconstruct(result)
- return result
- def _standardize_out_kwarg(**kwargs) -> dict:
- """
- If kwargs contain "out1" and "out2", replace that with a tuple "out"
- np.divmod, np.modf, np.frexp can have either `out=(out1, out2)` or
- `out1=out1, out2=out2)`
- """
- if "out" not in kwargs and "out1" in kwargs and "out2" in kwargs:
- out1 = kwargs.pop("out1")
- out2 = kwargs.pop("out2")
- out = (out1, out2)
- kwargs["out"] = out
- return kwargs
- def dispatch_ufunc_with_out(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
- """
- If we have an `out` keyword, then call the ufunc without `out` and then
- set the result into the given `out`.
- """
- # Note: we assume _standardize_out_kwarg has already been called.
- out = kwargs.pop("out")
- where = kwargs.pop("where", None)
- result = getattr(ufunc, method)(*inputs, **kwargs)
- if result is NotImplemented:
- return NotImplemented
- if isinstance(result, tuple):
- # i.e. np.divmod, np.modf, np.frexp
- if not isinstance(out, tuple) or len(out) != len(result):
- raise NotImplementedError
- for arr, res in zip(out, result):
- _assign_where(arr, res, where)
- return out
- if isinstance(out, tuple):
- if len(out) == 1:
- out = out[0]
- else:
- raise NotImplementedError
- _assign_where(out, result, where)
- return out
- def _assign_where(out, result, where) -> None:
- """
- Set a ufunc result into 'out', masking with a 'where' argument if necessary.
- """
- if where is None:
- # no 'where' arg passed to ufunc
- out[:] = result
- else:
- np.putmask(out, where, result)
- def default_array_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
- """
- Fallback to the behavior we would get if we did not define __array_ufunc__.
- Notes
- -----
- We are assuming that `self` is among `inputs`.
- """
- if not any(x is self for x in inputs):
- raise NotImplementedError
- new_inputs = [x if x is not self else np.asarray(x) for x in inputs]
- return getattr(ufunc, method)(*new_inputs, **kwargs)
- def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
- """
- Dispatch ufunc reductions to self's reduction methods.
- """
- assert method == "reduce"
- if len(inputs) != 1 or inputs[0] is not self:
- return NotImplemented
- if ufunc.__name__ not in REDUCTION_ALIASES:
- return NotImplemented
- method_name = REDUCTION_ALIASES[ufunc.__name__]
- # NB: we are assuming that min/max represent minimum/maximum methods,
- # which would not be accurate for e.g. Timestamp.min
- if not hasattr(self, method_name):
- return NotImplemented
- if self.ndim > 1:
- if isinstance(self, ABCNDFrame):
- # TODO: test cases where this doesn't hold, i.e. 2D DTA/TDA
- kwargs["numeric_only"] = False
- if "axis" not in kwargs:
- # For DataFrame reductions we don't want the default axis=0
- # Note: np.min is not a ufunc, but uses array_function_dispatch,
- # so calls DataFrame.min (without ever getting here) with the np.min
- # default of axis=None, which DataFrame.min catches and changes to axis=0.
- # np.minimum.reduce(df) gets here bc axis is not in kwargs,
- # so we set axis=0 to match the behaviorof np.minimum.reduce(df.values)
- kwargs["axis"] = 0
- # By default, numpy's reductions do not skip NaNs, so we have to
- # pass skipna=False
- return getattr(self, method_name)(skipna=False, **kwargs)
|