123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180 |
- """
- Missing data handling for arithmetic operations.
- In particular, pandas conventions regarding division by zero differ
- from numpy in the following ways:
- 1) np.array([-1, 0, 1], dtype=dtype1) // np.array([0, 0, 0], dtype=dtype2)
- gives [nan, nan, nan] for most dtype combinations, and [0, 0, 0] for
- the remaining pairs
- (the remaining being dtype1==dtype2==intN and dtype==dtype2==uintN).
- pandas convention is to return [-inf, nan, inf] for all dtype
- combinations.
- Note: the numpy behavior described here is py3-specific.
- 2) np.array([-1, 0, 1], dtype=dtype1) % np.array([0, 0, 0], dtype=dtype2)
- gives precisely the same results as the // operation.
- pandas convention is to return [nan, nan, nan] for all dtype
- combinations.
- 3) divmod behavior consistent with 1) and 2).
- """
- from __future__ import annotations
- import operator
- import numpy as np
- from pandas.core.dtypes.common import (
- is_float_dtype,
- is_integer_dtype,
- is_scalar,
- )
- from pandas.core.ops import roperator
- def _fill_zeros(result, x, y):
- """
- If this is a reversed op, then flip x,y
- If we have an integer value (or array in y)
- and we have 0's, fill them with np.nan,
- return the result.
- Mask the nan's from x.
- """
- if is_float_dtype(result.dtype):
- return result
- is_variable_type = hasattr(y, "dtype")
- is_scalar_type = is_scalar(y)
- if not is_variable_type and not is_scalar_type:
- return result
- if is_scalar_type:
- y = np.array(y)
- if is_integer_dtype(y.dtype):
- ymask = y == 0
- if ymask.any():
- # GH#7325, mask and nans must be broadcastable
- mask = ymask & ~np.isnan(result)
- # GH#9308 doing ravel on result and mask can improve putmask perf,
- # but can also make unwanted copies.
- result = result.astype("float64", copy=False)
- np.putmask(result, mask, np.nan)
- return result
- def mask_zero_div_zero(x, y, result: np.ndarray) -> np.ndarray:
- """
- Set results of 0 // 0 to np.nan, regardless of the dtypes
- of the numerator or the denominator.
- Parameters
- ----------
- x : ndarray
- y : ndarray
- result : ndarray
- Returns
- -------
- ndarray
- The filled result.
- Examples
- --------
- >>> x = np.array([1, 0, -1], dtype=np.int64)
- >>> x
- array([ 1, 0, -1])
- >>> y = 0 # int 0; numpy behavior is different with float
- >>> result = x // y
- >>> result # raw numpy result does not fill division by zero
- array([0, 0, 0])
- >>> mask_zero_div_zero(x, y, result)
- array([ inf, nan, -inf])
- """
- if not hasattr(y, "dtype"):
- # e.g. scalar, tuple
- y = np.array(y)
- if not hasattr(x, "dtype"):
- # e.g scalar, tuple
- x = np.array(x)
- zmask = y == 0
- if zmask.any():
- # Flip sign if necessary for -0.0
- zneg_mask = zmask & np.signbit(y)
- zpos_mask = zmask & ~zneg_mask
- x_lt0 = x < 0
- x_gt0 = x > 0
- nan_mask = zmask & (x == 0)
- with np.errstate(invalid="ignore"):
- neginf_mask = (zpos_mask & x_lt0) | (zneg_mask & x_gt0)
- posinf_mask = (zpos_mask & x_gt0) | (zneg_mask & x_lt0)
- if nan_mask.any() or neginf_mask.any() or posinf_mask.any():
- # Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN
- result = result.astype("float64", copy=False)
- result[nan_mask] = np.nan
- result[posinf_mask] = np.inf
- result[neginf_mask] = -np.inf
- return result
- def dispatch_fill_zeros(op, left, right, result):
- """
- Call _fill_zeros with the appropriate fill value depending on the operation,
- with special logic for divmod and rdivmod.
- Parameters
- ----------
- op : function (operator.add, operator.div, ...)
- left : object (np.ndarray for non-reversed ops)
- right : object (np.ndarray for reversed ops)
- result : ndarray
- Returns
- -------
- result : np.ndarray
- Notes
- -----
- For divmod and rdivmod, the `result` parameter and returned `result`
- is a 2-tuple of ndarray objects.
- """
- if op is divmod:
- result = (
- mask_zero_div_zero(left, right, result[0]),
- _fill_zeros(result[1], left, right),
- )
- elif op is roperator.rdivmod:
- result = (
- mask_zero_div_zero(right, left, result[0]),
- _fill_zeros(result[1], right, left),
- )
- elif op is operator.floordiv:
- # Note: no need to do this for truediv; in py3 numpy behaves the way
- # we want.
- result = mask_zero_div_zero(left, right, result)
- elif op is roperator.rfloordiv:
- # Note: no need to do this for rtruediv; in py3 numpy behaves the way
- # we want.
- result = mask_zero_div_zero(right, left, result)
- elif op is operator.mod:
- result = _fill_zeros(result, left, right)
- elif op is roperator.rmod:
- result = _fill_zeros(result, right, left)
- return result
|