missing.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. """
  2. Missing data handling for arithmetic operations.
  3. In particular, pandas conventions regarding division by zero differ
  4. from numpy in the following ways:
  5. 1) np.array([-1, 0, 1], dtype=dtype1) // np.array([0, 0, 0], dtype=dtype2)
  6. gives [nan, nan, nan] for most dtype combinations, and [0, 0, 0] for
  7. the remaining pairs
  8. (the remaining being dtype1==dtype2==intN and dtype==dtype2==uintN).
  9. pandas convention is to return [-inf, nan, inf] for all dtype
  10. combinations.
  11. Note: the numpy behavior described here is py3-specific.
  12. 2) np.array([-1, 0, 1], dtype=dtype1) % np.array([0, 0, 0], dtype=dtype2)
  13. gives precisely the same results as the // operation.
  14. pandas convention is to return [nan, nan, nan] for all dtype
  15. combinations.
  16. 3) divmod behavior consistent with 1) and 2).
  17. """
  18. from __future__ import annotations
  19. import operator
  20. import numpy as np
  21. from pandas.core.dtypes.common import (
  22. is_float_dtype,
  23. is_integer_dtype,
  24. is_scalar,
  25. )
  26. from pandas.core.ops import roperator
  27. def _fill_zeros(result, x, y):
  28. """
  29. If this is a reversed op, then flip x,y
  30. If we have an integer value (or array in y)
  31. and we have 0's, fill them with np.nan,
  32. return the result.
  33. Mask the nan's from x.
  34. """
  35. if is_float_dtype(result.dtype):
  36. return result
  37. is_variable_type = hasattr(y, "dtype")
  38. is_scalar_type = is_scalar(y)
  39. if not is_variable_type and not is_scalar_type:
  40. return result
  41. if is_scalar_type:
  42. y = np.array(y)
  43. if is_integer_dtype(y.dtype):
  44. ymask = y == 0
  45. if ymask.any():
  46. # GH#7325, mask and nans must be broadcastable
  47. mask = ymask & ~np.isnan(result)
  48. # GH#9308 doing ravel on result and mask can improve putmask perf,
  49. # but can also make unwanted copies.
  50. result = result.astype("float64", copy=False)
  51. np.putmask(result, mask, np.nan)
  52. return result
  53. def mask_zero_div_zero(x, y, result: np.ndarray) -> np.ndarray:
  54. """
  55. Set results of 0 // 0 to np.nan, regardless of the dtypes
  56. of the numerator or the denominator.
  57. Parameters
  58. ----------
  59. x : ndarray
  60. y : ndarray
  61. result : ndarray
  62. Returns
  63. -------
  64. ndarray
  65. The filled result.
  66. Examples
  67. --------
  68. >>> x = np.array([1, 0, -1], dtype=np.int64)
  69. >>> x
  70. array([ 1, 0, -1])
  71. >>> y = 0 # int 0; numpy behavior is different with float
  72. >>> result = x // y
  73. >>> result # raw numpy result does not fill division by zero
  74. array([0, 0, 0])
  75. >>> mask_zero_div_zero(x, y, result)
  76. array([ inf, nan, -inf])
  77. """
  78. if not hasattr(y, "dtype"):
  79. # e.g. scalar, tuple
  80. y = np.array(y)
  81. if not hasattr(x, "dtype"):
  82. # e.g scalar, tuple
  83. x = np.array(x)
  84. zmask = y == 0
  85. if zmask.any():
  86. # Flip sign if necessary for -0.0
  87. zneg_mask = zmask & np.signbit(y)
  88. zpos_mask = zmask & ~zneg_mask
  89. x_lt0 = x < 0
  90. x_gt0 = x > 0
  91. nan_mask = zmask & (x == 0)
  92. with np.errstate(invalid="ignore"):
  93. neginf_mask = (zpos_mask & x_lt0) | (zneg_mask & x_gt0)
  94. posinf_mask = (zpos_mask & x_gt0) | (zneg_mask & x_lt0)
  95. if nan_mask.any() or neginf_mask.any() or posinf_mask.any():
  96. # Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN
  97. result = result.astype("float64", copy=False)
  98. result[nan_mask] = np.nan
  99. result[posinf_mask] = np.inf
  100. result[neginf_mask] = -np.inf
  101. return result
  102. def dispatch_fill_zeros(op, left, right, result):
  103. """
  104. Call _fill_zeros with the appropriate fill value depending on the operation,
  105. with special logic for divmod and rdivmod.
  106. Parameters
  107. ----------
  108. op : function (operator.add, operator.div, ...)
  109. left : object (np.ndarray for non-reversed ops)
  110. right : object (np.ndarray for reversed ops)
  111. result : ndarray
  112. Returns
  113. -------
  114. result : np.ndarray
  115. Notes
  116. -----
  117. For divmod and rdivmod, the `result` parameter and returned `result`
  118. is a 2-tuple of ndarray objects.
  119. """
  120. if op is divmod:
  121. result = (
  122. mask_zero_div_zero(left, right, result[0]),
  123. _fill_zeros(result[1], left, right),
  124. )
  125. elif op is roperator.rdivmod:
  126. result = (
  127. mask_zero_div_zero(right, left, result[0]),
  128. _fill_zeros(result[1], right, left),
  129. )
  130. elif op is operator.floordiv:
  131. # Note: no need to do this for truediv; in py3 numpy behaves the way
  132. # we want.
  133. result = mask_zero_div_zero(left, right, result)
  134. elif op is roperator.rfloordiv:
  135. # Note: no need to do this for rtruediv; in py3 numpy behaves the way
  136. # we want.
  137. result = mask_zero_div_zero(right, left, result)
  138. elif op is operator.mod:
  139. result = _fill_zeros(result, left, right)
  140. elif op is roperator.rmod:
  141. result = _fill_zeros(result, right, left)
  142. return result