mask_ops.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. """
  2. Ops for masked arrays.
  3. """
  4. from __future__ import annotations
  5. import numpy as np
  6. from pandas._libs import (
  7. lib,
  8. missing as libmissing,
  9. )
  10. def kleene_or(
  11. left: bool | np.ndarray | libmissing.NAType,
  12. right: bool | np.ndarray | libmissing.NAType,
  13. left_mask: np.ndarray | None,
  14. right_mask: np.ndarray | None,
  15. ):
  16. """
  17. Boolean ``or`` using Kleene logic.
  18. Values are NA where we have ``NA | NA`` or ``NA | False``.
  19. ``NA | True`` is considered True.
  20. Parameters
  21. ----------
  22. left, right : ndarray, NA, or bool
  23. The values of the array.
  24. left_mask, right_mask : ndarray, optional
  25. The masks. Only one of these may be None, which implies that
  26. the associated `left` or `right` value is a scalar.
  27. Returns
  28. -------
  29. result, mask: ndarray[bool]
  30. The result of the logical or, and the new mask.
  31. """
  32. # To reduce the number of cases, we ensure that `left` & `left_mask`
  33. # always come from an array, not a scalar. This is safe, since
  34. # A | B == B | A
  35. if left_mask is None:
  36. return kleene_or(right, left, right_mask, left_mask)
  37. if not isinstance(left, np.ndarray):
  38. raise TypeError("Either `left` or `right` need to be a np.ndarray.")
  39. raise_for_nan(right, method="or")
  40. if right is libmissing.NA:
  41. result = left.copy()
  42. else:
  43. result = left | right
  44. if right_mask is not None:
  45. # output is unknown where (False & NA), (NA & False), (NA & NA)
  46. left_false = ~(left | left_mask)
  47. right_false = ~(right | right_mask)
  48. mask = (
  49. (left_false & right_mask)
  50. | (right_false & left_mask)
  51. | (left_mask & right_mask)
  52. )
  53. else:
  54. if right is True:
  55. mask = np.zeros_like(left_mask)
  56. elif right is libmissing.NA:
  57. mask = (~left & ~left_mask) | left_mask
  58. else:
  59. # False
  60. mask = left_mask.copy()
  61. return result, mask
  62. def kleene_xor(
  63. left: bool | np.ndarray | libmissing.NAType,
  64. right: bool | np.ndarray | libmissing.NAType,
  65. left_mask: np.ndarray | None,
  66. right_mask: np.ndarray | None,
  67. ):
  68. """
  69. Boolean ``xor`` using Kleene logic.
  70. This is the same as ``or``, with the following adjustments
  71. * True, True -> False
  72. * True, NA -> NA
  73. Parameters
  74. ----------
  75. left, right : ndarray, NA, or bool
  76. The values of the array.
  77. left_mask, right_mask : ndarray, optional
  78. The masks. Only one of these may be None, which implies that
  79. the associated `left` or `right` value is a scalar.
  80. Returns
  81. -------
  82. result, mask: ndarray[bool]
  83. The result of the logical xor, and the new mask.
  84. """
  85. # To reduce the number of cases, we ensure that `left` & `left_mask`
  86. # always come from an array, not a scalar. This is safe, since
  87. # A ^ B == B ^ A
  88. if left_mask is None:
  89. return kleene_xor(right, left, right_mask, left_mask)
  90. if not isinstance(left, np.ndarray):
  91. raise TypeError("Either `left` or `right` need to be a np.ndarray.")
  92. raise_for_nan(right, method="xor")
  93. if right is libmissing.NA:
  94. result = np.zeros_like(left)
  95. else:
  96. result = left ^ right
  97. if right_mask is None:
  98. if right is libmissing.NA:
  99. mask = np.ones_like(left_mask)
  100. else:
  101. mask = left_mask.copy()
  102. else:
  103. mask = left_mask | right_mask
  104. return result, mask
  105. def kleene_and(
  106. left: bool | libmissing.NAType | np.ndarray,
  107. right: bool | libmissing.NAType | np.ndarray,
  108. left_mask: np.ndarray | None,
  109. right_mask: np.ndarray | None,
  110. ):
  111. """
  112. Boolean ``and`` using Kleene logic.
  113. Values are ``NA`` for ``NA & NA`` or ``True & NA``.
  114. Parameters
  115. ----------
  116. left, right : ndarray, NA, or bool
  117. The values of the array.
  118. left_mask, right_mask : ndarray, optional
  119. The masks. Only one of these may be None, which implies that
  120. the associated `left` or `right` value is a scalar.
  121. Returns
  122. -------
  123. result, mask: ndarray[bool]
  124. The result of the logical xor, and the new mask.
  125. """
  126. # To reduce the number of cases, we ensure that `left` & `left_mask`
  127. # always come from an array, not a scalar. This is safe, since
  128. # A & B == B & A
  129. if left_mask is None:
  130. return kleene_and(right, left, right_mask, left_mask)
  131. if not isinstance(left, np.ndarray):
  132. raise TypeError("Either `left` or `right` need to be a np.ndarray.")
  133. raise_for_nan(right, method="and")
  134. if right is libmissing.NA:
  135. result = np.zeros_like(left)
  136. else:
  137. result = left & right
  138. if right_mask is None:
  139. # Scalar `right`
  140. if right is libmissing.NA:
  141. mask = (left & ~left_mask) | left_mask
  142. else:
  143. mask = left_mask.copy()
  144. if right is False:
  145. # unmask everything
  146. mask[:] = False
  147. else:
  148. # unmask where either left or right is False
  149. left_false = ~(left | left_mask)
  150. right_false = ~(right | right_mask)
  151. mask = (left_mask & ~right_false) | (right_mask & ~left_false)
  152. return result, mask
  153. def raise_for_nan(value, method: str) -> None:
  154. if lib.is_float(value) and np.isnan(value):
  155. raise ValueError(f"Cannot perform logical '{method}' with floating NaN")