putmask.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. """
  2. EA-compatible analogue to np.putmask
  3. """
  4. from __future__ import annotations
  5. from typing import (
  6. TYPE_CHECKING,
  7. Any,
  8. )
  9. import numpy as np
  10. from pandas._libs import lib
  11. from pandas._typing import (
  12. ArrayLike,
  13. npt,
  14. )
  15. from pandas.compat import np_version_under1p21
  16. from pandas.core.dtypes.cast import infer_dtype_from
  17. from pandas.core.dtypes.common import is_list_like
  18. from pandas.core.arrays import ExtensionArray
  19. if TYPE_CHECKING:
  20. from pandas import MultiIndex
  21. def putmask_inplace(values: ArrayLike, mask: npt.NDArray[np.bool_], value: Any) -> None:
  22. """
  23. ExtensionArray-compatible implementation of np.putmask. The main
  24. difference is we do not handle repeating or truncating like numpy.
  25. Parameters
  26. ----------
  27. values: np.ndarray or ExtensionArray
  28. mask : np.ndarray[bool]
  29. We assume extract_bool_array has already been called.
  30. value : Any
  31. """
  32. if (
  33. not isinstance(values, np.ndarray)
  34. or (values.dtype == object and not lib.is_scalar(value))
  35. # GH#43424: np.putmask raises TypeError if we cannot cast between types with
  36. # rule = "safe", a stricter guarantee we may not have here
  37. or (
  38. isinstance(value, np.ndarray) and not np.can_cast(value.dtype, values.dtype)
  39. )
  40. ):
  41. # GH#19266 using np.putmask gives unexpected results with listlike value
  42. # along with object dtype
  43. if is_list_like(value) and len(value) == len(values):
  44. values[mask] = value[mask]
  45. else:
  46. values[mask] = value
  47. else:
  48. # GH#37833 np.putmask is more performant than __setitem__
  49. np.putmask(values, mask, value)
  50. def putmask_without_repeat(
  51. values: np.ndarray, mask: npt.NDArray[np.bool_], new: Any
  52. ) -> None:
  53. """
  54. np.putmask will truncate or repeat if `new` is a listlike with
  55. len(new) != len(values). We require an exact match.
  56. Parameters
  57. ----------
  58. values : np.ndarray
  59. mask : np.ndarray[bool]
  60. new : Any
  61. """
  62. if np_version_under1p21:
  63. new = setitem_datetimelike_compat(values, mask.sum(), new)
  64. if getattr(new, "ndim", 0) >= 1:
  65. new = new.astype(values.dtype, copy=False)
  66. # TODO: this prob needs some better checking for 2D cases
  67. nlocs = mask.sum()
  68. if nlocs > 0 and is_list_like(new) and getattr(new, "ndim", 1) == 1:
  69. shape = np.shape(new)
  70. # np.shape compat for if setitem_datetimelike_compat
  71. # changed arraylike to list e.g. test_where_dt64_2d
  72. if nlocs == shape[-1]:
  73. # GH#30567
  74. # If length of ``new`` is less than the length of ``values``,
  75. # `np.putmask` would first repeat the ``new`` array and then
  76. # assign the masked values hence produces incorrect result.
  77. # `np.place` on the other hand uses the ``new`` values at it is
  78. # to place in the masked locations of ``values``
  79. np.place(values, mask, new)
  80. # i.e. values[mask] = new
  81. elif mask.shape[-1] == shape[-1] or shape[-1] == 1:
  82. np.putmask(values, mask, new)
  83. else:
  84. raise ValueError("cannot assign mismatch length to masked array")
  85. else:
  86. np.putmask(values, mask, new)
  87. def validate_putmask(
  88. values: ArrayLike | MultiIndex, mask: np.ndarray
  89. ) -> tuple[npt.NDArray[np.bool_], bool]:
  90. """
  91. Validate mask and check if this putmask operation is a no-op.
  92. """
  93. mask = extract_bool_array(mask)
  94. if mask.shape != values.shape:
  95. raise ValueError("putmask: mask and data must be the same size")
  96. noop = not mask.any()
  97. return mask, noop
  98. def extract_bool_array(mask: ArrayLike) -> npt.NDArray[np.bool_]:
  99. """
  100. If we have a SparseArray or BooleanArray, convert it to ndarray[bool].
  101. """
  102. if isinstance(mask, ExtensionArray):
  103. # We could have BooleanArray, Sparse[bool], ...
  104. # Except for BooleanArray, this is equivalent to just
  105. # np.asarray(mask, dtype=bool)
  106. mask = mask.to_numpy(dtype=bool, na_value=False)
  107. mask = np.asarray(mask, dtype=bool)
  108. return mask
  109. def setitem_datetimelike_compat(values: np.ndarray, num_set: int, other):
  110. """
  111. Parameters
  112. ----------
  113. values : np.ndarray
  114. num_set : int
  115. For putmask, this is mask.sum()
  116. other : Any
  117. """
  118. if values.dtype == object:
  119. dtype, _ = infer_dtype_from(other, pandas_dtype=True)
  120. if isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"]:
  121. # https://github.com/numpy/numpy/issues/12550
  122. # timedelta64 will incorrectly cast to int
  123. if not is_list_like(other):
  124. other = [other] * num_set
  125. else:
  126. other = list(other)
  127. return other