take.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594
  1. from __future__ import annotations
  2. import functools
  3. from typing import (
  4. TYPE_CHECKING,
  5. cast,
  6. overload,
  7. )
  8. import numpy as np
  9. from pandas._libs import (
  10. algos as libalgos,
  11. lib,
  12. )
  13. from pandas._typing import (
  14. ArrayLike,
  15. AxisInt,
  16. npt,
  17. )
  18. from pandas.core.dtypes.cast import maybe_promote
  19. from pandas.core.dtypes.common import (
  20. ensure_platform_int,
  21. is_1d_only_ea_obj,
  22. )
  23. from pandas.core.dtypes.missing import na_value_for_dtype
  24. from pandas.core.construction import ensure_wrapped_if_datetimelike
  25. if TYPE_CHECKING:
  26. from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
  27. from pandas.core.arrays.base import ExtensionArray
  28. @overload
  29. def take_nd(
  30. arr: np.ndarray,
  31. indexer,
  32. axis: AxisInt = ...,
  33. fill_value=...,
  34. allow_fill: bool = ...,
  35. ) -> np.ndarray:
  36. ...
  37. @overload
  38. def take_nd(
  39. arr: ExtensionArray,
  40. indexer,
  41. axis: AxisInt = ...,
  42. fill_value=...,
  43. allow_fill: bool = ...,
  44. ) -> ArrayLike:
  45. ...
  46. def take_nd(
  47. arr: ArrayLike,
  48. indexer,
  49. axis: AxisInt = 0,
  50. fill_value=lib.no_default,
  51. allow_fill: bool = True,
  52. ) -> ArrayLike:
  53. """
  54. Specialized Cython take which sets NaN values in one pass
  55. This dispatches to ``take`` defined on ExtensionArrays. It does not
  56. currently dispatch to ``SparseArray.take`` for sparse ``arr``.
  57. Note: this function assumes that the indexer is a valid(ated) indexer with
  58. no out of bound indices.
  59. Parameters
  60. ----------
  61. arr : np.ndarray or ExtensionArray
  62. Input array.
  63. indexer : ndarray
  64. 1-D array of indices to take, subarrays corresponding to -1 value
  65. indices are filed with fill_value
  66. axis : int, default 0
  67. Axis to take from
  68. fill_value : any, default np.nan
  69. Fill value to replace -1 values with
  70. allow_fill : bool, default True
  71. If False, indexer is assumed to contain no -1 values so no filling
  72. will be done. This short-circuits computation of a mask. Result is
  73. undefined if allow_fill == False and -1 is present in indexer.
  74. Returns
  75. -------
  76. subarray : np.ndarray or ExtensionArray
  77. May be the same type as the input, or cast to an ndarray.
  78. """
  79. if fill_value is lib.no_default:
  80. fill_value = na_value_for_dtype(arr.dtype, compat=False)
  81. elif isinstance(arr.dtype, np.dtype) and arr.dtype.kind in "mM":
  82. dtype, fill_value = maybe_promote(arr.dtype, fill_value)
  83. if arr.dtype != dtype:
  84. # EA.take is strict about returning a new object of the same type
  85. # so for that case cast upfront
  86. arr = arr.astype(dtype)
  87. if not isinstance(arr, np.ndarray):
  88. # i.e. ExtensionArray,
  89. # includes for EA to catch DatetimeArray, TimedeltaArray
  90. if not is_1d_only_ea_obj(arr):
  91. # i.e. DatetimeArray, TimedeltaArray
  92. arr = cast("NDArrayBackedExtensionArray", arr)
  93. return arr.take(
  94. indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis
  95. )
  96. return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
  97. arr = np.asarray(arr)
  98. return _take_nd_ndarray(arr, indexer, axis, fill_value, allow_fill)
  99. def _take_nd_ndarray(
  100. arr: np.ndarray,
  101. indexer: npt.NDArray[np.intp] | None,
  102. axis: AxisInt,
  103. fill_value,
  104. allow_fill: bool,
  105. ) -> np.ndarray:
  106. if indexer is None:
  107. indexer = np.arange(arr.shape[axis], dtype=np.intp)
  108. dtype, fill_value = arr.dtype, arr.dtype.type()
  109. else:
  110. indexer = ensure_platform_int(indexer)
  111. dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
  112. arr, indexer, fill_value, allow_fill
  113. )
  114. flip_order = False
  115. if arr.ndim == 2 and arr.flags.f_contiguous:
  116. flip_order = True
  117. if flip_order:
  118. arr = arr.T
  119. axis = arr.ndim - axis - 1
  120. # at this point, it's guaranteed that dtype can hold both the arr values
  121. # and the fill_value
  122. out_shape_ = list(arr.shape)
  123. out_shape_[axis] = len(indexer)
  124. out_shape = tuple(out_shape_)
  125. if arr.flags.f_contiguous and axis == arr.ndim - 1:
  126. # minor tweak that can make an order-of-magnitude difference
  127. # for dataframes initialized directly from 2-d ndarrays
  128. # (s.t. df.values is c-contiguous and df._mgr.blocks[0] is its
  129. # f-contiguous transpose)
  130. out = np.empty(out_shape, dtype=dtype, order="F")
  131. else:
  132. out = np.empty(out_shape, dtype=dtype)
  133. func = _get_take_nd_function(
  134. arr.ndim, arr.dtype, out.dtype, axis=axis, mask_info=mask_info
  135. )
  136. func(arr, indexer, out, fill_value)
  137. if flip_order:
  138. out = out.T
  139. return out
  140. def take_1d(
  141. arr: ArrayLike,
  142. indexer: npt.NDArray[np.intp],
  143. fill_value=None,
  144. allow_fill: bool = True,
  145. mask: npt.NDArray[np.bool_] | None = None,
  146. ) -> ArrayLike:
  147. """
  148. Specialized version for 1D arrays. Differences compared to `take_nd`:
  149. - Assumes input array has already been converted to numpy array / EA
  150. - Assumes indexer is already guaranteed to be intp dtype ndarray
  151. - Only works for 1D arrays
  152. To ensure the lowest possible overhead.
  153. Note: similarly to `take_nd`, this function assumes that the indexer is
  154. a valid(ated) indexer with no out of bound indices.
  155. Parameters
  156. ----------
  157. arr : np.ndarray or ExtensionArray
  158. Input array.
  159. indexer : ndarray
  160. 1-D array of indices to take (validated indices, intp dtype).
  161. fill_value : any, default np.nan
  162. Fill value to replace -1 values with
  163. allow_fill : bool, default True
  164. If False, indexer is assumed to contain no -1 values so no filling
  165. will be done. This short-circuits computation of a mask. Result is
  166. undefined if allow_fill == False and -1 is present in indexer.
  167. mask : np.ndarray, optional, default None
  168. If `allow_fill` is True, and the mask (where indexer == -1) is already
  169. known, it can be passed to avoid recomputation.
  170. """
  171. if not isinstance(arr, np.ndarray):
  172. # ExtensionArray -> dispatch to their method
  173. return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
  174. if not allow_fill:
  175. return arr.take(indexer)
  176. dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
  177. arr, indexer, fill_value, True, mask
  178. )
  179. # at this point, it's guaranteed that dtype can hold both the arr values
  180. # and the fill_value
  181. out = np.empty(indexer.shape, dtype=dtype)
  182. func = _get_take_nd_function(
  183. arr.ndim, arr.dtype, out.dtype, axis=0, mask_info=mask_info
  184. )
  185. func(arr, indexer, out, fill_value)
  186. return out
  187. def take_2d_multi(
  188. arr: np.ndarray,
  189. indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
  190. fill_value=np.nan,
  191. ) -> np.ndarray:
  192. """
  193. Specialized Cython take which sets NaN values in one pass.
  194. """
  195. # This is only called from one place in DataFrame._reindex_multi,
  196. # so we know indexer is well-behaved.
  197. assert indexer is not None
  198. assert indexer[0] is not None
  199. assert indexer[1] is not None
  200. row_idx, col_idx = indexer
  201. row_idx = ensure_platform_int(row_idx)
  202. col_idx = ensure_platform_int(col_idx)
  203. indexer = row_idx, col_idx
  204. mask_info = None
  205. # check for promotion based on types only (do this first because
  206. # it's faster than computing a mask)
  207. dtype, fill_value = maybe_promote(arr.dtype, fill_value)
  208. if dtype != arr.dtype:
  209. # check if promotion is actually required based on indexer
  210. row_mask = row_idx == -1
  211. col_mask = col_idx == -1
  212. row_needs = row_mask.any()
  213. col_needs = col_mask.any()
  214. mask_info = (row_mask, col_mask), (row_needs, col_needs)
  215. if not (row_needs or col_needs):
  216. # if not, then depromote, set fill_value to dummy
  217. # (it won't be used but we don't want the cython code
  218. # to crash when trying to cast it to dtype)
  219. dtype, fill_value = arr.dtype, arr.dtype.type()
  220. # at this point, it's guaranteed that dtype can hold both the arr values
  221. # and the fill_value
  222. out_shape = len(row_idx), len(col_idx)
  223. out = np.empty(out_shape, dtype=dtype)
  224. func = _take_2d_multi_dict.get((arr.dtype.name, out.dtype.name), None)
  225. if func is None and arr.dtype != out.dtype:
  226. func = _take_2d_multi_dict.get((out.dtype.name, out.dtype.name), None)
  227. if func is not None:
  228. func = _convert_wrapper(func, out.dtype)
  229. if func is not None:
  230. func(arr, indexer, out=out, fill_value=fill_value)
  231. else:
  232. # test_reindex_multi
  233. _take_2d_multi_object(
  234. arr, indexer, out, fill_value=fill_value, mask_info=mask_info
  235. )
  236. return out
  237. @functools.lru_cache(maxsize=128)
  238. def _get_take_nd_function_cached(
  239. ndim: int, arr_dtype: np.dtype, out_dtype: np.dtype, axis: AxisInt
  240. ):
  241. """
  242. Part of _get_take_nd_function below that doesn't need `mask_info` and thus
  243. can be cached (mask_info potentially contains a numpy ndarray which is not
  244. hashable and thus cannot be used as argument for cached function).
  245. """
  246. tup = (arr_dtype.name, out_dtype.name)
  247. if ndim == 1:
  248. func = _take_1d_dict.get(tup, None)
  249. elif ndim == 2:
  250. if axis == 0:
  251. func = _take_2d_axis0_dict.get(tup, None)
  252. else:
  253. func = _take_2d_axis1_dict.get(tup, None)
  254. if func is not None:
  255. return func
  256. # We get here with string, uint, float16, and complex dtypes that could
  257. # potentially be handled in algos_take_helper.
  258. # Also a couple with (M8[ns], object) and (m8[ns], object)
  259. tup = (out_dtype.name, out_dtype.name)
  260. if ndim == 1:
  261. func = _take_1d_dict.get(tup, None)
  262. elif ndim == 2:
  263. if axis == 0:
  264. func = _take_2d_axis0_dict.get(tup, None)
  265. else:
  266. func = _take_2d_axis1_dict.get(tup, None)
  267. if func is not None:
  268. func = _convert_wrapper(func, out_dtype)
  269. return func
  270. return None
  271. def _get_take_nd_function(
  272. ndim: int,
  273. arr_dtype: np.dtype,
  274. out_dtype: np.dtype,
  275. axis: AxisInt = 0,
  276. mask_info=None,
  277. ):
  278. """
  279. Get the appropriate "take" implementation for the given dimension, axis
  280. and dtypes.
  281. """
  282. func = None
  283. if ndim <= 2:
  284. # for this part we don't need `mask_info` -> use the cached algo lookup
  285. func = _get_take_nd_function_cached(ndim, arr_dtype, out_dtype, axis)
  286. if func is None:
  287. def func(arr, indexer, out, fill_value=np.nan) -> None:
  288. indexer = ensure_platform_int(indexer)
  289. _take_nd_object(
  290. arr, indexer, out, axis=axis, fill_value=fill_value, mask_info=mask_info
  291. )
  292. return func
  293. def _view_wrapper(f, arr_dtype=None, out_dtype=None, fill_wrap=None):
  294. def wrapper(
  295. arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=np.nan
  296. ) -> None:
  297. if arr_dtype is not None:
  298. arr = arr.view(arr_dtype)
  299. if out_dtype is not None:
  300. out = out.view(out_dtype)
  301. if fill_wrap is not None:
  302. # FIXME: if we get here with dt64/td64 we need to be sure we have
  303. # matching resos
  304. if fill_value.dtype.kind == "m":
  305. fill_value = fill_value.astype("m8[ns]")
  306. else:
  307. fill_value = fill_value.astype("M8[ns]")
  308. fill_value = fill_wrap(fill_value)
  309. f(arr, indexer, out, fill_value=fill_value)
  310. return wrapper
  311. def _convert_wrapper(f, conv_dtype):
  312. def wrapper(
  313. arr: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=np.nan
  314. ) -> None:
  315. if conv_dtype == object:
  316. # GH#39755 avoid casting dt64/td64 to integers
  317. arr = ensure_wrapped_if_datetimelike(arr)
  318. arr = arr.astype(conv_dtype)
  319. f(arr, indexer, out, fill_value=fill_value)
  320. return wrapper
  321. _take_1d_dict = {
  322. ("int8", "int8"): libalgos.take_1d_int8_int8,
  323. ("int8", "int32"): libalgos.take_1d_int8_int32,
  324. ("int8", "int64"): libalgos.take_1d_int8_int64,
  325. ("int8", "float64"): libalgos.take_1d_int8_float64,
  326. ("int16", "int16"): libalgos.take_1d_int16_int16,
  327. ("int16", "int32"): libalgos.take_1d_int16_int32,
  328. ("int16", "int64"): libalgos.take_1d_int16_int64,
  329. ("int16", "float64"): libalgos.take_1d_int16_float64,
  330. ("int32", "int32"): libalgos.take_1d_int32_int32,
  331. ("int32", "int64"): libalgos.take_1d_int32_int64,
  332. ("int32", "float64"): libalgos.take_1d_int32_float64,
  333. ("int64", "int64"): libalgos.take_1d_int64_int64,
  334. ("int64", "float64"): libalgos.take_1d_int64_float64,
  335. ("float32", "float32"): libalgos.take_1d_float32_float32,
  336. ("float32", "float64"): libalgos.take_1d_float32_float64,
  337. ("float64", "float64"): libalgos.take_1d_float64_float64,
  338. ("object", "object"): libalgos.take_1d_object_object,
  339. ("bool", "bool"): _view_wrapper(libalgos.take_1d_bool_bool, np.uint8, np.uint8),
  340. ("bool", "object"): _view_wrapper(libalgos.take_1d_bool_object, np.uint8, None),
  341. ("datetime64[ns]", "datetime64[ns]"): _view_wrapper(
  342. libalgos.take_1d_int64_int64, np.int64, np.int64, np.int64
  343. ),
  344. ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper(
  345. libalgos.take_1d_int64_int64, np.int64, np.int64, np.int64
  346. ),
  347. }
  348. _take_2d_axis0_dict = {
  349. ("int8", "int8"): libalgos.take_2d_axis0_int8_int8,
  350. ("int8", "int32"): libalgos.take_2d_axis0_int8_int32,
  351. ("int8", "int64"): libalgos.take_2d_axis0_int8_int64,
  352. ("int8", "float64"): libalgos.take_2d_axis0_int8_float64,
  353. ("int16", "int16"): libalgos.take_2d_axis0_int16_int16,
  354. ("int16", "int32"): libalgos.take_2d_axis0_int16_int32,
  355. ("int16", "int64"): libalgos.take_2d_axis0_int16_int64,
  356. ("int16", "float64"): libalgos.take_2d_axis0_int16_float64,
  357. ("int32", "int32"): libalgos.take_2d_axis0_int32_int32,
  358. ("int32", "int64"): libalgos.take_2d_axis0_int32_int64,
  359. ("int32", "float64"): libalgos.take_2d_axis0_int32_float64,
  360. ("int64", "int64"): libalgos.take_2d_axis0_int64_int64,
  361. ("int64", "float64"): libalgos.take_2d_axis0_int64_float64,
  362. ("float32", "float32"): libalgos.take_2d_axis0_float32_float32,
  363. ("float32", "float64"): libalgos.take_2d_axis0_float32_float64,
  364. ("float64", "float64"): libalgos.take_2d_axis0_float64_float64,
  365. ("object", "object"): libalgos.take_2d_axis0_object_object,
  366. ("bool", "bool"): _view_wrapper(
  367. libalgos.take_2d_axis0_bool_bool, np.uint8, np.uint8
  368. ),
  369. ("bool", "object"): _view_wrapper(
  370. libalgos.take_2d_axis0_bool_object, np.uint8, None
  371. ),
  372. ("datetime64[ns]", "datetime64[ns]"): _view_wrapper(
  373. libalgos.take_2d_axis0_int64_int64, np.int64, np.int64, fill_wrap=np.int64
  374. ),
  375. ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper(
  376. libalgos.take_2d_axis0_int64_int64, np.int64, np.int64, fill_wrap=np.int64
  377. ),
  378. }
  379. _take_2d_axis1_dict = {
  380. ("int8", "int8"): libalgos.take_2d_axis1_int8_int8,
  381. ("int8", "int32"): libalgos.take_2d_axis1_int8_int32,
  382. ("int8", "int64"): libalgos.take_2d_axis1_int8_int64,
  383. ("int8", "float64"): libalgos.take_2d_axis1_int8_float64,
  384. ("int16", "int16"): libalgos.take_2d_axis1_int16_int16,
  385. ("int16", "int32"): libalgos.take_2d_axis1_int16_int32,
  386. ("int16", "int64"): libalgos.take_2d_axis1_int16_int64,
  387. ("int16", "float64"): libalgos.take_2d_axis1_int16_float64,
  388. ("int32", "int32"): libalgos.take_2d_axis1_int32_int32,
  389. ("int32", "int64"): libalgos.take_2d_axis1_int32_int64,
  390. ("int32", "float64"): libalgos.take_2d_axis1_int32_float64,
  391. ("int64", "int64"): libalgos.take_2d_axis1_int64_int64,
  392. ("int64", "float64"): libalgos.take_2d_axis1_int64_float64,
  393. ("float32", "float32"): libalgos.take_2d_axis1_float32_float32,
  394. ("float32", "float64"): libalgos.take_2d_axis1_float32_float64,
  395. ("float64", "float64"): libalgos.take_2d_axis1_float64_float64,
  396. ("object", "object"): libalgos.take_2d_axis1_object_object,
  397. ("bool", "bool"): _view_wrapper(
  398. libalgos.take_2d_axis1_bool_bool, np.uint8, np.uint8
  399. ),
  400. ("bool", "object"): _view_wrapper(
  401. libalgos.take_2d_axis1_bool_object, np.uint8, None
  402. ),
  403. ("datetime64[ns]", "datetime64[ns]"): _view_wrapper(
  404. libalgos.take_2d_axis1_int64_int64, np.int64, np.int64, fill_wrap=np.int64
  405. ),
  406. ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper(
  407. libalgos.take_2d_axis1_int64_int64, np.int64, np.int64, fill_wrap=np.int64
  408. ),
  409. }
  410. _take_2d_multi_dict = {
  411. ("int8", "int8"): libalgos.take_2d_multi_int8_int8,
  412. ("int8", "int32"): libalgos.take_2d_multi_int8_int32,
  413. ("int8", "int64"): libalgos.take_2d_multi_int8_int64,
  414. ("int8", "float64"): libalgos.take_2d_multi_int8_float64,
  415. ("int16", "int16"): libalgos.take_2d_multi_int16_int16,
  416. ("int16", "int32"): libalgos.take_2d_multi_int16_int32,
  417. ("int16", "int64"): libalgos.take_2d_multi_int16_int64,
  418. ("int16", "float64"): libalgos.take_2d_multi_int16_float64,
  419. ("int32", "int32"): libalgos.take_2d_multi_int32_int32,
  420. ("int32", "int64"): libalgos.take_2d_multi_int32_int64,
  421. ("int32", "float64"): libalgos.take_2d_multi_int32_float64,
  422. ("int64", "int64"): libalgos.take_2d_multi_int64_int64,
  423. ("int64", "float64"): libalgos.take_2d_multi_int64_float64,
  424. ("float32", "float32"): libalgos.take_2d_multi_float32_float32,
  425. ("float32", "float64"): libalgos.take_2d_multi_float32_float64,
  426. ("float64", "float64"): libalgos.take_2d_multi_float64_float64,
  427. ("object", "object"): libalgos.take_2d_multi_object_object,
  428. ("bool", "bool"): _view_wrapper(
  429. libalgos.take_2d_multi_bool_bool, np.uint8, np.uint8
  430. ),
  431. ("bool", "object"): _view_wrapper(
  432. libalgos.take_2d_multi_bool_object, np.uint8, None
  433. ),
  434. ("datetime64[ns]", "datetime64[ns]"): _view_wrapper(
  435. libalgos.take_2d_multi_int64_int64, np.int64, np.int64, fill_wrap=np.int64
  436. ),
  437. ("timedelta64[ns]", "timedelta64[ns]"): _view_wrapper(
  438. libalgos.take_2d_multi_int64_int64, np.int64, np.int64, fill_wrap=np.int64
  439. ),
  440. }
  441. def _take_nd_object(
  442. arr: np.ndarray,
  443. indexer: npt.NDArray[np.intp],
  444. out: np.ndarray,
  445. axis: AxisInt,
  446. fill_value,
  447. mask_info,
  448. ) -> None:
  449. if mask_info is not None:
  450. mask, needs_masking = mask_info
  451. else:
  452. mask = indexer == -1
  453. needs_masking = mask.any()
  454. if arr.dtype != out.dtype:
  455. arr = arr.astype(out.dtype)
  456. if arr.shape[axis] > 0:
  457. arr.take(indexer, axis=axis, out=out)
  458. if needs_masking:
  459. outindexer = [slice(None)] * arr.ndim
  460. outindexer[axis] = mask
  461. out[tuple(outindexer)] = fill_value
  462. def _take_2d_multi_object(
  463. arr: np.ndarray,
  464. indexer: tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]],
  465. out: np.ndarray,
  466. fill_value,
  467. mask_info,
  468. ) -> None:
  469. # this is not ideal, performance-wise, but it's better than raising
  470. # an exception (best to optimize in Cython to avoid getting here)
  471. row_idx, col_idx = indexer # both np.intp
  472. if mask_info is not None:
  473. (row_mask, col_mask), (row_needs, col_needs) = mask_info
  474. else:
  475. row_mask = row_idx == -1
  476. col_mask = col_idx == -1
  477. row_needs = row_mask.any()
  478. col_needs = col_mask.any()
  479. if fill_value is not None:
  480. if row_needs:
  481. out[row_mask, :] = fill_value
  482. if col_needs:
  483. out[:, col_mask] = fill_value
  484. for i, u_ in enumerate(row_idx):
  485. if u_ != -1:
  486. for j, v in enumerate(col_idx):
  487. if v != -1:
  488. out[i, j] = arr[u_, v]
  489. def _take_preprocess_indexer_and_fill_value(
  490. arr: np.ndarray,
  491. indexer: npt.NDArray[np.intp],
  492. fill_value,
  493. allow_fill: bool,
  494. mask: npt.NDArray[np.bool_] | None = None,
  495. ):
  496. mask_info: tuple[np.ndarray | None, bool] | None = None
  497. if not allow_fill:
  498. dtype, fill_value = arr.dtype, arr.dtype.type()
  499. mask_info = None, False
  500. else:
  501. # check for promotion based on types only (do this first because
  502. # it's faster than computing a mask)
  503. dtype, fill_value = maybe_promote(arr.dtype, fill_value)
  504. if dtype != arr.dtype:
  505. # check if promotion is actually required based on indexer
  506. if mask is not None:
  507. needs_masking = True
  508. else:
  509. mask = indexer == -1
  510. needs_masking = bool(mask.any())
  511. mask_info = mask, needs_masking
  512. if not needs_masking:
  513. # if not, then depromote, set fill_value to dummy
  514. # (it won't be used but we don't want the cython code
  515. # to crash when trying to cast it to dtype)
  516. dtype, fill_value = arr.dtype, arr.dtype.type()
  517. return dtype, fill_value, mask_info