util.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. from __future__ import annotations
  2. import numpy as np
  3. from pandas._typing import NumpyIndexT
  4. from pandas.core.dtypes.common import is_list_like
  5. def cartesian_product(X) -> list[np.ndarray]:
  6. """
  7. Numpy version of itertools.product.
  8. Sometimes faster (for large inputs)...
  9. Parameters
  10. ----------
  11. X : list-like of list-likes
  12. Returns
  13. -------
  14. product : list of ndarrays
  15. Examples
  16. --------
  17. >>> cartesian_product([list('ABC'), [1, 2]])
  18. [array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='<U1'), array([1, 2, 1, 2, 1, 2])]
  19. See Also
  20. --------
  21. itertools.product : Cartesian product of input iterables. Equivalent to
  22. nested for-loops.
  23. """
  24. msg = "Input must be a list-like of list-likes"
  25. if not is_list_like(X):
  26. raise TypeError(msg)
  27. for x in X:
  28. if not is_list_like(x):
  29. raise TypeError(msg)
  30. if len(X) == 0:
  31. return []
  32. lenX = np.fromiter((len(x) for x in X), dtype=np.intp)
  33. cumprodX = np.cumprod(lenX)
  34. if np.any(cumprodX < 0):
  35. raise ValueError("Product space too large to allocate arrays!")
  36. a = np.roll(cumprodX, 1)
  37. a[0] = 1
  38. if cumprodX[-1] != 0:
  39. b = cumprodX[-1] / cumprodX
  40. else:
  41. # if any factor is empty, the cartesian product is empty
  42. b = np.zeros_like(cumprodX)
  43. # error: Argument of type "int_" cannot be assigned to parameter "num" of
  44. # type "int" in function "tile_compat"
  45. return [
  46. tile_compat(
  47. np.repeat(x, b[i]),
  48. np.prod(a[i]), # pyright: ignore[reportGeneralTypeIssues]
  49. )
  50. for i, x in enumerate(X)
  51. ]
  52. def tile_compat(arr: NumpyIndexT, num: int) -> NumpyIndexT:
  53. """
  54. Index compat for np.tile.
  55. Notes
  56. -----
  57. Does not support multi-dimensional `num`.
  58. """
  59. if isinstance(arr, np.ndarray):
  60. return np.tile(arr, num)
  61. # Otherwise we have an Index
  62. taker = np.tile(np.arange(len(arr)), num)
  63. return arr.take(taker)