align.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. """
  2. Core eval alignment algorithms.
  3. """
  4. from __future__ import annotations
  5. from functools import (
  6. partial,
  7. wraps,
  8. )
  9. from typing import (
  10. TYPE_CHECKING,
  11. Callable,
  12. Sequence,
  13. )
  14. import warnings
  15. import numpy as np
  16. from pandas.errors import PerformanceWarning
  17. from pandas.util._exceptions import find_stack_level
  18. from pandas.core.dtypes.generic import (
  19. ABCDataFrame,
  20. ABCSeries,
  21. )
  22. from pandas.core.base import PandasObject
  23. import pandas.core.common as com
  24. from pandas.core.computation.common import result_type_many
  25. if TYPE_CHECKING:
  26. from pandas._typing import F
  27. from pandas.core.generic import NDFrame
  28. from pandas.core.indexes.api import Index
  29. def _align_core_single_unary_op(
  30. term,
  31. ) -> tuple[partial | type[NDFrame], dict[str, Index] | None]:
  32. typ: partial | type[NDFrame]
  33. axes: dict[str, Index] | None = None
  34. if isinstance(term.value, np.ndarray):
  35. typ = partial(np.asanyarray, dtype=term.value.dtype)
  36. else:
  37. typ = type(term.value)
  38. if hasattr(term.value, "axes"):
  39. axes = _zip_axes_from_type(typ, term.value.axes)
  40. return typ, axes
  41. def _zip_axes_from_type(
  42. typ: type[NDFrame], new_axes: Sequence[Index]
  43. ) -> dict[str, Index]:
  44. return {name: new_axes[i] for i, name in enumerate(typ._AXIS_ORDERS)}
  45. def _any_pandas_objects(terms) -> bool:
  46. """
  47. Check a sequence of terms for instances of PandasObject.
  48. """
  49. return any(isinstance(term.value, PandasObject) for term in terms)
  50. def _filter_special_cases(f) -> Callable[[F], F]:
  51. @wraps(f)
  52. def wrapper(terms):
  53. # single unary operand
  54. if len(terms) == 1:
  55. return _align_core_single_unary_op(terms[0])
  56. term_values = (term.value for term in terms)
  57. # we don't have any pandas objects
  58. if not _any_pandas_objects(terms):
  59. return result_type_many(*term_values), None
  60. return f(terms)
  61. return wrapper
  62. @_filter_special_cases
  63. def _align_core(terms):
  64. term_index = [i for i, term in enumerate(terms) if hasattr(term.value, "axes")]
  65. term_dims = [terms[i].value.ndim for i in term_index]
  66. from pandas import Series
  67. ndims = Series(dict(zip(term_index, term_dims)))
  68. # initial axes are the axes of the largest-axis'd term
  69. biggest = terms[ndims.idxmax()].value
  70. typ = biggest._constructor
  71. axes = biggest.axes
  72. naxes = len(axes)
  73. gt_than_one_axis = naxes > 1
  74. for value in (terms[i].value for i in term_index):
  75. is_series = isinstance(value, ABCSeries)
  76. is_series_and_gt_one_axis = is_series and gt_than_one_axis
  77. for axis, items in enumerate(value.axes):
  78. if is_series_and_gt_one_axis:
  79. ax, itm = naxes - 1, value.index
  80. else:
  81. ax, itm = axis, items
  82. if not axes[ax].is_(itm):
  83. axes[ax] = axes[ax].join(itm, how="outer")
  84. for i, ndim in ndims.items():
  85. for axis, items in zip(range(ndim), axes):
  86. ti = terms[i].value
  87. if hasattr(ti, "reindex"):
  88. transpose = isinstance(ti, ABCSeries) and naxes > 1
  89. reindexer = axes[naxes - 1] if transpose else items
  90. term_axis_size = len(ti.axes[axis])
  91. reindexer_size = len(reindexer)
  92. ordm = np.log10(max(1, abs(reindexer_size - term_axis_size)))
  93. if ordm >= 1 and reindexer_size >= 10000:
  94. w = (
  95. f"Alignment difference on axis {axis} is larger "
  96. f"than an order of magnitude on term {repr(terms[i].name)}, "
  97. f"by more than {ordm:.4g}; performance may suffer."
  98. )
  99. warnings.warn(
  100. w, category=PerformanceWarning, stacklevel=find_stack_level()
  101. )
  102. f = partial(ti.reindex, reindexer, axis=axis, copy=False)
  103. terms[i].update(f())
  104. terms[i].update(terms[i].value.values)
  105. return typ, _zip_axes_from_type(typ, axes)
  106. def align_terms(terms):
  107. """
  108. Align a set of terms.
  109. """
  110. try:
  111. # flatten the parse tree (a nested list, really)
  112. terms = list(com.flatten(terms))
  113. except TypeError:
  114. # can't iterate so it must just be a constant or single variable
  115. if isinstance(terms.value, (ABCSeries, ABCDataFrame)):
  116. typ = type(terms.value)
  117. return typ, _zip_axes_from_type(typ, terms.value.axes)
  118. return np.result_type(terms.type), None
  119. # if all resolved variables are numeric scalars
  120. if all(term.is_scalar for term in terms):
  121. return result_type_many(*(term.value for term in terms)).type, None
  122. # perform the main alignment
  123. typ, axes = _align_core(terms)
  124. return typ, axes
  125. def reconstruct_object(typ, obj, axes, dtype):
  126. """
  127. Reconstruct an object given its type, raw value, and possibly empty
  128. (None) axes.
  129. Parameters
  130. ----------
  131. typ : object
  132. A type
  133. obj : object
  134. The value to use in the type constructor
  135. axes : dict
  136. The axes to use to construct the resulting pandas object
  137. Returns
  138. -------
  139. ret : typ
  140. An object of type ``typ`` with the value `obj` and possible axes
  141. `axes`.
  142. """
  143. try:
  144. typ = typ.type
  145. except AttributeError:
  146. pass
  147. res_t = np.result_type(obj.dtype, dtype)
  148. if not isinstance(typ, partial) and issubclass(typ, PandasObject):
  149. return typ(obj, dtype=res_t, **axes)
  150. # special case for pathological things like ~True/~False
  151. if hasattr(res_t, "type") and typ == np.bool_ and res_t != np.bool_:
  152. ret_value = res_t.type(obj)
  153. else:
  154. ret_value = typ(obj).astype(res_t)
  155. # The condition is to distinguish 0-dim array (returned in case of
  156. # scalar) and 1 element array
  157. # e.g. np.array(0) and np.array([0])
  158. if (
  159. len(obj.shape) == 1
  160. and len(obj) == 1
  161. and not isinstance(ret_value, np.ndarray)
  162. ):
  163. ret_value = np.array([ret_value]).astype(res_t)
  164. return ret_value