expr.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840
  1. """
  2. :func:`~pandas.eval` parsers.
  3. """
  4. from __future__ import annotations
  5. import ast
  6. from functools import (
  7. partial,
  8. reduce,
  9. )
  10. from keyword import iskeyword
  11. import tokenize
  12. from typing import (
  13. Callable,
  14. TypeVar,
  15. )
  16. import numpy as np
  17. from pandas.compat import PY39
  18. from pandas.errors import UndefinedVariableError
  19. import pandas.core.common as com
  20. from pandas.core.computation.ops import (
  21. ARITH_OPS_SYMS,
  22. BOOL_OPS_SYMS,
  23. CMP_OPS_SYMS,
  24. LOCAL_TAG,
  25. MATHOPS,
  26. REDUCTIONS,
  27. UNARY_OPS_SYMS,
  28. BinOp,
  29. Constant,
  30. Div,
  31. FuncNode,
  32. Op,
  33. Term,
  34. UnaryOp,
  35. is_term,
  36. )
  37. from pandas.core.computation.parsing import (
  38. clean_backtick_quoted_toks,
  39. tokenize_string,
  40. )
  41. from pandas.core.computation.scope import Scope
  42. from pandas.io.formats import printing
  43. def _rewrite_assign(tok: tuple[int, str]) -> tuple[int, str]:
  44. """
  45. Rewrite the assignment operator for PyTables expressions that use ``=``
  46. as a substitute for ``==``.
  47. Parameters
  48. ----------
  49. tok : tuple of int, str
  50. ints correspond to the all caps constants in the tokenize module
  51. Returns
  52. -------
  53. tuple of int, str
  54. Either the input or token or the replacement values
  55. """
  56. toknum, tokval = tok
  57. return toknum, "==" if tokval == "=" else tokval
  58. def _replace_booleans(tok: tuple[int, str]) -> tuple[int, str]:
  59. """
  60. Replace ``&`` with ``and`` and ``|`` with ``or`` so that bitwise
  61. precedence is changed to boolean precedence.
  62. Parameters
  63. ----------
  64. tok : tuple of int, str
  65. ints correspond to the all caps constants in the tokenize module
  66. Returns
  67. -------
  68. tuple of int, str
  69. Either the input or token or the replacement values
  70. """
  71. toknum, tokval = tok
  72. if toknum == tokenize.OP:
  73. if tokval == "&":
  74. return tokenize.NAME, "and"
  75. elif tokval == "|":
  76. return tokenize.NAME, "or"
  77. return toknum, tokval
  78. return toknum, tokval
  79. def _replace_locals(tok: tuple[int, str]) -> tuple[int, str]:
  80. """
  81. Replace local variables with a syntactically valid name.
  82. Parameters
  83. ----------
  84. tok : tuple of int, str
  85. ints correspond to the all caps constants in the tokenize module
  86. Returns
  87. -------
  88. tuple of int, str
  89. Either the input or token or the replacement values
  90. Notes
  91. -----
  92. This is somewhat of a hack in that we rewrite a string such as ``'@a'`` as
  93. ``'__pd_eval_local_a'`` by telling the tokenizer that ``__pd_eval_local_``
  94. is a ``tokenize.OP`` and to replace the ``'@'`` symbol with it.
  95. """
  96. toknum, tokval = tok
  97. if toknum == tokenize.OP and tokval == "@":
  98. return tokenize.OP, LOCAL_TAG
  99. return toknum, tokval
  100. def _compose2(f, g):
  101. """
  102. Compose 2 callables.
  103. """
  104. return lambda *args, **kwargs: f(g(*args, **kwargs))
  105. def _compose(*funcs):
  106. """
  107. Compose 2 or more callables.
  108. """
  109. assert len(funcs) > 1, "At least 2 callables must be passed to compose"
  110. return reduce(_compose2, funcs)
  111. def _preparse(
  112. source: str,
  113. f=_compose(
  114. _replace_locals, _replace_booleans, _rewrite_assign, clean_backtick_quoted_toks
  115. ),
  116. ) -> str:
  117. """
  118. Compose a collection of tokenization functions.
  119. Parameters
  120. ----------
  121. source : str
  122. A Python source code string
  123. f : callable
  124. This takes a tuple of (toknum, tokval) as its argument and returns a
  125. tuple with the same structure but possibly different elements. Defaults
  126. to the composition of ``_rewrite_assign``, ``_replace_booleans``, and
  127. ``_replace_locals``.
  128. Returns
  129. -------
  130. str
  131. Valid Python source code
  132. Notes
  133. -----
  134. The `f` parameter can be any callable that takes *and* returns input of the
  135. form ``(toknum, tokval)``, where ``toknum`` is one of the constants from
  136. the ``tokenize`` module and ``tokval`` is a string.
  137. """
  138. assert callable(f), "f must be callable"
  139. return tokenize.untokenize(f(x) for x in tokenize_string(source))
  140. def _is_type(t):
  141. """
  142. Factory for a type checking function of type ``t`` or tuple of types.
  143. """
  144. return lambda x: isinstance(x.value, t)
  145. _is_list = _is_type(list)
  146. _is_str = _is_type(str)
  147. # partition all AST nodes
  148. _all_nodes = frozenset(
  149. node
  150. for node in (getattr(ast, name) for name in dir(ast))
  151. if isinstance(node, type) and issubclass(node, ast.AST)
  152. )
  153. def _filter_nodes(superclass, all_nodes=_all_nodes):
  154. """
  155. Filter out AST nodes that are subclasses of ``superclass``.
  156. """
  157. node_names = (node.__name__ for node in all_nodes if issubclass(node, superclass))
  158. return frozenset(node_names)
  159. _all_node_names = frozenset(map(lambda x: x.__name__, _all_nodes))
  160. _mod_nodes = _filter_nodes(ast.mod)
  161. _stmt_nodes = _filter_nodes(ast.stmt)
  162. _expr_nodes = _filter_nodes(ast.expr)
  163. _expr_context_nodes = _filter_nodes(ast.expr_context)
  164. _boolop_nodes = _filter_nodes(ast.boolop)
  165. _operator_nodes = _filter_nodes(ast.operator)
  166. _unary_op_nodes = _filter_nodes(ast.unaryop)
  167. _cmp_op_nodes = _filter_nodes(ast.cmpop)
  168. _comprehension_nodes = _filter_nodes(ast.comprehension)
  169. _handler_nodes = _filter_nodes(ast.excepthandler)
  170. _arguments_nodes = _filter_nodes(ast.arguments)
  171. _keyword_nodes = _filter_nodes(ast.keyword)
  172. _alias_nodes = _filter_nodes(ast.alias)
  173. if not PY39:
  174. _slice_nodes = _filter_nodes(ast.slice)
  175. # nodes that we don't support directly but are needed for parsing
  176. _hacked_nodes = frozenset(["Assign", "Module", "Expr"])
  177. _unsupported_expr_nodes = frozenset(
  178. [
  179. "Yield",
  180. "GeneratorExp",
  181. "IfExp",
  182. "DictComp",
  183. "SetComp",
  184. "Repr",
  185. "Lambda",
  186. "Set",
  187. "AST",
  188. "Is",
  189. "IsNot",
  190. ]
  191. )
  192. # these nodes are low priority or won't ever be supported (e.g., AST)
  193. _unsupported_nodes = (
  194. _stmt_nodes
  195. | _mod_nodes
  196. | _handler_nodes
  197. | _arguments_nodes
  198. | _keyword_nodes
  199. | _alias_nodes
  200. | _expr_context_nodes
  201. | _unsupported_expr_nodes
  202. ) - _hacked_nodes
  203. # we're adding a different assignment in some cases to be equality comparison
  204. # and we don't want `stmt` and friends in their so get only the class whose
  205. # names are capitalized
  206. _base_supported_nodes = (_all_node_names - _unsupported_nodes) | _hacked_nodes
  207. intersection = _unsupported_nodes & _base_supported_nodes
  208. _msg = f"cannot both support and not support {intersection}"
  209. assert not intersection, _msg
  210. def _node_not_implemented(node_name: str) -> Callable[..., None]:
  211. """
  212. Return a function that raises a NotImplementedError with a passed node name.
  213. """
  214. def f(self, *args, **kwargs):
  215. raise NotImplementedError(f"'{node_name}' nodes are not implemented")
  216. return f
  217. # should be bound by BaseExprVisitor but that creates a circular dependency:
  218. # _T is used in disallow, but disallow is used to define BaseExprVisitor
  219. # https://github.com/microsoft/pyright/issues/2315
  220. _T = TypeVar("_T")
  221. def disallow(nodes: set[str]) -> Callable[[type[_T]], type[_T]]:
  222. """
  223. Decorator to disallow certain nodes from parsing. Raises a
  224. NotImplementedError instead.
  225. Returns
  226. -------
  227. callable
  228. """
  229. def disallowed(cls: type[_T]) -> type[_T]:
  230. # error: "Type[_T]" has no attribute "unsupported_nodes"
  231. cls.unsupported_nodes = () # type: ignore[attr-defined]
  232. for node in nodes:
  233. new_method = _node_not_implemented(node)
  234. name = f"visit_{node}"
  235. # error: "Type[_T]" has no attribute "unsupported_nodes"
  236. cls.unsupported_nodes += (name,) # type: ignore[attr-defined]
  237. setattr(cls, name, new_method)
  238. return cls
  239. return disallowed
  240. def _op_maker(op_class, op_symbol):
  241. """
  242. Return a function to create an op class with its symbol already passed.
  243. Returns
  244. -------
  245. callable
  246. """
  247. def f(self, node, *args, **kwargs):
  248. """
  249. Return a partial function with an Op subclass with an operator already passed.
  250. Returns
  251. -------
  252. callable
  253. """
  254. return partial(op_class, op_symbol, *args, **kwargs)
  255. return f
  256. _op_classes = {"binary": BinOp, "unary": UnaryOp}
  257. def add_ops(op_classes):
  258. """
  259. Decorator to add default implementation of ops.
  260. """
  261. def f(cls):
  262. for op_attr_name, op_class in op_classes.items():
  263. ops = getattr(cls, f"{op_attr_name}_ops")
  264. ops_map = getattr(cls, f"{op_attr_name}_op_nodes_map")
  265. for op in ops:
  266. op_node = ops_map[op]
  267. if op_node is not None:
  268. made_op = _op_maker(op_class, op)
  269. setattr(cls, f"visit_{op_node}", made_op)
  270. return cls
  271. return f
  272. @disallow(_unsupported_nodes)
  273. @add_ops(_op_classes)
  274. class BaseExprVisitor(ast.NodeVisitor):
  275. """
  276. Custom ast walker. Parsers of other engines should subclass this class
  277. if necessary.
  278. Parameters
  279. ----------
  280. env : Scope
  281. engine : str
  282. parser : str
  283. preparser : callable
  284. """
  285. const_type: type[Term] = Constant
  286. term_type = Term
  287. binary_ops = CMP_OPS_SYMS + BOOL_OPS_SYMS + ARITH_OPS_SYMS
  288. binary_op_nodes = (
  289. "Gt",
  290. "Lt",
  291. "GtE",
  292. "LtE",
  293. "Eq",
  294. "NotEq",
  295. "In",
  296. "NotIn",
  297. "BitAnd",
  298. "BitOr",
  299. "And",
  300. "Or",
  301. "Add",
  302. "Sub",
  303. "Mult",
  304. None,
  305. "Pow",
  306. "FloorDiv",
  307. "Mod",
  308. )
  309. binary_op_nodes_map = dict(zip(binary_ops, binary_op_nodes))
  310. unary_ops = UNARY_OPS_SYMS
  311. unary_op_nodes = "UAdd", "USub", "Invert", "Not"
  312. unary_op_nodes_map = dict(zip(unary_ops, unary_op_nodes))
  313. rewrite_map = {
  314. ast.Eq: ast.In,
  315. ast.NotEq: ast.NotIn,
  316. ast.In: ast.In,
  317. ast.NotIn: ast.NotIn,
  318. }
  319. unsupported_nodes: tuple[str, ...]
  320. def __init__(self, env, engine, parser, preparser=_preparse) -> None:
  321. self.env = env
  322. self.engine = engine
  323. self.parser = parser
  324. self.preparser = preparser
  325. self.assigner = None
  326. def visit(self, node, **kwargs):
  327. if isinstance(node, str):
  328. clean = self.preparser(node)
  329. try:
  330. node = ast.fix_missing_locations(ast.parse(clean))
  331. except SyntaxError as e:
  332. if any(iskeyword(x) for x in clean.split()):
  333. e.msg = "Python keyword not valid identifier in numexpr query"
  334. raise e
  335. method = f"visit_{type(node).__name__}"
  336. visitor = getattr(self, method)
  337. return visitor(node, **kwargs)
  338. def visit_Module(self, node, **kwargs):
  339. if len(node.body) != 1:
  340. raise SyntaxError("only a single expression is allowed")
  341. expr = node.body[0]
  342. return self.visit(expr, **kwargs)
  343. def visit_Expr(self, node, **kwargs):
  344. return self.visit(node.value, **kwargs)
  345. def _rewrite_membership_op(self, node, left, right):
  346. # the kind of the operator (is actually an instance)
  347. op_instance = node.op
  348. op_type = type(op_instance)
  349. # must be two terms and the comparison operator must be ==/!=/in/not in
  350. if is_term(left) and is_term(right) and op_type in self.rewrite_map:
  351. left_list, right_list = map(_is_list, (left, right))
  352. left_str, right_str = map(_is_str, (left, right))
  353. # if there are any strings or lists in the expression
  354. if left_list or right_list or left_str or right_str:
  355. op_instance = self.rewrite_map[op_type]()
  356. # pop the string variable out of locals and replace it with a list
  357. # of one string, kind of a hack
  358. if right_str:
  359. name = self.env.add_tmp([right.value])
  360. right = self.term_type(name, self.env)
  361. if left_str:
  362. name = self.env.add_tmp([left.value])
  363. left = self.term_type(name, self.env)
  364. op = self.visit(op_instance)
  365. return op, op_instance, left, right
  366. def _maybe_transform_eq_ne(self, node, left=None, right=None):
  367. if left is None:
  368. left = self.visit(node.left, side="left")
  369. if right is None:
  370. right = self.visit(node.right, side="right")
  371. op, op_class, left, right = self._rewrite_membership_op(node, left, right)
  372. return op, op_class, left, right
  373. def _maybe_downcast_constants(self, left, right):
  374. f32 = np.dtype(np.float32)
  375. if (
  376. left.is_scalar
  377. and hasattr(left, "value")
  378. and not right.is_scalar
  379. and right.return_type == f32
  380. ):
  381. # right is a float32 array, left is a scalar
  382. name = self.env.add_tmp(np.float32(left.value))
  383. left = self.term_type(name, self.env)
  384. if (
  385. right.is_scalar
  386. and hasattr(right, "value")
  387. and not left.is_scalar
  388. and left.return_type == f32
  389. ):
  390. # left is a float32 array, right is a scalar
  391. name = self.env.add_tmp(np.float32(right.value))
  392. right = self.term_type(name, self.env)
  393. return left, right
  394. def _maybe_eval(self, binop, eval_in_python):
  395. # eval `in` and `not in` (for now) in "partial" python space
  396. # things that can be evaluated in "eval" space will be turned into
  397. # temporary variables. for example,
  398. # [1,2] in a + 2 * b
  399. # in that case a + 2 * b will be evaluated using numexpr, and the "in"
  400. # call will be evaluated using isin (in python space)
  401. return binop.evaluate(
  402. self.env, self.engine, self.parser, self.term_type, eval_in_python
  403. )
  404. def _maybe_evaluate_binop(
  405. self,
  406. op,
  407. op_class,
  408. lhs,
  409. rhs,
  410. eval_in_python=("in", "not in"),
  411. maybe_eval_in_python=("==", "!=", "<", ">", "<=", ">="),
  412. ):
  413. res = op(lhs, rhs)
  414. if res.has_invalid_return_type:
  415. raise TypeError(
  416. f"unsupported operand type(s) for {res.op}: "
  417. f"'{lhs.type}' and '{rhs.type}'"
  418. )
  419. if self.engine != "pytables" and (
  420. res.op in CMP_OPS_SYMS
  421. and getattr(lhs, "is_datetime", False)
  422. or getattr(rhs, "is_datetime", False)
  423. ):
  424. # all date ops must be done in python bc numexpr doesn't work
  425. # well with NaT
  426. return self._maybe_eval(res, self.binary_ops)
  427. if res.op in eval_in_python:
  428. # "in"/"not in" ops are always evaluated in python
  429. return self._maybe_eval(res, eval_in_python)
  430. elif self.engine != "pytables":
  431. if (
  432. getattr(lhs, "return_type", None) == object
  433. or getattr(rhs, "return_type", None) == object
  434. ):
  435. # evaluate "==" and "!=" in python if either of our operands
  436. # has an object return type
  437. return self._maybe_eval(res, eval_in_python + maybe_eval_in_python)
  438. return res
  439. def visit_BinOp(self, node, **kwargs):
  440. op, op_class, left, right = self._maybe_transform_eq_ne(node)
  441. left, right = self._maybe_downcast_constants(left, right)
  442. return self._maybe_evaluate_binop(op, op_class, left, right)
  443. def visit_Div(self, node, **kwargs):
  444. return lambda lhs, rhs: Div(lhs, rhs)
  445. def visit_UnaryOp(self, node, **kwargs):
  446. op = self.visit(node.op)
  447. operand = self.visit(node.operand)
  448. return op(operand)
  449. def visit_Name(self, node, **kwargs):
  450. return self.term_type(node.id, self.env, **kwargs)
  451. def visit_NameConstant(self, node, **kwargs) -> Term:
  452. return self.const_type(node.value, self.env)
  453. def visit_Num(self, node, **kwargs) -> Term:
  454. return self.const_type(node.n, self.env)
  455. def visit_Constant(self, node, **kwargs) -> Term:
  456. return self.const_type(node.n, self.env)
  457. def visit_Str(self, node, **kwargs):
  458. name = self.env.add_tmp(node.s)
  459. return self.term_type(name, self.env)
  460. def visit_List(self, node, **kwargs):
  461. name = self.env.add_tmp([self.visit(e)(self.env) for e in node.elts])
  462. return self.term_type(name, self.env)
  463. visit_Tuple = visit_List
  464. def visit_Index(self, node, **kwargs):
  465. """df.index[4]"""
  466. return self.visit(node.value)
  467. def visit_Subscript(self, node, **kwargs):
  468. from pandas import eval as pd_eval
  469. value = self.visit(node.value)
  470. slobj = self.visit(node.slice)
  471. result = pd_eval(
  472. slobj, local_dict=self.env, engine=self.engine, parser=self.parser
  473. )
  474. try:
  475. # a Term instance
  476. v = value.value[result]
  477. except AttributeError:
  478. # an Op instance
  479. lhs = pd_eval(
  480. value, local_dict=self.env, engine=self.engine, parser=self.parser
  481. )
  482. v = lhs[result]
  483. name = self.env.add_tmp(v)
  484. return self.term_type(name, env=self.env)
  485. def visit_Slice(self, node, **kwargs):
  486. """df.index[slice(4,6)]"""
  487. lower = node.lower
  488. if lower is not None:
  489. lower = self.visit(lower).value
  490. upper = node.upper
  491. if upper is not None:
  492. upper = self.visit(upper).value
  493. step = node.step
  494. if step is not None:
  495. step = self.visit(step).value
  496. return slice(lower, upper, step)
  497. def visit_Assign(self, node, **kwargs):
  498. """
  499. support a single assignment node, like
  500. c = a + b
  501. set the assigner at the top level, must be a Name node which
  502. might or might not exist in the resolvers
  503. """
  504. if len(node.targets) != 1:
  505. raise SyntaxError("can only assign a single expression")
  506. if not isinstance(node.targets[0], ast.Name):
  507. raise SyntaxError("left hand side of an assignment must be a single name")
  508. if self.env.target is None:
  509. raise ValueError("cannot assign without a target object")
  510. try:
  511. assigner = self.visit(node.targets[0], **kwargs)
  512. except UndefinedVariableError:
  513. assigner = node.targets[0].id
  514. self.assigner = getattr(assigner, "name", assigner)
  515. if self.assigner is None:
  516. raise SyntaxError(
  517. "left hand side of an assignment must be a single resolvable name"
  518. )
  519. return self.visit(node.value, **kwargs)
  520. def visit_Attribute(self, node, **kwargs):
  521. attr = node.attr
  522. value = node.value
  523. ctx = node.ctx
  524. if isinstance(ctx, ast.Load):
  525. # resolve the value
  526. resolved = self.visit(value).value
  527. try:
  528. v = getattr(resolved, attr)
  529. name = self.env.add_tmp(v)
  530. return self.term_type(name, self.env)
  531. except AttributeError:
  532. # something like datetime.datetime where scope is overridden
  533. if isinstance(value, ast.Name) and value.id == attr:
  534. return resolved
  535. raise
  536. raise ValueError(f"Invalid Attribute context {type(ctx).__name__}")
  537. def visit_Call(self, node, side=None, **kwargs):
  538. if isinstance(node.func, ast.Attribute) and node.func.attr != "__call__":
  539. res = self.visit_Attribute(node.func)
  540. elif not isinstance(node.func, ast.Name):
  541. raise TypeError("Only named functions are supported")
  542. else:
  543. try:
  544. res = self.visit(node.func)
  545. except UndefinedVariableError:
  546. # Check if this is a supported function name
  547. try:
  548. res = FuncNode(node.func.id)
  549. except ValueError:
  550. # Raise original error
  551. raise
  552. if res is None:
  553. # error: "expr" has no attribute "id"
  554. raise ValueError(
  555. f"Invalid function call {node.func.id}" # type: ignore[attr-defined]
  556. )
  557. if hasattr(res, "value"):
  558. res = res.value
  559. if isinstance(res, FuncNode):
  560. new_args = [self.visit(arg) for arg in node.args]
  561. if node.keywords:
  562. raise TypeError(
  563. f'Function "{res.name}" does not support keyword arguments'
  564. )
  565. return res(*new_args)
  566. else:
  567. new_args = [self.visit(arg)(self.env) for arg in node.args]
  568. for key in node.keywords:
  569. if not isinstance(key, ast.keyword):
  570. # error: "expr" has no attribute "id"
  571. raise ValueError(
  572. "keyword error in function call " # type: ignore[attr-defined]
  573. f"'{node.func.id}'"
  574. )
  575. if key.arg:
  576. kwargs[key.arg] = self.visit(key.value)(self.env)
  577. name = self.env.add_tmp(res(*new_args, **kwargs))
  578. return self.term_type(name=name, env=self.env)
  579. def translate_In(self, op):
  580. return op
  581. def visit_Compare(self, node, **kwargs):
  582. ops = node.ops
  583. comps = node.comparators
  584. # base case: we have something like a CMP b
  585. if len(comps) == 1:
  586. op = self.translate_In(ops[0])
  587. binop = ast.BinOp(op=op, left=node.left, right=comps[0])
  588. return self.visit(binop)
  589. # recursive case: we have a chained comparison, a CMP b CMP c, etc.
  590. left = node.left
  591. values = []
  592. for op, comp in zip(ops, comps):
  593. new_node = self.visit(
  594. ast.Compare(comparators=[comp], left=left, ops=[self.translate_In(op)])
  595. )
  596. left = comp
  597. values.append(new_node)
  598. return self.visit(ast.BoolOp(op=ast.And(), values=values))
  599. def _try_visit_binop(self, bop):
  600. if isinstance(bop, (Op, Term)):
  601. return bop
  602. return self.visit(bop)
  603. def visit_BoolOp(self, node, **kwargs):
  604. def visitor(x, y):
  605. lhs = self._try_visit_binop(x)
  606. rhs = self._try_visit_binop(y)
  607. op, op_class, lhs, rhs = self._maybe_transform_eq_ne(node, lhs, rhs)
  608. return self._maybe_evaluate_binop(op, node.op, lhs, rhs)
  609. operands = node.values
  610. return reduce(visitor, operands)
  611. _python_not_supported = frozenset(["Dict", "BoolOp", "In", "NotIn"])
  612. _numexpr_supported_calls = frozenset(REDUCTIONS + MATHOPS)
  613. @disallow(
  614. (_unsupported_nodes | _python_not_supported)
  615. - (_boolop_nodes | frozenset(["BoolOp", "Attribute", "In", "NotIn", "Tuple"]))
  616. )
  617. class PandasExprVisitor(BaseExprVisitor):
  618. def __init__(
  619. self,
  620. env,
  621. engine,
  622. parser,
  623. preparser=partial(
  624. _preparse,
  625. f=_compose(_replace_locals, _replace_booleans, clean_backtick_quoted_toks),
  626. ),
  627. ) -> None:
  628. super().__init__(env, engine, parser, preparser)
  629. @disallow(_unsupported_nodes | _python_not_supported | frozenset(["Not"]))
  630. class PythonExprVisitor(BaseExprVisitor):
  631. def __init__(
  632. self, env, engine, parser, preparser=lambda source, f=None: source
  633. ) -> None:
  634. super().__init__(env, engine, parser, preparser=preparser)
  635. class Expr:
  636. """
  637. Object encapsulating an expression.
  638. Parameters
  639. ----------
  640. expr : str
  641. engine : str, optional, default 'numexpr'
  642. parser : str, optional, default 'pandas'
  643. env : Scope, optional, default None
  644. level : int, optional, default 2
  645. """
  646. env: Scope
  647. engine: str
  648. parser: str
  649. def __init__(
  650. self,
  651. expr,
  652. engine: str = "numexpr",
  653. parser: str = "pandas",
  654. env: Scope | None = None,
  655. level: int = 0,
  656. ) -> None:
  657. self.expr = expr
  658. self.env = env or Scope(level=level + 1)
  659. self.engine = engine
  660. self.parser = parser
  661. self._visitor = PARSERS[parser](self.env, self.engine, self.parser)
  662. self.terms = self.parse()
  663. @property
  664. def assigner(self):
  665. return getattr(self._visitor, "assigner", None)
  666. def __call__(self):
  667. return self.terms(self.env)
  668. def __repr__(self) -> str:
  669. return printing.pprint_thing(self.terms)
  670. def __len__(self) -> int:
  671. return len(self.expr)
  672. def parse(self):
  673. """
  674. Parse an expression.
  675. """
  676. return self._visitor.visit(self.expr)
  677. @property
  678. def names(self):
  679. """
  680. Get the names in an expression.
  681. """
  682. if is_term(self.terms):
  683. return frozenset([self.terms.name])
  684. return frozenset(term.name for term in com.flatten(self.terms))
  685. PARSERS = {"python": PythonExprVisitor, "pandas": PandasExprVisitor}