test_eval.py 66 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894
  1. from __future__ import annotations
  2. from functools import reduce
  3. from itertools import product
  4. import operator
  5. import random
  6. import warnings
  7. import numpy as np
  8. import pytest
  9. from pandas.errors import (
  10. NumExprClobberingError,
  11. PerformanceWarning,
  12. UndefinedVariableError,
  13. )
  14. import pandas.util._test_decorators as td
  15. from pandas.core.dtypes.common import (
  16. is_bool,
  17. is_float,
  18. is_list_like,
  19. is_scalar,
  20. )
  21. import pandas as pd
  22. from pandas import (
  23. DataFrame,
  24. Series,
  25. date_range,
  26. )
  27. import pandas._testing as tm
  28. from pandas.core.computation import (
  29. expr,
  30. pytables,
  31. )
  32. from pandas.core.computation.engines import ENGINES
  33. from pandas.core.computation.expr import (
  34. BaseExprVisitor,
  35. PandasExprVisitor,
  36. PythonExprVisitor,
  37. )
  38. from pandas.core.computation.expressions import (
  39. NUMEXPR_INSTALLED,
  40. USE_NUMEXPR,
  41. )
  42. from pandas.core.computation.ops import (
  43. ARITH_OPS_SYMS,
  44. SPECIAL_CASE_ARITH_OPS_SYMS,
  45. _binary_math_ops,
  46. _binary_ops_dict,
  47. _unary_math_ops,
  48. )
  49. from pandas.core.computation.scope import DEFAULT_GLOBALS
  50. @pytest.fixture(
  51. params=(
  52. pytest.param(
  53. engine,
  54. marks=[
  55. pytest.mark.skipif(
  56. engine == "numexpr" and not USE_NUMEXPR,
  57. reason=f"numexpr enabled->{USE_NUMEXPR}, "
  58. f"installed->{NUMEXPR_INSTALLED}",
  59. ),
  60. td.skip_if_no_ne,
  61. ],
  62. )
  63. for engine in ENGINES
  64. )
  65. )
  66. def engine(request):
  67. return request.param
  68. @pytest.fixture(params=expr.PARSERS)
  69. def parser(request):
  70. return request.param
  71. def _eval_single_bin(lhs, cmp1, rhs, engine):
  72. c = _binary_ops_dict[cmp1]
  73. if ENGINES[engine].has_neg_frac:
  74. try:
  75. return c(lhs, rhs)
  76. except ValueError as e:
  77. if str(e).startswith(
  78. "negative number cannot be raised to a fractional power"
  79. ):
  80. return np.nan
  81. raise
  82. return c(lhs, rhs)
  83. # TODO: using range(5) here is a kludge
  84. @pytest.fixture(
  85. params=list(range(5)),
  86. ids=["DataFrame", "Series", "SeriesNaN", "DataFrameNaN", "float"],
  87. )
  88. def lhs(request):
  89. nan_df1 = DataFrame(np.random.rand(10, 5))
  90. nan_df1[nan_df1 > 0.5] = np.nan
  91. opts = (
  92. DataFrame(np.random.randn(10, 5)),
  93. Series(np.random.randn(5)),
  94. Series([1, 2, np.nan, np.nan, 5]),
  95. nan_df1,
  96. np.random.randn(),
  97. )
  98. return opts[request.param]
  99. rhs = lhs
  100. midhs = lhs
  101. class TestEval:
  102. @pytest.mark.parametrize(
  103. "cmp1",
  104. ["!=", "==", "<=", ">=", "<", ">"],
  105. ids=["ne", "eq", "le", "ge", "lt", "gt"],
  106. )
  107. @pytest.mark.parametrize("cmp2", [">", "<"], ids=["gt", "lt"])
  108. @pytest.mark.parametrize("binop", expr.BOOL_OPS_SYMS)
  109. def test_complex_cmp_ops(self, cmp1, cmp2, binop, lhs, rhs, engine, parser):
  110. if parser == "python" and binop in ["and", "or"]:
  111. msg = "'BoolOp' nodes are not implemented"
  112. with pytest.raises(NotImplementedError, match=msg):
  113. ex = f"(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)"
  114. pd.eval(ex, engine=engine, parser=parser)
  115. return
  116. lhs_new = _eval_single_bin(lhs, cmp1, rhs, engine)
  117. rhs_new = _eval_single_bin(lhs, cmp2, rhs, engine)
  118. expected = _eval_single_bin(lhs_new, binop, rhs_new, engine)
  119. ex = f"(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)"
  120. result = pd.eval(ex, engine=engine, parser=parser)
  121. tm.assert_equal(result, expected)
  122. @pytest.mark.parametrize("cmp_op", expr.CMP_OPS_SYMS)
  123. def test_simple_cmp_ops(self, cmp_op, lhs, rhs, engine, parser):
  124. lhs = lhs < 0
  125. rhs = rhs < 0
  126. if parser == "python" and cmp_op in ["in", "not in"]:
  127. msg = "'(In|NotIn)' nodes are not implemented"
  128. with pytest.raises(NotImplementedError, match=msg):
  129. ex = f"lhs {cmp_op} rhs"
  130. pd.eval(ex, engine=engine, parser=parser)
  131. return
  132. ex = f"lhs {cmp_op} rhs"
  133. msg = "|".join(
  134. [
  135. r"only list-like( or dict-like)? objects are allowed to be "
  136. r"passed to (DataFrame\.)?isin\(\), you passed a "
  137. r"(\[|')bool(\]|')",
  138. "argument of type 'bool' is not iterable",
  139. ]
  140. )
  141. if cmp_op in ("in", "not in") and not is_list_like(rhs):
  142. with pytest.raises(TypeError, match=msg):
  143. pd.eval(
  144. ex,
  145. engine=engine,
  146. parser=parser,
  147. local_dict={"lhs": lhs, "rhs": rhs},
  148. )
  149. else:
  150. expected = _eval_single_bin(lhs, cmp_op, rhs, engine)
  151. result = pd.eval(ex, engine=engine, parser=parser)
  152. tm.assert_equal(result, expected)
  153. @pytest.mark.parametrize("op", expr.CMP_OPS_SYMS)
  154. def test_compound_invert_op(self, op, lhs, rhs, request, engine, parser):
  155. if parser == "python" and op in ["in", "not in"]:
  156. msg = "'(In|NotIn)' nodes are not implemented"
  157. with pytest.raises(NotImplementedError, match=msg):
  158. ex = f"~(lhs {op} rhs)"
  159. pd.eval(ex, engine=engine, parser=parser)
  160. return
  161. if (
  162. is_float(lhs)
  163. and not is_float(rhs)
  164. and op in ["in", "not in"]
  165. and engine == "python"
  166. and parser == "pandas"
  167. ):
  168. mark = pytest.mark.xfail(
  169. reason="Looks like expected is negative, unclear whether "
  170. "expected is incorrect or result is incorrect"
  171. )
  172. request.node.add_marker(mark)
  173. skip_these = ["in", "not in"]
  174. ex = f"~(lhs {op} rhs)"
  175. msg = "|".join(
  176. [
  177. r"only list-like( or dict-like)? objects are allowed to be "
  178. r"passed to (DataFrame\.)?isin\(\), you passed a "
  179. r"(\[|')float(\]|')",
  180. "argument of type 'float' is not iterable",
  181. ]
  182. )
  183. if is_scalar(rhs) and op in skip_these:
  184. with pytest.raises(TypeError, match=msg):
  185. pd.eval(
  186. ex,
  187. engine=engine,
  188. parser=parser,
  189. local_dict={"lhs": lhs, "rhs": rhs},
  190. )
  191. else:
  192. # compound
  193. if is_scalar(lhs) and is_scalar(rhs):
  194. lhs, rhs = map(lambda x: np.array([x]), (lhs, rhs))
  195. expected = _eval_single_bin(lhs, op, rhs, engine)
  196. if is_scalar(expected):
  197. expected = not expected
  198. else:
  199. expected = ~expected
  200. result = pd.eval(ex, engine=engine, parser=parser)
  201. tm.assert_almost_equal(expected, result)
  202. @pytest.mark.parametrize("cmp1", ["<", ">"])
  203. @pytest.mark.parametrize("cmp2", ["<", ">"])
  204. def test_chained_cmp_op(self, cmp1, cmp2, lhs, midhs, rhs, engine, parser):
  205. mid = midhs
  206. if parser == "python":
  207. ex1 = f"lhs {cmp1} mid {cmp2} rhs"
  208. msg = "'BoolOp' nodes are not implemented"
  209. with pytest.raises(NotImplementedError, match=msg):
  210. pd.eval(ex1, engine=engine, parser=parser)
  211. return
  212. lhs_new = _eval_single_bin(lhs, cmp1, mid, engine)
  213. rhs_new = _eval_single_bin(mid, cmp2, rhs, engine)
  214. if lhs_new is not None and rhs_new is not None:
  215. ex1 = f"lhs {cmp1} mid {cmp2} rhs"
  216. ex2 = f"lhs {cmp1} mid and mid {cmp2} rhs"
  217. ex3 = f"(lhs {cmp1} mid) & (mid {cmp2} rhs)"
  218. expected = _eval_single_bin(lhs_new, "&", rhs_new, engine)
  219. for ex in (ex1, ex2, ex3):
  220. result = pd.eval(ex, engine=engine, parser=parser)
  221. tm.assert_almost_equal(result, expected)
  222. @pytest.mark.parametrize(
  223. "arith1", sorted(set(ARITH_OPS_SYMS).difference(SPECIAL_CASE_ARITH_OPS_SYMS))
  224. )
  225. def test_binary_arith_ops(self, arith1, lhs, rhs, engine, parser):
  226. ex = f"lhs {arith1} rhs"
  227. result = pd.eval(ex, engine=engine, parser=parser)
  228. expected = _eval_single_bin(lhs, arith1, rhs, engine)
  229. tm.assert_almost_equal(result, expected)
  230. ex = f"lhs {arith1} rhs {arith1} rhs"
  231. result = pd.eval(ex, engine=engine, parser=parser)
  232. nlhs = _eval_single_bin(lhs, arith1, rhs, engine)
  233. try:
  234. nlhs, ghs = nlhs.align(rhs)
  235. except (ValueError, TypeError, AttributeError):
  236. # ValueError: series frame or frame series align
  237. # TypeError, AttributeError: series or frame with scalar align
  238. return
  239. else:
  240. if engine == "numexpr":
  241. import numexpr as ne
  242. # direct numpy comparison
  243. expected = ne.evaluate(f"nlhs {arith1} ghs")
  244. # Update assert statement due to unreliable numerical
  245. # precision component (GH37328)
  246. # TODO: update testing code so that assert_almost_equal statement
  247. # can be replaced again by the assert_numpy_array_equal statement
  248. tm.assert_almost_equal(result.values, expected)
  249. else:
  250. expected = eval(f"nlhs {arith1} ghs")
  251. tm.assert_almost_equal(result, expected)
  252. # modulus, pow, and floor division require special casing
  253. def test_modulus(self, lhs, rhs, engine, parser):
  254. ex = r"lhs % rhs"
  255. result = pd.eval(ex, engine=engine, parser=parser)
  256. expected = lhs % rhs
  257. tm.assert_almost_equal(result, expected)
  258. if engine == "numexpr":
  259. import numexpr as ne
  260. expected = ne.evaluate(r"expected % rhs")
  261. if isinstance(result, (DataFrame, Series)):
  262. tm.assert_almost_equal(result.values, expected)
  263. else:
  264. tm.assert_almost_equal(result, expected.item())
  265. else:
  266. expected = _eval_single_bin(expected, "%", rhs, engine)
  267. tm.assert_almost_equal(result, expected)
  268. def test_floor_division(self, lhs, rhs, engine, parser):
  269. ex = "lhs // rhs"
  270. if engine == "python":
  271. res = pd.eval(ex, engine=engine, parser=parser)
  272. expected = lhs // rhs
  273. tm.assert_equal(res, expected)
  274. else:
  275. msg = (
  276. r"unsupported operand type\(s\) for //: 'VariableNode' and "
  277. "'VariableNode'"
  278. )
  279. with pytest.raises(TypeError, match=msg):
  280. pd.eval(
  281. ex,
  282. local_dict={"lhs": lhs, "rhs": rhs},
  283. engine=engine,
  284. parser=parser,
  285. )
  286. @td.skip_if_windows
  287. def test_pow(self, lhs, rhs, engine, parser):
  288. # odd failure on win32 platform, so skip
  289. ex = "lhs ** rhs"
  290. expected = _eval_single_bin(lhs, "**", rhs, engine)
  291. result = pd.eval(ex, engine=engine, parser=parser)
  292. if (
  293. is_scalar(lhs)
  294. and is_scalar(rhs)
  295. and isinstance(expected, (complex, np.complexfloating))
  296. and np.isnan(result)
  297. ):
  298. msg = "(DataFrame.columns|numpy array) are different"
  299. with pytest.raises(AssertionError, match=msg):
  300. tm.assert_numpy_array_equal(result, expected)
  301. else:
  302. tm.assert_almost_equal(result, expected)
  303. ex = "(lhs ** rhs) ** rhs"
  304. result = pd.eval(ex, engine=engine, parser=parser)
  305. middle = _eval_single_bin(lhs, "**", rhs, engine)
  306. expected = _eval_single_bin(middle, "**", rhs, engine)
  307. tm.assert_almost_equal(result, expected)
  308. def test_check_single_invert_op(self, lhs, engine, parser):
  309. # simple
  310. try:
  311. elb = lhs.astype(bool)
  312. except AttributeError:
  313. elb = np.array([bool(lhs)])
  314. expected = ~elb
  315. result = pd.eval("~elb", engine=engine, parser=parser)
  316. tm.assert_almost_equal(expected, result)
  317. def test_frame_invert(self, engine, parser):
  318. expr = "~lhs"
  319. # ~ ##
  320. # frame
  321. # float always raises
  322. lhs = DataFrame(np.random.randn(5, 2))
  323. if engine == "numexpr":
  324. msg = "couldn't find matching opcode for 'invert_dd'"
  325. with pytest.raises(NotImplementedError, match=msg):
  326. pd.eval(expr, engine=engine, parser=parser)
  327. else:
  328. msg = "ufunc 'invert' not supported for the input types"
  329. with pytest.raises(TypeError, match=msg):
  330. pd.eval(expr, engine=engine, parser=parser)
  331. # int raises on numexpr
  332. lhs = DataFrame(np.random.randint(5, size=(5, 2)))
  333. if engine == "numexpr":
  334. msg = "couldn't find matching opcode for 'invert"
  335. with pytest.raises(NotImplementedError, match=msg):
  336. pd.eval(expr, engine=engine, parser=parser)
  337. else:
  338. expect = ~lhs
  339. result = pd.eval(expr, engine=engine, parser=parser)
  340. tm.assert_frame_equal(expect, result)
  341. # bool always works
  342. lhs = DataFrame(np.random.rand(5, 2) > 0.5)
  343. expect = ~lhs
  344. result = pd.eval(expr, engine=engine, parser=parser)
  345. tm.assert_frame_equal(expect, result)
  346. # object raises
  347. lhs = DataFrame({"b": ["a", 1, 2.0], "c": np.random.rand(3) > 0.5})
  348. if engine == "numexpr":
  349. with pytest.raises(ValueError, match="unknown type object"):
  350. pd.eval(expr, engine=engine, parser=parser)
  351. else:
  352. msg = "bad operand type for unary ~: 'str'"
  353. with pytest.raises(TypeError, match=msg):
  354. pd.eval(expr, engine=engine, parser=parser)
  355. def test_series_invert(self, engine, parser):
  356. # ~ ####
  357. expr = "~lhs"
  358. # series
  359. # float raises
  360. lhs = Series(np.random.randn(5))
  361. if engine == "numexpr":
  362. msg = "couldn't find matching opcode for 'invert_dd'"
  363. with pytest.raises(NotImplementedError, match=msg):
  364. result = pd.eval(expr, engine=engine, parser=parser)
  365. else:
  366. msg = "ufunc 'invert' not supported for the input types"
  367. with pytest.raises(TypeError, match=msg):
  368. pd.eval(expr, engine=engine, parser=parser)
  369. # int raises on numexpr
  370. lhs = Series(np.random.randint(5, size=5))
  371. if engine == "numexpr":
  372. msg = "couldn't find matching opcode for 'invert"
  373. with pytest.raises(NotImplementedError, match=msg):
  374. pd.eval(expr, engine=engine, parser=parser)
  375. else:
  376. expect = ~lhs
  377. result = pd.eval(expr, engine=engine, parser=parser)
  378. tm.assert_series_equal(expect, result)
  379. # bool
  380. lhs = Series(np.random.rand(5) > 0.5)
  381. expect = ~lhs
  382. result = pd.eval(expr, engine=engine, parser=parser)
  383. tm.assert_series_equal(expect, result)
  384. # float
  385. # int
  386. # bool
  387. # object
  388. lhs = Series(["a", 1, 2.0])
  389. if engine == "numexpr":
  390. with pytest.raises(ValueError, match="unknown type object"):
  391. pd.eval(expr, engine=engine, parser=parser)
  392. else:
  393. msg = "bad operand type for unary ~: 'str'"
  394. with pytest.raises(TypeError, match=msg):
  395. pd.eval(expr, engine=engine, parser=parser)
  396. def test_frame_negate(self, engine, parser):
  397. expr = "-lhs"
  398. # float
  399. lhs = DataFrame(np.random.randn(5, 2))
  400. expect = -lhs
  401. result = pd.eval(expr, engine=engine, parser=parser)
  402. tm.assert_frame_equal(expect, result)
  403. # int
  404. lhs = DataFrame(np.random.randint(5, size=(5, 2)))
  405. expect = -lhs
  406. result = pd.eval(expr, engine=engine, parser=parser)
  407. tm.assert_frame_equal(expect, result)
  408. # bool doesn't work with numexpr but works elsewhere
  409. lhs = DataFrame(np.random.rand(5, 2) > 0.5)
  410. if engine == "numexpr":
  411. msg = "couldn't find matching opcode for 'neg_bb'"
  412. with pytest.raises(NotImplementedError, match=msg):
  413. pd.eval(expr, engine=engine, parser=parser)
  414. else:
  415. expect = -lhs
  416. result = pd.eval(expr, engine=engine, parser=parser)
  417. tm.assert_frame_equal(expect, result)
  418. def test_series_negate(self, engine, parser):
  419. expr = "-lhs"
  420. # float
  421. lhs = Series(np.random.randn(5))
  422. expect = -lhs
  423. result = pd.eval(expr, engine=engine, parser=parser)
  424. tm.assert_series_equal(expect, result)
  425. # int
  426. lhs = Series(np.random.randint(5, size=5))
  427. expect = -lhs
  428. result = pd.eval(expr, engine=engine, parser=parser)
  429. tm.assert_series_equal(expect, result)
  430. # bool doesn't work with numexpr but works elsewhere
  431. lhs = Series(np.random.rand(5) > 0.5)
  432. if engine == "numexpr":
  433. msg = "couldn't find matching opcode for 'neg_bb'"
  434. with pytest.raises(NotImplementedError, match=msg):
  435. pd.eval(expr, engine=engine, parser=parser)
  436. else:
  437. expect = -lhs
  438. result = pd.eval(expr, engine=engine, parser=parser)
  439. tm.assert_series_equal(expect, result)
  440. @pytest.mark.parametrize(
  441. "lhs",
  442. [
  443. # Float
  444. DataFrame(np.random.randn(5, 2)),
  445. # Int
  446. DataFrame(np.random.randint(5, size=(5, 2))),
  447. # bool doesn't work with numexpr but works elsewhere
  448. DataFrame(np.random.rand(5, 2) > 0.5),
  449. ],
  450. )
  451. def test_frame_pos(self, lhs, engine, parser):
  452. expr = "+lhs"
  453. expect = lhs
  454. result = pd.eval(expr, engine=engine, parser=parser)
  455. tm.assert_frame_equal(expect, result)
  456. @pytest.mark.parametrize(
  457. "lhs",
  458. [
  459. # Float
  460. Series(np.random.randn(5)),
  461. # Int
  462. Series(np.random.randint(5, size=5)),
  463. # bool doesn't work with numexpr but works elsewhere
  464. Series(np.random.rand(5) > 0.5),
  465. ],
  466. )
  467. def test_series_pos(self, lhs, engine, parser):
  468. expr = "+lhs"
  469. expect = lhs
  470. result = pd.eval(expr, engine=engine, parser=parser)
  471. tm.assert_series_equal(expect, result)
  472. def test_scalar_unary(self, engine, parser):
  473. msg = "bad operand type for unary ~: 'float'"
  474. with pytest.raises(TypeError, match=msg):
  475. pd.eval("~1.0", engine=engine, parser=parser)
  476. assert pd.eval("-1.0", parser=parser, engine=engine) == -1.0
  477. assert pd.eval("+1.0", parser=parser, engine=engine) == +1.0
  478. assert pd.eval("~1", parser=parser, engine=engine) == ~1
  479. assert pd.eval("-1", parser=parser, engine=engine) == -1
  480. assert pd.eval("+1", parser=parser, engine=engine) == +1
  481. assert pd.eval("~True", parser=parser, engine=engine) == ~True
  482. assert pd.eval("~False", parser=parser, engine=engine) == ~False
  483. assert pd.eval("-True", parser=parser, engine=engine) == -True
  484. assert pd.eval("-False", parser=parser, engine=engine) == -False
  485. assert pd.eval("+True", parser=parser, engine=engine) == +True
  486. assert pd.eval("+False", parser=parser, engine=engine) == +False
  487. def test_unary_in_array(self):
  488. # GH 11235
  489. # TODO: 2022-01-29: result return list with numexpr 2.7.3 in CI
  490. # but cannot reproduce locally
  491. result = np.array(
  492. pd.eval(
  493. "[-True, True, ~True, +True,"
  494. "-False, False, ~False, +False,"
  495. "-37, 37, ~37, +37]"
  496. ),
  497. dtype=np.object_,
  498. )
  499. expected = np.array(
  500. [
  501. -True,
  502. True,
  503. ~True,
  504. +True,
  505. -False,
  506. False,
  507. ~False,
  508. +False,
  509. -37,
  510. 37,
  511. ~37,
  512. +37,
  513. ],
  514. dtype=np.object_,
  515. )
  516. tm.assert_numpy_array_equal(result, expected)
  517. @pytest.mark.parametrize("dtype", [np.float32, np.float64])
  518. @pytest.mark.parametrize("expr", ["x < -0.1", "-5 > x"])
  519. def test_float_comparison_bin_op(self, dtype, expr):
  520. # GH 16363
  521. df = DataFrame({"x": np.array([0], dtype=dtype)})
  522. res = df.eval(expr)
  523. assert res.values == np.array([False])
  524. def test_unary_in_function(self):
  525. # GH 46471
  526. df = DataFrame({"x": [0, 1, np.nan]})
  527. result = df.eval("x.fillna(-1)")
  528. expected = df.x.fillna(-1)
  529. # column name becomes None if using numexpr
  530. # only check names when the engine is not numexpr
  531. tm.assert_series_equal(result, expected, check_names=not USE_NUMEXPR)
  532. result = df.eval("x.shift(1, fill_value=-1)")
  533. expected = df.x.shift(1, fill_value=-1)
  534. tm.assert_series_equal(result, expected, check_names=not USE_NUMEXPR)
  535. @pytest.mark.parametrize(
  536. "ex",
  537. (
  538. "1 or 2",
  539. "1 and 2",
  540. "a and b",
  541. "a or b",
  542. "1 or 2 and (3 + 2) > 3",
  543. "2 * x > 2 or 1 and 2",
  544. "2 * df > 3 and 1 or a",
  545. ),
  546. )
  547. def test_disallow_scalar_bool_ops(self, ex, engine, parser):
  548. x, a, b = np.random.randn(3), 1, 2 # noqa:F841
  549. df = DataFrame(np.random.randn(3, 2)) # noqa:F841
  550. msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not"
  551. with pytest.raises(NotImplementedError, match=msg):
  552. pd.eval(ex, engine=engine, parser=parser)
  553. def test_identical(self, engine, parser):
  554. # see gh-10546
  555. x = 1
  556. result = pd.eval("x", engine=engine, parser=parser)
  557. assert result == 1
  558. assert is_scalar(result)
  559. x = 1.5
  560. result = pd.eval("x", engine=engine, parser=parser)
  561. assert result == 1.5
  562. assert is_scalar(result)
  563. x = False
  564. result = pd.eval("x", engine=engine, parser=parser)
  565. assert not result
  566. assert is_bool(result)
  567. assert is_scalar(result)
  568. x = np.array([1])
  569. result = pd.eval("x", engine=engine, parser=parser)
  570. tm.assert_numpy_array_equal(result, np.array([1]))
  571. assert result.shape == (1,)
  572. x = np.array([1.5])
  573. result = pd.eval("x", engine=engine, parser=parser)
  574. tm.assert_numpy_array_equal(result, np.array([1.5]))
  575. assert result.shape == (1,)
  576. x = np.array([False]) # noqa:F841
  577. result = pd.eval("x", engine=engine, parser=parser)
  578. tm.assert_numpy_array_equal(result, np.array([False]))
  579. assert result.shape == (1,)
  580. def test_line_continuation(self, engine, parser):
  581. # GH 11149
  582. exp = """1 + 2 * \
  583. 5 - 1 + 2 """
  584. result = pd.eval(exp, engine=engine, parser=parser)
  585. assert result == 12
  586. def test_float_truncation(self, engine, parser):
  587. # GH 14241
  588. exp = "1000000000.006"
  589. result = pd.eval(exp, engine=engine, parser=parser)
  590. expected = np.float64(exp)
  591. assert result == expected
  592. df = DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]})
  593. cutoff = 1000000000.0006
  594. result = df.query(f"A < {cutoff:.4f}")
  595. assert result.empty
  596. cutoff = 1000000000.0010
  597. result = df.query(f"A > {cutoff:.4f}")
  598. expected = df.loc[[1, 2], :]
  599. tm.assert_frame_equal(expected, result)
  600. exact = 1000000000.0011
  601. result = df.query(f"A == {exact:.4f}")
  602. expected = df.loc[[1], :]
  603. tm.assert_frame_equal(expected, result)
  604. def test_disallow_python_keywords(self):
  605. # GH 18221
  606. df = DataFrame([[0, 0, 0]], columns=["foo", "bar", "class"])
  607. msg = "Python keyword not valid identifier in numexpr query"
  608. with pytest.raises(SyntaxError, match=msg):
  609. df.query("class == 0")
  610. df = DataFrame()
  611. df.index.name = "lambda"
  612. with pytest.raises(SyntaxError, match=msg):
  613. df.query("lambda == 0")
  614. def test_true_false_logic(self):
  615. # GH 25823
  616. assert pd.eval("not True") == -2
  617. assert pd.eval("not False") == -1
  618. assert pd.eval("True and not True") == 0
  619. def test_and_logic_string_match(self):
  620. # GH 25823
  621. event = Series({"a": "hello"})
  622. assert pd.eval(f"{event.str.match('hello').a}")
  623. assert pd.eval(f"{event.str.match('hello').a and event.str.match('hello').a}")
  624. f = lambda *args, **kwargs: np.random.randn()
  625. # -------------------------------------
  626. # gh-12388: Typecasting rules consistency with python
  627. class TestTypeCasting:
  628. @pytest.mark.parametrize("op", ["+", "-", "*", "**", "/"])
  629. # maybe someday... numexpr has too many upcasting rules now
  630. # chain(*(np.sctypes[x] for x in ['uint', 'int', 'float']))
  631. @pytest.mark.parametrize("dt", [np.float32, np.float64])
  632. @pytest.mark.parametrize("left_right", [("df", "3"), ("3", "df")])
  633. def test_binop_typecasting(self, engine, parser, op, dt, left_right):
  634. df = tm.makeCustomDataframe(5, 3, data_gen_f=f, dtype=dt)
  635. left, right = left_right
  636. s = f"{left} {op} {right}"
  637. res = pd.eval(s, engine=engine, parser=parser)
  638. assert df.values.dtype == dt
  639. assert res.values.dtype == dt
  640. tm.assert_frame_equal(res, eval(s))
  641. # -------------------------------------
  642. # Basic and complex alignment
  643. def should_warn(*args):
  644. not_mono = not any(map(operator.attrgetter("is_monotonic_increasing"), args))
  645. only_one_dt = reduce(
  646. operator.xor, map(lambda x: issubclass(x.dtype.type, np.datetime64), args)
  647. )
  648. return not_mono and only_one_dt
  649. class TestAlignment:
  650. index_types = ["i", "s", "dt"]
  651. lhs_index_types = index_types + ["s"] # 'p'
  652. def test_align_nested_unary_op(self, engine, parser):
  653. s = "df * ~2"
  654. df = tm.makeCustomDataframe(5, 3, data_gen_f=f)
  655. res = pd.eval(s, engine=engine, parser=parser)
  656. tm.assert_frame_equal(res, df * ~2)
  657. @pytest.mark.parametrize("lr_idx_type", lhs_index_types)
  658. @pytest.mark.parametrize("rr_idx_type", index_types)
  659. @pytest.mark.parametrize("c_idx_type", index_types)
  660. def test_basic_frame_alignment(
  661. self, engine, parser, lr_idx_type, rr_idx_type, c_idx_type
  662. ):
  663. with warnings.catch_warnings(record=True):
  664. warnings.simplefilter("always", RuntimeWarning)
  665. df = tm.makeCustomDataframe(
  666. 10, 10, data_gen_f=f, r_idx_type=lr_idx_type, c_idx_type=c_idx_type
  667. )
  668. df2 = tm.makeCustomDataframe(
  669. 20, 10, data_gen_f=f, r_idx_type=rr_idx_type, c_idx_type=c_idx_type
  670. )
  671. # only warns if not monotonic and not sortable
  672. if should_warn(df.index, df2.index):
  673. with tm.assert_produces_warning(RuntimeWarning):
  674. res = pd.eval("df + df2", engine=engine, parser=parser)
  675. else:
  676. res = pd.eval("df + df2", engine=engine, parser=parser)
  677. tm.assert_frame_equal(res, df + df2)
  678. @pytest.mark.parametrize("r_idx_type", lhs_index_types)
  679. @pytest.mark.parametrize("c_idx_type", lhs_index_types)
  680. def test_frame_comparison(self, engine, parser, r_idx_type, c_idx_type):
  681. df = tm.makeCustomDataframe(
  682. 10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type
  683. )
  684. res = pd.eval("df < 2", engine=engine, parser=parser)
  685. tm.assert_frame_equal(res, df < 2)
  686. df3 = DataFrame(np.random.randn(*df.shape), index=df.index, columns=df.columns)
  687. res = pd.eval("df < df3", engine=engine, parser=parser)
  688. tm.assert_frame_equal(res, df < df3)
  689. @pytest.mark.parametrize("r1", lhs_index_types)
  690. @pytest.mark.parametrize("c1", index_types)
  691. @pytest.mark.parametrize("r2", index_types)
  692. @pytest.mark.parametrize("c2", index_types)
  693. def test_medium_complex_frame_alignment(self, engine, parser, r1, c1, r2, c2):
  694. with warnings.catch_warnings(record=True):
  695. warnings.simplefilter("always", RuntimeWarning)
  696. df = tm.makeCustomDataframe(
  697. 3, 2, data_gen_f=f, r_idx_type=r1, c_idx_type=c1
  698. )
  699. df2 = tm.makeCustomDataframe(
  700. 4, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2
  701. )
  702. df3 = tm.makeCustomDataframe(
  703. 5, 2, data_gen_f=f, r_idx_type=r2, c_idx_type=c2
  704. )
  705. if should_warn(df.index, df2.index, df3.index):
  706. with tm.assert_produces_warning(RuntimeWarning):
  707. res = pd.eval("df + df2 + df3", engine=engine, parser=parser)
  708. else:
  709. res = pd.eval("df + df2 + df3", engine=engine, parser=parser)
  710. tm.assert_frame_equal(res, df + df2 + df3)
  711. @pytest.mark.parametrize("index_name", ["index", "columns"])
  712. @pytest.mark.parametrize("c_idx_type", index_types)
  713. @pytest.mark.parametrize("r_idx_type", lhs_index_types)
  714. def test_basic_frame_series_alignment(
  715. self, engine, parser, index_name, r_idx_type, c_idx_type
  716. ):
  717. with warnings.catch_warnings(record=True):
  718. warnings.simplefilter("always", RuntimeWarning)
  719. df = tm.makeCustomDataframe(
  720. 10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type
  721. )
  722. index = getattr(df, index_name)
  723. s = Series(np.random.randn(5), index[:5])
  724. if should_warn(df.index, s.index):
  725. with tm.assert_produces_warning(RuntimeWarning):
  726. res = pd.eval("df + s", engine=engine, parser=parser)
  727. else:
  728. res = pd.eval("df + s", engine=engine, parser=parser)
  729. if r_idx_type == "dt" or c_idx_type == "dt":
  730. expected = df.add(s) if engine == "numexpr" else df + s
  731. else:
  732. expected = df + s
  733. tm.assert_frame_equal(res, expected)
  734. @pytest.mark.parametrize("index_name", ["index", "columns"])
  735. @pytest.mark.parametrize(
  736. "r_idx_type, c_idx_type",
  737. list(product(["i", "s"], ["i", "s"])) + [("dt", "dt")],
  738. )
  739. @pytest.mark.filterwarnings("ignore::RuntimeWarning")
  740. def test_basic_series_frame_alignment(
  741. self, request, engine, parser, index_name, r_idx_type, c_idx_type
  742. ):
  743. if (
  744. engine == "numexpr"
  745. and parser in ("pandas", "python")
  746. and index_name == "index"
  747. and r_idx_type == "i"
  748. and c_idx_type == "s"
  749. ):
  750. reason = (
  751. f"Flaky column ordering when engine={engine}, "
  752. f"parser={parser}, index_name={index_name}, "
  753. f"r_idx_type={r_idx_type}, c_idx_type={c_idx_type}"
  754. )
  755. request.node.add_marker(pytest.mark.xfail(reason=reason, strict=False))
  756. df = tm.makeCustomDataframe(
  757. 10, 7, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type
  758. )
  759. index = getattr(df, index_name)
  760. s = Series(np.random.randn(5), index[:5])
  761. if should_warn(s.index, df.index):
  762. with tm.assert_produces_warning(RuntimeWarning):
  763. res = pd.eval("s + df", engine=engine, parser=parser)
  764. else:
  765. res = pd.eval("s + df", engine=engine, parser=parser)
  766. if r_idx_type == "dt" or c_idx_type == "dt":
  767. expected = df.add(s) if engine == "numexpr" else s + df
  768. else:
  769. expected = s + df
  770. tm.assert_frame_equal(res, expected)
  771. @pytest.mark.parametrize("c_idx_type", index_types)
  772. @pytest.mark.parametrize("r_idx_type", lhs_index_types)
  773. @pytest.mark.parametrize("index_name", ["index", "columns"])
  774. @pytest.mark.parametrize("op", ["+", "*"])
  775. def test_series_frame_commutativity(
  776. self, engine, parser, index_name, op, r_idx_type, c_idx_type
  777. ):
  778. with warnings.catch_warnings(record=True):
  779. warnings.simplefilter("always", RuntimeWarning)
  780. df = tm.makeCustomDataframe(
  781. 10, 10, data_gen_f=f, r_idx_type=r_idx_type, c_idx_type=c_idx_type
  782. )
  783. index = getattr(df, index_name)
  784. s = Series(np.random.randn(5), index[:5])
  785. lhs = f"s {op} df"
  786. rhs = f"df {op} s"
  787. if should_warn(df.index, s.index):
  788. with tm.assert_produces_warning(RuntimeWarning):
  789. a = pd.eval(lhs, engine=engine, parser=parser)
  790. with tm.assert_produces_warning(RuntimeWarning):
  791. b = pd.eval(rhs, engine=engine, parser=parser)
  792. else:
  793. a = pd.eval(lhs, engine=engine, parser=parser)
  794. b = pd.eval(rhs, engine=engine, parser=parser)
  795. if r_idx_type != "dt" and c_idx_type != "dt":
  796. if engine == "numexpr":
  797. tm.assert_frame_equal(a, b)
  798. @pytest.mark.parametrize("r1", lhs_index_types)
  799. @pytest.mark.parametrize("c1", index_types)
  800. @pytest.mark.parametrize("r2", index_types)
  801. @pytest.mark.parametrize("c2", index_types)
  802. def test_complex_series_frame_alignment(self, engine, parser, r1, c1, r2, c2):
  803. n = 3
  804. m1 = 5
  805. m2 = 2 * m1
  806. with warnings.catch_warnings(record=True):
  807. warnings.simplefilter("always", RuntimeWarning)
  808. index_name = random.choice(["index", "columns"])
  809. obj_name = random.choice(["df", "df2"])
  810. df = tm.makeCustomDataframe(
  811. m1, n, data_gen_f=f, r_idx_type=r1, c_idx_type=c1
  812. )
  813. df2 = tm.makeCustomDataframe(
  814. m2, n, data_gen_f=f, r_idx_type=r2, c_idx_type=c2
  815. )
  816. index = getattr(locals().get(obj_name), index_name)
  817. ser = Series(np.random.randn(n), index[:n])
  818. if r2 == "dt" or c2 == "dt":
  819. if engine == "numexpr":
  820. expected2 = df2.add(ser)
  821. else:
  822. expected2 = df2 + ser
  823. else:
  824. expected2 = df2 + ser
  825. if r1 == "dt" or c1 == "dt":
  826. if engine == "numexpr":
  827. expected = expected2.add(df)
  828. else:
  829. expected = expected2 + df
  830. else:
  831. expected = expected2 + df
  832. if should_warn(df2.index, ser.index, df.index):
  833. with tm.assert_produces_warning(RuntimeWarning):
  834. res = pd.eval("df2 + ser + df", engine=engine, parser=parser)
  835. else:
  836. res = pd.eval("df2 + ser + df", engine=engine, parser=parser)
  837. assert res.shape == expected.shape
  838. tm.assert_frame_equal(res, expected)
  839. def test_performance_warning_for_poor_alignment(self, engine, parser):
  840. df = DataFrame(np.random.randn(1000, 10))
  841. s = Series(np.random.randn(10000))
  842. if engine == "numexpr":
  843. seen = PerformanceWarning
  844. else:
  845. seen = False
  846. with tm.assert_produces_warning(seen):
  847. pd.eval("df + s", engine=engine, parser=parser)
  848. s = Series(np.random.randn(1000))
  849. with tm.assert_produces_warning(False):
  850. pd.eval("df + s", engine=engine, parser=parser)
  851. df = DataFrame(np.random.randn(10, 10000))
  852. s = Series(np.random.randn(10000))
  853. with tm.assert_produces_warning(False):
  854. pd.eval("df + s", engine=engine, parser=parser)
  855. df = DataFrame(np.random.randn(10, 10))
  856. s = Series(np.random.randn(10000))
  857. is_python_engine = engine == "python"
  858. if not is_python_engine:
  859. wrn = PerformanceWarning
  860. else:
  861. wrn = False
  862. with tm.assert_produces_warning(wrn) as w:
  863. pd.eval("df + s", engine=engine, parser=parser)
  864. if not is_python_engine:
  865. assert len(w) == 1
  866. msg = str(w[0].message)
  867. logged = np.log10(s.size - df.shape[1])
  868. expected = (
  869. f"Alignment difference on axis 1 is larger "
  870. f"than an order of magnitude on term 'df', "
  871. f"by more than {logged:.4g}; performance may suffer."
  872. )
  873. assert msg == expected
  874. # ------------------------------------
  875. # Slightly more complex ops
  876. class TestOperations:
  877. def eval(self, *args, **kwargs):
  878. kwargs["level"] = kwargs.pop("level", 0) + 1
  879. return pd.eval(*args, **kwargs)
  880. def test_simple_arith_ops(self, engine, parser):
  881. exclude_arith = []
  882. if parser == "python":
  883. exclude_arith = ["in", "not in"]
  884. arith_ops = [
  885. op
  886. for op in expr.ARITH_OPS_SYMS + expr.CMP_OPS_SYMS
  887. if op not in exclude_arith
  888. ]
  889. ops = (op for op in arith_ops if op != "//")
  890. for op in ops:
  891. ex = f"1 {op} 1"
  892. ex2 = f"x {op} 1"
  893. ex3 = f"1 {op} (x + 1)"
  894. if op in ("in", "not in"):
  895. msg = "argument of type 'int' is not iterable"
  896. with pytest.raises(TypeError, match=msg):
  897. pd.eval(ex, engine=engine, parser=parser)
  898. else:
  899. expec = _eval_single_bin(1, op, 1, engine)
  900. x = self.eval(ex, engine=engine, parser=parser)
  901. assert x == expec
  902. expec = _eval_single_bin(x, op, 1, engine)
  903. y = self.eval(ex2, local_dict={"x": x}, engine=engine, parser=parser)
  904. assert y == expec
  905. expec = _eval_single_bin(1, op, x + 1, engine)
  906. y = self.eval(ex3, local_dict={"x": x}, engine=engine, parser=parser)
  907. assert y == expec
  908. @pytest.mark.parametrize("rhs", [True, False])
  909. @pytest.mark.parametrize("lhs", [True, False])
  910. @pytest.mark.parametrize("op", expr.BOOL_OPS_SYMS)
  911. def test_simple_bool_ops(self, rhs, lhs, op):
  912. ex = f"{lhs} {op} {rhs}"
  913. if parser == "python" and op in ["and", "or"]:
  914. msg = "'BoolOp' nodes are not implemented"
  915. with pytest.raises(NotImplementedError, match=msg):
  916. self.eval(ex)
  917. return
  918. res = self.eval(ex)
  919. exp = eval(ex)
  920. assert res == exp
  921. @pytest.mark.parametrize("rhs", [True, False])
  922. @pytest.mark.parametrize("lhs", [True, False])
  923. @pytest.mark.parametrize("op", expr.BOOL_OPS_SYMS)
  924. def test_bool_ops_with_constants(self, rhs, lhs, op):
  925. ex = f"{lhs} {op} {rhs}"
  926. if parser == "python" and op in ["and", "or"]:
  927. msg = "'BoolOp' nodes are not implemented"
  928. with pytest.raises(NotImplementedError, match=msg):
  929. self.eval(ex)
  930. return
  931. res = self.eval(ex)
  932. exp = eval(ex)
  933. assert res == exp
  934. def test_4d_ndarray_fails(self):
  935. x = np.random.randn(3, 4, 5, 6)
  936. y = Series(np.random.randn(10))
  937. msg = "N-dimensional objects, where N > 2, are not supported with eval"
  938. with pytest.raises(NotImplementedError, match=msg):
  939. self.eval("x + y", local_dict={"x": x, "y": y})
  940. def test_constant(self):
  941. x = self.eval("1")
  942. assert x == 1
  943. def test_single_variable(self):
  944. df = DataFrame(np.random.randn(10, 2))
  945. df2 = self.eval("df", local_dict={"df": df})
  946. tm.assert_frame_equal(df, df2)
  947. def test_failing_subscript_with_name_error(self):
  948. df = DataFrame(np.random.randn(5, 3)) # noqa:F841
  949. with pytest.raises(NameError, match="name 'x' is not defined"):
  950. self.eval("df[x > 2] > 2")
  951. def test_lhs_expression_subscript(self):
  952. df = DataFrame(np.random.randn(5, 3))
  953. result = self.eval("(df + 1)[df > 2]", local_dict={"df": df})
  954. expected = (df + 1)[df > 2]
  955. tm.assert_frame_equal(result, expected)
  956. def test_attr_expression(self):
  957. df = DataFrame(np.random.randn(5, 3), columns=list("abc"))
  958. expr1 = "df.a < df.b"
  959. expec1 = df.a < df.b
  960. expr2 = "df.a + df.b + df.c"
  961. expec2 = df.a + df.b + df.c
  962. expr3 = "df.a + df.b + df.c[df.b < 0]"
  963. expec3 = df.a + df.b + df.c[df.b < 0]
  964. exprs = expr1, expr2, expr3
  965. expecs = expec1, expec2, expec3
  966. for e, expec in zip(exprs, expecs):
  967. tm.assert_series_equal(expec, self.eval(e, local_dict={"df": df}))
  968. def test_assignment_fails(self):
  969. df = DataFrame(np.random.randn(5, 3), columns=list("abc"))
  970. df2 = DataFrame(np.random.randn(5, 3))
  971. expr1 = "df = df2"
  972. msg = "cannot assign without a target object"
  973. with pytest.raises(ValueError, match=msg):
  974. self.eval(expr1, local_dict={"df": df, "df2": df2})
  975. def test_assignment_column_multiple_raise(self):
  976. df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
  977. # multiple assignees
  978. with pytest.raises(SyntaxError, match="invalid syntax"):
  979. df.eval("d c = a + b")
  980. def test_assignment_column_invalid_assign(self):
  981. df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
  982. # invalid assignees
  983. msg = "left hand side of an assignment must be a single name"
  984. with pytest.raises(SyntaxError, match=msg):
  985. df.eval("d,c = a + b")
  986. def test_assignment_column_invalid_assign_function_call(self):
  987. df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
  988. msg = "cannot assign to function call"
  989. with pytest.raises(SyntaxError, match=msg):
  990. df.eval('Timestamp("20131001") = a + b')
  991. def test_assignment_single_assign_existing(self):
  992. df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
  993. # single assignment - existing variable
  994. expected = df.copy()
  995. expected["a"] = expected["a"] + expected["b"]
  996. df.eval("a = a + b", inplace=True)
  997. tm.assert_frame_equal(df, expected)
  998. def test_assignment_single_assign_new(self):
  999. df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
  1000. # single assignment - new variable
  1001. expected = df.copy()
  1002. expected["c"] = expected["a"] + expected["b"]
  1003. df.eval("c = a + b", inplace=True)
  1004. tm.assert_frame_equal(df, expected)
  1005. def test_assignment_single_assign_local_overlap(self):
  1006. df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
  1007. df = df.copy()
  1008. a = 1 # noqa:F841
  1009. df.eval("a = 1 + b", inplace=True)
  1010. expected = df.copy()
  1011. expected["a"] = 1 + expected["b"]
  1012. tm.assert_frame_equal(df, expected)
  1013. def test_assignment_single_assign_name(self):
  1014. df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
  1015. a = 1 # noqa:F841
  1016. old_a = df.a.copy()
  1017. df.eval("a = a + b", inplace=True)
  1018. result = old_a + df.b
  1019. tm.assert_series_equal(result, df.a, check_names=False)
  1020. assert result.name is None
  1021. def test_assignment_multiple_raises(self):
  1022. df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
  1023. # multiple assignment
  1024. df.eval("c = a + b", inplace=True)
  1025. msg = "can only assign a single expression"
  1026. with pytest.raises(SyntaxError, match=msg):
  1027. df.eval("c = a = b")
  1028. def test_assignment_explicit(self):
  1029. df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
  1030. # explicit targets
  1031. self.eval("c = df.a + df.b", local_dict={"df": df}, target=df, inplace=True)
  1032. expected = df.copy()
  1033. expected["c"] = expected["a"] + expected["b"]
  1034. tm.assert_frame_equal(df, expected)
  1035. def test_column_in(self):
  1036. # GH 11235
  1037. df = DataFrame({"a": [11], "b": [-32]})
  1038. result = df.eval("a in [11, -32]")
  1039. expected = Series([True])
  1040. # TODO: 2022-01-29: Name check failed with numexpr 2.7.3 in CI
  1041. # but cannot reproduce locally
  1042. tm.assert_series_equal(result, expected, check_names=False)
  1043. @pytest.mark.xfail(reason="Unknown: Omitted test_ in name prior.")
  1044. def test_assignment_not_inplace(self):
  1045. # see gh-9297
  1046. df = DataFrame(np.random.randn(5, 2), columns=list("ab"))
  1047. actual = df.eval("c = a + b", inplace=False)
  1048. assert actual is not None
  1049. expected = df.copy()
  1050. expected["c"] = expected["a"] + expected["b"]
  1051. tm.assert_frame_equal(df, expected)
  1052. def test_multi_line_expression(self):
  1053. # GH 11149
  1054. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  1055. expected = df.copy()
  1056. expected["c"] = expected["a"] + expected["b"]
  1057. expected["d"] = expected["c"] + expected["b"]
  1058. answer = df.eval(
  1059. """
  1060. c = a + b
  1061. d = c + b""",
  1062. inplace=True,
  1063. )
  1064. tm.assert_frame_equal(expected, df)
  1065. assert answer is None
  1066. expected["a"] = expected["a"] - 1
  1067. expected["e"] = expected["a"] + 2
  1068. answer = df.eval(
  1069. """
  1070. a = a - 1
  1071. e = a + 2""",
  1072. inplace=True,
  1073. )
  1074. tm.assert_frame_equal(expected, df)
  1075. assert answer is None
  1076. # multi-line not valid if not all assignments
  1077. msg = "Multi-line expressions are only valid if all expressions contain"
  1078. with pytest.raises(ValueError, match=msg):
  1079. df.eval(
  1080. """
  1081. a = b + 2
  1082. b - 2""",
  1083. inplace=False,
  1084. )
  1085. def test_multi_line_expression_not_inplace(self):
  1086. # GH 11149
  1087. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  1088. expected = df.copy()
  1089. expected["c"] = expected["a"] + expected["b"]
  1090. expected["d"] = expected["c"] + expected["b"]
  1091. df = df.eval(
  1092. """
  1093. c = a + b
  1094. d = c + b""",
  1095. inplace=False,
  1096. )
  1097. tm.assert_frame_equal(expected, df)
  1098. expected["a"] = expected["a"] - 1
  1099. expected["e"] = expected["a"] + 2
  1100. df = df.eval(
  1101. """
  1102. a = a - 1
  1103. e = a + 2""",
  1104. inplace=False,
  1105. )
  1106. tm.assert_frame_equal(expected, df)
  1107. def test_multi_line_expression_local_variable(self):
  1108. # GH 15342
  1109. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  1110. expected = df.copy()
  1111. local_var = 7
  1112. expected["c"] = expected["a"] * local_var
  1113. expected["d"] = expected["c"] + local_var
  1114. answer = df.eval(
  1115. """
  1116. c = a * @local_var
  1117. d = c + @local_var
  1118. """,
  1119. inplace=True,
  1120. )
  1121. tm.assert_frame_equal(expected, df)
  1122. assert answer is None
  1123. def test_multi_line_expression_callable_local_variable(self):
  1124. # 26426
  1125. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  1126. def local_func(a, b):
  1127. return b
  1128. expected = df.copy()
  1129. expected["c"] = expected["a"] * local_func(1, 7)
  1130. expected["d"] = expected["c"] + local_func(1, 7)
  1131. answer = df.eval(
  1132. """
  1133. c = a * @local_func(1, 7)
  1134. d = c + @local_func(1, 7)
  1135. """,
  1136. inplace=True,
  1137. )
  1138. tm.assert_frame_equal(expected, df)
  1139. assert answer is None
  1140. def test_multi_line_expression_callable_local_variable_with_kwargs(self):
  1141. # 26426
  1142. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  1143. def local_func(a, b):
  1144. return b
  1145. expected = df.copy()
  1146. expected["c"] = expected["a"] * local_func(b=7, a=1)
  1147. expected["d"] = expected["c"] + local_func(b=7, a=1)
  1148. answer = df.eval(
  1149. """
  1150. c = a * @local_func(b=7, a=1)
  1151. d = c + @local_func(b=7, a=1)
  1152. """,
  1153. inplace=True,
  1154. )
  1155. tm.assert_frame_equal(expected, df)
  1156. assert answer is None
  1157. def test_assignment_in_query(self):
  1158. # GH 8664
  1159. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  1160. df_orig = df.copy()
  1161. msg = "cannot assign without a target object"
  1162. with pytest.raises(ValueError, match=msg):
  1163. df.query("a = 1")
  1164. tm.assert_frame_equal(df, df_orig)
  1165. def test_query_inplace(self):
  1166. # see gh-11149
  1167. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  1168. expected = df.copy()
  1169. expected = expected[expected["a"] == 2]
  1170. df.query("a == 2", inplace=True)
  1171. tm.assert_frame_equal(expected, df)
  1172. df = {}
  1173. expected = {"a": 3}
  1174. self.eval("a = 1 + 2", target=df, inplace=True)
  1175. tm.assert_dict_equal(df, expected)
  1176. @pytest.mark.parametrize("invalid_target", [1, "cat", [1, 2], np.array([]), (1, 3)])
  1177. def test_cannot_item_assign(self, invalid_target):
  1178. msg = "Cannot assign expression output to target"
  1179. expression = "a = 1 + 2"
  1180. with pytest.raises(ValueError, match=msg):
  1181. self.eval(expression, target=invalid_target, inplace=True)
  1182. if hasattr(invalid_target, "copy"):
  1183. with pytest.raises(ValueError, match=msg):
  1184. self.eval(expression, target=invalid_target, inplace=False)
  1185. @pytest.mark.parametrize("invalid_target", [1, "cat", (1, 3)])
  1186. def test_cannot_copy_item(self, invalid_target):
  1187. msg = "Cannot return a copy of the target"
  1188. expression = "a = 1 + 2"
  1189. with pytest.raises(ValueError, match=msg):
  1190. self.eval(expression, target=invalid_target, inplace=False)
  1191. @pytest.mark.parametrize("target", [1, "cat", [1, 2], np.array([]), (1, 3), {1: 2}])
  1192. def test_inplace_no_assignment(self, target):
  1193. expression = "1 + 2"
  1194. assert self.eval(expression, target=target, inplace=False) == 3
  1195. msg = "Cannot operate inplace if there is no assignment"
  1196. with pytest.raises(ValueError, match=msg):
  1197. self.eval(expression, target=target, inplace=True)
  1198. def test_basic_period_index_boolean_expression(self):
  1199. df = tm.makeCustomDataframe(2, 2, data_gen_f=f, c_idx_type="p", r_idx_type="i")
  1200. e = df < 2
  1201. r = self.eval("df < 2", local_dict={"df": df})
  1202. x = df < 2
  1203. tm.assert_frame_equal(r, e)
  1204. tm.assert_frame_equal(x, e)
  1205. def test_basic_period_index_subscript_expression(self):
  1206. df = tm.makeCustomDataframe(2, 2, data_gen_f=f, c_idx_type="p", r_idx_type="i")
  1207. r = self.eval("df[df < 2 + 3]", local_dict={"df": df})
  1208. e = df[df < 2 + 3]
  1209. tm.assert_frame_equal(r, e)
  1210. def test_nested_period_index_subscript_expression(self):
  1211. df = tm.makeCustomDataframe(2, 2, data_gen_f=f, c_idx_type="p", r_idx_type="i")
  1212. r = self.eval("df[df[df < 2] < 2] + df * 2", local_dict={"df": df})
  1213. e = df[df[df < 2] < 2] + df * 2
  1214. tm.assert_frame_equal(r, e)
  1215. def test_date_boolean(self, engine, parser):
  1216. df = DataFrame(np.random.randn(5, 3))
  1217. df["dates1"] = date_range("1/1/2012", periods=5)
  1218. res = self.eval(
  1219. "df.dates1 < 20130101",
  1220. local_dict={"df": df},
  1221. engine=engine,
  1222. parser=parser,
  1223. )
  1224. expec = df.dates1 < "20130101"
  1225. tm.assert_series_equal(res, expec, check_names=False)
  1226. def test_simple_in_ops(self, engine, parser):
  1227. if parser != "python":
  1228. res = pd.eval("1 in [1, 2]", engine=engine, parser=parser)
  1229. assert res
  1230. res = pd.eval("2 in (1, 2)", engine=engine, parser=parser)
  1231. assert res
  1232. res = pd.eval("3 in (1, 2)", engine=engine, parser=parser)
  1233. assert not res
  1234. res = pd.eval("3 not in (1, 2)", engine=engine, parser=parser)
  1235. assert res
  1236. res = pd.eval("[3] not in (1, 2)", engine=engine, parser=parser)
  1237. assert res
  1238. res = pd.eval("[3] in ([3], 2)", engine=engine, parser=parser)
  1239. assert res
  1240. res = pd.eval("[[3]] in [[[3]], 2]", engine=engine, parser=parser)
  1241. assert res
  1242. res = pd.eval("(3,) in [(3,), 2]", engine=engine, parser=parser)
  1243. assert res
  1244. res = pd.eval("(3,) not in [(3,), 2]", engine=engine, parser=parser)
  1245. assert not res
  1246. res = pd.eval("[(3,)] in [[(3,)], 2]", engine=engine, parser=parser)
  1247. assert res
  1248. else:
  1249. msg = "'In' nodes are not implemented"
  1250. with pytest.raises(NotImplementedError, match=msg):
  1251. pd.eval("1 in [1, 2]", engine=engine, parser=parser)
  1252. with pytest.raises(NotImplementedError, match=msg):
  1253. pd.eval("2 in (1, 2)", engine=engine, parser=parser)
  1254. with pytest.raises(NotImplementedError, match=msg):
  1255. pd.eval("3 in (1, 2)", engine=engine, parser=parser)
  1256. with pytest.raises(NotImplementedError, match=msg):
  1257. pd.eval("[(3,)] in (1, 2, [(3,)])", engine=engine, parser=parser)
  1258. msg = "'NotIn' nodes are not implemented"
  1259. with pytest.raises(NotImplementedError, match=msg):
  1260. pd.eval("3 not in (1, 2)", engine=engine, parser=parser)
  1261. with pytest.raises(NotImplementedError, match=msg):
  1262. pd.eval("[3] not in (1, 2, [[3]])", engine=engine, parser=parser)
  1263. def test_check_many_exprs(self, engine, parser):
  1264. a = 1 # noqa:F841
  1265. expr = " * ".join("a" * 33)
  1266. expected = 1
  1267. res = pd.eval(expr, engine=engine, parser=parser)
  1268. assert res == expected
  1269. @pytest.mark.parametrize(
  1270. "expr",
  1271. [
  1272. "df > 2 and df > 3",
  1273. "df > 2 or df > 3",
  1274. "not df > 2",
  1275. ],
  1276. )
  1277. def test_fails_and_or_not(self, expr, engine, parser):
  1278. df = DataFrame(np.random.randn(5, 3))
  1279. if parser == "python":
  1280. msg = "'BoolOp' nodes are not implemented"
  1281. if "not" in expr:
  1282. msg = "'Not' nodes are not implemented"
  1283. with pytest.raises(NotImplementedError, match=msg):
  1284. pd.eval(
  1285. expr,
  1286. local_dict={"df": df},
  1287. parser=parser,
  1288. engine=engine,
  1289. )
  1290. else:
  1291. # smoke-test, should not raise
  1292. pd.eval(
  1293. expr,
  1294. local_dict={"df": df},
  1295. parser=parser,
  1296. engine=engine,
  1297. )
  1298. @pytest.mark.parametrize("char", ["|", "&"])
  1299. def test_fails_ampersand_pipe(self, char, engine, parser):
  1300. df = DataFrame(np.random.randn(5, 3)) # noqa:F841
  1301. ex = f"(df + 2)[df > 1] > 0 {char} (df > 0)"
  1302. if parser == "python":
  1303. msg = "cannot evaluate scalar only bool ops"
  1304. with pytest.raises(NotImplementedError, match=msg):
  1305. pd.eval(ex, parser=parser, engine=engine)
  1306. else:
  1307. # smoke-test, should not raise
  1308. pd.eval(ex, parser=parser, engine=engine)
  1309. class TestMath:
  1310. def eval(self, *args, **kwargs):
  1311. kwargs["level"] = kwargs.pop("level", 0) + 1
  1312. return pd.eval(*args, **kwargs)
  1313. @pytest.mark.skipif(
  1314. not NUMEXPR_INSTALLED, reason="Unary ops only implemented for numexpr"
  1315. )
  1316. @pytest.mark.parametrize("fn", _unary_math_ops)
  1317. def test_unary_functions(self, fn):
  1318. df = DataFrame({"a": np.random.randn(10)})
  1319. a = df.a
  1320. expr = f"{fn}(a)"
  1321. got = self.eval(expr)
  1322. with np.errstate(all="ignore"):
  1323. expect = getattr(np, fn)(a)
  1324. tm.assert_series_equal(got, expect, check_names=False)
  1325. @pytest.mark.parametrize("fn", _binary_math_ops)
  1326. def test_binary_functions(self, fn):
  1327. df = DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)})
  1328. a = df.a
  1329. b = df.b
  1330. expr = f"{fn}(a, b)"
  1331. got = self.eval(expr)
  1332. with np.errstate(all="ignore"):
  1333. expect = getattr(np, fn)(a, b)
  1334. tm.assert_almost_equal(got, expect, check_names=False)
  1335. def test_df_use_case(self, engine, parser):
  1336. df = DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)})
  1337. df.eval(
  1338. "e = arctan2(sin(a), b)",
  1339. engine=engine,
  1340. parser=parser,
  1341. inplace=True,
  1342. )
  1343. got = df.e
  1344. expect = np.arctan2(np.sin(df.a), df.b)
  1345. tm.assert_series_equal(got, expect, check_names=False)
  1346. def test_df_arithmetic_subexpression(self, engine, parser):
  1347. df = DataFrame({"a": np.random.randn(10), "b": np.random.randn(10)})
  1348. df.eval("e = sin(a + b)", engine=engine, parser=parser, inplace=True)
  1349. got = df.e
  1350. expect = np.sin(df.a + df.b)
  1351. tm.assert_series_equal(got, expect, check_names=False)
  1352. @pytest.mark.parametrize(
  1353. "dtype, expect_dtype",
  1354. [
  1355. (np.int32, np.float64),
  1356. (np.int64, np.float64),
  1357. (np.float32, np.float32),
  1358. (np.float64, np.float64),
  1359. pytest.param(np.complex128, np.complex128, marks=td.skip_if_windows),
  1360. ],
  1361. )
  1362. def test_result_types(self, dtype, expect_dtype, engine, parser):
  1363. # xref https://github.com/pandas-dev/pandas/issues/12293
  1364. # this fails on Windows, apparently a floating point precision issue
  1365. # Did not test complex64 because DataFrame is converting it to
  1366. # complex128. Due to https://github.com/pandas-dev/pandas/issues/10952
  1367. df = DataFrame({"a": np.random.randn(10).astype(dtype)})
  1368. assert df.a.dtype == dtype
  1369. df.eval("b = sin(a)", engine=engine, parser=parser, inplace=True)
  1370. got = df.b
  1371. expect = np.sin(df.a)
  1372. assert expect.dtype == got.dtype
  1373. assert expect_dtype == got.dtype
  1374. tm.assert_series_equal(got, expect, check_names=False)
  1375. def test_undefined_func(self, engine, parser):
  1376. df = DataFrame({"a": np.random.randn(10)})
  1377. msg = '"mysin" is not a supported function'
  1378. with pytest.raises(ValueError, match=msg):
  1379. df.eval("mysin(a)", engine=engine, parser=parser)
  1380. def test_keyword_arg(self, engine, parser):
  1381. df = DataFrame({"a": np.random.randn(10)})
  1382. msg = 'Function "sin" does not support keyword arguments'
  1383. with pytest.raises(TypeError, match=msg):
  1384. df.eval("sin(x=a)", engine=engine, parser=parser)
  1385. _var_s = np.random.randn(10)
  1386. class TestScope:
  1387. def test_global_scope(self, engine, parser):
  1388. e = "_var_s * 2"
  1389. tm.assert_numpy_array_equal(
  1390. _var_s * 2, pd.eval(e, engine=engine, parser=parser)
  1391. )
  1392. def test_no_new_locals(self, engine, parser):
  1393. x = 1
  1394. lcls = locals().copy()
  1395. pd.eval("x + 1", local_dict=lcls, engine=engine, parser=parser)
  1396. lcls2 = locals().copy()
  1397. lcls2.pop("lcls")
  1398. assert lcls == lcls2
  1399. def test_no_new_globals(self, engine, parser):
  1400. x = 1 # noqa:F841
  1401. gbls = globals().copy()
  1402. pd.eval("x + 1", engine=engine, parser=parser)
  1403. gbls2 = globals().copy()
  1404. assert gbls == gbls2
  1405. def test_empty_locals(self, engine, parser):
  1406. # GH 47084
  1407. x = 1 # noqa: F841
  1408. msg = "name 'x' is not defined"
  1409. with pytest.raises(UndefinedVariableError, match=msg):
  1410. pd.eval("x + 1", engine=engine, parser=parser, local_dict={})
  1411. def test_empty_globals(self, engine, parser):
  1412. # GH 47084
  1413. msg = "name '_var_s' is not defined"
  1414. e = "_var_s * 2"
  1415. with pytest.raises(UndefinedVariableError, match=msg):
  1416. pd.eval(e, engine=engine, parser=parser, global_dict={})
  1417. @td.skip_if_no_ne
  1418. def test_invalid_engine():
  1419. msg = "Invalid engine 'asdf' passed"
  1420. with pytest.raises(KeyError, match=msg):
  1421. pd.eval("x + y", local_dict={"x": 1, "y": 2}, engine="asdf")
  1422. @td.skip_if_no_ne
  1423. @pytest.mark.parametrize(
  1424. ("use_numexpr", "expected"),
  1425. (
  1426. (True, "numexpr"),
  1427. (False, "python"),
  1428. ),
  1429. )
  1430. def test_numexpr_option_respected(use_numexpr, expected):
  1431. # GH 32556
  1432. from pandas.core.computation.eval import _check_engine
  1433. with pd.option_context("compute.use_numexpr", use_numexpr):
  1434. result = _check_engine(None)
  1435. assert result == expected
  1436. @td.skip_if_no_ne
  1437. def test_numexpr_option_incompatible_op():
  1438. # GH 32556
  1439. with pd.option_context("compute.use_numexpr", False):
  1440. df = DataFrame(
  1441. {"A": [True, False, True, False, None, None], "B": [1, 2, 3, 4, 5, 6]}
  1442. )
  1443. result = df.query("A.isnull()")
  1444. expected = DataFrame({"A": [None, None], "B": [5, 6]}, index=[4, 5])
  1445. tm.assert_frame_equal(result, expected)
  1446. @td.skip_if_no_ne
  1447. def test_invalid_parser():
  1448. msg = "Invalid parser 'asdf' passed"
  1449. with pytest.raises(KeyError, match=msg):
  1450. pd.eval("x + y", local_dict={"x": 1, "y": 2}, parser="asdf")
  1451. _parsers: dict[str, type[BaseExprVisitor]] = {
  1452. "python": PythonExprVisitor,
  1453. "pytables": pytables.PyTablesExprVisitor,
  1454. "pandas": PandasExprVisitor,
  1455. }
  1456. @pytest.mark.parametrize("engine", ENGINES)
  1457. @pytest.mark.parametrize("parser", _parsers)
  1458. def test_disallowed_nodes(engine, parser):
  1459. VisitorClass = _parsers[parser]
  1460. inst = VisitorClass("x + 1", engine, parser)
  1461. for ops in VisitorClass.unsupported_nodes:
  1462. msg = "nodes are not implemented"
  1463. with pytest.raises(NotImplementedError, match=msg):
  1464. getattr(inst, ops)()
  1465. def test_syntax_error_exprs(engine, parser):
  1466. e = "s +"
  1467. with pytest.raises(SyntaxError, match="invalid syntax"):
  1468. pd.eval(e, engine=engine, parser=parser)
  1469. def test_name_error_exprs(engine, parser):
  1470. e = "s + t"
  1471. msg = "name 's' is not defined"
  1472. with pytest.raises(NameError, match=msg):
  1473. pd.eval(e, engine=engine, parser=parser)
  1474. @pytest.mark.parametrize("express", ["a + @b", "@a + b", "@a + @b"])
  1475. def test_invalid_local_variable_reference(engine, parser, express):
  1476. a, b = 1, 2 # noqa:F841
  1477. if parser != "pandas":
  1478. with pytest.raises(SyntaxError, match="The '@' prefix is only"):
  1479. pd.eval(express, engine=engine, parser=parser)
  1480. else:
  1481. with pytest.raises(SyntaxError, match="The '@' prefix is not"):
  1482. pd.eval(express, engine=engine, parser=parser)
  1483. def test_numexpr_builtin_raises(engine, parser):
  1484. sin, dotted_line = 1, 2
  1485. if engine == "numexpr":
  1486. msg = "Variables in expression .+"
  1487. with pytest.raises(NumExprClobberingError, match=msg):
  1488. pd.eval("sin + dotted_line", engine=engine, parser=parser)
  1489. else:
  1490. res = pd.eval("sin + dotted_line", engine=engine, parser=parser)
  1491. assert res == sin + dotted_line
  1492. def test_bad_resolver_raises(engine, parser):
  1493. cannot_resolve = 42, 3.0
  1494. with pytest.raises(TypeError, match="Resolver of type .+"):
  1495. pd.eval("1 + 2", resolvers=cannot_resolve, engine=engine, parser=parser)
  1496. def test_empty_string_raises(engine, parser):
  1497. # GH 13139
  1498. with pytest.raises(ValueError, match="expr cannot be an empty string"):
  1499. pd.eval("", engine=engine, parser=parser)
  1500. def test_more_than_one_expression_raises(engine, parser):
  1501. with pytest.raises(SyntaxError, match="only a single expression is allowed"):
  1502. pd.eval("1 + 1; 2 + 2", engine=engine, parser=parser)
  1503. @pytest.mark.parametrize("cmp", ("and", "or"))
  1504. @pytest.mark.parametrize("lhs", (int, float))
  1505. @pytest.mark.parametrize("rhs", (int, float))
  1506. def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser):
  1507. gen = {int: lambda: np.random.randint(10), float: np.random.randn}
  1508. mid = gen[lhs]() # noqa:F841
  1509. lhs = gen[lhs]()
  1510. rhs = gen[rhs]()
  1511. ex1 = f"lhs {cmp} mid {cmp} rhs"
  1512. ex2 = f"lhs {cmp} mid and mid {cmp} rhs"
  1513. ex3 = f"(lhs {cmp} mid) & (mid {cmp} rhs)"
  1514. for ex in (ex1, ex2, ex3):
  1515. msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not"
  1516. with pytest.raises(NotImplementedError, match=msg):
  1517. pd.eval(ex, engine=engine, parser=parser)
  1518. @pytest.mark.parametrize(
  1519. "other",
  1520. [
  1521. "'x'",
  1522. "...",
  1523. ],
  1524. )
  1525. def test_equals_various(other):
  1526. df = DataFrame({"A": ["a", "b", "c"]})
  1527. result = df.eval(f"A == {other}")
  1528. expected = Series([False, False, False], name="A")
  1529. if USE_NUMEXPR:
  1530. # https://github.com/pandas-dev/pandas/issues/10239
  1531. # lose name with numexpr engine. Remove when that's fixed.
  1532. expected.name = None
  1533. tm.assert_series_equal(result, expected)
  1534. def test_inf(engine, parser):
  1535. s = "inf + 1"
  1536. expected = np.inf
  1537. result = pd.eval(s, engine=engine, parser=parser)
  1538. assert result == expected
  1539. @pytest.mark.parametrize("column", ["Temp(°C)", "Capacitance(μF)"])
  1540. def test_query_token(engine, column):
  1541. # See: https://github.com/pandas-dev/pandas/pull/42826
  1542. df = DataFrame(np.random.randn(5, 2), columns=[column, "b"])
  1543. expected = df[df[column] > 5]
  1544. query_string = f"`{column}` > 5"
  1545. result = df.query(query_string, engine=engine)
  1546. tm.assert_frame_equal(result, expected)
  1547. def test_negate_lt_eq_le(engine, parser):
  1548. df = DataFrame([[0, 10], [1, 20]], columns=["cat", "count"])
  1549. expected = df[~(df.cat > 0)]
  1550. result = df.query("~(cat > 0)", engine=engine, parser=parser)
  1551. tm.assert_frame_equal(result, expected)
  1552. if parser == "python":
  1553. msg = "'Not' nodes are not implemented"
  1554. with pytest.raises(NotImplementedError, match=msg):
  1555. df.query("not (cat > 0)", engine=engine, parser=parser)
  1556. else:
  1557. result = df.query("not (cat > 0)", engine=engine, parser=parser)
  1558. tm.assert_frame_equal(result, expected)
  1559. @pytest.mark.parametrize(
  1560. "column",
  1561. DEFAULT_GLOBALS.keys(),
  1562. )
  1563. def test_eval_no_support_column_name(request, column):
  1564. # GH 44603
  1565. if column in ["True", "False", "inf", "Inf"]:
  1566. request.node.add_marker(
  1567. pytest.mark.xfail(
  1568. raises=KeyError,
  1569. reason=f"GH 47859 DataFrame eval not supported with {column}",
  1570. )
  1571. )
  1572. df = DataFrame(np.random.randint(0, 100, size=(10, 2)), columns=[column, "col1"])
  1573. expected = df[df[column] > 6]
  1574. result = df.query(f"{column}>6")
  1575. tm.assert_frame_equal(result, expected)
  1576. def test_set_inplace(using_copy_on_write):
  1577. # https://github.com/pandas-dev/pandas/issues/47449
  1578. # Ensure we don't only update the DataFrame inplace, but also the actual
  1579. # column values, such that references to this column also get updated
  1580. df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
  1581. result_view = df[:]
  1582. ser = df["A"]
  1583. df.eval("A = B + C", inplace=True)
  1584. expected = DataFrame({"A": [11, 13, 15], "B": [4, 5, 6], "C": [7, 8, 9]})
  1585. tm.assert_frame_equal(df, expected)
  1586. if not using_copy_on_write:
  1587. tm.assert_series_equal(ser, expected["A"])
  1588. tm.assert_series_equal(result_view["A"], expected["A"])
  1589. else:
  1590. expected = Series([1, 2, 3], name="A")
  1591. tm.assert_series_equal(ser, expected)
  1592. tm.assert_series_equal(result_view["A"], expected)
  1593. class TestValidate:
  1594. @pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0])
  1595. def test_validate_bool_args(self, value):
  1596. msg = 'For argument "inplace" expected type bool, received type'
  1597. with pytest.raises(ValueError, match=msg):
  1598. pd.eval("2+2", inplace=value)