12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361 |
- import operator
- import numpy as np
- import pytest
- from pandas.errors import (
- NumExprClobberingError,
- UndefinedVariableError,
- )
- import pandas.util._test_decorators as td
- import pandas as pd
- from pandas import (
- DataFrame,
- Index,
- MultiIndex,
- Series,
- date_range,
- )
- import pandas._testing as tm
- from pandas.core.computation.check import NUMEXPR_INSTALLED
- @pytest.fixture(params=["python", "pandas"], ids=lambda x: x)
- def parser(request):
- return request.param
- @pytest.fixture(
- params=["python", pytest.param("numexpr", marks=td.skip_if_no_ne)], ids=lambda x: x
- )
- def engine(request):
- return request.param
- def skip_if_no_pandas_parser(parser):
- if parser != "pandas":
- pytest.skip(f"cannot evaluate with parser {repr(parser)}")
- class TestCompat:
- @pytest.fixture
- def df(self):
- return DataFrame({"A": [1, 2, 3]})
- @pytest.fixture
- def expected1(self, df):
- return df[df.A > 0]
- @pytest.fixture
- def expected2(self, df):
- return df.A + 1
- def test_query_default(self, df, expected1, expected2):
- # GH 12749
- # this should always work, whether NUMEXPR_INSTALLED or not
- result = df.query("A>0")
- tm.assert_frame_equal(result, expected1)
- result = df.eval("A+1")
- tm.assert_series_equal(result, expected2, check_names=False)
- def test_query_None(self, df, expected1, expected2):
- result = df.query("A>0", engine=None)
- tm.assert_frame_equal(result, expected1)
- result = df.eval("A+1", engine=None)
- tm.assert_series_equal(result, expected2, check_names=False)
- def test_query_python(self, df, expected1, expected2):
- result = df.query("A>0", engine="python")
- tm.assert_frame_equal(result, expected1)
- result = df.eval("A+1", engine="python")
- tm.assert_series_equal(result, expected2, check_names=False)
- def test_query_numexpr(self, df, expected1, expected2):
- if NUMEXPR_INSTALLED:
- result = df.query("A>0", engine="numexpr")
- tm.assert_frame_equal(result, expected1)
- result = df.eval("A+1", engine="numexpr")
- tm.assert_series_equal(result, expected2, check_names=False)
- else:
- msg = (
- r"'numexpr' is not installed or an unsupported version. "
- r"Cannot use engine='numexpr' for query/eval if 'numexpr' is "
- r"not installed"
- )
- with pytest.raises(ImportError, match=msg):
- df.query("A>0", engine="numexpr")
- with pytest.raises(ImportError, match=msg):
- df.eval("A+1", engine="numexpr")
- class TestDataFrameEval:
- # smaller hits python, larger hits numexpr
- @pytest.mark.parametrize("n", [4, 4000])
- @pytest.mark.parametrize(
- "op_str,op,rop",
- [
- ("+", "__add__", "__radd__"),
- ("-", "__sub__", "__rsub__"),
- ("*", "__mul__", "__rmul__"),
- ("/", "__truediv__", "__rtruediv__"),
- ],
- )
- def test_ops(self, op_str, op, rop, n):
- # tst ops and reversed ops in evaluation
- # GH7198
- df = DataFrame(1, index=range(n), columns=list("abcd"))
- df.iloc[0] = 2
- m = df.mean()
- base = DataFrame( # noqa:F841
- np.tile(m.values, n).reshape(n, -1), columns=list("abcd")
- )
- expected = eval(f"base {op_str} df")
- # ops as strings
- result = eval(f"m {op_str} df")
- tm.assert_frame_equal(result, expected)
- # these are commutative
- if op in ["+", "*"]:
- result = getattr(df, op)(m)
- tm.assert_frame_equal(result, expected)
- # these are not
- elif op in ["-", "/"]:
- result = getattr(df, rop)(m)
- tm.assert_frame_equal(result, expected)
- def test_dataframe_sub_numexpr_path(self):
- # GH7192: Note we need a large number of rows to ensure this
- # goes through the numexpr path
- df = DataFrame({"A": np.random.randn(25000)})
- df.iloc[0:5] = np.nan
- expected = 1 - np.isnan(df.iloc[0:25])
- result = (1 - np.isnan(df)).iloc[0:25]
- tm.assert_frame_equal(result, expected)
- def test_query_non_str(self):
- # GH 11485
- df = DataFrame({"A": [1, 2, 3], "B": ["a", "b", "b"]})
- msg = "expr must be a string to be evaluated"
- with pytest.raises(ValueError, match=msg):
- df.query(lambda x: x.B == "b")
- with pytest.raises(ValueError, match=msg):
- df.query(111)
- def test_query_empty_string(self):
- # GH 13139
- df = DataFrame({"A": [1, 2, 3]})
- msg = "expr cannot be an empty string"
- with pytest.raises(ValueError, match=msg):
- df.query("")
- def test_eval_resolvers_as_list(self):
- # GH 14095
- df = DataFrame(np.random.randn(10, 2), columns=list("ab"))
- dict1 = {"a": 1}
- dict2 = {"b": 2}
- assert df.eval("a + b", resolvers=[dict1, dict2]) == dict1["a"] + dict2["b"]
- assert pd.eval("a + b", resolvers=[dict1, dict2]) == dict1["a"] + dict2["b"]
- def test_eval_resolvers_combined(self):
- # GH 34966
- df = DataFrame(np.random.randn(10, 2), columns=list("ab"))
- dict1 = {"c": 2}
- # Both input and default index/column resolvers should be usable
- result = df.eval("a + b * c", resolvers=[dict1])
- expected = df["a"] + df["b"] * dict1["c"]
- tm.assert_series_equal(result, expected)
- def test_eval_object_dtype_binop(self):
- # GH#24883
- df = DataFrame({"a1": ["Y", "N"]})
- res = df.eval("c = ((a1 == 'Y') & True)")
- expected = DataFrame({"a1": ["Y", "N"], "c": [True, False]})
- tm.assert_frame_equal(res, expected)
- class TestDataFrameQueryWithMultiIndex:
- def test_query_with_named_multiindex(self, parser, engine):
- skip_if_no_pandas_parser(parser)
- a = np.random.choice(["red", "green"], size=10)
- b = np.random.choice(["eggs", "ham"], size=10)
- index = MultiIndex.from_arrays([a, b], names=["color", "food"])
- df = DataFrame(np.random.randn(10, 2), index=index)
- ind = Series(
- df.index.get_level_values("color").values, index=index, name="color"
- )
- # equality
- res1 = df.query('color == "red"', parser=parser, engine=engine)
- res2 = df.query('"red" == color', parser=parser, engine=engine)
- exp = df[ind == "red"]
- tm.assert_frame_equal(res1, exp)
- tm.assert_frame_equal(res2, exp)
- # inequality
- res1 = df.query('color != "red"', parser=parser, engine=engine)
- res2 = df.query('"red" != color', parser=parser, engine=engine)
- exp = df[ind != "red"]
- tm.assert_frame_equal(res1, exp)
- tm.assert_frame_equal(res2, exp)
- # list equality (really just set membership)
- res1 = df.query('color == ["red"]', parser=parser, engine=engine)
- res2 = df.query('["red"] == color', parser=parser, engine=engine)
- exp = df[ind.isin(["red"])]
- tm.assert_frame_equal(res1, exp)
- tm.assert_frame_equal(res2, exp)
- res1 = df.query('color != ["red"]', parser=parser, engine=engine)
- res2 = df.query('["red"] != color', parser=parser, engine=engine)
- exp = df[~ind.isin(["red"])]
- tm.assert_frame_equal(res1, exp)
- tm.assert_frame_equal(res2, exp)
- # in/not in ops
- res1 = df.query('["red"] in color', parser=parser, engine=engine)
- res2 = df.query('"red" in color', parser=parser, engine=engine)
- exp = df[ind.isin(["red"])]
- tm.assert_frame_equal(res1, exp)
- tm.assert_frame_equal(res2, exp)
- res1 = df.query('["red"] not in color', parser=parser, engine=engine)
- res2 = df.query('"red" not in color', parser=parser, engine=engine)
- exp = df[~ind.isin(["red"])]
- tm.assert_frame_equal(res1, exp)
- tm.assert_frame_equal(res2, exp)
- def test_query_with_unnamed_multiindex(self, parser, engine):
- skip_if_no_pandas_parser(parser)
- a = np.random.choice(["red", "green"], size=10)
- b = np.random.choice(["eggs", "ham"], size=10)
- index = MultiIndex.from_arrays([a, b])
- df = DataFrame(np.random.randn(10, 2), index=index)
- ind = Series(df.index.get_level_values(0).values, index=index)
- res1 = df.query('ilevel_0 == "red"', parser=parser, engine=engine)
- res2 = df.query('"red" == ilevel_0', parser=parser, engine=engine)
- exp = df[ind == "red"]
- tm.assert_frame_equal(res1, exp)
- tm.assert_frame_equal(res2, exp)
- # inequality
- res1 = df.query('ilevel_0 != "red"', parser=parser, engine=engine)
- res2 = df.query('"red" != ilevel_0', parser=parser, engine=engine)
- exp = df[ind != "red"]
- tm.assert_frame_equal(res1, exp)
- tm.assert_frame_equal(res2, exp)
- # list equality (really just set membership)
- res1 = df.query('ilevel_0 == ["red"]', parser=parser, engine=engine)
- res2 = df.query('["red"] == ilevel_0', parser=parser, engine=engine)
- exp = df[ind.isin(["red"])]
- tm.assert_frame_equal(res1, exp)
- tm.assert_frame_equal(res2, exp)
- res1 = df.query('ilevel_0 != ["red"]', parser=parser, engine=engine)
- res2 = df.query('["red"] != ilevel_0', parser=parser, engine=engine)
- exp = df[~ind.isin(["red"])]
- tm.assert_frame_equal(res1, exp)
- tm.assert_frame_equal(res2, exp)
- # in/not in ops
- res1 = df.query('["red"] in ilevel_0', parser=parser, engine=engine)
- res2 = df.query('"red" in ilevel_0', parser=parser, engine=engine)
- exp = df[ind.isin(["red"])]
- tm.assert_frame_equal(res1, exp)
- tm.assert_frame_equal(res2, exp)
- res1 = df.query('["red"] not in ilevel_0', parser=parser, engine=engine)
- res2 = df.query('"red" not in ilevel_0', parser=parser, engine=engine)
- exp = df[~ind.isin(["red"])]
- tm.assert_frame_equal(res1, exp)
- tm.assert_frame_equal(res2, exp)
- # ## LEVEL 1
- ind = Series(df.index.get_level_values(1).values, index=index)
- res1 = df.query('ilevel_1 == "eggs"', parser=parser, engine=engine)
- res2 = df.query('"eggs" == ilevel_1', parser=parser, engine=engine)
- exp = df[ind == "eggs"]
- tm.assert_frame_equal(res1, exp)
- tm.assert_frame_equal(res2, exp)
- # inequality
- res1 = df.query('ilevel_1 != "eggs"', parser=parser, engine=engine)
- res2 = df.query('"eggs" != ilevel_1', parser=parser, engine=engine)
- exp = df[ind != "eggs"]
- tm.assert_frame_equal(res1, exp)
- tm.assert_frame_equal(res2, exp)
- # list equality (really just set membership)
- res1 = df.query('ilevel_1 == ["eggs"]', parser=parser, engine=engine)
- res2 = df.query('["eggs"] == ilevel_1', parser=parser, engine=engine)
- exp = df[ind.isin(["eggs"])]
- tm.assert_frame_equal(res1, exp)
- tm.assert_frame_equal(res2, exp)
- res1 = df.query('ilevel_1 != ["eggs"]', parser=parser, engine=engine)
- res2 = df.query('["eggs"] != ilevel_1', parser=parser, engine=engine)
- exp = df[~ind.isin(["eggs"])]
- tm.assert_frame_equal(res1, exp)
- tm.assert_frame_equal(res2, exp)
- # in/not in ops
- res1 = df.query('["eggs"] in ilevel_1', parser=parser, engine=engine)
- res2 = df.query('"eggs" in ilevel_1', parser=parser, engine=engine)
- exp = df[ind.isin(["eggs"])]
- tm.assert_frame_equal(res1, exp)
- tm.assert_frame_equal(res2, exp)
- res1 = df.query('["eggs"] not in ilevel_1', parser=parser, engine=engine)
- res2 = df.query('"eggs" not in ilevel_1', parser=parser, engine=engine)
- exp = df[~ind.isin(["eggs"])]
- tm.assert_frame_equal(res1, exp)
- tm.assert_frame_equal(res2, exp)
- def test_query_with_partially_named_multiindex(self, parser, engine):
- skip_if_no_pandas_parser(parser)
- a = np.random.choice(["red", "green"], size=10)
- b = np.arange(10)
- index = MultiIndex.from_arrays([a, b])
- index.names = [None, "rating"]
- df = DataFrame(np.random.randn(10, 2), index=index)
- res = df.query("rating == 1", parser=parser, engine=engine)
- ind = Series(
- df.index.get_level_values("rating").values, index=index, name="rating"
- )
- exp = df[ind == 1]
- tm.assert_frame_equal(res, exp)
- res = df.query("rating != 1", parser=parser, engine=engine)
- ind = Series(
- df.index.get_level_values("rating").values, index=index, name="rating"
- )
- exp = df[ind != 1]
- tm.assert_frame_equal(res, exp)
- res = df.query('ilevel_0 == "red"', parser=parser, engine=engine)
- ind = Series(df.index.get_level_values(0).values, index=index)
- exp = df[ind == "red"]
- tm.assert_frame_equal(res, exp)
- res = df.query('ilevel_0 != "red"', parser=parser, engine=engine)
- ind = Series(df.index.get_level_values(0).values, index=index)
- exp = df[ind != "red"]
- tm.assert_frame_equal(res, exp)
- def test_query_multiindex_get_index_resolvers(self):
- df = tm.makeCustomDataframe(
- 10, 3, r_idx_nlevels=2, r_idx_names=["spam", "eggs"]
- )
- resolvers = df._get_index_resolvers()
- def to_series(mi, level):
- level_values = mi.get_level_values(level)
- s = level_values.to_series()
- s.index = mi
- return s
- col_series = df.columns.to_series()
- expected = {
- "index": df.index,
- "columns": col_series,
- "spam": to_series(df.index, "spam"),
- "eggs": to_series(df.index, "eggs"),
- "C0": col_series,
- }
- for k, v in resolvers.items():
- if isinstance(v, Index):
- assert v.is_(expected[k])
- elif isinstance(v, Series):
- tm.assert_series_equal(v, expected[k])
- else:
- raise AssertionError("object must be a Series or Index")
- @td.skip_if_no_ne
- class TestDataFrameQueryNumExprPandas:
- @classmethod
- def setup_class(cls):
- cls.engine = "numexpr"
- cls.parser = "pandas"
- @classmethod
- def teardown_class(cls):
- del cls.engine, cls.parser
- def test_date_query_with_attribute_access(self):
- engine, parser = self.engine, self.parser
- skip_if_no_pandas_parser(parser)
- df = DataFrame(np.random.randn(5, 3))
- df["dates1"] = date_range("1/1/2012", periods=5)
- df["dates2"] = date_range("1/1/2013", periods=5)
- df["dates3"] = date_range("1/1/2014", periods=5)
- res = df.query(
- "@df.dates1 < 20130101 < @df.dates3", engine=engine, parser=parser
- )
- expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)]
- tm.assert_frame_equal(res, expec)
- def test_date_query_no_attribute_access(self):
- engine, parser = self.engine, self.parser
- df = DataFrame(np.random.randn(5, 3))
- df["dates1"] = date_range("1/1/2012", periods=5)
- df["dates2"] = date_range("1/1/2013", periods=5)
- df["dates3"] = date_range("1/1/2014", periods=5)
- res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser)
- expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)]
- tm.assert_frame_equal(res, expec)
- def test_date_query_with_NaT(self):
- engine, parser = self.engine, self.parser
- n = 10
- df = DataFrame(np.random.randn(n, 3))
- df["dates1"] = date_range("1/1/2012", periods=n)
- df["dates2"] = date_range("1/1/2013", periods=n)
- df["dates3"] = date_range("1/1/2014", periods=n)
- df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
- df.loc[np.random.rand(n) > 0.5, "dates3"] = pd.NaT
- res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser)
- expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)]
- tm.assert_frame_equal(res, expec)
- def test_date_index_query(self):
- engine, parser = self.engine, self.parser
- n = 10
- df = DataFrame(np.random.randn(n, 3))
- df["dates1"] = date_range("1/1/2012", periods=n)
- df["dates3"] = date_range("1/1/2014", periods=n)
- return_value = df.set_index("dates1", inplace=True, drop=True)
- assert return_value is None
- res = df.query("index < 20130101 < dates3", engine=engine, parser=parser)
- expec = df[(df.index < "20130101") & ("20130101" < df.dates3)]
- tm.assert_frame_equal(res, expec)
- def test_date_index_query_with_NaT(self):
- engine, parser = self.engine, self.parser
- n = 10
- # Cast to object to avoid implicit cast when setting entry to pd.NaT below
- df = DataFrame(np.random.randn(n, 3)).astype({0: object})
- df["dates1"] = date_range("1/1/2012", periods=n)
- df["dates3"] = date_range("1/1/2014", periods=n)
- df.iloc[0, 0] = pd.NaT
- return_value = df.set_index("dates1", inplace=True, drop=True)
- assert return_value is None
- res = df.query("index < 20130101 < dates3", engine=engine, parser=parser)
- expec = df[(df.index < "20130101") & ("20130101" < df.dates3)]
- tm.assert_frame_equal(res, expec)
- def test_date_index_query_with_NaT_duplicates(self):
- engine, parser = self.engine, self.parser
- n = 10
- d = {}
- d["dates1"] = date_range("1/1/2012", periods=n)
- d["dates3"] = date_range("1/1/2014", periods=n)
- df = DataFrame(d)
- df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
- return_value = df.set_index("dates1", inplace=True, drop=True)
- assert return_value is None
- res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser)
- expec = df[(df.index.to_series() < "20130101") & ("20130101" < df.dates3)]
- tm.assert_frame_equal(res, expec)
- def test_date_query_with_non_date(self):
- engine, parser = self.engine, self.parser
- n = 10
- df = DataFrame(
- {"dates": date_range("1/1/2012", periods=n), "nondate": np.arange(n)}
- )
- result = df.query("dates == nondate", parser=parser, engine=engine)
- assert len(result) == 0
- result = df.query("dates != nondate", parser=parser, engine=engine)
- tm.assert_frame_equal(result, df)
- msg = r"Invalid comparison between dtype=datetime64\[ns\] and ndarray"
- for op in ["<", ">", "<=", ">="]:
- with pytest.raises(TypeError, match=msg):
- df.query(f"dates {op} nondate", parser=parser, engine=engine)
- def test_query_syntax_error(self):
- engine, parser = self.engine, self.parser
- df = DataFrame({"i": range(10), "+": range(3, 13), "r": range(4, 14)})
- msg = "invalid syntax"
- with pytest.raises(SyntaxError, match=msg):
- df.query("i - +", engine=engine, parser=parser)
- def test_query_scope(self):
- engine, parser = self.engine, self.parser
- skip_if_no_pandas_parser(parser)
- df = DataFrame(np.random.randn(20, 2), columns=list("ab"))
- a, b = 1, 2 # noqa:F841
- res = df.query("a > b", engine=engine, parser=parser)
- expected = df[df.a > df.b]
- tm.assert_frame_equal(res, expected)
- res = df.query("@a > b", engine=engine, parser=parser)
- expected = df[a > df.b]
- tm.assert_frame_equal(res, expected)
- # no local variable c
- with pytest.raises(
- UndefinedVariableError, match="local variable 'c' is not defined"
- ):
- df.query("@a > b > @c", engine=engine, parser=parser)
- # no column named 'c'
- with pytest.raises(UndefinedVariableError, match="name 'c' is not defined"):
- df.query("@a > b > c", engine=engine, parser=parser)
- def test_query_doesnt_pickup_local(self):
- engine, parser = self.engine, self.parser
- n = m = 10
- df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc"))
- # we don't pick up the local 'sin'
- with pytest.raises(UndefinedVariableError, match="name 'sin' is not defined"):
- df.query("sin > 5", engine=engine, parser=parser)
- def test_query_builtin(self):
- engine, parser = self.engine, self.parser
- n = m = 10
- df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc"))
- df.index.name = "sin"
- msg = "Variables in expression.+"
- with pytest.raises(NumExprClobberingError, match=msg):
- df.query("sin > 5", engine=engine, parser=parser)
- def test_query(self):
- engine, parser = self.engine, self.parser
- df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"])
- tm.assert_frame_equal(
- df.query("a < b", engine=engine, parser=parser), df[df.a < df.b]
- )
- tm.assert_frame_equal(
- df.query("a + b > b * c", engine=engine, parser=parser),
- df[df.a + df.b > df.b * df.c],
- )
- def test_query_index_with_name(self):
- engine, parser = self.engine, self.parser
- df = DataFrame(
- np.random.randint(10, size=(10, 3)),
- index=Index(range(10), name="blob"),
- columns=["a", "b", "c"],
- )
- res = df.query("(blob < 5) & (a < b)", engine=engine, parser=parser)
- expec = df[(df.index < 5) & (df.a < df.b)]
- tm.assert_frame_equal(res, expec)
- res = df.query("blob < b", engine=engine, parser=parser)
- expec = df[df.index < df.b]
- tm.assert_frame_equal(res, expec)
- def test_query_index_without_name(self):
- engine, parser = self.engine, self.parser
- df = DataFrame(
- np.random.randint(10, size=(10, 3)),
- index=range(10),
- columns=["a", "b", "c"],
- )
- # "index" should refer to the index
- res = df.query("index < b", engine=engine, parser=parser)
- expec = df[df.index < df.b]
- tm.assert_frame_equal(res, expec)
- # test against a scalar
- res = df.query("index < 5", engine=engine, parser=parser)
- expec = df[df.index < 5]
- tm.assert_frame_equal(res, expec)
- def test_nested_scope(self):
- engine = self.engine
- parser = self.parser
- skip_if_no_pandas_parser(parser)
- df = DataFrame(np.random.randn(5, 3))
- df2 = DataFrame(np.random.randn(5, 3))
- expected = df[(df > 0) & (df2 > 0)]
- result = df.query("(@df > 0) & (@df2 > 0)", engine=engine, parser=parser)
- tm.assert_frame_equal(result, expected)
- result = pd.eval("df[df > 0 and df2 > 0]", engine=engine, parser=parser)
- tm.assert_frame_equal(result, expected)
- result = pd.eval(
- "df[df > 0 and df2 > 0 and df[df > 0] > 0]", engine=engine, parser=parser
- )
- expected = df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]
- tm.assert_frame_equal(result, expected)
- result = pd.eval("df[(df>0) & (df2>0)]", engine=engine, parser=parser)
- expected = df.query("(@df>0) & (@df2>0)", engine=engine, parser=parser)
- tm.assert_frame_equal(result, expected)
- def test_nested_raises_on_local_self_reference(self):
- df = DataFrame(np.random.randn(5, 3))
- # can't reference ourself b/c we're a local so @ is necessary
- with pytest.raises(UndefinedVariableError, match="name 'df' is not defined"):
- df.query("df > 0", engine=self.engine, parser=self.parser)
- def test_local_syntax(self):
- skip_if_no_pandas_parser(self.parser)
- engine, parser = self.engine, self.parser
- df = DataFrame(np.random.randn(100, 10), columns=list("abcdefghij"))
- b = 1
- expect = df[df.a < b]
- result = df.query("a < @b", engine=engine, parser=parser)
- tm.assert_frame_equal(result, expect)
- expect = df[df.a < df.b]
- result = df.query("a < b", engine=engine, parser=parser)
- tm.assert_frame_equal(result, expect)
- def test_chained_cmp_and_in(self):
- skip_if_no_pandas_parser(self.parser)
- engine, parser = self.engine, self.parser
- cols = list("abc")
- df = DataFrame(np.random.randn(100, len(cols)), columns=cols)
- res = df.query(
- "a < b < c and a not in b not in c", engine=engine, parser=parser
- )
- ind = (df.a < df.b) & (df.b < df.c) & ~df.b.isin(df.a) & ~df.c.isin(df.b)
- expec = df[ind]
- tm.assert_frame_equal(res, expec)
- def test_local_variable_with_in(self):
- engine, parser = self.engine, self.parser
- skip_if_no_pandas_parser(parser)
- a = Series(np.random.randint(3, size=15), name="a")
- b = Series(np.random.randint(10, size=15), name="b")
- df = DataFrame({"a": a, "b": b})
- expected = df.loc[(df.b - 1).isin(a)]
- result = df.query("b - 1 in a", engine=engine, parser=parser)
- tm.assert_frame_equal(expected, result)
- b = Series(np.random.randint(10, size=15), name="b")
- expected = df.loc[(b - 1).isin(a)]
- result = df.query("@b - 1 in a", engine=engine, parser=parser)
- tm.assert_frame_equal(expected, result)
- def test_at_inside_string(self):
- engine, parser = self.engine, self.parser
- skip_if_no_pandas_parser(parser)
- c = 1 # noqa:F841
- df = DataFrame({"a": ["a", "a", "b", "b", "@c", "@c"]})
- result = df.query('a == "@c"', engine=engine, parser=parser)
- expected = df[df.a == "@c"]
- tm.assert_frame_equal(result, expected)
- def test_query_undefined_local(self):
- engine, parser = self.engine, self.parser
- skip_if_no_pandas_parser(parser)
- df = DataFrame(np.random.rand(10, 2), columns=list("ab"))
- with pytest.raises(
- UndefinedVariableError, match="local variable 'c' is not defined"
- ):
- df.query("a == @c", engine=engine, parser=parser)
- def test_index_resolvers_come_after_columns_with_the_same_name(self):
- n = 1 # noqa:F841
- a = np.r_[20:101:20]
- df = DataFrame({"index": a, "b": np.random.randn(a.size)})
- df.index.name = "index"
- result = df.query("index > 5", engine=self.engine, parser=self.parser)
- expected = df[df["index"] > 5]
- tm.assert_frame_equal(result, expected)
- df = DataFrame({"index": a, "b": np.random.randn(a.size)})
- result = df.query("ilevel_0 > 5", engine=self.engine, parser=self.parser)
- expected = df.loc[df.index[df.index > 5]]
- tm.assert_frame_equal(result, expected)
- df = DataFrame({"a": a, "b": np.random.randn(a.size)})
- df.index.name = "a"
- result = df.query("a > 5", engine=self.engine, parser=self.parser)
- expected = df[df.a > 5]
- tm.assert_frame_equal(result, expected)
- result = df.query("index > 5", engine=self.engine, parser=self.parser)
- expected = df.loc[df.index[df.index > 5]]
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize("op, f", [["==", operator.eq], ["!=", operator.ne]])
- def test_inf(self, op, f):
- n = 10
- df = DataFrame({"a": np.random.rand(n), "b": np.random.rand(n)})
- df.loc[::2, 0] = np.inf
- q = f"a {op} inf"
- expected = df[f(df.a, np.inf)]
- result = df.query(q, engine=self.engine, parser=self.parser)
- tm.assert_frame_equal(result, expected)
- def test_check_tz_aware_index_query(self, tz_aware_fixture):
- # https://github.com/pandas-dev/pandas/issues/29463
- tz = tz_aware_fixture
- df_index = date_range(
- start="2019-01-01", freq="1d", periods=10, tz=tz, name="time"
- )
- expected = DataFrame(index=df_index)
- df = DataFrame(index=df_index)
- result = df.query('"2018-01-03 00:00:00+00" < time')
- tm.assert_frame_equal(result, expected)
- expected = DataFrame(df_index)
- result = df.reset_index().query('"2018-01-03 00:00:00+00" < time')
- tm.assert_frame_equal(result, expected)
- def test_method_calls_in_query(self):
- # https://github.com/pandas-dev/pandas/issues/22435
- n = 10
- df = DataFrame({"a": 2 * np.random.rand(n), "b": np.random.rand(n)})
- expected = df[df["a"].astype("int") == 0]
- result = df.query(
- "a.astype('int') == 0", engine=self.engine, parser=self.parser
- )
- tm.assert_frame_equal(result, expected)
- df = DataFrame(
- {
- "a": np.where(np.random.rand(n) < 0.5, np.nan, np.random.randn(n)),
- "b": np.random.randn(n),
- }
- )
- expected = df[df["a"].notnull()]
- result = df.query("a.notnull()", engine=self.engine, parser=self.parser)
- tm.assert_frame_equal(result, expected)
- @td.skip_if_no_ne
- class TestDataFrameQueryNumExprPython(TestDataFrameQueryNumExprPandas):
- @classmethod
- def setup_class(cls):
- super().setup_class()
- cls.engine = "numexpr"
- cls.parser = "python"
- def test_date_query_no_attribute_access(self):
- engine, parser = self.engine, self.parser
- df = DataFrame(np.random.randn(5, 3))
- df["dates1"] = date_range("1/1/2012", periods=5)
- df["dates2"] = date_range("1/1/2013", periods=5)
- df["dates3"] = date_range("1/1/2014", periods=5)
- res = df.query(
- "(dates1 < 20130101) & (20130101 < dates3)", engine=engine, parser=parser
- )
- expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)]
- tm.assert_frame_equal(res, expec)
- def test_date_query_with_NaT(self):
- engine, parser = self.engine, self.parser
- n = 10
- df = DataFrame(np.random.randn(n, 3))
- df["dates1"] = date_range("1/1/2012", periods=n)
- df["dates2"] = date_range("1/1/2013", periods=n)
- df["dates3"] = date_range("1/1/2014", periods=n)
- df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
- df.loc[np.random.rand(n) > 0.5, "dates3"] = pd.NaT
- res = df.query(
- "(dates1 < 20130101) & (20130101 < dates3)", engine=engine, parser=parser
- )
- expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)]
- tm.assert_frame_equal(res, expec)
- def test_date_index_query(self):
- engine, parser = self.engine, self.parser
- n = 10
- df = DataFrame(np.random.randn(n, 3))
- df["dates1"] = date_range("1/1/2012", periods=n)
- df["dates3"] = date_range("1/1/2014", periods=n)
- return_value = df.set_index("dates1", inplace=True, drop=True)
- assert return_value is None
- res = df.query(
- "(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser
- )
- expec = df[(df.index < "20130101") & ("20130101" < df.dates3)]
- tm.assert_frame_equal(res, expec)
- def test_date_index_query_with_NaT(self):
- engine, parser = self.engine, self.parser
- n = 10
- # Cast to object to avoid implicit cast when setting entry to pd.NaT below
- df = DataFrame(np.random.randn(n, 3)).astype({0: object})
- df["dates1"] = date_range("1/1/2012", periods=n)
- df["dates3"] = date_range("1/1/2014", periods=n)
- df.iloc[0, 0] = pd.NaT
- return_value = df.set_index("dates1", inplace=True, drop=True)
- assert return_value is None
- res = df.query(
- "(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser
- )
- expec = df[(df.index < "20130101") & ("20130101" < df.dates3)]
- tm.assert_frame_equal(res, expec)
- def test_date_index_query_with_NaT_duplicates(self):
- engine, parser = self.engine, self.parser
- n = 10
- df = DataFrame(np.random.randn(n, 3))
- df["dates1"] = date_range("1/1/2012", periods=n)
- df["dates3"] = date_range("1/1/2014", periods=n)
- df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
- return_value = df.set_index("dates1", inplace=True, drop=True)
- assert return_value is None
- msg = r"'BoolOp' nodes are not implemented"
- with pytest.raises(NotImplementedError, match=msg):
- df.query("index < 20130101 < dates3", engine=engine, parser=parser)
- def test_nested_scope(self):
- engine = self.engine
- parser = self.parser
- # smoke test
- x = 1 # noqa:F841
- result = pd.eval("x + 1", engine=engine, parser=parser)
- assert result == 2
- df = DataFrame(np.random.randn(5, 3))
- df2 = DataFrame(np.random.randn(5, 3))
- # don't have the pandas parser
- msg = r"The '@' prefix is only supported by the pandas parser"
- with pytest.raises(SyntaxError, match=msg):
- df.query("(@df>0) & (@df2>0)", engine=engine, parser=parser)
- with pytest.raises(UndefinedVariableError, match="name 'df' is not defined"):
- df.query("(df>0) & (df2>0)", engine=engine, parser=parser)
- expected = df[(df > 0) & (df2 > 0)]
- result = pd.eval("df[(df > 0) & (df2 > 0)]", engine=engine, parser=parser)
- tm.assert_frame_equal(expected, result)
- expected = df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]
- result = pd.eval(
- "df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]", engine=engine, parser=parser
- )
- tm.assert_frame_equal(expected, result)
- def test_query_numexpr_with_min_and_max_columns(self):
- df = DataFrame({"min": [1, 2, 3], "max": [4, 5, 6]})
- regex_to_match = (
- r"Variables in expression \"\(min\) == \(1\)\" "
- r"overlap with builtins: \('min'\)"
- )
- with pytest.raises(NumExprClobberingError, match=regex_to_match):
- df.query("min == 1")
- regex_to_match = (
- r"Variables in expression \"\(max\) == \(1\)\" "
- r"overlap with builtins: \('max'\)"
- )
- with pytest.raises(NumExprClobberingError, match=regex_to_match):
- df.query("max == 1")
- class TestDataFrameQueryPythonPandas(TestDataFrameQueryNumExprPandas):
- @classmethod
- def setup_class(cls):
- super().setup_class()
- cls.engine = "python"
- cls.parser = "pandas"
- def test_query_builtin(self):
- engine, parser = self.engine, self.parser
- n = m = 10
- df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc"))
- df.index.name = "sin"
- expected = df[df.index > 5]
- result = df.query("sin > 5", engine=engine, parser=parser)
- tm.assert_frame_equal(expected, result)
- class TestDataFrameQueryPythonPython(TestDataFrameQueryNumExprPython):
- @classmethod
- def setup_class(cls):
- super().setup_class()
- cls.engine = cls.parser = "python"
- def test_query_builtin(self):
- engine, parser = self.engine, self.parser
- n = m = 10
- df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc"))
- df.index.name = "sin"
- expected = df[df.index > 5]
- result = df.query("sin > 5", engine=engine, parser=parser)
- tm.assert_frame_equal(expected, result)
- class TestDataFrameQueryStrings:
- def test_str_query_method(self, parser, engine):
- df = DataFrame(np.random.randn(10, 1), columns=["b"])
- df["strings"] = Series(list("aabbccddee"))
- expect = df[df.strings == "a"]
- if parser != "pandas":
- col = "strings"
- lst = '"a"'
- lhs = [col] * 2 + [lst] * 2
- rhs = lhs[::-1]
- eq, ne = "==", "!="
- ops = 2 * ([eq] + [ne])
- msg = r"'(Not)?In' nodes are not implemented"
- for lhs, op, rhs in zip(lhs, ops, rhs):
- ex = f"{lhs} {op} {rhs}"
- with pytest.raises(NotImplementedError, match=msg):
- df.query(
- ex,
- engine=engine,
- parser=parser,
- local_dict={"strings": df.strings},
- )
- else:
- res = df.query('"a" == strings', engine=engine, parser=parser)
- tm.assert_frame_equal(res, expect)
- res = df.query('strings == "a"', engine=engine, parser=parser)
- tm.assert_frame_equal(res, expect)
- tm.assert_frame_equal(res, df[df.strings.isin(["a"])])
- expect = df[df.strings != "a"]
- res = df.query('strings != "a"', engine=engine, parser=parser)
- tm.assert_frame_equal(res, expect)
- res = df.query('"a" != strings', engine=engine, parser=parser)
- tm.assert_frame_equal(res, expect)
- tm.assert_frame_equal(res, df[~df.strings.isin(["a"])])
- def test_str_list_query_method(self, parser, engine):
- df = DataFrame(np.random.randn(10, 1), columns=["b"])
- df["strings"] = Series(list("aabbccddee"))
- expect = df[df.strings.isin(["a", "b"])]
- if parser != "pandas":
- col = "strings"
- lst = '["a", "b"]'
- lhs = [col] * 2 + [lst] * 2
- rhs = lhs[::-1]
- eq, ne = "==", "!="
- ops = 2 * ([eq] + [ne])
- msg = r"'(Not)?In' nodes are not implemented"
- for lhs, op, rhs in zip(lhs, ops, rhs):
- ex = f"{lhs} {op} {rhs}"
- with pytest.raises(NotImplementedError, match=msg):
- df.query(ex, engine=engine, parser=parser)
- else:
- res = df.query('strings == ["a", "b"]', engine=engine, parser=parser)
- tm.assert_frame_equal(res, expect)
- res = df.query('["a", "b"] == strings', engine=engine, parser=parser)
- tm.assert_frame_equal(res, expect)
- expect = df[~df.strings.isin(["a", "b"])]
- res = df.query('strings != ["a", "b"]', engine=engine, parser=parser)
- tm.assert_frame_equal(res, expect)
- res = df.query('["a", "b"] != strings', engine=engine, parser=parser)
- tm.assert_frame_equal(res, expect)
- def test_query_with_string_columns(self, parser, engine):
- df = DataFrame(
- {
- "a": list("aaaabbbbcccc"),
- "b": list("aabbccddeeff"),
- "c": np.random.randint(5, size=12),
- "d": np.random.randint(9, size=12),
- }
- )
- if parser == "pandas":
- res = df.query("a in b", parser=parser, engine=engine)
- expec = df[df.a.isin(df.b)]
- tm.assert_frame_equal(res, expec)
- res = df.query("a in b and c < d", parser=parser, engine=engine)
- expec = df[df.a.isin(df.b) & (df.c < df.d)]
- tm.assert_frame_equal(res, expec)
- else:
- msg = r"'(Not)?In' nodes are not implemented"
- with pytest.raises(NotImplementedError, match=msg):
- df.query("a in b", parser=parser, engine=engine)
- msg = r"'BoolOp' nodes are not implemented"
- with pytest.raises(NotImplementedError, match=msg):
- df.query("a in b and c < d", parser=parser, engine=engine)
- def test_object_array_eq_ne(self, parser, engine):
- df = DataFrame(
- {
- "a": list("aaaabbbbcccc"),
- "b": list("aabbccddeeff"),
- "c": np.random.randint(5, size=12),
- "d": np.random.randint(9, size=12),
- }
- )
- res = df.query("a == b", parser=parser, engine=engine)
- exp = df[df.a == df.b]
- tm.assert_frame_equal(res, exp)
- res = df.query("a != b", parser=parser, engine=engine)
- exp = df[df.a != df.b]
- tm.assert_frame_equal(res, exp)
- def test_query_with_nested_strings(self, parser, engine):
- skip_if_no_pandas_parser(parser)
- events = [
- f"page {n} {act}" for n in range(1, 4) for act in ["load", "exit"]
- ] * 2
- stamps1 = date_range("2014-01-01 0:00:01", freq="30s", periods=6)
- stamps2 = date_range("2014-02-01 1:00:01", freq="30s", periods=6)
- df = DataFrame(
- {
- "id": np.arange(1, 7).repeat(2),
- "event": events,
- "timestamp": stamps1.append(stamps2),
- }
- )
- expected = df[df.event == '"page 1 load"']
- res = df.query("""'"page 1 load"' in event""", parser=parser, engine=engine)
- tm.assert_frame_equal(expected, res)
- def test_query_with_nested_special_character(self, parser, engine):
- skip_if_no_pandas_parser(parser)
- df = DataFrame({"a": ["a", "b", "test & test"], "b": [1, 2, 3]})
- res = df.query('a == "test & test"', parser=parser, engine=engine)
- expec = df[df.a == "test & test"]
- tm.assert_frame_equal(res, expec)
- @pytest.mark.parametrize(
- "op, func",
- [
- ["<", operator.lt],
- [">", operator.gt],
- ["<=", operator.le],
- [">=", operator.ge],
- ],
- )
- def test_query_lex_compare_strings(self, parser, engine, op, func):
- a = Series(np.random.choice(list("abcde"), 20))
- b = Series(np.arange(a.size))
- df = DataFrame({"X": a, "Y": b})
- res = df.query(f'X {op} "d"', engine=engine, parser=parser)
- expected = df[func(df.X, "d")]
- tm.assert_frame_equal(res, expected)
- def test_query_single_element_booleans(self, parser, engine):
- columns = "bid", "bidsize", "ask", "asksize"
- data = np.random.randint(2, size=(1, len(columns))).astype(bool)
- df = DataFrame(data, columns=columns)
- res = df.query("bid & ask", engine=engine, parser=parser)
- expected = df[df.bid & df.ask]
- tm.assert_frame_equal(res, expected)
- def test_query_string_scalar_variable(self, parser, engine):
- skip_if_no_pandas_parser(parser)
- df = DataFrame(
- {
- "Symbol": ["BUD US", "BUD US", "IBM US", "IBM US"],
- "Price": [109.70, 109.72, 183.30, 183.35],
- }
- )
- e = df[df.Symbol == "BUD US"]
- symb = "BUD US" # noqa:F841
- r = df.query("Symbol == @symb", parser=parser, engine=engine)
- tm.assert_frame_equal(e, r)
- class TestDataFrameEvalWithFrame:
- @pytest.fixture
- def frame(self):
- return DataFrame(np.random.randn(10, 3), columns=list("abc"))
- def test_simple_expr(self, frame, parser, engine):
- res = frame.eval("a + b", engine=engine, parser=parser)
- expect = frame.a + frame.b
- tm.assert_series_equal(res, expect)
- def test_bool_arith_expr(self, frame, parser, engine):
- res = frame.eval("a[a < 1] + b", engine=engine, parser=parser)
- expect = frame.a[frame.a < 1] + frame.b
- tm.assert_series_equal(res, expect)
- @pytest.mark.parametrize("op", ["+", "-", "*", "/"])
- def test_invalid_type_for_operator_raises(self, parser, engine, op):
- df = DataFrame({"a": [1, 2], "b": ["c", "d"]})
- msg = r"unsupported operand type\(s\) for .+: '.+' and '.+'"
- with pytest.raises(TypeError, match=msg):
- df.eval(f"a {op} b", engine=engine, parser=parser)
- class TestDataFrameQueryBacktickQuoting:
- @pytest.fixture
- def df(self):
- """
- Yields a dataframe with strings that may or may not need escaping
- by backticks. The last two columns cannot be escaped by backticks
- and should raise a ValueError.
- """
- yield DataFrame(
- {
- "A": [1, 2, 3],
- "B B": [3, 2, 1],
- "C C": [4, 5, 6],
- "C C": [7, 4, 3],
- "C_C": [8, 9, 10],
- "D_D D": [11, 1, 101],
- "E.E": [6, 3, 5],
- "F-F": [8, 1, 10],
- "1e1": [2, 4, 8],
- "def": [10, 11, 2],
- "A (x)": [4, 1, 3],
- "B(x)": [1, 1, 5],
- "B (x)": [2, 7, 4],
- " &^ :!€$?(} > <++*'' ": [2, 5, 6],
- "": [10, 11, 1],
- " A": [4, 7, 9],
- " ": [1, 2, 1],
- "it's": [6, 3, 1],
- "that's": [9, 1, 8],
- "☺": [8, 7, 6],
- "foo#bar": [2, 4, 5],
- 1: [5, 7, 9],
- }
- )
- def test_single_backtick_variable_query(self, df):
- res = df.query("1 < `B B`")
- expect = df[1 < df["B B"]]
- tm.assert_frame_equal(res, expect)
- def test_two_backtick_variables_query(self, df):
- res = df.query("1 < `B B` and 4 < `C C`")
- expect = df[(1 < df["B B"]) & (4 < df["C C"])]
- tm.assert_frame_equal(res, expect)
- def test_single_backtick_variable_expr(self, df):
- res = df.eval("A + `B B`")
- expect = df["A"] + df["B B"]
- tm.assert_series_equal(res, expect)
- def test_two_backtick_variables_expr(self, df):
- res = df.eval("`B B` + `C C`")
- expect = df["B B"] + df["C C"]
- tm.assert_series_equal(res, expect)
- def test_already_underscore_variable(self, df):
- res = df.eval("`C_C` + A")
- expect = df["C_C"] + df["A"]
- tm.assert_series_equal(res, expect)
- def test_same_name_but_underscores(self, df):
- res = df.eval("C_C + `C C`")
- expect = df["C_C"] + df["C C"]
- tm.assert_series_equal(res, expect)
- def test_mixed_underscores_and_spaces(self, df):
- res = df.eval("A + `D_D D`")
- expect = df["A"] + df["D_D D"]
- tm.assert_series_equal(res, expect)
- def test_backtick_quote_name_with_no_spaces(self, df):
- res = df.eval("A + `C_C`")
- expect = df["A"] + df["C_C"]
- tm.assert_series_equal(res, expect)
- def test_special_characters(self, df):
- res = df.eval("`E.E` + `F-F` - A")
- expect = df["E.E"] + df["F-F"] - df["A"]
- tm.assert_series_equal(res, expect)
- def test_start_with_digit(self, df):
- res = df.eval("A + `1e1`")
- expect = df["A"] + df["1e1"]
- tm.assert_series_equal(res, expect)
- def test_keyword(self, df):
- res = df.eval("A + `def`")
- expect = df["A"] + df["def"]
- tm.assert_series_equal(res, expect)
- def test_unneeded_quoting(self, df):
- res = df.query("`A` > 2")
- expect = df[df["A"] > 2]
- tm.assert_frame_equal(res, expect)
- def test_parenthesis(self, df):
- res = df.query("`A (x)` > 2")
- expect = df[df["A (x)"] > 2]
- tm.assert_frame_equal(res, expect)
- def test_empty_string(self, df):
- res = df.query("`` > 5")
- expect = df[df[""] > 5]
- tm.assert_frame_equal(res, expect)
- def test_multiple_spaces(self, df):
- res = df.query("`C C` > 5")
- expect = df[df["C C"] > 5]
- tm.assert_frame_equal(res, expect)
- def test_start_with_spaces(self, df):
- res = df.eval("` A` + ` `")
- expect = df[" A"] + df[" "]
- tm.assert_series_equal(res, expect)
- def test_lots_of_operators_string(self, df):
- res = df.query("` &^ :!€$?(} > <++*'' ` > 4")
- expect = df[df[" &^ :!€$?(} > <++*'' "] > 4]
- tm.assert_frame_equal(res, expect)
- def test_missing_attribute(self, df):
- message = "module 'pandas' has no attribute 'thing'"
- with pytest.raises(AttributeError, match=message):
- df.eval("@pd.thing")
- def test_failing_quote(self, df):
- msg = r"(Could not convert ).*( to a valid Python identifier.)"
- with pytest.raises(SyntaxError, match=msg):
- df.query("`it's` > `that's`")
- def test_failing_character_outside_range(self, df):
- msg = r"(Could not convert ).*( to a valid Python identifier.)"
- with pytest.raises(SyntaxError, match=msg):
- df.query("`☺` > 4")
- def test_failing_hashtag(self, df):
- msg = "Failed to parse backticks"
- with pytest.raises(SyntaxError, match=msg):
- df.query("`foo#bar` > 4")
- def test_call_non_named_expression(self, df):
- """
- Only attributes and variables ('named functions') can be called.
- .__call__() is not an allowed attribute because that would allow
- calling anything.
- https://github.com/pandas-dev/pandas/pull/32460
- """
- def func(*_):
- return 1
- funcs = [func] # noqa:F841
- df.eval("@func()")
- with pytest.raises(TypeError, match="Only named functions are supported"):
- df.eval("@funcs[0]()")
- with pytest.raises(TypeError, match="Only named functions are supported"):
- df.eval("@funcs[0].__call__()")
- def test_ea_dtypes(self, any_numeric_ea_and_arrow_dtype):
- # GH#29618
- df = DataFrame(
- [[1, 2], [3, 4]], columns=["a", "b"], dtype=any_numeric_ea_and_arrow_dtype
- )
- warning = RuntimeWarning if NUMEXPR_INSTALLED else None
- with tm.assert_produces_warning(warning):
- result = df.eval("c = b - a")
- expected = DataFrame(
- [[1, 2, 1], [3, 4, 1]],
- columns=["a", "b", "c"],
- dtype=any_numeric_ea_and_arrow_dtype,
- )
- tm.assert_frame_equal(result, expected)
- def test_ea_dtypes_and_scalar(self):
- # GH#29618
- df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"], dtype="Float64")
- warning = RuntimeWarning if NUMEXPR_INSTALLED else None
- with tm.assert_produces_warning(warning):
- result = df.eval("c = b - 1")
- expected = DataFrame(
- [[1, 2, 1], [3, 4, 3]], columns=["a", "b", "c"], dtype="Float64"
- )
- tm.assert_frame_equal(result, expected)
- def test_ea_dtypes_and_scalar_operation(self, any_numeric_ea_and_arrow_dtype):
- # GH#29618
- df = DataFrame(
- [[1, 2], [3, 4]], columns=["a", "b"], dtype=any_numeric_ea_and_arrow_dtype
- )
- result = df.eval("c = 2 - 1")
- expected = DataFrame(
- {
- "a": Series([1, 3], dtype=any_numeric_ea_and_arrow_dtype),
- "b": Series([2, 4], dtype=any_numeric_ea_and_arrow_dtype),
- "c": Series([1, 1], dtype=result["c"].dtype),
- }
- )
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize("dtype", ["int64", "Int64", "int64[pyarrow]"])
- def test_query_ea_dtypes(self, dtype):
- if dtype == "int64[pyarrow]":
- pytest.importorskip("pyarrow")
- # GH#50261
- df = DataFrame({"a": Series([1, 2], dtype=dtype)})
- ref = {2} # noqa:F841
- warning = RuntimeWarning if dtype == "Int64" and NUMEXPR_INSTALLED else None
- with tm.assert_produces_warning(warning):
- result = df.query("a in @ref")
- expected = DataFrame({"a": Series([2], dtype=dtype, index=[1])})
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize("engine", ["python", "numexpr"])
- @pytest.mark.parametrize("dtype", ["int64", "Int64", "int64[pyarrow]"])
- def test_query_ea_equality_comparison(self, dtype, engine):
- # GH#50261
- warning = RuntimeWarning if engine == "numexpr" else None
- if engine == "numexpr" and not NUMEXPR_INSTALLED:
- pytest.skip("numexpr not installed")
- if dtype == "int64[pyarrow]":
- pytest.importorskip("pyarrow")
- df = DataFrame(
- {"A": Series([1, 1, 2], dtype="Int64"), "B": Series([1, 2, 2], dtype=dtype)}
- )
- with tm.assert_produces_warning(warning):
- result = df.query("A == B", engine=engine)
- expected = DataFrame(
- {
- "A": Series([1, 2], dtype="Int64", index=[0, 2]),
- "B": Series([1, 2], dtype=dtype, index=[0, 2]),
- }
- )
- tm.assert_frame_equal(result, expected)
|