12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090 |
- from collections import deque
- from datetime import (
- datetime,
- timezone,
- )
- from enum import Enum
- import functools
- import operator
- import re
- import numpy as np
- import pytest
- import pandas.util._test_decorators as td
- import pandas as pd
- from pandas import (
- DataFrame,
- Index,
- MultiIndex,
- Series,
- )
- import pandas._testing as tm
- import pandas.core.common as com
- from pandas.core.computation import expressions as expr
- from pandas.core.computation.expressions import (
- _MIN_ELEMENTS,
- NUMEXPR_INSTALLED,
- )
- from pandas.tests.frame.common import (
- _check_mixed_float,
- _check_mixed_int,
- )
- @pytest.fixture(autouse=True, params=[0, 1000000], ids=["numexpr", "python"])
- def switch_numexpr_min_elements(request):
- _MIN_ELEMENTS = expr._MIN_ELEMENTS
- expr._MIN_ELEMENTS = request.param
- yield request.param
- expr._MIN_ELEMENTS = _MIN_ELEMENTS
- class DummyElement:
- def __init__(self, value, dtype) -> None:
- self.value = value
- self.dtype = np.dtype(dtype)
- def __array__(self):
- return np.array(self.value, dtype=self.dtype)
- def __str__(self) -> str:
- return f"DummyElement({self.value}, {self.dtype})"
- def __repr__(self) -> str:
- return str(self)
- def astype(self, dtype, copy=False):
- self.dtype = dtype
- return self
- def view(self, dtype):
- return type(self)(self.value.view(dtype), dtype)
- def any(self, axis=None):
- return bool(self.value)
- # -------------------------------------------------------------------
- # Comparisons
- class TestFrameComparisons:
- # Specifically _not_ flex-comparisons
- def test_comparison_with_categorical_dtype(self):
- # GH#12564
- df = DataFrame({"A": ["foo", "bar", "baz"]})
- exp = DataFrame({"A": [True, False, False]})
- res = df == "foo"
- tm.assert_frame_equal(res, exp)
- # casting to categorical shouldn't affect the result
- df["A"] = df["A"].astype("category")
- res = df == "foo"
- tm.assert_frame_equal(res, exp)
- def test_frame_in_list(self):
- # GH#12689 this should raise at the DataFrame level, not blocks
- df = DataFrame(np.random.randn(6, 4), columns=list("ABCD"))
- msg = "The truth value of a DataFrame is ambiguous"
- with pytest.raises(ValueError, match=msg):
- df in [None]
- @pytest.mark.parametrize(
- "arg, arg2",
- [
- [
- {
- "a": np.random.randint(10, size=10),
- "b": pd.date_range("20010101", periods=10),
- },
- {
- "a": np.random.randint(10, size=10),
- "b": np.random.randint(10, size=10),
- },
- ],
- [
- {
- "a": np.random.randint(10, size=10),
- "b": np.random.randint(10, size=10),
- },
- {
- "a": np.random.randint(10, size=10),
- "b": pd.date_range("20010101", periods=10),
- },
- ],
- [
- {
- "a": pd.date_range("20010101", periods=10),
- "b": pd.date_range("20010101", periods=10),
- },
- {
- "a": np.random.randint(10, size=10),
- "b": np.random.randint(10, size=10),
- },
- ],
- [
- {
- "a": np.random.randint(10, size=10),
- "b": pd.date_range("20010101", periods=10),
- },
- {
- "a": pd.date_range("20010101", periods=10),
- "b": pd.date_range("20010101", periods=10),
- },
- ],
- ],
- )
- def test_comparison_invalid(self, arg, arg2):
- # GH4968
- # invalid date/int comparisons
- x = DataFrame(arg)
- y = DataFrame(arg2)
- # we expect the result to match Series comparisons for
- # == and !=, inequalities should raise
- result = x == y
- expected = DataFrame(
- {col: x[col] == y[col] for col in x.columns},
- index=x.index,
- columns=x.columns,
- )
- tm.assert_frame_equal(result, expected)
- result = x != y
- expected = DataFrame(
- {col: x[col] != y[col] for col in x.columns},
- index=x.index,
- columns=x.columns,
- )
- tm.assert_frame_equal(result, expected)
- msgs = [
- r"Invalid comparison between dtype=datetime64\[ns\] and ndarray",
- "invalid type promotion",
- (
- # npdev 1.20.0
- r"The DTypes <class 'numpy.dtype\[.*\]'> and "
- r"<class 'numpy.dtype\[.*\]'> do not have a common DType."
- ),
- ]
- msg = "|".join(msgs)
- with pytest.raises(TypeError, match=msg):
- x >= y
- with pytest.raises(TypeError, match=msg):
- x > y
- with pytest.raises(TypeError, match=msg):
- x < y
- with pytest.raises(TypeError, match=msg):
- x <= y
- @pytest.mark.parametrize(
- "left, right",
- [
- ("gt", "lt"),
- ("lt", "gt"),
- ("ge", "le"),
- ("le", "ge"),
- ("eq", "eq"),
- ("ne", "ne"),
- ],
- )
- def test_timestamp_compare(self, left, right):
- # make sure we can compare Timestamps on the right AND left hand side
- # GH#4982
- df = DataFrame(
- {
- "dates1": pd.date_range("20010101", periods=10),
- "dates2": pd.date_range("20010102", periods=10),
- "intcol": np.random.randint(1000000000, size=10),
- "floatcol": np.random.randn(10),
- "stringcol": list(tm.rands(10)),
- }
- )
- df.loc[np.random.rand(len(df)) > 0.5, "dates2"] = pd.NaT
- left_f = getattr(operator, left)
- right_f = getattr(operator, right)
- # no nats
- if left in ["eq", "ne"]:
- expected = left_f(df, pd.Timestamp("20010109"))
- result = right_f(pd.Timestamp("20010109"), df)
- tm.assert_frame_equal(result, expected)
- else:
- msg = (
- "'(<|>)=?' not supported between "
- "instances of 'numpy.ndarray' and 'Timestamp'"
- )
- with pytest.raises(TypeError, match=msg):
- left_f(df, pd.Timestamp("20010109"))
- with pytest.raises(TypeError, match=msg):
- right_f(pd.Timestamp("20010109"), df)
- # nats
- if left in ["eq", "ne"]:
- expected = left_f(df, pd.Timestamp("nat"))
- result = right_f(pd.Timestamp("nat"), df)
- tm.assert_frame_equal(result, expected)
- else:
- msg = (
- "'(<|>)=?' not supported between "
- "instances of 'numpy.ndarray' and 'NaTType'"
- )
- with pytest.raises(TypeError, match=msg):
- left_f(df, pd.Timestamp("nat"))
- with pytest.raises(TypeError, match=msg):
- right_f(pd.Timestamp("nat"), df)
- def test_mixed_comparison(self):
- # GH#13128, GH#22163 != datetime64 vs non-dt64 should be False,
- # not raise TypeError
- # (this appears to be fixed before GH#22163, not sure when)
- df = DataFrame([["1989-08-01", 1], ["1989-08-01", 2]])
- other = DataFrame([["a", "b"], ["c", "d"]])
- result = df == other
- assert not result.any().any()
- result = df != other
- assert result.all().all()
- def test_df_boolean_comparison_error(self):
- # GH#4576, GH#22880
- # comparing DataFrame against list/tuple with len(obj) matching
- # len(df.columns) is supported as of GH#22800
- df = DataFrame(np.arange(6).reshape((3, 2)))
- expected = DataFrame([[False, False], [True, False], [False, False]])
- result = df == (2, 2)
- tm.assert_frame_equal(result, expected)
- result = df == [2, 2]
- tm.assert_frame_equal(result, expected)
- def test_df_float_none_comparison(self):
- df = DataFrame(np.random.randn(8, 3), index=range(8), columns=["A", "B", "C"])
- result = df.__eq__(None)
- assert not result.any().any()
- def test_df_string_comparison(self):
- df = DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}])
- mask_a = df.a > 1
- tm.assert_frame_equal(df[mask_a], df.loc[1:1, :])
- tm.assert_frame_equal(df[-mask_a], df.loc[0:0, :])
- mask_b = df.b == "foo"
- tm.assert_frame_equal(df[mask_b], df.loc[0:0, :])
- tm.assert_frame_equal(df[-mask_b], df.loc[1:1, :])
- class TestFrameFlexComparisons:
- # TODO: test_bool_flex_frame needs a better name
- @pytest.mark.parametrize("op", ["eq", "ne", "gt", "lt", "ge", "le"])
- def test_bool_flex_frame(self, op):
- data = np.random.randn(5, 3)
- other_data = np.random.randn(5, 3)
- df = DataFrame(data)
- other = DataFrame(other_data)
- ndim_5 = np.ones(df.shape + (1, 3))
- # DataFrame
- assert df.eq(df).values.all()
- assert not df.ne(df).values.any()
- f = getattr(df, op)
- o = getattr(operator, op)
- # No NAs
- tm.assert_frame_equal(f(other), o(df, other))
- # Unaligned
- part_o = other.loc[3:, 1:].copy()
- rs = f(part_o)
- xp = o(df, part_o.reindex(index=df.index, columns=df.columns))
- tm.assert_frame_equal(rs, xp)
- # ndarray
- tm.assert_frame_equal(f(other.values), o(df, other.values))
- # scalar
- tm.assert_frame_equal(f(0), o(df, 0))
- # NAs
- msg = "Unable to coerce to Series/DataFrame"
- tm.assert_frame_equal(f(np.nan), o(df, np.nan))
- with pytest.raises(ValueError, match=msg):
- f(ndim_5)
- @pytest.mark.parametrize("box", [np.array, Series])
- def test_bool_flex_series(self, box):
- # Series
- # list/tuple
- data = np.random.randn(5, 3)
- df = DataFrame(data)
- idx_ser = box(np.random.randn(5))
- col_ser = box(np.random.randn(3))
- idx_eq = df.eq(idx_ser, axis=0)
- col_eq = df.eq(col_ser)
- idx_ne = df.ne(idx_ser, axis=0)
- col_ne = df.ne(col_ser)
- tm.assert_frame_equal(col_eq, df == Series(col_ser))
- tm.assert_frame_equal(col_eq, -col_ne)
- tm.assert_frame_equal(idx_eq, -idx_ne)
- tm.assert_frame_equal(idx_eq, df.T.eq(idx_ser).T)
- tm.assert_frame_equal(col_eq, df.eq(list(col_ser)))
- tm.assert_frame_equal(idx_eq, df.eq(Series(idx_ser), axis=0))
- tm.assert_frame_equal(idx_eq, df.eq(list(idx_ser), axis=0))
- idx_gt = df.gt(idx_ser, axis=0)
- col_gt = df.gt(col_ser)
- idx_le = df.le(idx_ser, axis=0)
- col_le = df.le(col_ser)
- tm.assert_frame_equal(col_gt, df > Series(col_ser))
- tm.assert_frame_equal(col_gt, -col_le)
- tm.assert_frame_equal(idx_gt, -idx_le)
- tm.assert_frame_equal(idx_gt, df.T.gt(idx_ser).T)
- idx_ge = df.ge(idx_ser, axis=0)
- col_ge = df.ge(col_ser)
- idx_lt = df.lt(idx_ser, axis=0)
- col_lt = df.lt(col_ser)
- tm.assert_frame_equal(col_ge, df >= Series(col_ser))
- tm.assert_frame_equal(col_ge, -col_lt)
- tm.assert_frame_equal(idx_ge, -idx_lt)
- tm.assert_frame_equal(idx_ge, df.T.ge(idx_ser).T)
- idx_ser = Series(np.random.randn(5))
- col_ser = Series(np.random.randn(3))
- def test_bool_flex_frame_na(self):
- df = DataFrame(np.random.randn(5, 3))
- # NA
- df.loc[0, 0] = np.nan
- rs = df.eq(df)
- assert not rs.loc[0, 0]
- rs = df.ne(df)
- assert rs.loc[0, 0]
- rs = df.gt(df)
- assert not rs.loc[0, 0]
- rs = df.lt(df)
- assert not rs.loc[0, 0]
- rs = df.ge(df)
- assert not rs.loc[0, 0]
- rs = df.le(df)
- assert not rs.loc[0, 0]
- def test_bool_flex_frame_complex_dtype(self):
- # complex
- arr = np.array([np.nan, 1, 6, np.nan])
- arr2 = np.array([2j, np.nan, 7, None])
- df = DataFrame({"a": arr})
- df2 = DataFrame({"a": arr2})
- msg = "|".join(
- [
- "'>' not supported between instances of '.*' and 'complex'",
- r"unorderable types: .*complex\(\)", # PY35
- ]
- )
- with pytest.raises(TypeError, match=msg):
- # inequalities are not well-defined for complex numbers
- df.gt(df2)
- with pytest.raises(TypeError, match=msg):
- # regression test that we get the same behavior for Series
- df["a"].gt(df2["a"])
- with pytest.raises(TypeError, match=msg):
- # Check that we match numpy behavior here
- df.values > df2.values
- rs = df.ne(df2)
- assert rs.values.all()
- arr3 = np.array([2j, np.nan, None])
- df3 = DataFrame({"a": arr3})
- with pytest.raises(TypeError, match=msg):
- # inequalities are not well-defined for complex numbers
- df3.gt(2j)
- with pytest.raises(TypeError, match=msg):
- # regression test that we get the same behavior for Series
- df3["a"].gt(2j)
- with pytest.raises(TypeError, match=msg):
- # Check that we match numpy behavior here
- df3.values > 2j
- def test_bool_flex_frame_object_dtype(self):
- # corner, dtype=object
- df1 = DataFrame({"col": ["foo", np.nan, "bar"]})
- df2 = DataFrame({"col": ["foo", datetime.now(), "bar"]})
- result = df1.ne(df2)
- exp = DataFrame({"col": [False, True, False]})
- tm.assert_frame_equal(result, exp)
- def test_flex_comparison_nat(self):
- # GH 15697, GH 22163 df.eq(pd.NaT) should behave like df == pd.NaT,
- # and _definitely_ not be NaN
- df = DataFrame([pd.NaT])
- result = df == pd.NaT
- # result.iloc[0, 0] is a np.bool_ object
- assert result.iloc[0, 0].item() is False
- result = df.eq(pd.NaT)
- assert result.iloc[0, 0].item() is False
- result = df != pd.NaT
- assert result.iloc[0, 0].item() is True
- result = df.ne(pd.NaT)
- assert result.iloc[0, 0].item() is True
- @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"])
- def test_df_flex_cmp_constant_return_types(self, opname):
- # GH 15077, non-empty DataFrame
- df = DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]})
- const = 2
- result = getattr(df, opname)(const).dtypes.value_counts()
- tm.assert_series_equal(
- result, Series([2], index=[np.dtype(bool)], name="count")
- )
- @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"])
- def test_df_flex_cmp_constant_return_types_empty(self, opname):
- # GH 15077 empty DataFrame
- df = DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]})
- const = 2
- empty = df.iloc[:0]
- result = getattr(empty, opname)(const).dtypes.value_counts()
- tm.assert_series_equal(
- result, Series([2], index=[np.dtype(bool)], name="count")
- )
- def test_df_flex_cmp_ea_dtype_with_ndarray_series(self):
- ii = pd.IntervalIndex.from_breaks([1, 2, 3])
- df = DataFrame({"A": ii, "B": ii})
- ser = Series([0, 0])
- res = df.eq(ser, axis=0)
- expected = DataFrame({"A": [False, False], "B": [False, False]})
- tm.assert_frame_equal(res, expected)
- ser2 = Series([1, 2], index=["A", "B"])
- res2 = df.eq(ser2, axis=1)
- tm.assert_frame_equal(res2, expected)
- # -------------------------------------------------------------------
- # Arithmetic
- class TestFrameFlexArithmetic:
- def test_floordiv_axis0(self):
- # make sure we df.floordiv(ser, axis=0) matches column-wise result
- arr = np.arange(3)
- ser = Series(arr)
- df = DataFrame({"A": ser, "B": ser})
- result = df.floordiv(ser, axis=0)
- expected = DataFrame({col: df[col] // ser for col in df.columns})
- tm.assert_frame_equal(result, expected)
- result2 = df.floordiv(ser.values, axis=0)
- tm.assert_frame_equal(result2, expected)
- @pytest.mark.skipif(not NUMEXPR_INSTALLED, reason="numexpr not installed")
- @pytest.mark.parametrize("opname", ["floordiv", "pow"])
- def test_floordiv_axis0_numexpr_path(self, opname):
- # case that goes through numexpr and has to fall back to masked_arith_op
- op = getattr(operator, opname)
- arr = np.arange(_MIN_ELEMENTS + 100).reshape(_MIN_ELEMENTS // 100 + 1, -1) * 100
- df = DataFrame(arr)
- df["C"] = 1.0
- ser = df[0]
- result = getattr(df, opname)(ser, axis=0)
- expected = DataFrame({col: op(df[col], ser) for col in df.columns})
- tm.assert_frame_equal(result, expected)
- result2 = getattr(df, opname)(ser.values, axis=0)
- tm.assert_frame_equal(result2, expected)
- def test_df_add_td64_columnwise(self):
- # GH 22534 Check that column-wise addition broadcasts correctly
- dti = pd.date_range("2016-01-01", periods=10)
- tdi = pd.timedelta_range("1", periods=10)
- tser = Series(tdi)
- df = DataFrame({0: dti, 1: tdi})
- result = df.add(tser, axis=0)
- expected = DataFrame({0: dti + tdi, 1: tdi + tdi})
- tm.assert_frame_equal(result, expected)
- def test_df_add_flex_filled_mixed_dtypes(self):
- # GH 19611
- dti = pd.date_range("2016-01-01", periods=3)
- ser = Series(["1 Day", "NaT", "2 Days"], dtype="timedelta64[ns]")
- df = DataFrame({"A": dti, "B": ser})
- other = DataFrame({"A": ser, "B": ser})
- fill = pd.Timedelta(days=1).to_timedelta64()
- result = df.add(other, fill_value=fill)
- expected = DataFrame(
- {
- "A": Series(
- ["2016-01-02", "2016-01-03", "2016-01-05"], dtype="datetime64[ns]"
- ),
- "B": ser * 2,
- }
- )
- tm.assert_frame_equal(result, expected)
- def test_arith_flex_frame(
- self, all_arithmetic_operators, float_frame, mixed_float_frame
- ):
- # one instance of parametrized fixture
- op = all_arithmetic_operators
- def f(x, y):
- # r-versions not in operator-stdlib; get op without "r" and invert
- if op.startswith("__r"):
- return getattr(operator, op.replace("__r", "__"))(y, x)
- return getattr(operator, op)(x, y)
- result = getattr(float_frame, op)(2 * float_frame)
- expected = f(float_frame, 2 * float_frame)
- tm.assert_frame_equal(result, expected)
- # vs mix float
- result = getattr(mixed_float_frame, op)(2 * mixed_float_frame)
- expected = f(mixed_float_frame, 2 * mixed_float_frame)
- tm.assert_frame_equal(result, expected)
- _check_mixed_float(result, dtype={"C": None})
- @pytest.mark.parametrize("op", ["__add__", "__sub__", "__mul__"])
- def test_arith_flex_frame_mixed(
- self,
- op,
- int_frame,
- mixed_int_frame,
- mixed_float_frame,
- switch_numexpr_min_elements,
- ):
- f = getattr(operator, op)
- # vs mix int
- result = getattr(mixed_int_frame, op)(2 + mixed_int_frame)
- expected = f(mixed_int_frame, 2 + mixed_int_frame)
- # no overflow in the uint
- dtype = None
- if op in ["__sub__"]:
- dtype = {"B": "uint64", "C": None}
- elif op in ["__add__", "__mul__"]:
- dtype = {"C": None}
- if expr.USE_NUMEXPR and switch_numexpr_min_elements == 0:
- # when using numexpr, the casting rules are slightly different:
- # in the `2 + mixed_int_frame` operation, int32 column becomes
- # and int64 column (not preserving dtype in operation with Python
- # scalar), and then the int32/int64 combo results in int64 result
- dtype["A"] = (2 + mixed_int_frame)["A"].dtype
- tm.assert_frame_equal(result, expected)
- _check_mixed_int(result, dtype=dtype)
- # vs mix float
- result = getattr(mixed_float_frame, op)(2 * mixed_float_frame)
- expected = f(mixed_float_frame, 2 * mixed_float_frame)
- tm.assert_frame_equal(result, expected)
- _check_mixed_float(result, dtype={"C": None})
- # vs plain int
- result = getattr(int_frame, op)(2 * int_frame)
- expected = f(int_frame, 2 * int_frame)
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize("dim", range(3, 6))
- def test_arith_flex_frame_raise(self, all_arithmetic_operators, float_frame, dim):
- # one instance of parametrized fixture
- op = all_arithmetic_operators
- # Check that arrays with dim >= 3 raise
- arr = np.ones((1,) * dim)
- msg = "Unable to coerce to Series/DataFrame"
- with pytest.raises(ValueError, match=msg):
- getattr(float_frame, op)(arr)
- def test_arith_flex_frame_corner(self, float_frame):
- const_add = float_frame.add(1)
- tm.assert_frame_equal(const_add, float_frame + 1)
- # corner cases
- result = float_frame.add(float_frame[:0])
- tm.assert_frame_equal(result, float_frame * np.nan)
- result = float_frame[:0].add(float_frame)
- tm.assert_frame_equal(result, float_frame * np.nan)
- with pytest.raises(NotImplementedError, match="fill_value"):
- float_frame.add(float_frame.iloc[0], fill_value=3)
- with pytest.raises(NotImplementedError, match="fill_value"):
- float_frame.add(float_frame.iloc[0], axis="index", fill_value=3)
- @pytest.mark.parametrize("op", ["add", "sub", "mul", "mod"])
- def test_arith_flex_series_ops(self, simple_frame, op):
- # after arithmetic refactor, add truediv here
- df = simple_frame
- row = df.xs("a")
- col = df["two"]
- f = getattr(df, op)
- op = getattr(operator, op)
- tm.assert_frame_equal(f(row), op(df, row))
- tm.assert_frame_equal(f(col, axis=0), op(df.T, col).T)
- def test_arith_flex_series(self, simple_frame):
- df = simple_frame
- row = df.xs("a")
- col = df["two"]
- # special case for some reason
- tm.assert_frame_equal(df.add(row, axis=None), df + row)
- # cases which will be refactored after big arithmetic refactor
- tm.assert_frame_equal(df.div(row), df / row)
- tm.assert_frame_equal(df.div(col, axis=0), (df.T / col).T)
- @pytest.mark.parametrize("dtype", ["int64", "float64"])
- def test_arith_flex_series_broadcasting(self, dtype):
- # broadcasting issue in GH 7325
- df = DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype=dtype)
- expected = DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]])
- result = df.div(df[0], axis="index")
- tm.assert_frame_equal(result, expected)
- def test_arith_flex_zero_len_raises(self):
- # GH 19522 passing fill_value to frame flex arith methods should
- # raise even in the zero-length special cases
- ser_len0 = Series([], dtype=object)
- df_len0 = DataFrame(columns=["A", "B"])
- df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
- with pytest.raises(NotImplementedError, match="fill_value"):
- df.add(ser_len0, fill_value="E")
- with pytest.raises(NotImplementedError, match="fill_value"):
- df_len0.sub(df["A"], axis=None, fill_value=3)
- def test_flex_add_scalar_fill_value(self):
- # GH#12723
- dat = np.array([0, 1, np.nan, 3, 4, 5], dtype="float")
- df = DataFrame({"foo": dat}, index=range(6))
- exp = df.fillna(0).add(2)
- res = df.add(2, fill_value=0)
- tm.assert_frame_equal(res, exp)
- def test_sub_alignment_with_duplicate_index(self):
- # GH#5185 dup aligning operations should work
- df1 = DataFrame([1, 2, 3, 4, 5], index=[1, 2, 1, 2, 3])
- df2 = DataFrame([1, 2, 3], index=[1, 2, 3])
- expected = DataFrame([0, 2, 0, 2, 2], index=[1, 1, 2, 2, 3])
- result = df1.sub(df2)
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize("op", ["__add__", "__mul__", "__sub__", "__truediv__"])
- def test_arithmetic_with_duplicate_columns(self, op):
- # operations
- df = DataFrame({"A": np.arange(10), "B": np.random.rand(10)})
- expected = getattr(df, op)(df)
- expected.columns = ["A", "A"]
- df.columns = ["A", "A"]
- result = getattr(df, op)(df)
- tm.assert_frame_equal(result, expected)
- str(result)
- result.dtypes
- @pytest.mark.parametrize("level", [0, None])
- def test_broadcast_multiindex(self, level):
- # GH34388
- df1 = DataFrame({"A": [0, 1, 2], "B": [1, 2, 3]})
- df1.columns = df1.columns.set_names("L1")
- df2 = DataFrame({("A", "C"): [0, 0, 0], ("A", "D"): [0, 0, 0]})
- df2.columns = df2.columns.set_names(["L1", "L2"])
- result = df1.add(df2, level=level)
- expected = DataFrame({("A", "C"): [0, 1, 2], ("A", "D"): [0, 1, 2]})
- expected.columns = expected.columns.set_names(["L1", "L2"])
- tm.assert_frame_equal(result, expected)
- def test_frame_multiindex_operations(self):
- # GH 43321
- df = DataFrame(
- {2010: [1, 2, 3], 2020: [3, 4, 5]},
- index=MultiIndex.from_product(
- [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
- ),
- )
- series = Series(
- [0.4],
- index=MultiIndex.from_product([["b"], ["a"]], names=["mod", "scen"]),
- )
- expected = DataFrame(
- {2010: [1.4, 2.4, 3.4], 2020: [3.4, 4.4, 5.4]},
- index=MultiIndex.from_product(
- [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
- ),
- )
- result = df.add(series, axis=0)
- tm.assert_frame_equal(result, expected)
- def test_frame_multiindex_operations_series_index_to_frame_index(self):
- # GH 43321
- df = DataFrame(
- {2010: [1], 2020: [3]},
- index=MultiIndex.from_product([["a"], ["b"]], names=["scen", "mod"]),
- )
- series = Series(
- [10.0, 20.0, 30.0],
- index=MultiIndex.from_product(
- [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
- ),
- )
- expected = DataFrame(
- {2010: [11.0, 21, 31.0], 2020: [13.0, 23.0, 33.0]},
- index=MultiIndex.from_product(
- [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
- ),
- )
- result = df.add(series, axis=0)
- tm.assert_frame_equal(result, expected)
- def test_frame_multiindex_operations_no_align(self):
- df = DataFrame(
- {2010: [1, 2, 3], 2020: [3, 4, 5]},
- index=MultiIndex.from_product(
- [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
- ),
- )
- series = Series(
- [0.4],
- index=MultiIndex.from_product([["c"], ["a"]], names=["mod", "scen"]),
- )
- expected = DataFrame(
- {2010: np.nan, 2020: np.nan},
- index=MultiIndex.from_tuples(
- [
- ("a", "b", 0),
- ("a", "b", 1),
- ("a", "b", 2),
- ("a", "c", np.nan),
- ],
- names=["scen", "mod", "id"],
- ),
- )
- result = df.add(series, axis=0)
- tm.assert_frame_equal(result, expected)
- def test_frame_multiindex_operations_part_align(self):
- df = DataFrame(
- {2010: [1, 2, 3], 2020: [3, 4, 5]},
- index=MultiIndex.from_tuples(
- [
- ("a", "b", 0),
- ("a", "b", 1),
- ("a", "c", 2),
- ],
- names=["scen", "mod", "id"],
- ),
- )
- series = Series(
- [0.4],
- index=MultiIndex.from_product([["b"], ["a"]], names=["mod", "scen"]),
- )
- expected = DataFrame(
- {2010: [1.4, 2.4, np.nan], 2020: [3.4, 4.4, np.nan]},
- index=MultiIndex.from_tuples(
- [
- ("a", "b", 0),
- ("a", "b", 1),
- ("a", "c", 2),
- ],
- names=["scen", "mod", "id"],
- ),
- )
- result = df.add(series, axis=0)
- tm.assert_frame_equal(result, expected)
- class TestFrameArithmetic:
- def test_td64_op_nat_casting(self):
- # Make sure we don't accidentally treat timedelta64(NaT) as datetime64
- # when calling dispatch_to_series in DataFrame arithmetic
- ser = Series(["NaT", "NaT"], dtype="timedelta64[ns]")
- df = DataFrame([[1, 2], [3, 4]])
- result = df * ser
- expected = DataFrame({0: ser, 1: ser})
- tm.assert_frame_equal(result, expected)
- def test_df_add_2d_array_rowlike_broadcasts(self):
- # GH#23000
- arr = np.arange(6).reshape(3, 2)
- df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"])
- rowlike = arr[[1], :] # shape --> (1, ncols)
- assert rowlike.shape == (1, df.shape[1])
- expected = DataFrame(
- [[2, 4], [4, 6], [6, 8]],
- columns=df.columns,
- index=df.index,
- # specify dtype explicitly to avoid failing
- # on 32bit builds
- dtype=arr.dtype,
- )
- result = df + rowlike
- tm.assert_frame_equal(result, expected)
- result = rowlike + df
- tm.assert_frame_equal(result, expected)
- def test_df_add_2d_array_collike_broadcasts(self):
- # GH#23000
- arr = np.arange(6).reshape(3, 2)
- df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"])
- collike = arr[:, [1]] # shape --> (nrows, 1)
- assert collike.shape == (df.shape[0], 1)
- expected = DataFrame(
- [[1, 2], [5, 6], [9, 10]],
- columns=df.columns,
- index=df.index,
- # specify dtype explicitly to avoid failing
- # on 32bit builds
- dtype=arr.dtype,
- )
- result = df + collike
- tm.assert_frame_equal(result, expected)
- result = collike + df
- tm.assert_frame_equal(result, expected)
- def test_df_arith_2d_array_rowlike_broadcasts(
- self, request, all_arithmetic_operators, using_array_manager
- ):
- # GH#23000
- opname = all_arithmetic_operators
- if using_array_manager and opname in ("__rmod__", "__rfloordiv__"):
- # TODO(ArrayManager) decide on dtypes
- td.mark_array_manager_not_yet_implemented(request)
- arr = np.arange(6).reshape(3, 2)
- df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"])
- rowlike = arr[[1], :] # shape --> (1, ncols)
- assert rowlike.shape == (1, df.shape[1])
- exvals = [
- getattr(df.loc["A"], opname)(rowlike.squeeze()),
- getattr(df.loc["B"], opname)(rowlike.squeeze()),
- getattr(df.loc["C"], opname)(rowlike.squeeze()),
- ]
- expected = DataFrame(exvals, columns=df.columns, index=df.index)
- result = getattr(df, opname)(rowlike)
- tm.assert_frame_equal(result, expected)
- def test_df_arith_2d_array_collike_broadcasts(
- self, request, all_arithmetic_operators, using_array_manager
- ):
- # GH#23000
- opname = all_arithmetic_operators
- if using_array_manager and opname in ("__rmod__", "__rfloordiv__"):
- # TODO(ArrayManager) decide on dtypes
- td.mark_array_manager_not_yet_implemented(request)
- arr = np.arange(6).reshape(3, 2)
- df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"])
- collike = arr[:, [1]] # shape --> (nrows, 1)
- assert collike.shape == (df.shape[0], 1)
- exvals = {
- True: getattr(df[True], opname)(collike.squeeze()),
- False: getattr(df[False], opname)(collike.squeeze()),
- }
- dtype = None
- if opname in ["__rmod__", "__rfloordiv__"]:
- # Series ops may return mixed int/float dtypes in cases where
- # DataFrame op will return all-float. So we upcast `expected`
- dtype = np.common_type(*(x.values for x in exvals.values()))
- expected = DataFrame(exvals, columns=df.columns, index=df.index, dtype=dtype)
- result = getattr(df, opname)(collike)
- tm.assert_frame_equal(result, expected)
- def test_df_bool_mul_int(self):
- # GH 22047, GH 22163 multiplication by 1 should result in int dtype,
- # not object dtype
- df = DataFrame([[False, True], [False, False]])
- result = df * 1
- # On appveyor this comes back as np.int32 instead of np.int64,
- # so we check dtype.kind instead of just dtype
- kinds = result.dtypes.apply(lambda x: x.kind)
- assert (kinds == "i").all()
- result = 1 * df
- kinds = result.dtypes.apply(lambda x: x.kind)
- assert (kinds == "i").all()
- def test_arith_mixed(self):
- left = DataFrame({"A": ["a", "b", "c"], "B": [1, 2, 3]})
- result = left + left
- expected = DataFrame({"A": ["aa", "bb", "cc"], "B": [2, 4, 6]})
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize("col", ["A", "B"])
- def test_arith_getitem_commute(self, all_arithmetic_functions, col):
- df = DataFrame({"A": [1.1, 3.3], "B": [2.5, -3.9]})
- result = all_arithmetic_functions(df, 1)[col]
- expected = all_arithmetic_functions(df[col], 1)
- tm.assert_series_equal(result, expected)
- @pytest.mark.parametrize(
- "values", [[1, 2], (1, 2), np.array([1, 2]), range(1, 3), deque([1, 2])]
- )
- def test_arith_alignment_non_pandas_object(self, values):
- # GH#17901
- df = DataFrame({"A": [1, 1], "B": [1, 1]})
- expected = DataFrame({"A": [2, 2], "B": [3, 3]})
- result = df + values
- tm.assert_frame_equal(result, expected)
- def test_arith_non_pandas_object(self):
- df = DataFrame(
- np.arange(1, 10, dtype="f8").reshape(3, 3),
- columns=["one", "two", "three"],
- index=["a", "b", "c"],
- )
- val1 = df.xs("a").values
- added = DataFrame(df.values + val1, index=df.index, columns=df.columns)
- tm.assert_frame_equal(df + val1, added)
- added = DataFrame((df.values.T + val1).T, index=df.index, columns=df.columns)
- tm.assert_frame_equal(df.add(val1, axis=0), added)
- val2 = list(df["two"])
- added = DataFrame(df.values + val2, index=df.index, columns=df.columns)
- tm.assert_frame_equal(df + val2, added)
- added = DataFrame((df.values.T + val2).T, index=df.index, columns=df.columns)
- tm.assert_frame_equal(df.add(val2, axis="index"), added)
- val3 = np.random.rand(*df.shape)
- added = DataFrame(df.values + val3, index=df.index, columns=df.columns)
- tm.assert_frame_equal(df.add(val3), added)
- def test_operations_with_interval_categories_index(self, all_arithmetic_operators):
- # GH#27415
- op = all_arithmetic_operators
- ind = pd.CategoricalIndex(pd.interval_range(start=0.0, end=2.0))
- data = [1, 2]
- df = DataFrame([data], columns=ind)
- num = 10
- result = getattr(df, op)(num)
- expected = DataFrame([[getattr(n, op)(num) for n in data]], columns=ind)
- tm.assert_frame_equal(result, expected)
- def test_frame_with_frame_reindex(self):
- # GH#31623
- df = DataFrame(
- {
- "foo": [pd.Timestamp("2019"), pd.Timestamp("2020")],
- "bar": [pd.Timestamp("2018"), pd.Timestamp("2021")],
- },
- columns=["foo", "bar"],
- )
- df2 = df[["foo"]]
- result = df - df2
- expected = DataFrame(
- {"foo": [pd.Timedelta(0), pd.Timedelta(0)], "bar": [np.nan, np.nan]},
- columns=["bar", "foo"],
- )
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize(
- "value, dtype",
- [
- (1, "i8"),
- (1.0, "f8"),
- (2**63, "f8"),
- (1j, "complex128"),
- (2**63, "complex128"),
- (True, "bool"),
- (np.timedelta64(20, "ns"), "<m8[ns]"),
- (np.datetime64(20, "ns"), "<M8[ns]"),
- ],
- )
- @pytest.mark.parametrize(
- "op",
- [
- operator.add,
- operator.sub,
- operator.mul,
- operator.truediv,
- operator.mod,
- operator.pow,
- ],
- ids=lambda x: x.__name__,
- )
- def test_binop_other(self, op, value, dtype, switch_numexpr_min_elements, request):
- skip = {
- (operator.truediv, "bool"),
- (operator.pow, "bool"),
- (operator.add, "bool"),
- (operator.mul, "bool"),
- }
- elem = DummyElement(value, dtype)
- df = DataFrame({"A": [elem.value, elem.value]}, dtype=elem.dtype)
- invalid = {
- (operator.pow, "<M8[ns]"),
- (operator.mod, "<M8[ns]"),
- (operator.truediv, "<M8[ns]"),
- (operator.mul, "<M8[ns]"),
- (operator.add, "<M8[ns]"),
- (operator.pow, "<m8[ns]"),
- (operator.mul, "<m8[ns]"),
- (operator.sub, "bool"),
- (operator.mod, "complex128"),
- }
- if (op, dtype) in invalid:
- warn = None
- if (dtype == "<M8[ns]" and op == operator.add) or (
- dtype == "<m8[ns]" and op == operator.mul
- ):
- msg = None
- elif dtype == "complex128":
- msg = "ufunc 'remainder' not supported for the input types"
- elif op is operator.sub:
- msg = "numpy boolean subtract, the `-` operator, is "
- if (
- dtype == "bool"
- and expr.USE_NUMEXPR
- and switch_numexpr_min_elements == 0
- ):
- warn = UserWarning # "evaluating in Python space because ..."
- else:
- msg = (
- f"cannot perform __{op.__name__}__ with this "
- "index type: (DatetimeArray|TimedeltaArray)"
- )
- with pytest.raises(TypeError, match=msg):
- with tm.assert_produces_warning(warn):
- op(df, elem.value)
- elif (op, dtype) in skip:
- if op in [operator.add, operator.mul]:
- if expr.USE_NUMEXPR and switch_numexpr_min_elements == 0:
- # "evaluating in Python space because ..."
- warn = UserWarning
- else:
- warn = None
- with tm.assert_produces_warning(warn):
- op(df, elem.value)
- else:
- msg = "operator '.*' not implemented for .* dtypes"
- with pytest.raises(NotImplementedError, match=msg):
- op(df, elem.value)
- else:
- with tm.assert_produces_warning(None):
- result = op(df, elem.value).dtypes
- expected = op(df, value).dtypes
- tm.assert_series_equal(result, expected)
- def test_arithmetic_midx_cols_different_dtypes(self):
- # GH#49769
- midx = MultiIndex.from_arrays([Series([1, 2]), Series([3, 4])])
- midx2 = MultiIndex.from_arrays([Series([1, 2], dtype="Int8"), Series([3, 4])])
- left = DataFrame([[1, 2], [3, 4]], columns=midx)
- right = DataFrame([[1, 2], [3, 4]], columns=midx2)
- result = left - right
- expected = DataFrame([[0, 0], [0, 0]], columns=midx)
- tm.assert_frame_equal(result, expected)
- def test_arithmetic_midx_cols_different_dtypes_different_order(self):
- # GH#49769
- midx = MultiIndex.from_arrays([Series([1, 2]), Series([3, 4])])
- midx2 = MultiIndex.from_arrays([Series([2, 1], dtype="Int8"), Series([4, 3])])
- left = DataFrame([[1, 2], [3, 4]], columns=midx)
- right = DataFrame([[1, 2], [3, 4]], columns=midx2)
- result = left - right
- expected = DataFrame([[-1, 1], [-1, 1]], columns=midx)
- tm.assert_frame_equal(result, expected)
- def test_frame_with_zero_len_series_corner_cases():
- # GH#28600
- # easy all-float case
- df = DataFrame(np.random.randn(6).reshape(3, 2), columns=["A", "B"])
- ser = Series(dtype=np.float64)
- result = df + ser
- expected = DataFrame(df.values * np.nan, columns=df.columns)
- tm.assert_frame_equal(result, expected)
- with pytest.raises(ValueError, match="not aligned"):
- # Automatic alignment for comparisons deprecated GH#36795, enforced 2.0
- df == ser
- # non-float case should not raise TypeError on comparison
- df2 = DataFrame(df.values.view("M8[ns]"), columns=df.columns)
- with pytest.raises(ValueError, match="not aligned"):
- # Automatic alignment for comparisons deprecated
- df2 == ser
- def test_zero_len_frame_with_series_corner_cases():
- # GH#28600
- df = DataFrame(columns=["A", "B"], dtype=np.float64)
- ser = Series([1, 2], index=["A", "B"])
- result = df + ser
- expected = df
- tm.assert_frame_equal(result, expected)
- def test_frame_single_columns_object_sum_axis_1():
- # GH 13758
- data = {
- "One": Series(["A", 1.2, np.nan]),
- }
- df = DataFrame(data)
- result = df.sum(axis=1)
- expected = Series(["A", 1.2, 0])
- tm.assert_series_equal(result, expected)
- # -------------------------------------------------------------------
- # Unsorted
- # These arithmetic tests were previously in other files, eventually
- # should be parametrized and put into tests.arithmetic
- class TestFrameArithmeticUnsorted:
- def test_frame_add_tz_mismatch_converts_to_utc(self):
- rng = pd.date_range("1/1/2011", periods=10, freq="H", tz="US/Eastern")
- df = DataFrame(np.random.randn(len(rng)), index=rng, columns=["a"])
- df_moscow = df.tz_convert("Europe/Moscow")
- result = df + df_moscow
- assert result.index.tz is timezone.utc
- result = df_moscow + df
- assert result.index.tz is timezone.utc
- def test_align_frame(self):
- rng = pd.period_range("1/1/2000", "1/1/2010", freq="A")
- ts = DataFrame(np.random.randn(len(rng), 3), index=rng)
- result = ts + ts[::2]
- expected = ts + ts
- expected.iloc[1::2] = np.nan
- tm.assert_frame_equal(result, expected)
- half = ts[::2]
- result = ts + half.take(np.random.permutation(len(half)))
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize(
- "op", [operator.add, operator.sub, operator.mul, operator.truediv]
- )
- def test_operators_none_as_na(self, op):
- df = DataFrame(
- {"col1": [2, 5.0, 123, None], "col2": [1, 2, 3, 4]}, dtype=object
- )
- # since filling converts dtypes from object, changed expected to be
- # object
- filled = df.fillna(np.nan)
- result = op(df, 3)
- expected = op(filled, 3).astype(object)
- expected[com.isna(expected)] = None
- tm.assert_frame_equal(result, expected)
- result = op(df, df)
- expected = op(filled, filled).astype(object)
- expected[com.isna(expected)] = None
- tm.assert_frame_equal(result, expected)
- result = op(df, df.fillna(7))
- tm.assert_frame_equal(result, expected)
- result = op(df.fillna(7), df)
- tm.assert_frame_equal(result, expected, check_dtype=False)
- @pytest.mark.parametrize("op,res", [("__eq__", False), ("__ne__", True)])
- # TODO: not sure what's correct here.
- @pytest.mark.filterwarnings("ignore:elementwise:FutureWarning")
- def test_logical_typeerror_with_non_valid(self, op, res, float_frame):
- # we are comparing floats vs a string
- result = getattr(float_frame, op)("foo")
- assert bool(result.all().all()) is res
- @pytest.mark.parametrize("op", ["add", "sub", "mul", "div", "truediv"])
- def test_binary_ops_align(self, op):
- # test aligning binary ops
- # GH 6681
- index = MultiIndex.from_product(
- [list("abc"), ["one", "two", "three"], [1, 2, 3]],
- names=["first", "second", "third"],
- )
- df = DataFrame(
- np.arange(27 * 3).reshape(27, 3),
- index=index,
- columns=["value1", "value2", "value3"],
- ).sort_index()
- idx = pd.IndexSlice
- opa = getattr(operator, op, None)
- if opa is None:
- return
- x = Series([1.0, 10.0, 100.0], [1, 2, 3])
- result = getattr(df, op)(x, level="third", axis=0)
- expected = pd.concat(
- [opa(df.loc[idx[:, :, i], :], v) for i, v in x.items()]
- ).sort_index()
- tm.assert_frame_equal(result, expected)
- x = Series([1.0, 10.0], ["two", "three"])
- result = getattr(df, op)(x, level="second", axis=0)
- expected = (
- pd.concat([opa(df.loc[idx[:, i], :], v) for i, v in x.items()])
- .reindex_like(df)
- .sort_index()
- )
- tm.assert_frame_equal(result, expected)
- def test_binary_ops_align_series_dataframe(self):
- # GH9463 (alignment level of dataframe with series)
- midx = MultiIndex.from_product([["A", "B"], ["a", "b"]])
- df = DataFrame(np.ones((2, 4), dtype="int64"), columns=midx)
- s = Series({"a": 1, "b": 2})
- df2 = df.copy()
- df2.columns.names = ["lvl0", "lvl1"]
- s2 = s.copy()
- s2.index.name = "lvl1"
- # different cases of integer/string level names:
- res1 = df.mul(s, axis=1, level=1)
- res2 = df.mul(s2, axis=1, level=1)
- res3 = df2.mul(s, axis=1, level=1)
- res4 = df2.mul(s2, axis=1, level=1)
- res5 = df2.mul(s, axis=1, level="lvl1")
- res6 = df2.mul(s2, axis=1, level="lvl1")
- exp = DataFrame(
- np.array([[1, 2, 1, 2], [1, 2, 1, 2]], dtype="int64"), columns=midx
- )
- for res in [res1, res2]:
- tm.assert_frame_equal(res, exp)
- exp.columns.names = ["lvl0", "lvl1"]
- for res in [res3, res4, res5, res6]:
- tm.assert_frame_equal(res, exp)
- def test_add_with_dti_mismatched_tzs(self):
- base = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz="UTC")
- idx1 = base.tz_convert("Asia/Tokyo")[:2]
- idx2 = base.tz_convert("US/Eastern")[1:]
- df1 = DataFrame({"A": [1, 2]}, index=idx1)
- df2 = DataFrame({"A": [1, 1]}, index=idx2)
- exp = DataFrame({"A": [np.nan, 3, np.nan]}, index=base)
- tm.assert_frame_equal(df1 + df2, exp)
- def test_combineFrame(self, float_frame, mixed_float_frame, mixed_int_frame):
- frame_copy = float_frame.reindex(float_frame.index[::2])
- del frame_copy["D"]
- # adding NAs to first 5 values of column "C"
- frame_copy.loc[: frame_copy.index[4], "C"] = np.nan
- added = float_frame + frame_copy
- indexer = added["A"].dropna().index
- exp = (float_frame["A"] * 2).copy()
- tm.assert_series_equal(added["A"].dropna(), exp.loc[indexer])
- exp.loc[~exp.index.isin(indexer)] = np.nan
- tm.assert_series_equal(added["A"], exp.loc[added["A"].index])
- assert np.isnan(added["C"].reindex(frame_copy.index)[:5]).all()
- # assert(False)
- assert np.isnan(added["D"]).all()
- self_added = float_frame + float_frame
- tm.assert_index_equal(self_added.index, float_frame.index)
- added_rev = frame_copy + float_frame
- assert np.isnan(added["D"]).all()
- assert np.isnan(added_rev["D"]).all()
- # corner cases
- # empty
- plus_empty = float_frame + DataFrame()
- assert np.isnan(plus_empty.values).all()
- empty_plus = DataFrame() + float_frame
- assert np.isnan(empty_plus.values).all()
- empty_empty = DataFrame() + DataFrame()
- assert empty_empty.empty
- # out of order
- reverse = float_frame.reindex(columns=float_frame.columns[::-1])
- tm.assert_frame_equal(reverse + float_frame, float_frame * 2)
- # mix vs float64, upcast
- added = float_frame + mixed_float_frame
- _check_mixed_float(added, dtype="float64")
- added = mixed_float_frame + float_frame
- _check_mixed_float(added, dtype="float64")
- # mix vs mix
- added = mixed_float_frame + mixed_float_frame
- _check_mixed_float(added, dtype={"C": None})
- # with int
- added = float_frame + mixed_int_frame
- _check_mixed_float(added, dtype="float64")
- def test_combine_series(self, float_frame, mixed_float_frame, mixed_int_frame):
- # Series
- series = float_frame.xs(float_frame.index[0])
- added = float_frame + series
- for key, s in added.items():
- tm.assert_series_equal(s, float_frame[key] + series[key])
- larger_series = series.to_dict()
- larger_series["E"] = 1
- larger_series = Series(larger_series)
- larger_added = float_frame + larger_series
- for key, s in float_frame.items():
- tm.assert_series_equal(larger_added[key], s + series[key])
- assert "E" in larger_added
- assert np.isnan(larger_added["E"]).all()
- # no upcast needed
- added = mixed_float_frame + series
- assert np.all(added.dtypes == series.dtype)
- # vs mix (upcast) as needed
- added = mixed_float_frame + series.astype("float32")
- _check_mixed_float(added, dtype={"C": None})
- added = mixed_float_frame + series.astype("float16")
- _check_mixed_float(added, dtype={"C": None})
- # these used to raise with numexpr as we are adding an int64 to an
- # uint64....weird vs int
- added = mixed_int_frame + (100 * series).astype("int64")
- _check_mixed_int(
- added, dtype={"A": "int64", "B": "float64", "C": "int64", "D": "int64"}
- )
- added = mixed_int_frame + (100 * series).astype("int32")
- _check_mixed_int(
- added, dtype={"A": "int32", "B": "float64", "C": "int32", "D": "int64"}
- )
- def test_combine_timeseries(self, datetime_frame):
- # TimeSeries
- ts = datetime_frame["A"]
- # 10890
- # we no longer allow auto timeseries broadcasting
- # and require explicit broadcasting
- added = datetime_frame.add(ts, axis="index")
- for key, col in datetime_frame.items():
- result = col + ts
- tm.assert_series_equal(added[key], result, check_names=False)
- assert added[key].name == key
- if col.name == ts.name:
- assert result.name == "A"
- else:
- assert result.name is None
- smaller_frame = datetime_frame[:-5]
- smaller_added = smaller_frame.add(ts, axis="index")
- tm.assert_index_equal(smaller_added.index, datetime_frame.index)
- smaller_ts = ts[:-5]
- smaller_added2 = datetime_frame.add(smaller_ts, axis="index")
- tm.assert_frame_equal(smaller_added, smaller_added2)
- # length 0, result is all-nan
- result = datetime_frame.add(ts[:0], axis="index")
- expected = DataFrame(
- np.nan, index=datetime_frame.index, columns=datetime_frame.columns
- )
- tm.assert_frame_equal(result, expected)
- # Frame is all-nan
- result = datetime_frame[:0].add(ts, axis="index")
- expected = DataFrame(
- np.nan, index=datetime_frame.index, columns=datetime_frame.columns
- )
- tm.assert_frame_equal(result, expected)
- # empty but with non-empty index
- frame = datetime_frame[:1].reindex(columns=[])
- result = frame.mul(ts, axis="index")
- assert len(result) == len(ts)
- def test_combineFunc(self, float_frame, mixed_float_frame):
- result = float_frame * 2
- tm.assert_numpy_array_equal(result.values, float_frame.values * 2)
- # vs mix
- result = mixed_float_frame * 2
- for c, s in result.items():
- tm.assert_numpy_array_equal(s.values, mixed_float_frame[c].values * 2)
- _check_mixed_float(result, dtype={"C": None})
- result = DataFrame() * 2
- assert result.index.equals(DataFrame().index)
- assert len(result.columns) == 0
- @pytest.mark.parametrize(
- "func",
- [operator.eq, operator.ne, operator.lt, operator.gt, operator.ge, operator.le],
- )
- def test_comparisons(self, simple_frame, float_frame, func):
- df1 = tm.makeTimeDataFrame()
- df2 = tm.makeTimeDataFrame()
- row = simple_frame.xs("a")
- ndim_5 = np.ones(df1.shape + (1, 1, 1))
- result = func(df1, df2)
- tm.assert_numpy_array_equal(result.values, func(df1.values, df2.values))
- msg = (
- "Unable to coerce to Series/DataFrame, "
- "dimension must be <= 2: (30, 4, 1, 1, 1)"
- )
- with pytest.raises(ValueError, match=re.escape(msg)):
- func(df1, ndim_5)
- result2 = func(simple_frame, row)
- tm.assert_numpy_array_equal(
- result2.values, func(simple_frame.values, row.values)
- )
- result3 = func(float_frame, 0)
- tm.assert_numpy_array_equal(result3.values, func(float_frame.values, 0))
- msg = (
- r"Can only compare identically-labeled \(both index and columns\) "
- "DataFrame objects"
- )
- with pytest.raises(ValueError, match=msg):
- func(simple_frame, simple_frame[:2])
- def test_strings_to_numbers_comparisons_raises(self, compare_operators_no_eq_ne):
- # GH 11565
- df = DataFrame(
- {x: {"x": "foo", "y": "bar", "z": "baz"} for x in ["a", "b", "c"]}
- )
- f = getattr(operator, compare_operators_no_eq_ne)
- msg = "'[<>]=?' not supported between instances of 'str' and 'int'"
- with pytest.raises(TypeError, match=msg):
- f(df, 0)
- def test_comparison_protected_from_errstate(self):
- missing_df = tm.makeDataFrame()
- missing_df.loc[missing_df.index[0], "A"] = np.nan
- with np.errstate(invalid="ignore"):
- expected = missing_df.values < 0
- with np.errstate(invalid="raise"):
- result = (missing_df < 0).values
- tm.assert_numpy_array_equal(result, expected)
- def test_boolean_comparison(self):
- # GH 4576
- # boolean comparisons with a tuple/list give unexpected results
- df = DataFrame(np.arange(6).reshape((3, 2)))
- b = np.array([2, 2])
- b_r = np.atleast_2d([2, 2])
- b_c = b_r.T
- lst = [2, 2, 2]
- tup = tuple(lst)
- # gt
- expected = DataFrame([[False, False], [False, True], [True, True]])
- result = df > b
- tm.assert_frame_equal(result, expected)
- result = df.values > b
- tm.assert_numpy_array_equal(result, expected.values)
- msg1d = "Unable to coerce to Series, length must be 2: given 3"
- msg2d = "Unable to coerce to DataFrame, shape must be"
- msg2db = "operands could not be broadcast together with shapes"
- with pytest.raises(ValueError, match=msg1d):
- # wrong shape
- df > lst
- with pytest.raises(ValueError, match=msg1d):
- # wrong shape
- df > tup
- # broadcasts like ndarray (GH#23000)
- result = df > b_r
- tm.assert_frame_equal(result, expected)
- result = df.values > b_r
- tm.assert_numpy_array_equal(result, expected.values)
- with pytest.raises(ValueError, match=msg2d):
- df > b_c
- with pytest.raises(ValueError, match=msg2db):
- df.values > b_c
- # ==
- expected = DataFrame([[False, False], [True, False], [False, False]])
- result = df == b
- tm.assert_frame_equal(result, expected)
- with pytest.raises(ValueError, match=msg1d):
- df == lst
- with pytest.raises(ValueError, match=msg1d):
- df == tup
- # broadcasts like ndarray (GH#23000)
- result = df == b_r
- tm.assert_frame_equal(result, expected)
- result = df.values == b_r
- tm.assert_numpy_array_equal(result, expected.values)
- with pytest.raises(ValueError, match=msg2d):
- df == b_c
- assert df.values.shape != b_c.shape
- # with alignment
- df = DataFrame(
- np.arange(6).reshape((3, 2)), columns=list("AB"), index=list("abc")
- )
- expected.index = df.index
- expected.columns = df.columns
- with pytest.raises(ValueError, match=msg1d):
- df == lst
- with pytest.raises(ValueError, match=msg1d):
- df == tup
- def test_inplace_ops_alignment(self):
- # inplace ops / ops alignment
- # GH 8511
- columns = list("abcdefg")
- X_orig = DataFrame(
- np.arange(10 * len(columns)).reshape(-1, len(columns)),
- columns=columns,
- index=range(10),
- )
- Z = 100 * X_orig.iloc[:, 1:-1].copy()
- block1 = list("bedcf")
- subs = list("bcdef")
- # add
- X = X_orig.copy()
- result1 = (X[block1] + Z).reindex(columns=subs)
- X[block1] += Z
- result2 = X.reindex(columns=subs)
- X = X_orig.copy()
- result3 = (X[block1] + Z[block1]).reindex(columns=subs)
- X[block1] += Z[block1]
- result4 = X.reindex(columns=subs)
- tm.assert_frame_equal(result1, result2)
- tm.assert_frame_equal(result1, result3)
- tm.assert_frame_equal(result1, result4)
- # sub
- X = X_orig.copy()
- result1 = (X[block1] - Z).reindex(columns=subs)
- X[block1] -= Z
- result2 = X.reindex(columns=subs)
- X = X_orig.copy()
- result3 = (X[block1] - Z[block1]).reindex(columns=subs)
- X[block1] -= Z[block1]
- result4 = X.reindex(columns=subs)
- tm.assert_frame_equal(result1, result2)
- tm.assert_frame_equal(result1, result3)
- tm.assert_frame_equal(result1, result4)
- def test_inplace_ops_identity(self):
- # GH 5104
- # make sure that we are actually changing the object
- s_orig = Series([1, 2, 3])
- df_orig = DataFrame(np.random.randint(0, 5, size=10).reshape(-1, 5))
- # no dtype change
- s = s_orig.copy()
- s2 = s
- s += 1
- tm.assert_series_equal(s, s2)
- tm.assert_series_equal(s_orig + 1, s)
- assert s is s2
- assert s._mgr is s2._mgr
- df = df_orig.copy()
- df2 = df
- df += 1
- tm.assert_frame_equal(df, df2)
- tm.assert_frame_equal(df_orig + 1, df)
- assert df is df2
- assert df._mgr is df2._mgr
- # dtype change
- s = s_orig.copy()
- s2 = s
- s += 1.5
- tm.assert_series_equal(s, s2)
- tm.assert_series_equal(s_orig + 1.5, s)
- df = df_orig.copy()
- df2 = df
- df += 1.5
- tm.assert_frame_equal(df, df2)
- tm.assert_frame_equal(df_orig + 1.5, df)
- assert df is df2
- assert df._mgr is df2._mgr
- # mixed dtype
- arr = np.random.randint(0, 10, size=5)
- df_orig = DataFrame({"A": arr.copy(), "B": "foo"})
- df = df_orig.copy()
- df2 = df
- df["A"] += 1
- expected = DataFrame({"A": arr.copy() + 1, "B": "foo"})
- tm.assert_frame_equal(df, expected)
- tm.assert_frame_equal(df2, expected)
- assert df._mgr is df2._mgr
- df = df_orig.copy()
- df2 = df
- df["A"] += 1.5
- expected = DataFrame({"A": arr.copy() + 1.5, "B": "foo"})
- tm.assert_frame_equal(df, expected)
- tm.assert_frame_equal(df2, expected)
- assert df._mgr is df2._mgr
- @pytest.mark.parametrize(
- "op",
- [
- "add",
- "and",
- "div",
- "floordiv",
- "mod",
- "mul",
- "or",
- "pow",
- "sub",
- "truediv",
- "xor",
- ],
- )
- def test_inplace_ops_identity2(self, op):
- if op == "div":
- return
- df = DataFrame({"a": [1.0, 2.0, 3.0], "b": [1, 2, 3]})
- operand = 2
- if op in ("and", "or", "xor"):
- # cannot use floats for boolean ops
- df["a"] = [True, False, True]
- df_copy = df.copy()
- iop = f"__i{op}__"
- op = f"__{op}__"
- # no id change and value is correct
- getattr(df, iop)(operand)
- expected = getattr(df_copy, op)(operand)
- tm.assert_frame_equal(df, expected)
- expected = id(df)
- assert id(df) == expected
- @pytest.mark.parametrize(
- "val",
- [
- [1, 2, 3],
- (1, 2, 3),
- np.array([1, 2, 3], dtype=np.int64),
- range(1, 4),
- ],
- )
- def test_alignment_non_pandas(self, val):
- index = ["A", "B", "C"]
- columns = ["X", "Y", "Z"]
- df = DataFrame(np.random.randn(3, 3), index=index, columns=columns)
- align = pd.core.ops.align_method_FRAME
- expected = DataFrame({"X": val, "Y": val, "Z": val}, index=df.index)
- tm.assert_frame_equal(align(df, val, "index")[1], expected)
- expected = DataFrame(
- {"X": [1, 1, 1], "Y": [2, 2, 2], "Z": [3, 3, 3]}, index=df.index
- )
- tm.assert_frame_equal(align(df, val, "columns")[1], expected)
- @pytest.mark.parametrize("val", [[1, 2], (1, 2), np.array([1, 2]), range(1, 3)])
- def test_alignment_non_pandas_length_mismatch(self, val):
- index = ["A", "B", "C"]
- columns = ["X", "Y", "Z"]
- df = DataFrame(np.random.randn(3, 3), index=index, columns=columns)
- align = pd.core.ops.align_method_FRAME
- # length mismatch
- msg = "Unable to coerce to Series, length must be 3: given 2"
- with pytest.raises(ValueError, match=msg):
- align(df, val, "index")
- with pytest.raises(ValueError, match=msg):
- align(df, val, "columns")
- def test_alignment_non_pandas_index_columns(self):
- index = ["A", "B", "C"]
- columns = ["X", "Y", "Z"]
- df = DataFrame(np.random.randn(3, 3), index=index, columns=columns)
- align = pd.core.ops.align_method_FRAME
- val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
- tm.assert_frame_equal(
- align(df, val, "index")[1],
- DataFrame(val, index=df.index, columns=df.columns),
- )
- tm.assert_frame_equal(
- align(df, val, "columns")[1],
- DataFrame(val, index=df.index, columns=df.columns),
- )
- # shape mismatch
- msg = "Unable to coerce to DataFrame, shape must be"
- val = np.array([[1, 2, 3], [4, 5, 6]])
- with pytest.raises(ValueError, match=msg):
- align(df, val, "index")
- with pytest.raises(ValueError, match=msg):
- align(df, val, "columns")
- val = np.zeros((3, 3, 3))
- msg = re.escape(
- "Unable to coerce to Series/DataFrame, dimension must be <= 2: (3, 3, 3)"
- )
- with pytest.raises(ValueError, match=msg):
- align(df, val, "index")
- with pytest.raises(ValueError, match=msg):
- align(df, val, "columns")
- def test_no_warning(self, all_arithmetic_operators):
- df = DataFrame({"A": [0.0, 0.0], "B": [0.0, None]})
- b = df["B"]
- with tm.assert_produces_warning(None):
- getattr(df, all_arithmetic_operators)(b)
- def test_dunder_methods_binary(self, all_arithmetic_operators):
- # GH#??? frame.__foo__ should only accept one argument
- df = DataFrame({"A": [0.0, 0.0], "B": [0.0, None]})
- b = df["B"]
- with pytest.raises(TypeError, match="takes 2 positional arguments"):
- getattr(df, all_arithmetic_operators)(b, 0)
- def test_align_int_fill_bug(self):
- # GH#910
- X = np.arange(10 * 10, dtype="float64").reshape(10, 10)
- Y = np.ones((10, 1), dtype=int)
- df1 = DataFrame(X)
- df1["0.X"] = Y.squeeze()
- df2 = df1.astype(float)
- result = df1 - df1.mean()
- expected = df2 - df2.mean()
- tm.assert_frame_equal(result, expected)
- def test_pow_with_realignment():
- # GH#32685 pow has special semantics for operating with null values
- left = DataFrame({"A": [0, 1, 2]})
- right = DataFrame(index=[0, 1, 2])
- result = left**right
- expected = DataFrame({"A": [np.nan, 1.0, np.nan]})
- tm.assert_frame_equal(result, expected)
- # TODO: move to tests.arithmetic and parametrize
- def test_pow_nan_with_zero():
- left = DataFrame({"A": [np.nan, np.nan, np.nan]})
- right = DataFrame({"A": [0, 0, 0]})
- expected = DataFrame({"A": [1.0, 1.0, 1.0]})
- result = left**right
- tm.assert_frame_equal(result, expected)
- result = left["A"] ** right["A"]
- tm.assert_series_equal(result, expected["A"])
- def test_dataframe_series_extension_dtypes():
- # https://github.com/pandas-dev/pandas/issues/34311
- df = DataFrame(np.random.randint(0, 100, (10, 3)), columns=["a", "b", "c"])
- ser = Series([1, 2, 3], index=["a", "b", "c"])
- expected = df.to_numpy("int64") + ser.to_numpy("int64").reshape(-1, 3)
- expected = DataFrame(expected, columns=df.columns, dtype="Int64")
- df_ea = df.astype("Int64")
- result = df_ea + ser
- tm.assert_frame_equal(result, expected)
- result = df_ea + ser.astype("Int64")
- tm.assert_frame_equal(result, expected)
- def test_dataframe_blockwise_slicelike():
- # GH#34367
- arr = np.random.randint(0, 1000, (100, 10))
- df1 = DataFrame(arr)
- # Explicit cast to float to avoid implicit cast when setting nan
- df2 = df1.copy().astype({1: "float", 3: "float", 7: "float"})
- df2.iloc[0, [1, 3, 7]] = np.nan
- # Explicit cast to float to avoid implicit cast when setting nan
- df3 = df1.copy().astype({5: "float"})
- df3.iloc[0, [5]] = np.nan
- # Explicit cast to float to avoid implicit cast when setting nan
- df4 = df1.copy().astype({2: "float", 3: "float", 4: "float"})
- df4.iloc[0, np.arange(2, 5)] = np.nan
- # Explicit cast to float to avoid implicit cast when setting nan
- df5 = df1.copy().astype({4: "float", 5: "float", 6: "float"})
- df5.iloc[0, np.arange(4, 7)] = np.nan
- for left, right in [(df1, df2), (df2, df3), (df4, df5)]:
- res = left + right
- expected = DataFrame({i: left[i] + right[i] for i in left.columns})
- tm.assert_frame_equal(res, expected)
- @pytest.mark.parametrize(
- "df, col_dtype",
- [
- (DataFrame([[1.0, 2.0], [4.0, 5.0]], columns=list("ab")), "float64"),
- (DataFrame([[1.0, "b"], [4.0, "b"]], columns=list("ab")), "object"),
- ],
- )
- def test_dataframe_operation_with_non_numeric_types(df, col_dtype):
- # GH #22663
- expected = DataFrame([[0.0, np.nan], [3.0, np.nan]], columns=list("ab"))
- expected = expected.astype({"b": col_dtype})
- result = df + Series([-1.0], index=list("a"))
- tm.assert_frame_equal(result, expected)
- def test_arith_reindex_with_duplicates():
- # https://github.com/pandas-dev/pandas/issues/35194
- df1 = DataFrame(data=[[0]], columns=["second"])
- df2 = DataFrame(data=[[0, 0, 0]], columns=["first", "second", "second"])
- result = df1 + df2
- expected = DataFrame([[np.nan, 0, 0]], columns=["first", "second", "second"])
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize("to_add", [[Series([1, 1])], [Series([1, 1]), Series([1, 1])]])
- def test_arith_list_of_arraylike_raise(to_add):
- # GH 36702. Raise when trying to add list of array-like to DataFrame
- df = DataFrame({"x": [1, 2], "y": [1, 2]})
- msg = f"Unable to coerce list of {type(to_add[0])} to Series/DataFrame"
- with pytest.raises(ValueError, match=msg):
- df + to_add
- with pytest.raises(ValueError, match=msg):
- to_add + df
- def test_inplace_arithmetic_series_update(using_copy_on_write):
- # https://github.com/pandas-dev/pandas/issues/36373
- df = DataFrame({"A": [1, 2, 3]})
- df_orig = df.copy()
- series = df["A"]
- vals = series._values
- series += 1
- if using_copy_on_write:
- assert series._values is not vals
- tm.assert_frame_equal(df, df_orig)
- else:
- assert series._values is vals
- expected = DataFrame({"A": [2, 3, 4]})
- tm.assert_frame_equal(df, expected)
- def test_arithemetic_multiindex_align():
- """
- Regression test for: https://github.com/pandas-dev/pandas/issues/33765
- """
- df1 = DataFrame(
- [[1]],
- index=["a"],
- columns=MultiIndex.from_product([[0], [1]], names=["a", "b"]),
- )
- df2 = DataFrame([[1]], index=["a"], columns=Index([0], name="a"))
- expected = DataFrame(
- [[0]],
- index=["a"],
- columns=MultiIndex.from_product([[0], [1]], names=["a", "b"]),
- )
- result = df1 - df2
- tm.assert_frame_equal(result, expected)
- def test_bool_frame_mult_float():
- # GH 18549
- df = DataFrame(True, list("ab"), list("cd"))
- result = df * 1.0
- expected = DataFrame(np.ones((2, 2)), list("ab"), list("cd"))
- tm.assert_frame_equal(result, expected)
- def test_frame_sub_nullable_int(any_int_ea_dtype):
- # GH 32822
- series1 = Series([1, 2, None], dtype=any_int_ea_dtype)
- series2 = Series([1, 2, 3], dtype=any_int_ea_dtype)
- expected = DataFrame([0, 0, None], dtype=any_int_ea_dtype)
- result = series1.to_frame() - series2.to_frame()
- tm.assert_frame_equal(result, expected)
- def test_frame_op_subclass_nonclass_constructor():
- # GH#43201 subclass._constructor is a function, not the subclass itself
- class SubclassedSeries(Series):
- @property
- def _constructor(self):
- return SubclassedSeries
- @property
- def _constructor_expanddim(self):
- return SubclassedDataFrame
- class SubclassedDataFrame(DataFrame):
- _metadata = ["my_extra_data"]
- def __init__(self, my_extra_data, *args, **kwargs) -> None:
- self.my_extra_data = my_extra_data
- super().__init__(*args, **kwargs)
- @property
- def _constructor(self):
- return functools.partial(type(self), self.my_extra_data)
- @property
- def _constructor_sliced(self):
- return SubclassedSeries
- sdf = SubclassedDataFrame("some_data", {"A": [1, 2, 3], "B": [4, 5, 6]})
- result = sdf * 2
- expected = SubclassedDataFrame("some_data", {"A": [2, 4, 6], "B": [8, 10, 12]})
- tm.assert_frame_equal(result, expected)
- result = sdf + sdf
- tm.assert_frame_equal(result, expected)
- def test_enum_column_equality():
- Cols = Enum("Cols", "col1 col2")
- q1 = DataFrame({Cols.col1: [1, 2, 3]})
- q2 = DataFrame({Cols.col1: [1, 2, 3]})
- result = q1[Cols.col1] == q2[Cols.col1]
- expected = Series([True, True, True], name=Cols.col1)
- tm.assert_series_equal(result, expected)
|