test_arithmetic.py 70 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090
  1. from collections import deque
  2. from datetime import (
  3. datetime,
  4. timezone,
  5. )
  6. from enum import Enum
  7. import functools
  8. import operator
  9. import re
  10. import numpy as np
  11. import pytest
  12. import pandas.util._test_decorators as td
  13. import pandas as pd
  14. from pandas import (
  15. DataFrame,
  16. Index,
  17. MultiIndex,
  18. Series,
  19. )
  20. import pandas._testing as tm
  21. import pandas.core.common as com
  22. from pandas.core.computation import expressions as expr
  23. from pandas.core.computation.expressions import (
  24. _MIN_ELEMENTS,
  25. NUMEXPR_INSTALLED,
  26. )
  27. from pandas.tests.frame.common import (
  28. _check_mixed_float,
  29. _check_mixed_int,
  30. )
  31. @pytest.fixture(autouse=True, params=[0, 1000000], ids=["numexpr", "python"])
  32. def switch_numexpr_min_elements(request):
  33. _MIN_ELEMENTS = expr._MIN_ELEMENTS
  34. expr._MIN_ELEMENTS = request.param
  35. yield request.param
  36. expr._MIN_ELEMENTS = _MIN_ELEMENTS
  37. class DummyElement:
  38. def __init__(self, value, dtype) -> None:
  39. self.value = value
  40. self.dtype = np.dtype(dtype)
  41. def __array__(self):
  42. return np.array(self.value, dtype=self.dtype)
  43. def __str__(self) -> str:
  44. return f"DummyElement({self.value}, {self.dtype})"
  45. def __repr__(self) -> str:
  46. return str(self)
  47. def astype(self, dtype, copy=False):
  48. self.dtype = dtype
  49. return self
  50. def view(self, dtype):
  51. return type(self)(self.value.view(dtype), dtype)
  52. def any(self, axis=None):
  53. return bool(self.value)
  54. # -------------------------------------------------------------------
  55. # Comparisons
  56. class TestFrameComparisons:
  57. # Specifically _not_ flex-comparisons
  58. def test_comparison_with_categorical_dtype(self):
  59. # GH#12564
  60. df = DataFrame({"A": ["foo", "bar", "baz"]})
  61. exp = DataFrame({"A": [True, False, False]})
  62. res = df == "foo"
  63. tm.assert_frame_equal(res, exp)
  64. # casting to categorical shouldn't affect the result
  65. df["A"] = df["A"].astype("category")
  66. res = df == "foo"
  67. tm.assert_frame_equal(res, exp)
  68. def test_frame_in_list(self):
  69. # GH#12689 this should raise at the DataFrame level, not blocks
  70. df = DataFrame(np.random.randn(6, 4), columns=list("ABCD"))
  71. msg = "The truth value of a DataFrame is ambiguous"
  72. with pytest.raises(ValueError, match=msg):
  73. df in [None]
  74. @pytest.mark.parametrize(
  75. "arg, arg2",
  76. [
  77. [
  78. {
  79. "a": np.random.randint(10, size=10),
  80. "b": pd.date_range("20010101", periods=10),
  81. },
  82. {
  83. "a": np.random.randint(10, size=10),
  84. "b": np.random.randint(10, size=10),
  85. },
  86. ],
  87. [
  88. {
  89. "a": np.random.randint(10, size=10),
  90. "b": np.random.randint(10, size=10),
  91. },
  92. {
  93. "a": np.random.randint(10, size=10),
  94. "b": pd.date_range("20010101", periods=10),
  95. },
  96. ],
  97. [
  98. {
  99. "a": pd.date_range("20010101", periods=10),
  100. "b": pd.date_range("20010101", periods=10),
  101. },
  102. {
  103. "a": np.random.randint(10, size=10),
  104. "b": np.random.randint(10, size=10),
  105. },
  106. ],
  107. [
  108. {
  109. "a": np.random.randint(10, size=10),
  110. "b": pd.date_range("20010101", periods=10),
  111. },
  112. {
  113. "a": pd.date_range("20010101", periods=10),
  114. "b": pd.date_range("20010101", periods=10),
  115. },
  116. ],
  117. ],
  118. )
  119. def test_comparison_invalid(self, arg, arg2):
  120. # GH4968
  121. # invalid date/int comparisons
  122. x = DataFrame(arg)
  123. y = DataFrame(arg2)
  124. # we expect the result to match Series comparisons for
  125. # == and !=, inequalities should raise
  126. result = x == y
  127. expected = DataFrame(
  128. {col: x[col] == y[col] for col in x.columns},
  129. index=x.index,
  130. columns=x.columns,
  131. )
  132. tm.assert_frame_equal(result, expected)
  133. result = x != y
  134. expected = DataFrame(
  135. {col: x[col] != y[col] for col in x.columns},
  136. index=x.index,
  137. columns=x.columns,
  138. )
  139. tm.assert_frame_equal(result, expected)
  140. msgs = [
  141. r"Invalid comparison between dtype=datetime64\[ns\] and ndarray",
  142. "invalid type promotion",
  143. (
  144. # npdev 1.20.0
  145. r"The DTypes <class 'numpy.dtype\[.*\]'> and "
  146. r"<class 'numpy.dtype\[.*\]'> do not have a common DType."
  147. ),
  148. ]
  149. msg = "|".join(msgs)
  150. with pytest.raises(TypeError, match=msg):
  151. x >= y
  152. with pytest.raises(TypeError, match=msg):
  153. x > y
  154. with pytest.raises(TypeError, match=msg):
  155. x < y
  156. with pytest.raises(TypeError, match=msg):
  157. x <= y
  158. @pytest.mark.parametrize(
  159. "left, right",
  160. [
  161. ("gt", "lt"),
  162. ("lt", "gt"),
  163. ("ge", "le"),
  164. ("le", "ge"),
  165. ("eq", "eq"),
  166. ("ne", "ne"),
  167. ],
  168. )
  169. def test_timestamp_compare(self, left, right):
  170. # make sure we can compare Timestamps on the right AND left hand side
  171. # GH#4982
  172. df = DataFrame(
  173. {
  174. "dates1": pd.date_range("20010101", periods=10),
  175. "dates2": pd.date_range("20010102", periods=10),
  176. "intcol": np.random.randint(1000000000, size=10),
  177. "floatcol": np.random.randn(10),
  178. "stringcol": list(tm.rands(10)),
  179. }
  180. )
  181. df.loc[np.random.rand(len(df)) > 0.5, "dates2"] = pd.NaT
  182. left_f = getattr(operator, left)
  183. right_f = getattr(operator, right)
  184. # no nats
  185. if left in ["eq", "ne"]:
  186. expected = left_f(df, pd.Timestamp("20010109"))
  187. result = right_f(pd.Timestamp("20010109"), df)
  188. tm.assert_frame_equal(result, expected)
  189. else:
  190. msg = (
  191. "'(<|>)=?' not supported between "
  192. "instances of 'numpy.ndarray' and 'Timestamp'"
  193. )
  194. with pytest.raises(TypeError, match=msg):
  195. left_f(df, pd.Timestamp("20010109"))
  196. with pytest.raises(TypeError, match=msg):
  197. right_f(pd.Timestamp("20010109"), df)
  198. # nats
  199. if left in ["eq", "ne"]:
  200. expected = left_f(df, pd.Timestamp("nat"))
  201. result = right_f(pd.Timestamp("nat"), df)
  202. tm.assert_frame_equal(result, expected)
  203. else:
  204. msg = (
  205. "'(<|>)=?' not supported between "
  206. "instances of 'numpy.ndarray' and 'NaTType'"
  207. )
  208. with pytest.raises(TypeError, match=msg):
  209. left_f(df, pd.Timestamp("nat"))
  210. with pytest.raises(TypeError, match=msg):
  211. right_f(pd.Timestamp("nat"), df)
  212. def test_mixed_comparison(self):
  213. # GH#13128, GH#22163 != datetime64 vs non-dt64 should be False,
  214. # not raise TypeError
  215. # (this appears to be fixed before GH#22163, not sure when)
  216. df = DataFrame([["1989-08-01", 1], ["1989-08-01", 2]])
  217. other = DataFrame([["a", "b"], ["c", "d"]])
  218. result = df == other
  219. assert not result.any().any()
  220. result = df != other
  221. assert result.all().all()
  222. def test_df_boolean_comparison_error(self):
  223. # GH#4576, GH#22880
  224. # comparing DataFrame against list/tuple with len(obj) matching
  225. # len(df.columns) is supported as of GH#22800
  226. df = DataFrame(np.arange(6).reshape((3, 2)))
  227. expected = DataFrame([[False, False], [True, False], [False, False]])
  228. result = df == (2, 2)
  229. tm.assert_frame_equal(result, expected)
  230. result = df == [2, 2]
  231. tm.assert_frame_equal(result, expected)
  232. def test_df_float_none_comparison(self):
  233. df = DataFrame(np.random.randn(8, 3), index=range(8), columns=["A", "B", "C"])
  234. result = df.__eq__(None)
  235. assert not result.any().any()
  236. def test_df_string_comparison(self):
  237. df = DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}])
  238. mask_a = df.a > 1
  239. tm.assert_frame_equal(df[mask_a], df.loc[1:1, :])
  240. tm.assert_frame_equal(df[-mask_a], df.loc[0:0, :])
  241. mask_b = df.b == "foo"
  242. tm.assert_frame_equal(df[mask_b], df.loc[0:0, :])
  243. tm.assert_frame_equal(df[-mask_b], df.loc[1:1, :])
  244. class TestFrameFlexComparisons:
  245. # TODO: test_bool_flex_frame needs a better name
  246. @pytest.mark.parametrize("op", ["eq", "ne", "gt", "lt", "ge", "le"])
  247. def test_bool_flex_frame(self, op):
  248. data = np.random.randn(5, 3)
  249. other_data = np.random.randn(5, 3)
  250. df = DataFrame(data)
  251. other = DataFrame(other_data)
  252. ndim_5 = np.ones(df.shape + (1, 3))
  253. # DataFrame
  254. assert df.eq(df).values.all()
  255. assert not df.ne(df).values.any()
  256. f = getattr(df, op)
  257. o = getattr(operator, op)
  258. # No NAs
  259. tm.assert_frame_equal(f(other), o(df, other))
  260. # Unaligned
  261. part_o = other.loc[3:, 1:].copy()
  262. rs = f(part_o)
  263. xp = o(df, part_o.reindex(index=df.index, columns=df.columns))
  264. tm.assert_frame_equal(rs, xp)
  265. # ndarray
  266. tm.assert_frame_equal(f(other.values), o(df, other.values))
  267. # scalar
  268. tm.assert_frame_equal(f(0), o(df, 0))
  269. # NAs
  270. msg = "Unable to coerce to Series/DataFrame"
  271. tm.assert_frame_equal(f(np.nan), o(df, np.nan))
  272. with pytest.raises(ValueError, match=msg):
  273. f(ndim_5)
  274. @pytest.mark.parametrize("box", [np.array, Series])
  275. def test_bool_flex_series(self, box):
  276. # Series
  277. # list/tuple
  278. data = np.random.randn(5, 3)
  279. df = DataFrame(data)
  280. idx_ser = box(np.random.randn(5))
  281. col_ser = box(np.random.randn(3))
  282. idx_eq = df.eq(idx_ser, axis=0)
  283. col_eq = df.eq(col_ser)
  284. idx_ne = df.ne(idx_ser, axis=0)
  285. col_ne = df.ne(col_ser)
  286. tm.assert_frame_equal(col_eq, df == Series(col_ser))
  287. tm.assert_frame_equal(col_eq, -col_ne)
  288. tm.assert_frame_equal(idx_eq, -idx_ne)
  289. tm.assert_frame_equal(idx_eq, df.T.eq(idx_ser).T)
  290. tm.assert_frame_equal(col_eq, df.eq(list(col_ser)))
  291. tm.assert_frame_equal(idx_eq, df.eq(Series(idx_ser), axis=0))
  292. tm.assert_frame_equal(idx_eq, df.eq(list(idx_ser), axis=0))
  293. idx_gt = df.gt(idx_ser, axis=0)
  294. col_gt = df.gt(col_ser)
  295. idx_le = df.le(idx_ser, axis=0)
  296. col_le = df.le(col_ser)
  297. tm.assert_frame_equal(col_gt, df > Series(col_ser))
  298. tm.assert_frame_equal(col_gt, -col_le)
  299. tm.assert_frame_equal(idx_gt, -idx_le)
  300. tm.assert_frame_equal(idx_gt, df.T.gt(idx_ser).T)
  301. idx_ge = df.ge(idx_ser, axis=0)
  302. col_ge = df.ge(col_ser)
  303. idx_lt = df.lt(idx_ser, axis=0)
  304. col_lt = df.lt(col_ser)
  305. tm.assert_frame_equal(col_ge, df >= Series(col_ser))
  306. tm.assert_frame_equal(col_ge, -col_lt)
  307. tm.assert_frame_equal(idx_ge, -idx_lt)
  308. tm.assert_frame_equal(idx_ge, df.T.ge(idx_ser).T)
  309. idx_ser = Series(np.random.randn(5))
  310. col_ser = Series(np.random.randn(3))
  311. def test_bool_flex_frame_na(self):
  312. df = DataFrame(np.random.randn(5, 3))
  313. # NA
  314. df.loc[0, 0] = np.nan
  315. rs = df.eq(df)
  316. assert not rs.loc[0, 0]
  317. rs = df.ne(df)
  318. assert rs.loc[0, 0]
  319. rs = df.gt(df)
  320. assert not rs.loc[0, 0]
  321. rs = df.lt(df)
  322. assert not rs.loc[0, 0]
  323. rs = df.ge(df)
  324. assert not rs.loc[0, 0]
  325. rs = df.le(df)
  326. assert not rs.loc[0, 0]
  327. def test_bool_flex_frame_complex_dtype(self):
  328. # complex
  329. arr = np.array([np.nan, 1, 6, np.nan])
  330. arr2 = np.array([2j, np.nan, 7, None])
  331. df = DataFrame({"a": arr})
  332. df2 = DataFrame({"a": arr2})
  333. msg = "|".join(
  334. [
  335. "'>' not supported between instances of '.*' and 'complex'",
  336. r"unorderable types: .*complex\(\)", # PY35
  337. ]
  338. )
  339. with pytest.raises(TypeError, match=msg):
  340. # inequalities are not well-defined for complex numbers
  341. df.gt(df2)
  342. with pytest.raises(TypeError, match=msg):
  343. # regression test that we get the same behavior for Series
  344. df["a"].gt(df2["a"])
  345. with pytest.raises(TypeError, match=msg):
  346. # Check that we match numpy behavior here
  347. df.values > df2.values
  348. rs = df.ne(df2)
  349. assert rs.values.all()
  350. arr3 = np.array([2j, np.nan, None])
  351. df3 = DataFrame({"a": arr3})
  352. with pytest.raises(TypeError, match=msg):
  353. # inequalities are not well-defined for complex numbers
  354. df3.gt(2j)
  355. with pytest.raises(TypeError, match=msg):
  356. # regression test that we get the same behavior for Series
  357. df3["a"].gt(2j)
  358. with pytest.raises(TypeError, match=msg):
  359. # Check that we match numpy behavior here
  360. df3.values > 2j
  361. def test_bool_flex_frame_object_dtype(self):
  362. # corner, dtype=object
  363. df1 = DataFrame({"col": ["foo", np.nan, "bar"]})
  364. df2 = DataFrame({"col": ["foo", datetime.now(), "bar"]})
  365. result = df1.ne(df2)
  366. exp = DataFrame({"col": [False, True, False]})
  367. tm.assert_frame_equal(result, exp)
  368. def test_flex_comparison_nat(self):
  369. # GH 15697, GH 22163 df.eq(pd.NaT) should behave like df == pd.NaT,
  370. # and _definitely_ not be NaN
  371. df = DataFrame([pd.NaT])
  372. result = df == pd.NaT
  373. # result.iloc[0, 0] is a np.bool_ object
  374. assert result.iloc[0, 0].item() is False
  375. result = df.eq(pd.NaT)
  376. assert result.iloc[0, 0].item() is False
  377. result = df != pd.NaT
  378. assert result.iloc[0, 0].item() is True
  379. result = df.ne(pd.NaT)
  380. assert result.iloc[0, 0].item() is True
  381. @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"])
  382. def test_df_flex_cmp_constant_return_types(self, opname):
  383. # GH 15077, non-empty DataFrame
  384. df = DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]})
  385. const = 2
  386. result = getattr(df, opname)(const).dtypes.value_counts()
  387. tm.assert_series_equal(
  388. result, Series([2], index=[np.dtype(bool)], name="count")
  389. )
  390. @pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"])
  391. def test_df_flex_cmp_constant_return_types_empty(self, opname):
  392. # GH 15077 empty DataFrame
  393. df = DataFrame({"x": [1, 2, 3], "y": [1.0, 2.0, 3.0]})
  394. const = 2
  395. empty = df.iloc[:0]
  396. result = getattr(empty, opname)(const).dtypes.value_counts()
  397. tm.assert_series_equal(
  398. result, Series([2], index=[np.dtype(bool)], name="count")
  399. )
  400. def test_df_flex_cmp_ea_dtype_with_ndarray_series(self):
  401. ii = pd.IntervalIndex.from_breaks([1, 2, 3])
  402. df = DataFrame({"A": ii, "B": ii})
  403. ser = Series([0, 0])
  404. res = df.eq(ser, axis=0)
  405. expected = DataFrame({"A": [False, False], "B": [False, False]})
  406. tm.assert_frame_equal(res, expected)
  407. ser2 = Series([1, 2], index=["A", "B"])
  408. res2 = df.eq(ser2, axis=1)
  409. tm.assert_frame_equal(res2, expected)
  410. # -------------------------------------------------------------------
  411. # Arithmetic
  412. class TestFrameFlexArithmetic:
  413. def test_floordiv_axis0(self):
  414. # make sure we df.floordiv(ser, axis=0) matches column-wise result
  415. arr = np.arange(3)
  416. ser = Series(arr)
  417. df = DataFrame({"A": ser, "B": ser})
  418. result = df.floordiv(ser, axis=0)
  419. expected = DataFrame({col: df[col] // ser for col in df.columns})
  420. tm.assert_frame_equal(result, expected)
  421. result2 = df.floordiv(ser.values, axis=0)
  422. tm.assert_frame_equal(result2, expected)
  423. @pytest.mark.skipif(not NUMEXPR_INSTALLED, reason="numexpr not installed")
  424. @pytest.mark.parametrize("opname", ["floordiv", "pow"])
  425. def test_floordiv_axis0_numexpr_path(self, opname):
  426. # case that goes through numexpr and has to fall back to masked_arith_op
  427. op = getattr(operator, opname)
  428. arr = np.arange(_MIN_ELEMENTS + 100).reshape(_MIN_ELEMENTS // 100 + 1, -1) * 100
  429. df = DataFrame(arr)
  430. df["C"] = 1.0
  431. ser = df[0]
  432. result = getattr(df, opname)(ser, axis=0)
  433. expected = DataFrame({col: op(df[col], ser) for col in df.columns})
  434. tm.assert_frame_equal(result, expected)
  435. result2 = getattr(df, opname)(ser.values, axis=0)
  436. tm.assert_frame_equal(result2, expected)
  437. def test_df_add_td64_columnwise(self):
  438. # GH 22534 Check that column-wise addition broadcasts correctly
  439. dti = pd.date_range("2016-01-01", periods=10)
  440. tdi = pd.timedelta_range("1", periods=10)
  441. tser = Series(tdi)
  442. df = DataFrame({0: dti, 1: tdi})
  443. result = df.add(tser, axis=0)
  444. expected = DataFrame({0: dti + tdi, 1: tdi + tdi})
  445. tm.assert_frame_equal(result, expected)
  446. def test_df_add_flex_filled_mixed_dtypes(self):
  447. # GH 19611
  448. dti = pd.date_range("2016-01-01", periods=3)
  449. ser = Series(["1 Day", "NaT", "2 Days"], dtype="timedelta64[ns]")
  450. df = DataFrame({"A": dti, "B": ser})
  451. other = DataFrame({"A": ser, "B": ser})
  452. fill = pd.Timedelta(days=1).to_timedelta64()
  453. result = df.add(other, fill_value=fill)
  454. expected = DataFrame(
  455. {
  456. "A": Series(
  457. ["2016-01-02", "2016-01-03", "2016-01-05"], dtype="datetime64[ns]"
  458. ),
  459. "B": ser * 2,
  460. }
  461. )
  462. tm.assert_frame_equal(result, expected)
  463. def test_arith_flex_frame(
  464. self, all_arithmetic_operators, float_frame, mixed_float_frame
  465. ):
  466. # one instance of parametrized fixture
  467. op = all_arithmetic_operators
  468. def f(x, y):
  469. # r-versions not in operator-stdlib; get op without "r" and invert
  470. if op.startswith("__r"):
  471. return getattr(operator, op.replace("__r", "__"))(y, x)
  472. return getattr(operator, op)(x, y)
  473. result = getattr(float_frame, op)(2 * float_frame)
  474. expected = f(float_frame, 2 * float_frame)
  475. tm.assert_frame_equal(result, expected)
  476. # vs mix float
  477. result = getattr(mixed_float_frame, op)(2 * mixed_float_frame)
  478. expected = f(mixed_float_frame, 2 * mixed_float_frame)
  479. tm.assert_frame_equal(result, expected)
  480. _check_mixed_float(result, dtype={"C": None})
  481. @pytest.mark.parametrize("op", ["__add__", "__sub__", "__mul__"])
  482. def test_arith_flex_frame_mixed(
  483. self,
  484. op,
  485. int_frame,
  486. mixed_int_frame,
  487. mixed_float_frame,
  488. switch_numexpr_min_elements,
  489. ):
  490. f = getattr(operator, op)
  491. # vs mix int
  492. result = getattr(mixed_int_frame, op)(2 + mixed_int_frame)
  493. expected = f(mixed_int_frame, 2 + mixed_int_frame)
  494. # no overflow in the uint
  495. dtype = None
  496. if op in ["__sub__"]:
  497. dtype = {"B": "uint64", "C": None}
  498. elif op in ["__add__", "__mul__"]:
  499. dtype = {"C": None}
  500. if expr.USE_NUMEXPR and switch_numexpr_min_elements == 0:
  501. # when using numexpr, the casting rules are slightly different:
  502. # in the `2 + mixed_int_frame` operation, int32 column becomes
  503. # and int64 column (not preserving dtype in operation with Python
  504. # scalar), and then the int32/int64 combo results in int64 result
  505. dtype["A"] = (2 + mixed_int_frame)["A"].dtype
  506. tm.assert_frame_equal(result, expected)
  507. _check_mixed_int(result, dtype=dtype)
  508. # vs mix float
  509. result = getattr(mixed_float_frame, op)(2 * mixed_float_frame)
  510. expected = f(mixed_float_frame, 2 * mixed_float_frame)
  511. tm.assert_frame_equal(result, expected)
  512. _check_mixed_float(result, dtype={"C": None})
  513. # vs plain int
  514. result = getattr(int_frame, op)(2 * int_frame)
  515. expected = f(int_frame, 2 * int_frame)
  516. tm.assert_frame_equal(result, expected)
  517. @pytest.mark.parametrize("dim", range(3, 6))
  518. def test_arith_flex_frame_raise(self, all_arithmetic_operators, float_frame, dim):
  519. # one instance of parametrized fixture
  520. op = all_arithmetic_operators
  521. # Check that arrays with dim >= 3 raise
  522. arr = np.ones((1,) * dim)
  523. msg = "Unable to coerce to Series/DataFrame"
  524. with pytest.raises(ValueError, match=msg):
  525. getattr(float_frame, op)(arr)
  526. def test_arith_flex_frame_corner(self, float_frame):
  527. const_add = float_frame.add(1)
  528. tm.assert_frame_equal(const_add, float_frame + 1)
  529. # corner cases
  530. result = float_frame.add(float_frame[:0])
  531. tm.assert_frame_equal(result, float_frame * np.nan)
  532. result = float_frame[:0].add(float_frame)
  533. tm.assert_frame_equal(result, float_frame * np.nan)
  534. with pytest.raises(NotImplementedError, match="fill_value"):
  535. float_frame.add(float_frame.iloc[0], fill_value=3)
  536. with pytest.raises(NotImplementedError, match="fill_value"):
  537. float_frame.add(float_frame.iloc[0], axis="index", fill_value=3)
  538. @pytest.mark.parametrize("op", ["add", "sub", "mul", "mod"])
  539. def test_arith_flex_series_ops(self, simple_frame, op):
  540. # after arithmetic refactor, add truediv here
  541. df = simple_frame
  542. row = df.xs("a")
  543. col = df["two"]
  544. f = getattr(df, op)
  545. op = getattr(operator, op)
  546. tm.assert_frame_equal(f(row), op(df, row))
  547. tm.assert_frame_equal(f(col, axis=0), op(df.T, col).T)
  548. def test_arith_flex_series(self, simple_frame):
  549. df = simple_frame
  550. row = df.xs("a")
  551. col = df["two"]
  552. # special case for some reason
  553. tm.assert_frame_equal(df.add(row, axis=None), df + row)
  554. # cases which will be refactored after big arithmetic refactor
  555. tm.assert_frame_equal(df.div(row), df / row)
  556. tm.assert_frame_equal(df.div(col, axis=0), (df.T / col).T)
  557. @pytest.mark.parametrize("dtype", ["int64", "float64"])
  558. def test_arith_flex_series_broadcasting(self, dtype):
  559. # broadcasting issue in GH 7325
  560. df = DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype=dtype)
  561. expected = DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]])
  562. result = df.div(df[0], axis="index")
  563. tm.assert_frame_equal(result, expected)
  564. def test_arith_flex_zero_len_raises(self):
  565. # GH 19522 passing fill_value to frame flex arith methods should
  566. # raise even in the zero-length special cases
  567. ser_len0 = Series([], dtype=object)
  568. df_len0 = DataFrame(columns=["A", "B"])
  569. df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
  570. with pytest.raises(NotImplementedError, match="fill_value"):
  571. df.add(ser_len0, fill_value="E")
  572. with pytest.raises(NotImplementedError, match="fill_value"):
  573. df_len0.sub(df["A"], axis=None, fill_value=3)
  574. def test_flex_add_scalar_fill_value(self):
  575. # GH#12723
  576. dat = np.array([0, 1, np.nan, 3, 4, 5], dtype="float")
  577. df = DataFrame({"foo": dat}, index=range(6))
  578. exp = df.fillna(0).add(2)
  579. res = df.add(2, fill_value=0)
  580. tm.assert_frame_equal(res, exp)
  581. def test_sub_alignment_with_duplicate_index(self):
  582. # GH#5185 dup aligning operations should work
  583. df1 = DataFrame([1, 2, 3, 4, 5], index=[1, 2, 1, 2, 3])
  584. df2 = DataFrame([1, 2, 3], index=[1, 2, 3])
  585. expected = DataFrame([0, 2, 0, 2, 2], index=[1, 1, 2, 2, 3])
  586. result = df1.sub(df2)
  587. tm.assert_frame_equal(result, expected)
  588. @pytest.mark.parametrize("op", ["__add__", "__mul__", "__sub__", "__truediv__"])
  589. def test_arithmetic_with_duplicate_columns(self, op):
  590. # operations
  591. df = DataFrame({"A": np.arange(10), "B": np.random.rand(10)})
  592. expected = getattr(df, op)(df)
  593. expected.columns = ["A", "A"]
  594. df.columns = ["A", "A"]
  595. result = getattr(df, op)(df)
  596. tm.assert_frame_equal(result, expected)
  597. str(result)
  598. result.dtypes
  599. @pytest.mark.parametrize("level", [0, None])
  600. def test_broadcast_multiindex(self, level):
  601. # GH34388
  602. df1 = DataFrame({"A": [0, 1, 2], "B": [1, 2, 3]})
  603. df1.columns = df1.columns.set_names("L1")
  604. df2 = DataFrame({("A", "C"): [0, 0, 0], ("A", "D"): [0, 0, 0]})
  605. df2.columns = df2.columns.set_names(["L1", "L2"])
  606. result = df1.add(df2, level=level)
  607. expected = DataFrame({("A", "C"): [0, 1, 2], ("A", "D"): [0, 1, 2]})
  608. expected.columns = expected.columns.set_names(["L1", "L2"])
  609. tm.assert_frame_equal(result, expected)
  610. def test_frame_multiindex_operations(self):
  611. # GH 43321
  612. df = DataFrame(
  613. {2010: [1, 2, 3], 2020: [3, 4, 5]},
  614. index=MultiIndex.from_product(
  615. [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
  616. ),
  617. )
  618. series = Series(
  619. [0.4],
  620. index=MultiIndex.from_product([["b"], ["a"]], names=["mod", "scen"]),
  621. )
  622. expected = DataFrame(
  623. {2010: [1.4, 2.4, 3.4], 2020: [3.4, 4.4, 5.4]},
  624. index=MultiIndex.from_product(
  625. [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
  626. ),
  627. )
  628. result = df.add(series, axis=0)
  629. tm.assert_frame_equal(result, expected)
  630. def test_frame_multiindex_operations_series_index_to_frame_index(self):
  631. # GH 43321
  632. df = DataFrame(
  633. {2010: [1], 2020: [3]},
  634. index=MultiIndex.from_product([["a"], ["b"]], names=["scen", "mod"]),
  635. )
  636. series = Series(
  637. [10.0, 20.0, 30.0],
  638. index=MultiIndex.from_product(
  639. [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
  640. ),
  641. )
  642. expected = DataFrame(
  643. {2010: [11.0, 21, 31.0], 2020: [13.0, 23.0, 33.0]},
  644. index=MultiIndex.from_product(
  645. [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
  646. ),
  647. )
  648. result = df.add(series, axis=0)
  649. tm.assert_frame_equal(result, expected)
  650. def test_frame_multiindex_operations_no_align(self):
  651. df = DataFrame(
  652. {2010: [1, 2, 3], 2020: [3, 4, 5]},
  653. index=MultiIndex.from_product(
  654. [["a"], ["b"], [0, 1, 2]], names=["scen", "mod", "id"]
  655. ),
  656. )
  657. series = Series(
  658. [0.4],
  659. index=MultiIndex.from_product([["c"], ["a"]], names=["mod", "scen"]),
  660. )
  661. expected = DataFrame(
  662. {2010: np.nan, 2020: np.nan},
  663. index=MultiIndex.from_tuples(
  664. [
  665. ("a", "b", 0),
  666. ("a", "b", 1),
  667. ("a", "b", 2),
  668. ("a", "c", np.nan),
  669. ],
  670. names=["scen", "mod", "id"],
  671. ),
  672. )
  673. result = df.add(series, axis=0)
  674. tm.assert_frame_equal(result, expected)
  675. def test_frame_multiindex_operations_part_align(self):
  676. df = DataFrame(
  677. {2010: [1, 2, 3], 2020: [3, 4, 5]},
  678. index=MultiIndex.from_tuples(
  679. [
  680. ("a", "b", 0),
  681. ("a", "b", 1),
  682. ("a", "c", 2),
  683. ],
  684. names=["scen", "mod", "id"],
  685. ),
  686. )
  687. series = Series(
  688. [0.4],
  689. index=MultiIndex.from_product([["b"], ["a"]], names=["mod", "scen"]),
  690. )
  691. expected = DataFrame(
  692. {2010: [1.4, 2.4, np.nan], 2020: [3.4, 4.4, np.nan]},
  693. index=MultiIndex.from_tuples(
  694. [
  695. ("a", "b", 0),
  696. ("a", "b", 1),
  697. ("a", "c", 2),
  698. ],
  699. names=["scen", "mod", "id"],
  700. ),
  701. )
  702. result = df.add(series, axis=0)
  703. tm.assert_frame_equal(result, expected)
  704. class TestFrameArithmetic:
  705. def test_td64_op_nat_casting(self):
  706. # Make sure we don't accidentally treat timedelta64(NaT) as datetime64
  707. # when calling dispatch_to_series in DataFrame arithmetic
  708. ser = Series(["NaT", "NaT"], dtype="timedelta64[ns]")
  709. df = DataFrame([[1, 2], [3, 4]])
  710. result = df * ser
  711. expected = DataFrame({0: ser, 1: ser})
  712. tm.assert_frame_equal(result, expected)
  713. def test_df_add_2d_array_rowlike_broadcasts(self):
  714. # GH#23000
  715. arr = np.arange(6).reshape(3, 2)
  716. df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"])
  717. rowlike = arr[[1], :] # shape --> (1, ncols)
  718. assert rowlike.shape == (1, df.shape[1])
  719. expected = DataFrame(
  720. [[2, 4], [4, 6], [6, 8]],
  721. columns=df.columns,
  722. index=df.index,
  723. # specify dtype explicitly to avoid failing
  724. # on 32bit builds
  725. dtype=arr.dtype,
  726. )
  727. result = df + rowlike
  728. tm.assert_frame_equal(result, expected)
  729. result = rowlike + df
  730. tm.assert_frame_equal(result, expected)
  731. def test_df_add_2d_array_collike_broadcasts(self):
  732. # GH#23000
  733. arr = np.arange(6).reshape(3, 2)
  734. df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"])
  735. collike = arr[:, [1]] # shape --> (nrows, 1)
  736. assert collike.shape == (df.shape[0], 1)
  737. expected = DataFrame(
  738. [[1, 2], [5, 6], [9, 10]],
  739. columns=df.columns,
  740. index=df.index,
  741. # specify dtype explicitly to avoid failing
  742. # on 32bit builds
  743. dtype=arr.dtype,
  744. )
  745. result = df + collike
  746. tm.assert_frame_equal(result, expected)
  747. result = collike + df
  748. tm.assert_frame_equal(result, expected)
  749. def test_df_arith_2d_array_rowlike_broadcasts(
  750. self, request, all_arithmetic_operators, using_array_manager
  751. ):
  752. # GH#23000
  753. opname = all_arithmetic_operators
  754. if using_array_manager and opname in ("__rmod__", "__rfloordiv__"):
  755. # TODO(ArrayManager) decide on dtypes
  756. td.mark_array_manager_not_yet_implemented(request)
  757. arr = np.arange(6).reshape(3, 2)
  758. df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"])
  759. rowlike = arr[[1], :] # shape --> (1, ncols)
  760. assert rowlike.shape == (1, df.shape[1])
  761. exvals = [
  762. getattr(df.loc["A"], opname)(rowlike.squeeze()),
  763. getattr(df.loc["B"], opname)(rowlike.squeeze()),
  764. getattr(df.loc["C"], opname)(rowlike.squeeze()),
  765. ]
  766. expected = DataFrame(exvals, columns=df.columns, index=df.index)
  767. result = getattr(df, opname)(rowlike)
  768. tm.assert_frame_equal(result, expected)
  769. def test_df_arith_2d_array_collike_broadcasts(
  770. self, request, all_arithmetic_operators, using_array_manager
  771. ):
  772. # GH#23000
  773. opname = all_arithmetic_operators
  774. if using_array_manager and opname in ("__rmod__", "__rfloordiv__"):
  775. # TODO(ArrayManager) decide on dtypes
  776. td.mark_array_manager_not_yet_implemented(request)
  777. arr = np.arange(6).reshape(3, 2)
  778. df = DataFrame(arr, columns=[True, False], index=["A", "B", "C"])
  779. collike = arr[:, [1]] # shape --> (nrows, 1)
  780. assert collike.shape == (df.shape[0], 1)
  781. exvals = {
  782. True: getattr(df[True], opname)(collike.squeeze()),
  783. False: getattr(df[False], opname)(collike.squeeze()),
  784. }
  785. dtype = None
  786. if opname in ["__rmod__", "__rfloordiv__"]:
  787. # Series ops may return mixed int/float dtypes in cases where
  788. # DataFrame op will return all-float. So we upcast `expected`
  789. dtype = np.common_type(*(x.values for x in exvals.values()))
  790. expected = DataFrame(exvals, columns=df.columns, index=df.index, dtype=dtype)
  791. result = getattr(df, opname)(collike)
  792. tm.assert_frame_equal(result, expected)
  793. def test_df_bool_mul_int(self):
  794. # GH 22047, GH 22163 multiplication by 1 should result in int dtype,
  795. # not object dtype
  796. df = DataFrame([[False, True], [False, False]])
  797. result = df * 1
  798. # On appveyor this comes back as np.int32 instead of np.int64,
  799. # so we check dtype.kind instead of just dtype
  800. kinds = result.dtypes.apply(lambda x: x.kind)
  801. assert (kinds == "i").all()
  802. result = 1 * df
  803. kinds = result.dtypes.apply(lambda x: x.kind)
  804. assert (kinds == "i").all()
  805. def test_arith_mixed(self):
  806. left = DataFrame({"A": ["a", "b", "c"], "B": [1, 2, 3]})
  807. result = left + left
  808. expected = DataFrame({"A": ["aa", "bb", "cc"], "B": [2, 4, 6]})
  809. tm.assert_frame_equal(result, expected)
  810. @pytest.mark.parametrize("col", ["A", "B"])
  811. def test_arith_getitem_commute(self, all_arithmetic_functions, col):
  812. df = DataFrame({"A": [1.1, 3.3], "B": [2.5, -3.9]})
  813. result = all_arithmetic_functions(df, 1)[col]
  814. expected = all_arithmetic_functions(df[col], 1)
  815. tm.assert_series_equal(result, expected)
  816. @pytest.mark.parametrize(
  817. "values", [[1, 2], (1, 2), np.array([1, 2]), range(1, 3), deque([1, 2])]
  818. )
  819. def test_arith_alignment_non_pandas_object(self, values):
  820. # GH#17901
  821. df = DataFrame({"A": [1, 1], "B": [1, 1]})
  822. expected = DataFrame({"A": [2, 2], "B": [3, 3]})
  823. result = df + values
  824. tm.assert_frame_equal(result, expected)
  825. def test_arith_non_pandas_object(self):
  826. df = DataFrame(
  827. np.arange(1, 10, dtype="f8").reshape(3, 3),
  828. columns=["one", "two", "three"],
  829. index=["a", "b", "c"],
  830. )
  831. val1 = df.xs("a").values
  832. added = DataFrame(df.values + val1, index=df.index, columns=df.columns)
  833. tm.assert_frame_equal(df + val1, added)
  834. added = DataFrame((df.values.T + val1).T, index=df.index, columns=df.columns)
  835. tm.assert_frame_equal(df.add(val1, axis=0), added)
  836. val2 = list(df["two"])
  837. added = DataFrame(df.values + val2, index=df.index, columns=df.columns)
  838. tm.assert_frame_equal(df + val2, added)
  839. added = DataFrame((df.values.T + val2).T, index=df.index, columns=df.columns)
  840. tm.assert_frame_equal(df.add(val2, axis="index"), added)
  841. val3 = np.random.rand(*df.shape)
  842. added = DataFrame(df.values + val3, index=df.index, columns=df.columns)
  843. tm.assert_frame_equal(df.add(val3), added)
  844. def test_operations_with_interval_categories_index(self, all_arithmetic_operators):
  845. # GH#27415
  846. op = all_arithmetic_operators
  847. ind = pd.CategoricalIndex(pd.interval_range(start=0.0, end=2.0))
  848. data = [1, 2]
  849. df = DataFrame([data], columns=ind)
  850. num = 10
  851. result = getattr(df, op)(num)
  852. expected = DataFrame([[getattr(n, op)(num) for n in data]], columns=ind)
  853. tm.assert_frame_equal(result, expected)
  854. def test_frame_with_frame_reindex(self):
  855. # GH#31623
  856. df = DataFrame(
  857. {
  858. "foo": [pd.Timestamp("2019"), pd.Timestamp("2020")],
  859. "bar": [pd.Timestamp("2018"), pd.Timestamp("2021")],
  860. },
  861. columns=["foo", "bar"],
  862. )
  863. df2 = df[["foo"]]
  864. result = df - df2
  865. expected = DataFrame(
  866. {"foo": [pd.Timedelta(0), pd.Timedelta(0)], "bar": [np.nan, np.nan]},
  867. columns=["bar", "foo"],
  868. )
  869. tm.assert_frame_equal(result, expected)
  870. @pytest.mark.parametrize(
  871. "value, dtype",
  872. [
  873. (1, "i8"),
  874. (1.0, "f8"),
  875. (2**63, "f8"),
  876. (1j, "complex128"),
  877. (2**63, "complex128"),
  878. (True, "bool"),
  879. (np.timedelta64(20, "ns"), "<m8[ns]"),
  880. (np.datetime64(20, "ns"), "<M8[ns]"),
  881. ],
  882. )
  883. @pytest.mark.parametrize(
  884. "op",
  885. [
  886. operator.add,
  887. operator.sub,
  888. operator.mul,
  889. operator.truediv,
  890. operator.mod,
  891. operator.pow,
  892. ],
  893. ids=lambda x: x.__name__,
  894. )
  895. def test_binop_other(self, op, value, dtype, switch_numexpr_min_elements, request):
  896. skip = {
  897. (operator.truediv, "bool"),
  898. (operator.pow, "bool"),
  899. (operator.add, "bool"),
  900. (operator.mul, "bool"),
  901. }
  902. elem = DummyElement(value, dtype)
  903. df = DataFrame({"A": [elem.value, elem.value]}, dtype=elem.dtype)
  904. invalid = {
  905. (operator.pow, "<M8[ns]"),
  906. (operator.mod, "<M8[ns]"),
  907. (operator.truediv, "<M8[ns]"),
  908. (operator.mul, "<M8[ns]"),
  909. (operator.add, "<M8[ns]"),
  910. (operator.pow, "<m8[ns]"),
  911. (operator.mul, "<m8[ns]"),
  912. (operator.sub, "bool"),
  913. (operator.mod, "complex128"),
  914. }
  915. if (op, dtype) in invalid:
  916. warn = None
  917. if (dtype == "<M8[ns]" and op == operator.add) or (
  918. dtype == "<m8[ns]" and op == operator.mul
  919. ):
  920. msg = None
  921. elif dtype == "complex128":
  922. msg = "ufunc 'remainder' not supported for the input types"
  923. elif op is operator.sub:
  924. msg = "numpy boolean subtract, the `-` operator, is "
  925. if (
  926. dtype == "bool"
  927. and expr.USE_NUMEXPR
  928. and switch_numexpr_min_elements == 0
  929. ):
  930. warn = UserWarning # "evaluating in Python space because ..."
  931. else:
  932. msg = (
  933. f"cannot perform __{op.__name__}__ with this "
  934. "index type: (DatetimeArray|TimedeltaArray)"
  935. )
  936. with pytest.raises(TypeError, match=msg):
  937. with tm.assert_produces_warning(warn):
  938. op(df, elem.value)
  939. elif (op, dtype) in skip:
  940. if op in [operator.add, operator.mul]:
  941. if expr.USE_NUMEXPR and switch_numexpr_min_elements == 0:
  942. # "evaluating in Python space because ..."
  943. warn = UserWarning
  944. else:
  945. warn = None
  946. with tm.assert_produces_warning(warn):
  947. op(df, elem.value)
  948. else:
  949. msg = "operator '.*' not implemented for .* dtypes"
  950. with pytest.raises(NotImplementedError, match=msg):
  951. op(df, elem.value)
  952. else:
  953. with tm.assert_produces_warning(None):
  954. result = op(df, elem.value).dtypes
  955. expected = op(df, value).dtypes
  956. tm.assert_series_equal(result, expected)
  957. def test_arithmetic_midx_cols_different_dtypes(self):
  958. # GH#49769
  959. midx = MultiIndex.from_arrays([Series([1, 2]), Series([3, 4])])
  960. midx2 = MultiIndex.from_arrays([Series([1, 2], dtype="Int8"), Series([3, 4])])
  961. left = DataFrame([[1, 2], [3, 4]], columns=midx)
  962. right = DataFrame([[1, 2], [3, 4]], columns=midx2)
  963. result = left - right
  964. expected = DataFrame([[0, 0], [0, 0]], columns=midx)
  965. tm.assert_frame_equal(result, expected)
  966. def test_arithmetic_midx_cols_different_dtypes_different_order(self):
  967. # GH#49769
  968. midx = MultiIndex.from_arrays([Series([1, 2]), Series([3, 4])])
  969. midx2 = MultiIndex.from_arrays([Series([2, 1], dtype="Int8"), Series([4, 3])])
  970. left = DataFrame([[1, 2], [3, 4]], columns=midx)
  971. right = DataFrame([[1, 2], [3, 4]], columns=midx2)
  972. result = left - right
  973. expected = DataFrame([[-1, 1], [-1, 1]], columns=midx)
  974. tm.assert_frame_equal(result, expected)
  975. def test_frame_with_zero_len_series_corner_cases():
  976. # GH#28600
  977. # easy all-float case
  978. df = DataFrame(np.random.randn(6).reshape(3, 2), columns=["A", "B"])
  979. ser = Series(dtype=np.float64)
  980. result = df + ser
  981. expected = DataFrame(df.values * np.nan, columns=df.columns)
  982. tm.assert_frame_equal(result, expected)
  983. with pytest.raises(ValueError, match="not aligned"):
  984. # Automatic alignment for comparisons deprecated GH#36795, enforced 2.0
  985. df == ser
  986. # non-float case should not raise TypeError on comparison
  987. df2 = DataFrame(df.values.view("M8[ns]"), columns=df.columns)
  988. with pytest.raises(ValueError, match="not aligned"):
  989. # Automatic alignment for comparisons deprecated
  990. df2 == ser
  991. def test_zero_len_frame_with_series_corner_cases():
  992. # GH#28600
  993. df = DataFrame(columns=["A", "B"], dtype=np.float64)
  994. ser = Series([1, 2], index=["A", "B"])
  995. result = df + ser
  996. expected = df
  997. tm.assert_frame_equal(result, expected)
  998. def test_frame_single_columns_object_sum_axis_1():
  999. # GH 13758
  1000. data = {
  1001. "One": Series(["A", 1.2, np.nan]),
  1002. }
  1003. df = DataFrame(data)
  1004. result = df.sum(axis=1)
  1005. expected = Series(["A", 1.2, 0])
  1006. tm.assert_series_equal(result, expected)
  1007. # -------------------------------------------------------------------
  1008. # Unsorted
  1009. # These arithmetic tests were previously in other files, eventually
  1010. # should be parametrized and put into tests.arithmetic
  1011. class TestFrameArithmeticUnsorted:
  1012. def test_frame_add_tz_mismatch_converts_to_utc(self):
  1013. rng = pd.date_range("1/1/2011", periods=10, freq="H", tz="US/Eastern")
  1014. df = DataFrame(np.random.randn(len(rng)), index=rng, columns=["a"])
  1015. df_moscow = df.tz_convert("Europe/Moscow")
  1016. result = df + df_moscow
  1017. assert result.index.tz is timezone.utc
  1018. result = df_moscow + df
  1019. assert result.index.tz is timezone.utc
  1020. def test_align_frame(self):
  1021. rng = pd.period_range("1/1/2000", "1/1/2010", freq="A")
  1022. ts = DataFrame(np.random.randn(len(rng), 3), index=rng)
  1023. result = ts + ts[::2]
  1024. expected = ts + ts
  1025. expected.iloc[1::2] = np.nan
  1026. tm.assert_frame_equal(result, expected)
  1027. half = ts[::2]
  1028. result = ts + half.take(np.random.permutation(len(half)))
  1029. tm.assert_frame_equal(result, expected)
  1030. @pytest.mark.parametrize(
  1031. "op", [operator.add, operator.sub, operator.mul, operator.truediv]
  1032. )
  1033. def test_operators_none_as_na(self, op):
  1034. df = DataFrame(
  1035. {"col1": [2, 5.0, 123, None], "col2": [1, 2, 3, 4]}, dtype=object
  1036. )
  1037. # since filling converts dtypes from object, changed expected to be
  1038. # object
  1039. filled = df.fillna(np.nan)
  1040. result = op(df, 3)
  1041. expected = op(filled, 3).astype(object)
  1042. expected[com.isna(expected)] = None
  1043. tm.assert_frame_equal(result, expected)
  1044. result = op(df, df)
  1045. expected = op(filled, filled).astype(object)
  1046. expected[com.isna(expected)] = None
  1047. tm.assert_frame_equal(result, expected)
  1048. result = op(df, df.fillna(7))
  1049. tm.assert_frame_equal(result, expected)
  1050. result = op(df.fillna(7), df)
  1051. tm.assert_frame_equal(result, expected, check_dtype=False)
  1052. @pytest.mark.parametrize("op,res", [("__eq__", False), ("__ne__", True)])
  1053. # TODO: not sure what's correct here.
  1054. @pytest.mark.filterwarnings("ignore:elementwise:FutureWarning")
  1055. def test_logical_typeerror_with_non_valid(self, op, res, float_frame):
  1056. # we are comparing floats vs a string
  1057. result = getattr(float_frame, op)("foo")
  1058. assert bool(result.all().all()) is res
  1059. @pytest.mark.parametrize("op", ["add", "sub", "mul", "div", "truediv"])
  1060. def test_binary_ops_align(self, op):
  1061. # test aligning binary ops
  1062. # GH 6681
  1063. index = MultiIndex.from_product(
  1064. [list("abc"), ["one", "two", "three"], [1, 2, 3]],
  1065. names=["first", "second", "third"],
  1066. )
  1067. df = DataFrame(
  1068. np.arange(27 * 3).reshape(27, 3),
  1069. index=index,
  1070. columns=["value1", "value2", "value3"],
  1071. ).sort_index()
  1072. idx = pd.IndexSlice
  1073. opa = getattr(operator, op, None)
  1074. if opa is None:
  1075. return
  1076. x = Series([1.0, 10.0, 100.0], [1, 2, 3])
  1077. result = getattr(df, op)(x, level="third", axis=0)
  1078. expected = pd.concat(
  1079. [opa(df.loc[idx[:, :, i], :], v) for i, v in x.items()]
  1080. ).sort_index()
  1081. tm.assert_frame_equal(result, expected)
  1082. x = Series([1.0, 10.0], ["two", "three"])
  1083. result = getattr(df, op)(x, level="second", axis=0)
  1084. expected = (
  1085. pd.concat([opa(df.loc[idx[:, i], :], v) for i, v in x.items()])
  1086. .reindex_like(df)
  1087. .sort_index()
  1088. )
  1089. tm.assert_frame_equal(result, expected)
  1090. def test_binary_ops_align_series_dataframe(self):
  1091. # GH9463 (alignment level of dataframe with series)
  1092. midx = MultiIndex.from_product([["A", "B"], ["a", "b"]])
  1093. df = DataFrame(np.ones((2, 4), dtype="int64"), columns=midx)
  1094. s = Series({"a": 1, "b": 2})
  1095. df2 = df.copy()
  1096. df2.columns.names = ["lvl0", "lvl1"]
  1097. s2 = s.copy()
  1098. s2.index.name = "lvl1"
  1099. # different cases of integer/string level names:
  1100. res1 = df.mul(s, axis=1, level=1)
  1101. res2 = df.mul(s2, axis=1, level=1)
  1102. res3 = df2.mul(s, axis=1, level=1)
  1103. res4 = df2.mul(s2, axis=1, level=1)
  1104. res5 = df2.mul(s, axis=1, level="lvl1")
  1105. res6 = df2.mul(s2, axis=1, level="lvl1")
  1106. exp = DataFrame(
  1107. np.array([[1, 2, 1, 2], [1, 2, 1, 2]], dtype="int64"), columns=midx
  1108. )
  1109. for res in [res1, res2]:
  1110. tm.assert_frame_equal(res, exp)
  1111. exp.columns.names = ["lvl0", "lvl1"]
  1112. for res in [res3, res4, res5, res6]:
  1113. tm.assert_frame_equal(res, exp)
  1114. def test_add_with_dti_mismatched_tzs(self):
  1115. base = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz="UTC")
  1116. idx1 = base.tz_convert("Asia/Tokyo")[:2]
  1117. idx2 = base.tz_convert("US/Eastern")[1:]
  1118. df1 = DataFrame({"A": [1, 2]}, index=idx1)
  1119. df2 = DataFrame({"A": [1, 1]}, index=idx2)
  1120. exp = DataFrame({"A": [np.nan, 3, np.nan]}, index=base)
  1121. tm.assert_frame_equal(df1 + df2, exp)
  1122. def test_combineFrame(self, float_frame, mixed_float_frame, mixed_int_frame):
  1123. frame_copy = float_frame.reindex(float_frame.index[::2])
  1124. del frame_copy["D"]
  1125. # adding NAs to first 5 values of column "C"
  1126. frame_copy.loc[: frame_copy.index[4], "C"] = np.nan
  1127. added = float_frame + frame_copy
  1128. indexer = added["A"].dropna().index
  1129. exp = (float_frame["A"] * 2).copy()
  1130. tm.assert_series_equal(added["A"].dropna(), exp.loc[indexer])
  1131. exp.loc[~exp.index.isin(indexer)] = np.nan
  1132. tm.assert_series_equal(added["A"], exp.loc[added["A"].index])
  1133. assert np.isnan(added["C"].reindex(frame_copy.index)[:5]).all()
  1134. # assert(False)
  1135. assert np.isnan(added["D"]).all()
  1136. self_added = float_frame + float_frame
  1137. tm.assert_index_equal(self_added.index, float_frame.index)
  1138. added_rev = frame_copy + float_frame
  1139. assert np.isnan(added["D"]).all()
  1140. assert np.isnan(added_rev["D"]).all()
  1141. # corner cases
  1142. # empty
  1143. plus_empty = float_frame + DataFrame()
  1144. assert np.isnan(plus_empty.values).all()
  1145. empty_plus = DataFrame() + float_frame
  1146. assert np.isnan(empty_plus.values).all()
  1147. empty_empty = DataFrame() + DataFrame()
  1148. assert empty_empty.empty
  1149. # out of order
  1150. reverse = float_frame.reindex(columns=float_frame.columns[::-1])
  1151. tm.assert_frame_equal(reverse + float_frame, float_frame * 2)
  1152. # mix vs float64, upcast
  1153. added = float_frame + mixed_float_frame
  1154. _check_mixed_float(added, dtype="float64")
  1155. added = mixed_float_frame + float_frame
  1156. _check_mixed_float(added, dtype="float64")
  1157. # mix vs mix
  1158. added = mixed_float_frame + mixed_float_frame
  1159. _check_mixed_float(added, dtype={"C": None})
  1160. # with int
  1161. added = float_frame + mixed_int_frame
  1162. _check_mixed_float(added, dtype="float64")
  1163. def test_combine_series(self, float_frame, mixed_float_frame, mixed_int_frame):
  1164. # Series
  1165. series = float_frame.xs(float_frame.index[0])
  1166. added = float_frame + series
  1167. for key, s in added.items():
  1168. tm.assert_series_equal(s, float_frame[key] + series[key])
  1169. larger_series = series.to_dict()
  1170. larger_series["E"] = 1
  1171. larger_series = Series(larger_series)
  1172. larger_added = float_frame + larger_series
  1173. for key, s in float_frame.items():
  1174. tm.assert_series_equal(larger_added[key], s + series[key])
  1175. assert "E" in larger_added
  1176. assert np.isnan(larger_added["E"]).all()
  1177. # no upcast needed
  1178. added = mixed_float_frame + series
  1179. assert np.all(added.dtypes == series.dtype)
  1180. # vs mix (upcast) as needed
  1181. added = mixed_float_frame + series.astype("float32")
  1182. _check_mixed_float(added, dtype={"C": None})
  1183. added = mixed_float_frame + series.astype("float16")
  1184. _check_mixed_float(added, dtype={"C": None})
  1185. # these used to raise with numexpr as we are adding an int64 to an
  1186. # uint64....weird vs int
  1187. added = mixed_int_frame + (100 * series).astype("int64")
  1188. _check_mixed_int(
  1189. added, dtype={"A": "int64", "B": "float64", "C": "int64", "D": "int64"}
  1190. )
  1191. added = mixed_int_frame + (100 * series).astype("int32")
  1192. _check_mixed_int(
  1193. added, dtype={"A": "int32", "B": "float64", "C": "int32", "D": "int64"}
  1194. )
  1195. def test_combine_timeseries(self, datetime_frame):
  1196. # TimeSeries
  1197. ts = datetime_frame["A"]
  1198. # 10890
  1199. # we no longer allow auto timeseries broadcasting
  1200. # and require explicit broadcasting
  1201. added = datetime_frame.add(ts, axis="index")
  1202. for key, col in datetime_frame.items():
  1203. result = col + ts
  1204. tm.assert_series_equal(added[key], result, check_names=False)
  1205. assert added[key].name == key
  1206. if col.name == ts.name:
  1207. assert result.name == "A"
  1208. else:
  1209. assert result.name is None
  1210. smaller_frame = datetime_frame[:-5]
  1211. smaller_added = smaller_frame.add(ts, axis="index")
  1212. tm.assert_index_equal(smaller_added.index, datetime_frame.index)
  1213. smaller_ts = ts[:-5]
  1214. smaller_added2 = datetime_frame.add(smaller_ts, axis="index")
  1215. tm.assert_frame_equal(smaller_added, smaller_added2)
  1216. # length 0, result is all-nan
  1217. result = datetime_frame.add(ts[:0], axis="index")
  1218. expected = DataFrame(
  1219. np.nan, index=datetime_frame.index, columns=datetime_frame.columns
  1220. )
  1221. tm.assert_frame_equal(result, expected)
  1222. # Frame is all-nan
  1223. result = datetime_frame[:0].add(ts, axis="index")
  1224. expected = DataFrame(
  1225. np.nan, index=datetime_frame.index, columns=datetime_frame.columns
  1226. )
  1227. tm.assert_frame_equal(result, expected)
  1228. # empty but with non-empty index
  1229. frame = datetime_frame[:1].reindex(columns=[])
  1230. result = frame.mul(ts, axis="index")
  1231. assert len(result) == len(ts)
  1232. def test_combineFunc(self, float_frame, mixed_float_frame):
  1233. result = float_frame * 2
  1234. tm.assert_numpy_array_equal(result.values, float_frame.values * 2)
  1235. # vs mix
  1236. result = mixed_float_frame * 2
  1237. for c, s in result.items():
  1238. tm.assert_numpy_array_equal(s.values, mixed_float_frame[c].values * 2)
  1239. _check_mixed_float(result, dtype={"C": None})
  1240. result = DataFrame() * 2
  1241. assert result.index.equals(DataFrame().index)
  1242. assert len(result.columns) == 0
  1243. @pytest.mark.parametrize(
  1244. "func",
  1245. [operator.eq, operator.ne, operator.lt, operator.gt, operator.ge, operator.le],
  1246. )
  1247. def test_comparisons(self, simple_frame, float_frame, func):
  1248. df1 = tm.makeTimeDataFrame()
  1249. df2 = tm.makeTimeDataFrame()
  1250. row = simple_frame.xs("a")
  1251. ndim_5 = np.ones(df1.shape + (1, 1, 1))
  1252. result = func(df1, df2)
  1253. tm.assert_numpy_array_equal(result.values, func(df1.values, df2.values))
  1254. msg = (
  1255. "Unable to coerce to Series/DataFrame, "
  1256. "dimension must be <= 2: (30, 4, 1, 1, 1)"
  1257. )
  1258. with pytest.raises(ValueError, match=re.escape(msg)):
  1259. func(df1, ndim_5)
  1260. result2 = func(simple_frame, row)
  1261. tm.assert_numpy_array_equal(
  1262. result2.values, func(simple_frame.values, row.values)
  1263. )
  1264. result3 = func(float_frame, 0)
  1265. tm.assert_numpy_array_equal(result3.values, func(float_frame.values, 0))
  1266. msg = (
  1267. r"Can only compare identically-labeled \(both index and columns\) "
  1268. "DataFrame objects"
  1269. )
  1270. with pytest.raises(ValueError, match=msg):
  1271. func(simple_frame, simple_frame[:2])
  1272. def test_strings_to_numbers_comparisons_raises(self, compare_operators_no_eq_ne):
  1273. # GH 11565
  1274. df = DataFrame(
  1275. {x: {"x": "foo", "y": "bar", "z": "baz"} for x in ["a", "b", "c"]}
  1276. )
  1277. f = getattr(operator, compare_operators_no_eq_ne)
  1278. msg = "'[<>]=?' not supported between instances of 'str' and 'int'"
  1279. with pytest.raises(TypeError, match=msg):
  1280. f(df, 0)
  1281. def test_comparison_protected_from_errstate(self):
  1282. missing_df = tm.makeDataFrame()
  1283. missing_df.loc[missing_df.index[0], "A"] = np.nan
  1284. with np.errstate(invalid="ignore"):
  1285. expected = missing_df.values < 0
  1286. with np.errstate(invalid="raise"):
  1287. result = (missing_df < 0).values
  1288. tm.assert_numpy_array_equal(result, expected)
  1289. def test_boolean_comparison(self):
  1290. # GH 4576
  1291. # boolean comparisons with a tuple/list give unexpected results
  1292. df = DataFrame(np.arange(6).reshape((3, 2)))
  1293. b = np.array([2, 2])
  1294. b_r = np.atleast_2d([2, 2])
  1295. b_c = b_r.T
  1296. lst = [2, 2, 2]
  1297. tup = tuple(lst)
  1298. # gt
  1299. expected = DataFrame([[False, False], [False, True], [True, True]])
  1300. result = df > b
  1301. tm.assert_frame_equal(result, expected)
  1302. result = df.values > b
  1303. tm.assert_numpy_array_equal(result, expected.values)
  1304. msg1d = "Unable to coerce to Series, length must be 2: given 3"
  1305. msg2d = "Unable to coerce to DataFrame, shape must be"
  1306. msg2db = "operands could not be broadcast together with shapes"
  1307. with pytest.raises(ValueError, match=msg1d):
  1308. # wrong shape
  1309. df > lst
  1310. with pytest.raises(ValueError, match=msg1d):
  1311. # wrong shape
  1312. df > tup
  1313. # broadcasts like ndarray (GH#23000)
  1314. result = df > b_r
  1315. tm.assert_frame_equal(result, expected)
  1316. result = df.values > b_r
  1317. tm.assert_numpy_array_equal(result, expected.values)
  1318. with pytest.raises(ValueError, match=msg2d):
  1319. df > b_c
  1320. with pytest.raises(ValueError, match=msg2db):
  1321. df.values > b_c
  1322. # ==
  1323. expected = DataFrame([[False, False], [True, False], [False, False]])
  1324. result = df == b
  1325. tm.assert_frame_equal(result, expected)
  1326. with pytest.raises(ValueError, match=msg1d):
  1327. df == lst
  1328. with pytest.raises(ValueError, match=msg1d):
  1329. df == tup
  1330. # broadcasts like ndarray (GH#23000)
  1331. result = df == b_r
  1332. tm.assert_frame_equal(result, expected)
  1333. result = df.values == b_r
  1334. tm.assert_numpy_array_equal(result, expected.values)
  1335. with pytest.raises(ValueError, match=msg2d):
  1336. df == b_c
  1337. assert df.values.shape != b_c.shape
  1338. # with alignment
  1339. df = DataFrame(
  1340. np.arange(6).reshape((3, 2)), columns=list("AB"), index=list("abc")
  1341. )
  1342. expected.index = df.index
  1343. expected.columns = df.columns
  1344. with pytest.raises(ValueError, match=msg1d):
  1345. df == lst
  1346. with pytest.raises(ValueError, match=msg1d):
  1347. df == tup
  1348. def test_inplace_ops_alignment(self):
  1349. # inplace ops / ops alignment
  1350. # GH 8511
  1351. columns = list("abcdefg")
  1352. X_orig = DataFrame(
  1353. np.arange(10 * len(columns)).reshape(-1, len(columns)),
  1354. columns=columns,
  1355. index=range(10),
  1356. )
  1357. Z = 100 * X_orig.iloc[:, 1:-1].copy()
  1358. block1 = list("bedcf")
  1359. subs = list("bcdef")
  1360. # add
  1361. X = X_orig.copy()
  1362. result1 = (X[block1] + Z).reindex(columns=subs)
  1363. X[block1] += Z
  1364. result2 = X.reindex(columns=subs)
  1365. X = X_orig.copy()
  1366. result3 = (X[block1] + Z[block1]).reindex(columns=subs)
  1367. X[block1] += Z[block1]
  1368. result4 = X.reindex(columns=subs)
  1369. tm.assert_frame_equal(result1, result2)
  1370. tm.assert_frame_equal(result1, result3)
  1371. tm.assert_frame_equal(result1, result4)
  1372. # sub
  1373. X = X_orig.copy()
  1374. result1 = (X[block1] - Z).reindex(columns=subs)
  1375. X[block1] -= Z
  1376. result2 = X.reindex(columns=subs)
  1377. X = X_orig.copy()
  1378. result3 = (X[block1] - Z[block1]).reindex(columns=subs)
  1379. X[block1] -= Z[block1]
  1380. result4 = X.reindex(columns=subs)
  1381. tm.assert_frame_equal(result1, result2)
  1382. tm.assert_frame_equal(result1, result3)
  1383. tm.assert_frame_equal(result1, result4)
  1384. def test_inplace_ops_identity(self):
  1385. # GH 5104
  1386. # make sure that we are actually changing the object
  1387. s_orig = Series([1, 2, 3])
  1388. df_orig = DataFrame(np.random.randint(0, 5, size=10).reshape(-1, 5))
  1389. # no dtype change
  1390. s = s_orig.copy()
  1391. s2 = s
  1392. s += 1
  1393. tm.assert_series_equal(s, s2)
  1394. tm.assert_series_equal(s_orig + 1, s)
  1395. assert s is s2
  1396. assert s._mgr is s2._mgr
  1397. df = df_orig.copy()
  1398. df2 = df
  1399. df += 1
  1400. tm.assert_frame_equal(df, df2)
  1401. tm.assert_frame_equal(df_orig + 1, df)
  1402. assert df is df2
  1403. assert df._mgr is df2._mgr
  1404. # dtype change
  1405. s = s_orig.copy()
  1406. s2 = s
  1407. s += 1.5
  1408. tm.assert_series_equal(s, s2)
  1409. tm.assert_series_equal(s_orig + 1.5, s)
  1410. df = df_orig.copy()
  1411. df2 = df
  1412. df += 1.5
  1413. tm.assert_frame_equal(df, df2)
  1414. tm.assert_frame_equal(df_orig + 1.5, df)
  1415. assert df is df2
  1416. assert df._mgr is df2._mgr
  1417. # mixed dtype
  1418. arr = np.random.randint(0, 10, size=5)
  1419. df_orig = DataFrame({"A": arr.copy(), "B": "foo"})
  1420. df = df_orig.copy()
  1421. df2 = df
  1422. df["A"] += 1
  1423. expected = DataFrame({"A": arr.copy() + 1, "B": "foo"})
  1424. tm.assert_frame_equal(df, expected)
  1425. tm.assert_frame_equal(df2, expected)
  1426. assert df._mgr is df2._mgr
  1427. df = df_orig.copy()
  1428. df2 = df
  1429. df["A"] += 1.5
  1430. expected = DataFrame({"A": arr.copy() + 1.5, "B": "foo"})
  1431. tm.assert_frame_equal(df, expected)
  1432. tm.assert_frame_equal(df2, expected)
  1433. assert df._mgr is df2._mgr
  1434. @pytest.mark.parametrize(
  1435. "op",
  1436. [
  1437. "add",
  1438. "and",
  1439. "div",
  1440. "floordiv",
  1441. "mod",
  1442. "mul",
  1443. "or",
  1444. "pow",
  1445. "sub",
  1446. "truediv",
  1447. "xor",
  1448. ],
  1449. )
  1450. def test_inplace_ops_identity2(self, op):
  1451. if op == "div":
  1452. return
  1453. df = DataFrame({"a": [1.0, 2.0, 3.0], "b": [1, 2, 3]})
  1454. operand = 2
  1455. if op in ("and", "or", "xor"):
  1456. # cannot use floats for boolean ops
  1457. df["a"] = [True, False, True]
  1458. df_copy = df.copy()
  1459. iop = f"__i{op}__"
  1460. op = f"__{op}__"
  1461. # no id change and value is correct
  1462. getattr(df, iop)(operand)
  1463. expected = getattr(df_copy, op)(operand)
  1464. tm.assert_frame_equal(df, expected)
  1465. expected = id(df)
  1466. assert id(df) == expected
  1467. @pytest.mark.parametrize(
  1468. "val",
  1469. [
  1470. [1, 2, 3],
  1471. (1, 2, 3),
  1472. np.array([1, 2, 3], dtype=np.int64),
  1473. range(1, 4),
  1474. ],
  1475. )
  1476. def test_alignment_non_pandas(self, val):
  1477. index = ["A", "B", "C"]
  1478. columns = ["X", "Y", "Z"]
  1479. df = DataFrame(np.random.randn(3, 3), index=index, columns=columns)
  1480. align = pd.core.ops.align_method_FRAME
  1481. expected = DataFrame({"X": val, "Y": val, "Z": val}, index=df.index)
  1482. tm.assert_frame_equal(align(df, val, "index")[1], expected)
  1483. expected = DataFrame(
  1484. {"X": [1, 1, 1], "Y": [2, 2, 2], "Z": [3, 3, 3]}, index=df.index
  1485. )
  1486. tm.assert_frame_equal(align(df, val, "columns")[1], expected)
  1487. @pytest.mark.parametrize("val", [[1, 2], (1, 2), np.array([1, 2]), range(1, 3)])
  1488. def test_alignment_non_pandas_length_mismatch(self, val):
  1489. index = ["A", "B", "C"]
  1490. columns = ["X", "Y", "Z"]
  1491. df = DataFrame(np.random.randn(3, 3), index=index, columns=columns)
  1492. align = pd.core.ops.align_method_FRAME
  1493. # length mismatch
  1494. msg = "Unable to coerce to Series, length must be 3: given 2"
  1495. with pytest.raises(ValueError, match=msg):
  1496. align(df, val, "index")
  1497. with pytest.raises(ValueError, match=msg):
  1498. align(df, val, "columns")
  1499. def test_alignment_non_pandas_index_columns(self):
  1500. index = ["A", "B", "C"]
  1501. columns = ["X", "Y", "Z"]
  1502. df = DataFrame(np.random.randn(3, 3), index=index, columns=columns)
  1503. align = pd.core.ops.align_method_FRAME
  1504. val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
  1505. tm.assert_frame_equal(
  1506. align(df, val, "index")[1],
  1507. DataFrame(val, index=df.index, columns=df.columns),
  1508. )
  1509. tm.assert_frame_equal(
  1510. align(df, val, "columns")[1],
  1511. DataFrame(val, index=df.index, columns=df.columns),
  1512. )
  1513. # shape mismatch
  1514. msg = "Unable to coerce to DataFrame, shape must be"
  1515. val = np.array([[1, 2, 3], [4, 5, 6]])
  1516. with pytest.raises(ValueError, match=msg):
  1517. align(df, val, "index")
  1518. with pytest.raises(ValueError, match=msg):
  1519. align(df, val, "columns")
  1520. val = np.zeros((3, 3, 3))
  1521. msg = re.escape(
  1522. "Unable to coerce to Series/DataFrame, dimension must be <= 2: (3, 3, 3)"
  1523. )
  1524. with pytest.raises(ValueError, match=msg):
  1525. align(df, val, "index")
  1526. with pytest.raises(ValueError, match=msg):
  1527. align(df, val, "columns")
  1528. def test_no_warning(self, all_arithmetic_operators):
  1529. df = DataFrame({"A": [0.0, 0.0], "B": [0.0, None]})
  1530. b = df["B"]
  1531. with tm.assert_produces_warning(None):
  1532. getattr(df, all_arithmetic_operators)(b)
  1533. def test_dunder_methods_binary(self, all_arithmetic_operators):
  1534. # GH#??? frame.__foo__ should only accept one argument
  1535. df = DataFrame({"A": [0.0, 0.0], "B": [0.0, None]})
  1536. b = df["B"]
  1537. with pytest.raises(TypeError, match="takes 2 positional arguments"):
  1538. getattr(df, all_arithmetic_operators)(b, 0)
  1539. def test_align_int_fill_bug(self):
  1540. # GH#910
  1541. X = np.arange(10 * 10, dtype="float64").reshape(10, 10)
  1542. Y = np.ones((10, 1), dtype=int)
  1543. df1 = DataFrame(X)
  1544. df1["0.X"] = Y.squeeze()
  1545. df2 = df1.astype(float)
  1546. result = df1 - df1.mean()
  1547. expected = df2 - df2.mean()
  1548. tm.assert_frame_equal(result, expected)
  1549. def test_pow_with_realignment():
  1550. # GH#32685 pow has special semantics for operating with null values
  1551. left = DataFrame({"A": [0, 1, 2]})
  1552. right = DataFrame(index=[0, 1, 2])
  1553. result = left**right
  1554. expected = DataFrame({"A": [np.nan, 1.0, np.nan]})
  1555. tm.assert_frame_equal(result, expected)
  1556. # TODO: move to tests.arithmetic and parametrize
  1557. def test_pow_nan_with_zero():
  1558. left = DataFrame({"A": [np.nan, np.nan, np.nan]})
  1559. right = DataFrame({"A": [0, 0, 0]})
  1560. expected = DataFrame({"A": [1.0, 1.0, 1.0]})
  1561. result = left**right
  1562. tm.assert_frame_equal(result, expected)
  1563. result = left["A"] ** right["A"]
  1564. tm.assert_series_equal(result, expected["A"])
  1565. def test_dataframe_series_extension_dtypes():
  1566. # https://github.com/pandas-dev/pandas/issues/34311
  1567. df = DataFrame(np.random.randint(0, 100, (10, 3)), columns=["a", "b", "c"])
  1568. ser = Series([1, 2, 3], index=["a", "b", "c"])
  1569. expected = df.to_numpy("int64") + ser.to_numpy("int64").reshape(-1, 3)
  1570. expected = DataFrame(expected, columns=df.columns, dtype="Int64")
  1571. df_ea = df.astype("Int64")
  1572. result = df_ea + ser
  1573. tm.assert_frame_equal(result, expected)
  1574. result = df_ea + ser.astype("Int64")
  1575. tm.assert_frame_equal(result, expected)
  1576. def test_dataframe_blockwise_slicelike():
  1577. # GH#34367
  1578. arr = np.random.randint(0, 1000, (100, 10))
  1579. df1 = DataFrame(arr)
  1580. # Explicit cast to float to avoid implicit cast when setting nan
  1581. df2 = df1.copy().astype({1: "float", 3: "float", 7: "float"})
  1582. df2.iloc[0, [1, 3, 7]] = np.nan
  1583. # Explicit cast to float to avoid implicit cast when setting nan
  1584. df3 = df1.copy().astype({5: "float"})
  1585. df3.iloc[0, [5]] = np.nan
  1586. # Explicit cast to float to avoid implicit cast when setting nan
  1587. df4 = df1.copy().astype({2: "float", 3: "float", 4: "float"})
  1588. df4.iloc[0, np.arange(2, 5)] = np.nan
  1589. # Explicit cast to float to avoid implicit cast when setting nan
  1590. df5 = df1.copy().astype({4: "float", 5: "float", 6: "float"})
  1591. df5.iloc[0, np.arange(4, 7)] = np.nan
  1592. for left, right in [(df1, df2), (df2, df3), (df4, df5)]:
  1593. res = left + right
  1594. expected = DataFrame({i: left[i] + right[i] for i in left.columns})
  1595. tm.assert_frame_equal(res, expected)
  1596. @pytest.mark.parametrize(
  1597. "df, col_dtype",
  1598. [
  1599. (DataFrame([[1.0, 2.0], [4.0, 5.0]], columns=list("ab")), "float64"),
  1600. (DataFrame([[1.0, "b"], [4.0, "b"]], columns=list("ab")), "object"),
  1601. ],
  1602. )
  1603. def test_dataframe_operation_with_non_numeric_types(df, col_dtype):
  1604. # GH #22663
  1605. expected = DataFrame([[0.0, np.nan], [3.0, np.nan]], columns=list("ab"))
  1606. expected = expected.astype({"b": col_dtype})
  1607. result = df + Series([-1.0], index=list("a"))
  1608. tm.assert_frame_equal(result, expected)
  1609. def test_arith_reindex_with_duplicates():
  1610. # https://github.com/pandas-dev/pandas/issues/35194
  1611. df1 = DataFrame(data=[[0]], columns=["second"])
  1612. df2 = DataFrame(data=[[0, 0, 0]], columns=["first", "second", "second"])
  1613. result = df1 + df2
  1614. expected = DataFrame([[np.nan, 0, 0]], columns=["first", "second", "second"])
  1615. tm.assert_frame_equal(result, expected)
  1616. @pytest.mark.parametrize("to_add", [[Series([1, 1])], [Series([1, 1]), Series([1, 1])]])
  1617. def test_arith_list_of_arraylike_raise(to_add):
  1618. # GH 36702. Raise when trying to add list of array-like to DataFrame
  1619. df = DataFrame({"x": [1, 2], "y": [1, 2]})
  1620. msg = f"Unable to coerce list of {type(to_add[0])} to Series/DataFrame"
  1621. with pytest.raises(ValueError, match=msg):
  1622. df + to_add
  1623. with pytest.raises(ValueError, match=msg):
  1624. to_add + df
  1625. def test_inplace_arithmetic_series_update(using_copy_on_write):
  1626. # https://github.com/pandas-dev/pandas/issues/36373
  1627. df = DataFrame({"A": [1, 2, 3]})
  1628. df_orig = df.copy()
  1629. series = df["A"]
  1630. vals = series._values
  1631. series += 1
  1632. if using_copy_on_write:
  1633. assert series._values is not vals
  1634. tm.assert_frame_equal(df, df_orig)
  1635. else:
  1636. assert series._values is vals
  1637. expected = DataFrame({"A": [2, 3, 4]})
  1638. tm.assert_frame_equal(df, expected)
  1639. def test_arithemetic_multiindex_align():
  1640. """
  1641. Regression test for: https://github.com/pandas-dev/pandas/issues/33765
  1642. """
  1643. df1 = DataFrame(
  1644. [[1]],
  1645. index=["a"],
  1646. columns=MultiIndex.from_product([[0], [1]], names=["a", "b"]),
  1647. )
  1648. df2 = DataFrame([[1]], index=["a"], columns=Index([0], name="a"))
  1649. expected = DataFrame(
  1650. [[0]],
  1651. index=["a"],
  1652. columns=MultiIndex.from_product([[0], [1]], names=["a", "b"]),
  1653. )
  1654. result = df1 - df2
  1655. tm.assert_frame_equal(result, expected)
  1656. def test_bool_frame_mult_float():
  1657. # GH 18549
  1658. df = DataFrame(True, list("ab"), list("cd"))
  1659. result = df * 1.0
  1660. expected = DataFrame(np.ones((2, 2)), list("ab"), list("cd"))
  1661. tm.assert_frame_equal(result, expected)
  1662. def test_frame_sub_nullable_int(any_int_ea_dtype):
  1663. # GH 32822
  1664. series1 = Series([1, 2, None], dtype=any_int_ea_dtype)
  1665. series2 = Series([1, 2, 3], dtype=any_int_ea_dtype)
  1666. expected = DataFrame([0, 0, None], dtype=any_int_ea_dtype)
  1667. result = series1.to_frame() - series2.to_frame()
  1668. tm.assert_frame_equal(result, expected)
  1669. def test_frame_op_subclass_nonclass_constructor():
  1670. # GH#43201 subclass._constructor is a function, not the subclass itself
  1671. class SubclassedSeries(Series):
  1672. @property
  1673. def _constructor(self):
  1674. return SubclassedSeries
  1675. @property
  1676. def _constructor_expanddim(self):
  1677. return SubclassedDataFrame
  1678. class SubclassedDataFrame(DataFrame):
  1679. _metadata = ["my_extra_data"]
  1680. def __init__(self, my_extra_data, *args, **kwargs) -> None:
  1681. self.my_extra_data = my_extra_data
  1682. super().__init__(*args, **kwargs)
  1683. @property
  1684. def _constructor(self):
  1685. return functools.partial(type(self), self.my_extra_data)
  1686. @property
  1687. def _constructor_sliced(self):
  1688. return SubclassedSeries
  1689. sdf = SubclassedDataFrame("some_data", {"A": [1, 2, 3], "B": [4, 5, 6]})
  1690. result = sdf * 2
  1691. expected = SubclassedDataFrame("some_data", {"A": [2, 4, 6], "B": [8, 10, 12]})
  1692. tm.assert_frame_equal(result, expected)
  1693. result = sdf + sdf
  1694. tm.assert_frame_equal(result, expected)
  1695. def test_enum_column_equality():
  1696. Cols = Enum("Cols", "col1 col2")
  1697. q1 = DataFrame({Cols.col1: [1, 2, 3]})
  1698. q2 = DataFrame({Cols.col1: [1, 2, 3]})
  1699. result = q1[Cols.col1] == q2[Cols.col1]
  1700. expected = Series([True, True, True], name=Cols.col1)
  1701. tm.assert_series_equal(result, expected)