test_reductions.py 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501
  1. from datetime import (
  2. datetime,
  3. timedelta,
  4. )
  5. import numpy as np
  6. import pytest
  7. import pandas as pd
  8. from pandas import (
  9. Categorical,
  10. DataFrame,
  11. DatetimeIndex,
  12. Index,
  13. NaT,
  14. Period,
  15. PeriodIndex,
  16. RangeIndex,
  17. Series,
  18. Timedelta,
  19. TimedeltaIndex,
  20. Timestamp,
  21. date_range,
  22. isna,
  23. timedelta_range,
  24. to_timedelta,
  25. )
  26. import pandas._testing as tm
  27. from pandas.core import nanops
  28. def get_objs():
  29. indexes = [
  30. tm.makeBoolIndex(10, name="a"),
  31. tm.makeIntIndex(10, name="a"),
  32. tm.makeFloatIndex(10, name="a"),
  33. tm.makeDateIndex(10, name="a"),
  34. tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern"),
  35. tm.makePeriodIndex(10, name="a"),
  36. tm.makeStringIndex(10, name="a"),
  37. ]
  38. arr = np.random.randn(10)
  39. series = [Series(arr, index=idx, name="a") for idx in indexes]
  40. objs = indexes + series
  41. return objs
  42. objs = get_objs()
  43. class TestReductions:
  44. @pytest.mark.parametrize("opname", ["max", "min"])
  45. @pytest.mark.parametrize("obj", objs)
  46. def test_ops(self, opname, obj):
  47. result = getattr(obj, opname)()
  48. if not isinstance(obj, PeriodIndex):
  49. expected = getattr(obj.values, opname)()
  50. else:
  51. expected = Period(ordinal=getattr(obj.asi8, opname)(), freq=obj.freq)
  52. if getattr(obj, "tz", None) is not None:
  53. # We need to de-localize before comparing to the numpy-produced result
  54. expected = expected.astype("M8[ns]").astype("int64")
  55. assert result._value == expected
  56. else:
  57. assert result == expected
  58. @pytest.mark.parametrize("opname", ["max", "min"])
  59. @pytest.mark.parametrize(
  60. "dtype, val",
  61. [
  62. ("object", 2.0),
  63. ("float64", 2.0),
  64. ("datetime64[ns]", datetime(2011, 11, 1)),
  65. ("Int64", 2),
  66. ("boolean", True),
  67. ],
  68. )
  69. def test_nanminmax(self, opname, dtype, val, index_or_series):
  70. # GH#7261
  71. klass = index_or_series
  72. def check_missing(res):
  73. if dtype == "datetime64[ns]":
  74. return res is NaT
  75. elif dtype in ["Int64", "boolean"]:
  76. return res is pd.NA
  77. else:
  78. return isna(res)
  79. obj = klass([None], dtype=dtype)
  80. assert check_missing(getattr(obj, opname)())
  81. assert check_missing(getattr(obj, opname)(skipna=False))
  82. obj = klass([], dtype=dtype)
  83. assert check_missing(getattr(obj, opname)())
  84. assert check_missing(getattr(obj, opname)(skipna=False))
  85. if dtype == "object":
  86. # generic test with object only works for empty / all NaN
  87. return
  88. obj = klass([None, val], dtype=dtype)
  89. assert getattr(obj, opname)() == val
  90. assert check_missing(getattr(obj, opname)(skipna=False))
  91. obj = klass([None, val, None], dtype=dtype)
  92. assert getattr(obj, opname)() == val
  93. assert check_missing(getattr(obj, opname)(skipna=False))
  94. @pytest.mark.parametrize("opname", ["max", "min"])
  95. def test_nanargminmax(self, opname, index_or_series):
  96. # GH#7261
  97. klass = index_or_series
  98. arg_op = "arg" + opname if klass is Index else "idx" + opname
  99. obj = klass([NaT, datetime(2011, 11, 1)])
  100. assert getattr(obj, arg_op)() == 1
  101. result = getattr(obj, arg_op)(skipna=False)
  102. if klass is Series:
  103. assert np.isnan(result)
  104. else:
  105. assert result == -1
  106. obj = klass([NaT, datetime(2011, 11, 1), NaT])
  107. # check DatetimeIndex non-monotonic path
  108. assert getattr(obj, arg_op)() == 1
  109. result = getattr(obj, arg_op)(skipna=False)
  110. if klass is Series:
  111. assert np.isnan(result)
  112. else:
  113. assert result == -1
  114. @pytest.mark.parametrize("opname", ["max", "min"])
  115. @pytest.mark.parametrize("dtype", ["M8[ns]", "datetime64[ns, UTC]"])
  116. def test_nanops_empty_object(self, opname, index_or_series, dtype):
  117. klass = index_or_series
  118. arg_op = "arg" + opname if klass is Index else "idx" + opname
  119. obj = klass([], dtype=dtype)
  120. assert getattr(obj, opname)() is NaT
  121. assert getattr(obj, opname)(skipna=False) is NaT
  122. with pytest.raises(ValueError, match="empty sequence"):
  123. getattr(obj, arg_op)()
  124. with pytest.raises(ValueError, match="empty sequence"):
  125. getattr(obj, arg_op)(skipna=False)
  126. def test_argminmax(self):
  127. obj = Index(np.arange(5, dtype="int64"))
  128. assert obj.argmin() == 0
  129. assert obj.argmax() == 4
  130. obj = Index([np.nan, 1, np.nan, 2])
  131. assert obj.argmin() == 1
  132. assert obj.argmax() == 3
  133. assert obj.argmin(skipna=False) == -1
  134. assert obj.argmax(skipna=False) == -1
  135. obj = Index([np.nan])
  136. assert obj.argmin() == -1
  137. assert obj.argmax() == -1
  138. assert obj.argmin(skipna=False) == -1
  139. assert obj.argmax(skipna=False) == -1
  140. obj = Index([NaT, datetime(2011, 11, 1), datetime(2011, 11, 2), NaT])
  141. assert obj.argmin() == 1
  142. assert obj.argmax() == 2
  143. assert obj.argmin(skipna=False) == -1
  144. assert obj.argmax(skipna=False) == -1
  145. obj = Index([NaT])
  146. assert obj.argmin() == -1
  147. assert obj.argmax() == -1
  148. assert obj.argmin(skipna=False) == -1
  149. assert obj.argmax(skipna=False) == -1
  150. @pytest.mark.parametrize("op, expected_col", [["max", "a"], ["min", "b"]])
  151. def test_same_tz_min_max_axis_1(self, op, expected_col):
  152. # GH 10390
  153. df = DataFrame(
  154. date_range("2016-01-01 00:00:00", periods=3, tz="UTC"), columns=["a"]
  155. )
  156. df["b"] = df.a.subtract(Timedelta(seconds=3600))
  157. result = getattr(df, op)(axis=1)
  158. expected = df[expected_col].rename(None)
  159. tm.assert_series_equal(result, expected)
  160. @pytest.mark.parametrize("func", ["maximum", "minimum"])
  161. def test_numpy_reduction_with_tz_aware_dtype(self, tz_aware_fixture, func):
  162. # GH 15552
  163. tz = tz_aware_fixture
  164. arg = pd.to_datetime(["2019"]).tz_localize(tz)
  165. expected = Series(arg)
  166. result = getattr(np, func)(expected, expected)
  167. tm.assert_series_equal(result, expected)
  168. def test_nan_int_timedelta_sum(self):
  169. # GH 27185
  170. df = DataFrame(
  171. {
  172. "A": Series([1, 2, NaT], dtype="timedelta64[ns]"),
  173. "B": Series([1, 2, np.nan], dtype="Int64"),
  174. }
  175. )
  176. expected = Series({"A": Timedelta(3), "B": 3})
  177. result = df.sum()
  178. tm.assert_series_equal(result, expected)
  179. class TestIndexReductions:
  180. # Note: the name TestIndexReductions indicates these tests
  181. # were moved from a Index-specific test file, _not_ that these tests are
  182. # intended long-term to be Index-specific
  183. @pytest.mark.parametrize(
  184. "start,stop,step",
  185. [
  186. (0, 400, 3),
  187. (500, 0, -6),
  188. (-(10**6), 10**6, 4),
  189. (10**6, -(10**6), -4),
  190. (0, 10, 20),
  191. ],
  192. )
  193. def test_max_min_range(self, start, stop, step):
  194. # GH#17607
  195. idx = RangeIndex(start, stop, step)
  196. expected = idx._values.max()
  197. result = idx.max()
  198. assert result == expected
  199. # skipna should be irrelevant since RangeIndex should never have NAs
  200. result2 = idx.max(skipna=False)
  201. assert result2 == expected
  202. expected = idx._values.min()
  203. result = idx.min()
  204. assert result == expected
  205. # skipna should be irrelevant since RangeIndex should never have NAs
  206. result2 = idx.min(skipna=False)
  207. assert result2 == expected
  208. # empty
  209. idx = RangeIndex(start, stop, -step)
  210. assert isna(idx.max())
  211. assert isna(idx.min())
  212. def test_minmax_timedelta64(self):
  213. # monotonic
  214. idx1 = TimedeltaIndex(["1 days", "2 days", "3 days"])
  215. assert idx1.is_monotonic_increasing
  216. # non-monotonic
  217. idx2 = TimedeltaIndex(["1 days", np.nan, "3 days", "NaT"])
  218. assert not idx2.is_monotonic_increasing
  219. for idx in [idx1, idx2]:
  220. assert idx.min() == Timedelta("1 days")
  221. assert idx.max() == Timedelta("3 days")
  222. assert idx.argmin() == 0
  223. assert idx.argmax() == 2
  224. @pytest.mark.parametrize("op", ["min", "max"])
  225. def test_minmax_timedelta_empty_or_na(self, op):
  226. # Return NaT
  227. obj = TimedeltaIndex([])
  228. assert getattr(obj, op)() is NaT
  229. obj = TimedeltaIndex([NaT])
  230. assert getattr(obj, op)() is NaT
  231. obj = TimedeltaIndex([NaT, NaT, NaT])
  232. assert getattr(obj, op)() is NaT
  233. def test_numpy_minmax_timedelta64(self):
  234. td = timedelta_range("16815 days", "16820 days", freq="D")
  235. assert np.min(td) == Timedelta("16815 days")
  236. assert np.max(td) == Timedelta("16820 days")
  237. errmsg = "the 'out' parameter is not supported"
  238. with pytest.raises(ValueError, match=errmsg):
  239. np.min(td, out=0)
  240. with pytest.raises(ValueError, match=errmsg):
  241. np.max(td, out=0)
  242. assert np.argmin(td) == 0
  243. assert np.argmax(td) == 5
  244. errmsg = "the 'out' parameter is not supported"
  245. with pytest.raises(ValueError, match=errmsg):
  246. np.argmin(td, out=0)
  247. with pytest.raises(ValueError, match=errmsg):
  248. np.argmax(td, out=0)
  249. def test_timedelta_ops(self):
  250. # GH#4984
  251. # make sure ops return Timedelta
  252. s = Series(
  253. [Timestamp("20130101") + timedelta(seconds=i * i) for i in range(10)]
  254. )
  255. td = s.diff()
  256. result = td.mean()
  257. expected = to_timedelta(timedelta(seconds=9))
  258. assert result == expected
  259. result = td.to_frame().mean()
  260. assert result[0] == expected
  261. result = td.quantile(0.1)
  262. expected = Timedelta(np.timedelta64(2600, "ms"))
  263. assert result == expected
  264. result = td.median()
  265. expected = to_timedelta("00:00:09")
  266. assert result == expected
  267. result = td.to_frame().median()
  268. assert result[0] == expected
  269. # GH#6462
  270. # consistency in returned values for sum
  271. result = td.sum()
  272. expected = to_timedelta("00:01:21")
  273. assert result == expected
  274. result = td.to_frame().sum()
  275. assert result[0] == expected
  276. # std
  277. result = td.std()
  278. expected = to_timedelta(Series(td.dropna().values).std())
  279. assert result == expected
  280. result = td.to_frame().std()
  281. assert result[0] == expected
  282. # GH#10040
  283. # make sure NaT is properly handled by median()
  284. s = Series([Timestamp("2015-02-03"), Timestamp("2015-02-07")])
  285. assert s.diff().median() == timedelta(days=4)
  286. s = Series(
  287. [Timestamp("2015-02-03"), Timestamp("2015-02-07"), Timestamp("2015-02-15")]
  288. )
  289. assert s.diff().median() == timedelta(days=6)
  290. @pytest.mark.parametrize("opname", ["skew", "kurt", "sem", "prod", "var"])
  291. def test_invalid_td64_reductions(self, opname):
  292. s = Series(
  293. [Timestamp("20130101") + timedelta(seconds=i * i) for i in range(10)]
  294. )
  295. td = s.diff()
  296. msg = "|".join(
  297. [
  298. f"reduction operation '{opname}' not allowed for this dtype",
  299. rf"cannot perform {opname} with type timedelta64\[ns\]",
  300. f"does not support reduction '{opname}'",
  301. ]
  302. )
  303. with pytest.raises(TypeError, match=msg):
  304. getattr(td, opname)()
  305. with pytest.raises(TypeError, match=msg):
  306. getattr(td.to_frame(), opname)(numeric_only=False)
  307. def test_minmax_tz(self, tz_naive_fixture):
  308. tz = tz_naive_fixture
  309. # monotonic
  310. idx1 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz=tz)
  311. assert idx1.is_monotonic_increasing
  312. # non-monotonic
  313. idx2 = DatetimeIndex(
  314. ["2011-01-01", NaT, "2011-01-03", "2011-01-02", NaT], tz=tz
  315. )
  316. assert not idx2.is_monotonic_increasing
  317. for idx in [idx1, idx2]:
  318. assert idx.min() == Timestamp("2011-01-01", tz=tz)
  319. assert idx.max() == Timestamp("2011-01-03", tz=tz)
  320. assert idx.argmin() == 0
  321. assert idx.argmax() == 2
  322. @pytest.mark.parametrize("op", ["min", "max"])
  323. def test_minmax_nat_datetime64(self, op):
  324. # Return NaT
  325. obj = DatetimeIndex([])
  326. assert isna(getattr(obj, op)())
  327. obj = DatetimeIndex([NaT])
  328. assert isna(getattr(obj, op)())
  329. obj = DatetimeIndex([NaT, NaT, NaT])
  330. assert isna(getattr(obj, op)())
  331. def test_numpy_minmax_integer(self):
  332. # GH#26125
  333. idx = Index([1, 2, 3])
  334. expected = idx.values.max()
  335. result = np.max(idx)
  336. assert result == expected
  337. expected = idx.values.min()
  338. result = np.min(idx)
  339. assert result == expected
  340. errmsg = "the 'out' parameter is not supported"
  341. with pytest.raises(ValueError, match=errmsg):
  342. np.min(idx, out=0)
  343. with pytest.raises(ValueError, match=errmsg):
  344. np.max(idx, out=0)
  345. expected = idx.values.argmax()
  346. result = np.argmax(idx)
  347. assert result == expected
  348. expected = idx.values.argmin()
  349. result = np.argmin(idx)
  350. assert result == expected
  351. errmsg = "the 'out' parameter is not supported"
  352. with pytest.raises(ValueError, match=errmsg):
  353. np.argmin(idx, out=0)
  354. with pytest.raises(ValueError, match=errmsg):
  355. np.argmax(idx, out=0)
  356. def test_numpy_minmax_range(self):
  357. # GH#26125
  358. idx = RangeIndex(0, 10, 3)
  359. result = np.max(idx)
  360. assert result == 9
  361. result = np.min(idx)
  362. assert result == 0
  363. errmsg = "the 'out' parameter is not supported"
  364. with pytest.raises(ValueError, match=errmsg):
  365. np.min(idx, out=0)
  366. with pytest.raises(ValueError, match=errmsg):
  367. np.max(idx, out=0)
  368. # No need to test again argmax/argmin compat since the implementation
  369. # is the same as basic integer index
  370. def test_numpy_minmax_datetime64(self):
  371. dr = date_range(start="2016-01-15", end="2016-01-20")
  372. assert np.min(dr) == Timestamp("2016-01-15 00:00:00")
  373. assert np.max(dr) == Timestamp("2016-01-20 00:00:00")
  374. errmsg = "the 'out' parameter is not supported"
  375. with pytest.raises(ValueError, match=errmsg):
  376. np.min(dr, out=0)
  377. with pytest.raises(ValueError, match=errmsg):
  378. np.max(dr, out=0)
  379. assert np.argmin(dr) == 0
  380. assert np.argmax(dr) == 5
  381. errmsg = "the 'out' parameter is not supported"
  382. with pytest.raises(ValueError, match=errmsg):
  383. np.argmin(dr, out=0)
  384. with pytest.raises(ValueError, match=errmsg):
  385. np.argmax(dr, out=0)
  386. def test_minmax_period(self):
  387. # monotonic
  388. idx1 = PeriodIndex([NaT, "2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
  389. assert not idx1.is_monotonic_increasing
  390. assert idx1[1:].is_monotonic_increasing
  391. # non-monotonic
  392. idx2 = PeriodIndex(
  393. ["2011-01-01", NaT, "2011-01-03", "2011-01-02", NaT], freq="D"
  394. )
  395. assert not idx2.is_monotonic_increasing
  396. for idx in [idx1, idx2]:
  397. assert idx.min() == Period("2011-01-01", freq="D")
  398. assert idx.max() == Period("2011-01-03", freq="D")
  399. assert idx1.argmin() == 1
  400. assert idx2.argmin() == 0
  401. assert idx1.argmax() == 3
  402. assert idx2.argmax() == 2
  403. @pytest.mark.parametrize("op", ["min", "max"])
  404. @pytest.mark.parametrize("data", [[], [NaT], [NaT, NaT, NaT]])
  405. def test_minmax_period_empty_nat(self, op, data):
  406. # Return NaT
  407. obj = PeriodIndex(data, freq="M")
  408. result = getattr(obj, op)()
  409. assert result is NaT
  410. def test_numpy_minmax_period(self):
  411. pr = pd.period_range(start="2016-01-15", end="2016-01-20")
  412. assert np.min(pr) == Period("2016-01-15", freq="D")
  413. assert np.max(pr) == Period("2016-01-20", freq="D")
  414. errmsg = "the 'out' parameter is not supported"
  415. with pytest.raises(ValueError, match=errmsg):
  416. np.min(pr, out=0)
  417. with pytest.raises(ValueError, match=errmsg):
  418. np.max(pr, out=0)
  419. assert np.argmin(pr) == 0
  420. assert np.argmax(pr) == 5
  421. errmsg = "the 'out' parameter is not supported"
  422. with pytest.raises(ValueError, match=errmsg):
  423. np.argmin(pr, out=0)
  424. with pytest.raises(ValueError, match=errmsg):
  425. np.argmax(pr, out=0)
  426. def test_min_max_categorical(self):
  427. ci = pd.CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
  428. msg = (
  429. r"Categorical is not ordered for operation min\n"
  430. r"you can use .as_ordered\(\) to change the Categorical to an ordered one\n"
  431. )
  432. with pytest.raises(TypeError, match=msg):
  433. ci.min()
  434. msg = (
  435. r"Categorical is not ordered for operation max\n"
  436. r"you can use .as_ordered\(\) to change the Categorical to an ordered one\n"
  437. )
  438. with pytest.raises(TypeError, match=msg):
  439. ci.max()
  440. ci = pd.CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=True)
  441. assert ci.min() == "c"
  442. assert ci.max() == "b"
  443. class TestSeriesReductions:
  444. # Note: the name TestSeriesReductions indicates these tests
  445. # were moved from a series-specific test file, _not_ that these tests are
  446. # intended long-term to be series-specific
  447. def test_sum_inf(self):
  448. s = Series(np.random.randn(10))
  449. s2 = s.copy()
  450. s[5:8] = np.inf
  451. s2[5:8] = np.nan
  452. assert np.isinf(s.sum())
  453. arr = np.random.randn(100, 100).astype("f4")
  454. arr[:, 2] = np.inf
  455. with pd.option_context("mode.use_inf_as_na", True):
  456. tm.assert_almost_equal(s.sum(), s2.sum())
  457. res = nanops.nansum(arr, axis=1)
  458. assert np.isinf(res).all()
  459. @pytest.mark.parametrize(
  460. "dtype", ["float64", "Float32", "Int64", "boolean", "object"]
  461. )
  462. @pytest.mark.parametrize("use_bottleneck", [True, False])
  463. @pytest.mark.parametrize("method, unit", [("sum", 0.0), ("prod", 1.0)])
  464. def test_empty(self, method, unit, use_bottleneck, dtype):
  465. with pd.option_context("use_bottleneck", use_bottleneck):
  466. # GH#9422 / GH#18921
  467. # Entirely empty
  468. s = Series([], dtype=dtype)
  469. # NA by default
  470. result = getattr(s, method)()
  471. assert result == unit
  472. # Explicit
  473. result = getattr(s, method)(min_count=0)
  474. assert result == unit
  475. result = getattr(s, method)(min_count=1)
  476. assert isna(result)
  477. # Skipna, default
  478. result = getattr(s, method)(skipna=True)
  479. result == unit
  480. # Skipna, explicit
  481. result = getattr(s, method)(skipna=True, min_count=0)
  482. assert result == unit
  483. result = getattr(s, method)(skipna=True, min_count=1)
  484. assert isna(result)
  485. result = getattr(s, method)(skipna=False, min_count=0)
  486. assert result == unit
  487. result = getattr(s, method)(skipna=False, min_count=1)
  488. assert isna(result)
  489. # All-NA
  490. s = Series([np.nan], dtype=dtype)
  491. # NA by default
  492. result = getattr(s, method)()
  493. assert result == unit
  494. # Explicit
  495. result = getattr(s, method)(min_count=0)
  496. assert result == unit
  497. result = getattr(s, method)(min_count=1)
  498. assert isna(result)
  499. # Skipna, default
  500. result = getattr(s, method)(skipna=True)
  501. result == unit
  502. # skipna, explicit
  503. result = getattr(s, method)(skipna=True, min_count=0)
  504. assert result == unit
  505. result = getattr(s, method)(skipna=True, min_count=1)
  506. assert isna(result)
  507. # Mix of valid, empty
  508. s = Series([np.nan, 1], dtype=dtype)
  509. # Default
  510. result = getattr(s, method)()
  511. assert result == 1.0
  512. # Explicit
  513. result = getattr(s, method)(min_count=0)
  514. assert result == 1.0
  515. result = getattr(s, method)(min_count=1)
  516. assert result == 1.0
  517. # Skipna
  518. result = getattr(s, method)(skipna=True)
  519. assert result == 1.0
  520. result = getattr(s, method)(skipna=True, min_count=0)
  521. assert result == 1.0
  522. # GH#844 (changed in GH#9422)
  523. df = DataFrame(np.empty((10, 0)), dtype=dtype)
  524. assert (getattr(df, method)(1) == unit).all()
  525. s = Series([1], dtype=dtype)
  526. result = getattr(s, method)(min_count=2)
  527. assert isna(result)
  528. result = getattr(s, method)(skipna=False, min_count=2)
  529. assert isna(result)
  530. s = Series([np.nan], dtype=dtype)
  531. result = getattr(s, method)(min_count=2)
  532. assert isna(result)
  533. s = Series([np.nan, 1], dtype=dtype)
  534. result = getattr(s, method)(min_count=2)
  535. assert isna(result)
  536. @pytest.mark.parametrize("method", ["mean", "var"])
  537. @pytest.mark.parametrize("dtype", ["Float64", "Int64", "boolean"])
  538. def test_ops_consistency_on_empty_nullable(self, method, dtype):
  539. # GH#34814
  540. # consistency for nullable dtypes on empty or ALL-NA mean
  541. # empty series
  542. eser = Series([], dtype=dtype)
  543. result = getattr(eser, method)()
  544. assert result is pd.NA
  545. # ALL-NA series
  546. nser = Series([np.nan], dtype=dtype)
  547. result = getattr(nser, method)()
  548. assert result is pd.NA
  549. @pytest.mark.parametrize("method", ["mean", "median", "std", "var"])
  550. def test_ops_consistency_on_empty(self, method):
  551. # GH#7869
  552. # consistency on empty
  553. # float
  554. result = getattr(Series(dtype=float), method)()
  555. assert isna(result)
  556. # timedelta64[ns]
  557. tdser = Series([], dtype="m8[ns]")
  558. if method == "var":
  559. msg = "|".join(
  560. [
  561. "operation 'var' not allowed",
  562. r"cannot perform var with type timedelta64\[ns\]",
  563. "does not support reduction 'var'",
  564. ]
  565. )
  566. with pytest.raises(TypeError, match=msg):
  567. getattr(tdser, method)()
  568. else:
  569. result = getattr(tdser, method)()
  570. assert result is NaT
  571. def test_nansum_buglet(self):
  572. ser = Series([1.0, np.nan], index=[0, 1])
  573. result = np.nansum(ser)
  574. tm.assert_almost_equal(result, 1)
  575. @pytest.mark.parametrize("use_bottleneck", [True, False])
  576. @pytest.mark.parametrize("dtype", ["int32", "int64"])
  577. def test_sum_overflow_int(self, use_bottleneck, dtype):
  578. with pd.option_context("use_bottleneck", use_bottleneck):
  579. # GH#6915
  580. # overflowing on the smaller int dtypes
  581. v = np.arange(5000000, dtype=dtype)
  582. s = Series(v)
  583. result = s.sum(skipna=False)
  584. assert int(result) == v.sum(dtype="int64")
  585. result = s.min(skipna=False)
  586. assert int(result) == 0
  587. result = s.max(skipna=False)
  588. assert int(result) == v[-1]
  589. @pytest.mark.parametrize("use_bottleneck", [True, False])
  590. @pytest.mark.parametrize("dtype", ["float32", "float64"])
  591. def test_sum_overflow_float(self, use_bottleneck, dtype):
  592. with pd.option_context("use_bottleneck", use_bottleneck):
  593. v = np.arange(5000000, dtype=dtype)
  594. s = Series(v)
  595. result = s.sum(skipna=False)
  596. assert result == v.sum(dtype=dtype)
  597. result = s.min(skipna=False)
  598. assert np.allclose(float(result), 0.0)
  599. result = s.max(skipna=False)
  600. assert np.allclose(float(result), v[-1])
  601. def test_mean_masked_overflow(self):
  602. # GH#48378
  603. val = 100_000_000_000_000_000
  604. n_elements = 100
  605. na = np.array([val] * n_elements)
  606. ser = Series([val] * n_elements, dtype="Int64")
  607. result_numpy = np.mean(na)
  608. result_masked = ser.mean()
  609. assert result_masked - result_numpy == 0
  610. assert result_masked == 1e17
  611. @pytest.mark.parametrize("ddof, exp", [(1, 2.5), (0, 2.0)])
  612. def test_var_masked_array(self, ddof, exp):
  613. # GH#48379
  614. ser = Series([1, 2, 3, 4, 5], dtype="Int64")
  615. ser_numpy_dtype = Series([1, 2, 3, 4, 5], dtype="int64")
  616. result = ser.var(ddof=ddof)
  617. result_numpy_dtype = ser_numpy_dtype.var(ddof=ddof)
  618. assert result == result_numpy_dtype
  619. assert result == exp
  620. @pytest.mark.parametrize("dtype", ("m8[ns]", "m8[ns]", "M8[ns]", "M8[ns, UTC]"))
  621. @pytest.mark.parametrize("skipna", [True, False])
  622. def test_empty_timeseries_reductions_return_nat(self, dtype, skipna):
  623. # covers GH#11245
  624. assert Series([], dtype=dtype).min(skipna=skipna) is NaT
  625. assert Series([], dtype=dtype).max(skipna=skipna) is NaT
  626. def test_numpy_argmin(self):
  627. # See GH#16830
  628. data = np.arange(1, 11)
  629. s = Series(data, index=data)
  630. result = np.argmin(s)
  631. expected = np.argmin(data)
  632. assert result == expected
  633. result = s.argmin()
  634. assert result == expected
  635. msg = "the 'out' parameter is not supported"
  636. with pytest.raises(ValueError, match=msg):
  637. np.argmin(s, out=data)
  638. def test_numpy_argmax(self):
  639. # See GH#16830
  640. data = np.arange(1, 11)
  641. s = Series(data, index=data)
  642. result = np.argmax(s)
  643. expected = np.argmax(data)
  644. assert result == expected
  645. result = s.argmax()
  646. assert result == expected
  647. msg = "the 'out' parameter is not supported"
  648. with pytest.raises(ValueError, match=msg):
  649. np.argmax(s, out=data)
  650. def test_idxmin(self):
  651. # test idxmin
  652. # _check_stat_op approach can not be used here because of isna check.
  653. string_series = tm.makeStringSeries().rename("series")
  654. # add some NaNs
  655. string_series[5:15] = np.NaN
  656. # skipna or no
  657. assert string_series[string_series.idxmin()] == string_series.min()
  658. assert isna(string_series.idxmin(skipna=False))
  659. # no NaNs
  660. nona = string_series.dropna()
  661. assert nona[nona.idxmin()] == nona.min()
  662. assert nona.index.values.tolist().index(nona.idxmin()) == nona.values.argmin()
  663. # all NaNs
  664. allna = string_series * np.nan
  665. assert isna(allna.idxmin())
  666. # datetime64[ns]
  667. s = Series(date_range("20130102", periods=6))
  668. result = s.idxmin()
  669. assert result == 0
  670. s[0] = np.nan
  671. result = s.idxmin()
  672. assert result == 1
  673. def test_idxmax(self):
  674. # test idxmax
  675. # _check_stat_op approach can not be used here because of isna check.
  676. string_series = tm.makeStringSeries().rename("series")
  677. # add some NaNs
  678. string_series[5:15] = np.NaN
  679. # skipna or no
  680. assert string_series[string_series.idxmax()] == string_series.max()
  681. assert isna(string_series.idxmax(skipna=False))
  682. # no NaNs
  683. nona = string_series.dropna()
  684. assert nona[nona.idxmax()] == nona.max()
  685. assert nona.index.values.tolist().index(nona.idxmax()) == nona.values.argmax()
  686. # all NaNs
  687. allna = string_series * np.nan
  688. assert isna(allna.idxmax())
  689. s = Series(date_range("20130102", periods=6))
  690. result = s.idxmax()
  691. assert result == 5
  692. s[5] = np.nan
  693. result = s.idxmax()
  694. assert result == 4
  695. # Index with float64 dtype
  696. # GH#5914
  697. s = Series([1, 2, 3], [1.1, 2.1, 3.1])
  698. result = s.idxmax()
  699. assert result == 3.1
  700. result = s.idxmin()
  701. assert result == 1.1
  702. s = Series(s.index, s.index)
  703. result = s.idxmax()
  704. assert result == 3.1
  705. result = s.idxmin()
  706. assert result == 1.1
  707. def test_all_any(self):
  708. ts = tm.makeTimeSeries()
  709. bool_series = ts > 0
  710. assert not bool_series.all()
  711. assert bool_series.any()
  712. # Alternative types, with implicit 'object' dtype.
  713. s = Series(["abc", True])
  714. assert s.any()
  715. def test_numpy_all_any(self, index_or_series):
  716. # GH#40180
  717. idx = index_or_series([0, 1, 2])
  718. assert not np.all(idx)
  719. assert np.any(idx)
  720. idx = Index([1, 2, 3])
  721. assert np.all(idx)
  722. def test_all_any_skipna(self):
  723. # Check skipna, with implicit 'object' dtype.
  724. s1 = Series([np.nan, True])
  725. s2 = Series([np.nan, False])
  726. assert s1.all(skipna=False) # nan && True => True
  727. assert s1.all(skipna=True)
  728. assert s2.any(skipna=False)
  729. assert not s2.any(skipna=True)
  730. def test_all_any_bool_only(self):
  731. s = Series([False, False, True, True, False, True], index=[0, 0, 1, 1, 2, 2])
  732. # GH#47500 - test bool_only works
  733. assert s.any(bool_only=True)
  734. assert not s.all(bool_only=True)
  735. @pytest.mark.parametrize("bool_agg_func", ["any", "all"])
  736. @pytest.mark.parametrize("skipna", [True, False])
  737. def test_any_all_object_dtype(self, bool_agg_func, skipna):
  738. # GH#12863
  739. ser = Series(["a", "b", "c", "d", "e"], dtype=object)
  740. result = getattr(ser, bool_agg_func)(skipna=skipna)
  741. expected = True
  742. assert result == expected
  743. @pytest.mark.parametrize("bool_agg_func", ["any", "all"])
  744. @pytest.mark.parametrize(
  745. "data", [[False, None], [None, False], [False, np.nan], [np.nan, False]]
  746. )
  747. def test_any_all_object_dtype_missing(self, data, bool_agg_func):
  748. # GH#27709
  749. ser = Series(data)
  750. result = getattr(ser, bool_agg_func)(skipna=False)
  751. # None is treated is False, but np.nan is treated as True
  752. expected = bool_agg_func == "any" and None not in data
  753. assert result == expected
  754. @pytest.mark.parametrize("dtype", ["boolean", "Int64", "UInt64", "Float64"])
  755. @pytest.mark.parametrize("bool_agg_func", ["any", "all"])
  756. @pytest.mark.parametrize("skipna", [True, False])
  757. @pytest.mark.parametrize(
  758. # expected_data indexed as [[skipna=False/any, skipna=False/all],
  759. # [skipna=True/any, skipna=True/all]]
  760. "data,expected_data",
  761. [
  762. ([0, 0, 0], [[False, False], [False, False]]),
  763. ([1, 1, 1], [[True, True], [True, True]]),
  764. ([pd.NA, pd.NA, pd.NA], [[pd.NA, pd.NA], [False, True]]),
  765. ([0, pd.NA, 0], [[pd.NA, False], [False, False]]),
  766. ([1, pd.NA, 1], [[True, pd.NA], [True, True]]),
  767. ([1, pd.NA, 0], [[True, False], [True, False]]),
  768. ],
  769. )
  770. def test_any_all_nullable_kleene_logic(
  771. self, bool_agg_func, skipna, data, dtype, expected_data
  772. ):
  773. # GH-37506, GH-41967
  774. ser = Series(data, dtype=dtype)
  775. expected = expected_data[skipna][bool_agg_func == "all"]
  776. result = getattr(ser, bool_agg_func)(skipna=skipna)
  777. assert (result is pd.NA and expected is pd.NA) or result == expected
  778. def test_any_axis1_bool_only(self):
  779. # GH#32432
  780. df = DataFrame({"A": [True, False], "B": [1, 2]})
  781. result = df.any(axis=1, bool_only=True)
  782. expected = Series([True, False])
  783. tm.assert_series_equal(result, expected)
  784. def test_any_all_datetimelike(self):
  785. # GH#38723 these may not be the desired long-term behavior (GH#34479)
  786. # but in the interim should be internally consistent
  787. dta = date_range("1995-01-02", periods=3)._data
  788. ser = Series(dta)
  789. df = DataFrame(ser)
  790. msg = "'(any|all)' with datetime64 dtypes is deprecated"
  791. with tm.assert_produces_warning(FutureWarning, match=msg):
  792. # GH#34479
  793. assert dta.all()
  794. assert dta.any()
  795. assert ser.all()
  796. assert ser.any()
  797. assert df.any().all()
  798. assert df.all().all()
  799. dta = dta.tz_localize("UTC")
  800. ser = Series(dta)
  801. df = DataFrame(ser)
  802. with tm.assert_produces_warning(FutureWarning, match=msg):
  803. # GH#34479
  804. assert dta.all()
  805. assert dta.any()
  806. assert ser.all()
  807. assert ser.any()
  808. assert df.any().all()
  809. assert df.all().all()
  810. tda = dta - dta[0]
  811. ser = Series(tda)
  812. df = DataFrame(ser)
  813. assert tda.any()
  814. assert not tda.all()
  815. assert ser.any()
  816. assert not ser.all()
  817. assert df.any().all()
  818. assert not df.all().any()
  819. def test_timedelta64_analytics(self):
  820. # index min/max
  821. dti = date_range("2012-1-1", periods=3, freq="D")
  822. td = Series(dti) - Timestamp("20120101")
  823. result = td.idxmin()
  824. assert result == 0
  825. result = td.idxmax()
  826. assert result == 2
  827. # GH#2982
  828. # with NaT
  829. td[0] = np.nan
  830. result = td.idxmin()
  831. assert result == 1
  832. result = td.idxmax()
  833. assert result == 2
  834. # abs
  835. s1 = Series(date_range("20120101", periods=3))
  836. s2 = Series(date_range("20120102", periods=3))
  837. expected = Series(s2 - s1)
  838. result = np.abs(s1 - s2)
  839. tm.assert_series_equal(result, expected)
  840. result = (s1 - s2).abs()
  841. tm.assert_series_equal(result, expected)
  842. # max/min
  843. result = td.max()
  844. expected = Timedelta("2 days")
  845. assert result == expected
  846. result = td.min()
  847. expected = Timedelta("1 days")
  848. assert result == expected
  849. @pytest.mark.parametrize(
  850. "test_input,error_type",
  851. [
  852. (Series([], dtype="float64"), ValueError),
  853. # For strings, or any Series with dtype 'O'
  854. (Series(["foo", "bar", "baz"]), TypeError),
  855. (Series([(1,), (2,)]), TypeError),
  856. # For mixed data types
  857. (Series(["foo", "foo", "bar", "bar", None, np.nan, "baz"]), TypeError),
  858. ],
  859. )
  860. def test_assert_idxminmax_raises(self, test_input, error_type):
  861. """
  862. Cases where ``Series.argmax`` and related should raise an exception
  863. """
  864. msg = (
  865. "reduction operation 'argmin' not allowed for this dtype|"
  866. "attempt to get argmin of an empty sequence"
  867. )
  868. with pytest.raises(error_type, match=msg):
  869. test_input.idxmin()
  870. with pytest.raises(error_type, match=msg):
  871. test_input.idxmin(skipna=False)
  872. msg = (
  873. "reduction operation 'argmax' not allowed for this dtype|"
  874. "attempt to get argmax of an empty sequence"
  875. )
  876. with pytest.raises(error_type, match=msg):
  877. test_input.idxmax()
  878. with pytest.raises(error_type, match=msg):
  879. test_input.idxmax(skipna=False)
  880. def test_idxminmax_with_inf(self):
  881. # For numeric data with NA and Inf (GH #13595)
  882. s = Series([0, -np.inf, np.inf, np.nan])
  883. assert s.idxmin() == 1
  884. assert np.isnan(s.idxmin(skipna=False))
  885. assert s.idxmax() == 2
  886. assert np.isnan(s.idxmax(skipna=False))
  887. # Using old-style behavior that treats floating point nan, -inf, and
  888. # +inf as missing
  889. with pd.option_context("mode.use_inf_as_na", True):
  890. assert s.idxmin() == 0
  891. assert np.isnan(s.idxmin(skipna=False))
  892. assert s.idxmax() == 0
  893. np.isnan(s.idxmax(skipna=False))
  894. class TestDatetime64SeriesReductions:
  895. # Note: the name TestDatetime64SeriesReductions indicates these tests
  896. # were moved from a series-specific test file, _not_ that these tests are
  897. # intended long-term to be series-specific
  898. @pytest.mark.parametrize(
  899. "nat_ser",
  900. [
  901. Series([NaT, NaT]),
  902. Series([NaT, Timedelta("nat")]),
  903. Series([Timedelta("nat"), Timedelta("nat")]),
  904. ],
  905. )
  906. def test_minmax_nat_series(self, nat_ser):
  907. # GH#23282
  908. assert nat_ser.min() is NaT
  909. assert nat_ser.max() is NaT
  910. assert nat_ser.min(skipna=False) is NaT
  911. assert nat_ser.max(skipna=False) is NaT
  912. @pytest.mark.parametrize(
  913. "nat_df",
  914. [
  915. DataFrame([NaT, NaT]),
  916. DataFrame([NaT, Timedelta("nat")]),
  917. DataFrame([Timedelta("nat"), Timedelta("nat")]),
  918. ],
  919. )
  920. def test_minmax_nat_dataframe(self, nat_df):
  921. # GH#23282
  922. assert nat_df.min()[0] is NaT
  923. assert nat_df.max()[0] is NaT
  924. assert nat_df.min(skipna=False)[0] is NaT
  925. assert nat_df.max(skipna=False)[0] is NaT
  926. def test_min_max(self):
  927. rng = date_range("1/1/2000", "12/31/2000")
  928. rng2 = rng.take(np.random.permutation(len(rng)))
  929. the_min = rng2.min()
  930. the_max = rng2.max()
  931. assert isinstance(the_min, Timestamp)
  932. assert isinstance(the_max, Timestamp)
  933. assert the_min == rng[0]
  934. assert the_max == rng[-1]
  935. assert rng.min() == rng[0]
  936. assert rng.max() == rng[-1]
  937. def test_min_max_series(self):
  938. rng = date_range("1/1/2000", periods=10, freq="4h")
  939. lvls = ["A", "A", "A", "B", "B", "B", "C", "C", "C", "C"]
  940. df = DataFrame({"TS": rng, "V": np.random.randn(len(rng)), "L": lvls})
  941. result = df.TS.max()
  942. exp = Timestamp(df.TS.iat[-1])
  943. assert isinstance(result, Timestamp)
  944. assert result == exp
  945. result = df.TS.min()
  946. exp = Timestamp(df.TS.iat[0])
  947. assert isinstance(result, Timestamp)
  948. assert result == exp
  949. class TestCategoricalSeriesReductions:
  950. # Note: the name TestCategoricalSeriesReductions indicates these tests
  951. # were moved from a series-specific test file, _not_ that these tests are
  952. # intended long-term to be series-specific
  953. @pytest.mark.parametrize("function", ["min", "max"])
  954. def test_min_max_unordered_raises(self, function):
  955. # unordered cats have no min/max
  956. cat = Series(Categorical(["a", "b", "c", "d"], ordered=False))
  957. msg = f"Categorical is not ordered for operation {function}"
  958. with pytest.raises(TypeError, match=msg):
  959. getattr(cat, function)()
  960. @pytest.mark.parametrize(
  961. "values, categories",
  962. [
  963. (list("abc"), list("abc")),
  964. (list("abc"), list("cba")),
  965. (list("abc") + [np.nan], list("cba")),
  966. ([1, 2, 3], [3, 2, 1]),
  967. ([1, 2, 3, np.nan], [3, 2, 1]),
  968. ],
  969. )
  970. @pytest.mark.parametrize("function", ["min", "max"])
  971. def test_min_max_ordered(self, values, categories, function):
  972. # GH 25303
  973. cat = Series(Categorical(values, categories=categories, ordered=True))
  974. result = getattr(cat, function)(skipna=True)
  975. expected = categories[0] if function == "min" else categories[2]
  976. assert result == expected
  977. @pytest.mark.parametrize("function", ["min", "max"])
  978. @pytest.mark.parametrize("skipna", [True, False])
  979. def test_min_max_ordered_with_nan_only(self, function, skipna):
  980. # https://github.com/pandas-dev/pandas/issues/33450
  981. cat = Series(Categorical([np.nan], categories=[1, 2], ordered=True))
  982. result = getattr(cat, function)(skipna=skipna)
  983. assert result is np.nan
  984. @pytest.mark.parametrize("function", ["min", "max"])
  985. @pytest.mark.parametrize("skipna", [True, False])
  986. def test_min_max_skipna(self, function, skipna):
  987. cat = Series(
  988. Categorical(["a", "b", np.nan, "a"], categories=["b", "a"], ordered=True)
  989. )
  990. result = getattr(cat, function)(skipna=skipna)
  991. if skipna is True:
  992. expected = "b" if function == "min" else "a"
  993. assert result == expected
  994. else:
  995. assert result is np.nan
  996. class TestSeriesMode:
  997. # Note: the name TestSeriesMode indicates these tests
  998. # were moved from a series-specific test file, _not_ that these tests are
  999. # intended long-term to be series-specific
  1000. @pytest.mark.parametrize(
  1001. "dropna, expected",
  1002. [(True, Series([], dtype=np.float64)), (False, Series([], dtype=np.float64))],
  1003. )
  1004. def test_mode_empty(self, dropna, expected):
  1005. s = Series([], dtype=np.float64)
  1006. result = s.mode(dropna)
  1007. tm.assert_series_equal(result, expected)
  1008. @pytest.mark.parametrize(
  1009. "dropna, data, expected",
  1010. [
  1011. (True, [1, 1, 1, 2], [1]),
  1012. (True, [1, 1, 1, 2, 3, 3, 3], [1, 3]),
  1013. (False, [1, 1, 1, 2], [1]),
  1014. (False, [1, 1, 1, 2, 3, 3, 3], [1, 3]),
  1015. ],
  1016. )
  1017. @pytest.mark.parametrize(
  1018. "dt", list(np.typecodes["AllInteger"] + np.typecodes["Float"])
  1019. )
  1020. def test_mode_numerical(self, dropna, data, expected, dt):
  1021. s = Series(data, dtype=dt)
  1022. result = s.mode(dropna)
  1023. expected = Series(expected, dtype=dt)
  1024. tm.assert_series_equal(result, expected)
  1025. @pytest.mark.parametrize("dropna, expected", [(True, [1.0]), (False, [1, np.nan])])
  1026. def test_mode_numerical_nan(self, dropna, expected):
  1027. s = Series([1, 1, 2, np.nan, np.nan])
  1028. result = s.mode(dropna)
  1029. expected = Series(expected)
  1030. tm.assert_series_equal(result, expected)
  1031. @pytest.mark.parametrize(
  1032. "dropna, expected1, expected2, expected3",
  1033. [(True, ["b"], ["bar"], ["nan"]), (False, ["b"], [np.nan], ["nan"])],
  1034. )
  1035. def test_mode_str_obj(self, dropna, expected1, expected2, expected3):
  1036. # Test string and object types.
  1037. data = ["a"] * 2 + ["b"] * 3
  1038. s = Series(data, dtype="c")
  1039. result = s.mode(dropna)
  1040. expected1 = Series(expected1, dtype="c")
  1041. tm.assert_series_equal(result, expected1)
  1042. data = ["foo", "bar", "bar", np.nan, np.nan, np.nan]
  1043. s = Series(data, dtype=object)
  1044. result = s.mode(dropna)
  1045. expected2 = Series(expected2, dtype=object)
  1046. tm.assert_series_equal(result, expected2)
  1047. data = ["foo", "bar", "bar", np.nan, np.nan, np.nan]
  1048. s = Series(data, dtype=object).astype(str)
  1049. result = s.mode(dropna)
  1050. expected3 = Series(expected3, dtype=str)
  1051. tm.assert_series_equal(result, expected3)
  1052. @pytest.mark.parametrize(
  1053. "dropna, expected1, expected2",
  1054. [(True, ["foo"], ["foo"]), (False, ["foo"], [np.nan])],
  1055. )
  1056. def test_mode_mixeddtype(self, dropna, expected1, expected2):
  1057. s = Series([1, "foo", "foo"])
  1058. result = s.mode(dropna)
  1059. expected = Series(expected1)
  1060. tm.assert_series_equal(result, expected)
  1061. s = Series([1, "foo", "foo", np.nan, np.nan, np.nan])
  1062. result = s.mode(dropna)
  1063. expected = Series(expected2, dtype=object)
  1064. tm.assert_series_equal(result, expected)
  1065. @pytest.mark.parametrize(
  1066. "dropna, expected1, expected2",
  1067. [
  1068. (
  1069. True,
  1070. ["1900-05-03", "2011-01-03", "2013-01-02"],
  1071. ["2011-01-03", "2013-01-02"],
  1072. ),
  1073. (False, [np.nan], [np.nan, "2011-01-03", "2013-01-02"]),
  1074. ],
  1075. )
  1076. def test_mode_datetime(self, dropna, expected1, expected2):
  1077. s = Series(
  1078. ["2011-01-03", "2013-01-02", "1900-05-03", "nan", "nan"], dtype="M8[ns]"
  1079. )
  1080. result = s.mode(dropna)
  1081. expected1 = Series(expected1, dtype="M8[ns]")
  1082. tm.assert_series_equal(result, expected1)
  1083. s = Series(
  1084. [
  1085. "2011-01-03",
  1086. "2013-01-02",
  1087. "1900-05-03",
  1088. "2011-01-03",
  1089. "2013-01-02",
  1090. "nan",
  1091. "nan",
  1092. ],
  1093. dtype="M8[ns]",
  1094. )
  1095. result = s.mode(dropna)
  1096. expected2 = Series(expected2, dtype="M8[ns]")
  1097. tm.assert_series_equal(result, expected2)
  1098. @pytest.mark.parametrize(
  1099. "dropna, expected1, expected2",
  1100. [
  1101. (True, ["-1 days", "0 days", "1 days"], ["2 min", "1 day"]),
  1102. (False, [np.nan], [np.nan, "2 min", "1 day"]),
  1103. ],
  1104. )
  1105. def test_mode_timedelta(self, dropna, expected1, expected2):
  1106. # gh-5986: Test timedelta types.
  1107. s = Series(
  1108. ["1 days", "-1 days", "0 days", "nan", "nan"], dtype="timedelta64[ns]"
  1109. )
  1110. result = s.mode(dropna)
  1111. expected1 = Series(expected1, dtype="timedelta64[ns]")
  1112. tm.assert_series_equal(result, expected1)
  1113. s = Series(
  1114. [
  1115. "1 day",
  1116. "1 day",
  1117. "-1 day",
  1118. "-1 day 2 min",
  1119. "2 min",
  1120. "2 min",
  1121. "nan",
  1122. "nan",
  1123. ],
  1124. dtype="timedelta64[ns]",
  1125. )
  1126. result = s.mode(dropna)
  1127. expected2 = Series(expected2, dtype="timedelta64[ns]")
  1128. tm.assert_series_equal(result, expected2)
  1129. @pytest.mark.parametrize(
  1130. "dropna, expected1, expected2, expected3",
  1131. [
  1132. (
  1133. True,
  1134. Categorical([1, 2], categories=[1, 2]),
  1135. Categorical(["a"], categories=[1, "a"]),
  1136. Categorical([3, 1], categories=[3, 2, 1], ordered=True),
  1137. ),
  1138. (
  1139. False,
  1140. Categorical([np.nan], categories=[1, 2]),
  1141. Categorical([np.nan, "a"], categories=[1, "a"]),
  1142. Categorical([np.nan, 3, 1], categories=[3, 2, 1], ordered=True),
  1143. ),
  1144. ],
  1145. )
  1146. def test_mode_category(self, dropna, expected1, expected2, expected3):
  1147. s = Series(Categorical([1, 2, np.nan, np.nan]))
  1148. result = s.mode(dropna)
  1149. expected1 = Series(expected1, dtype="category")
  1150. tm.assert_series_equal(result, expected1)
  1151. s = Series(Categorical([1, "a", "a", np.nan, np.nan]))
  1152. result = s.mode(dropna)
  1153. expected2 = Series(expected2, dtype="category")
  1154. tm.assert_series_equal(result, expected2)
  1155. s = Series(
  1156. Categorical(
  1157. [1, 1, 2, 3, 3, np.nan, np.nan], categories=[3, 2, 1], ordered=True
  1158. )
  1159. )
  1160. result = s.mode(dropna)
  1161. expected3 = Series(expected3, dtype="category")
  1162. tm.assert_series_equal(result, expected3)
  1163. @pytest.mark.parametrize(
  1164. "dropna, expected1, expected2",
  1165. [(True, [2**63], [1, 2**63]), (False, [2**63], [1, 2**63])],
  1166. )
  1167. def test_mode_intoverflow(self, dropna, expected1, expected2):
  1168. # Test for uint64 overflow.
  1169. s = Series([1, 2**63, 2**63], dtype=np.uint64)
  1170. result = s.mode(dropna)
  1171. expected1 = Series(expected1, dtype=np.uint64)
  1172. tm.assert_series_equal(result, expected1)
  1173. s = Series([1, 2**63], dtype=np.uint64)
  1174. result = s.mode(dropna)
  1175. expected2 = Series(expected2, dtype=np.uint64)
  1176. tm.assert_series_equal(result, expected2)
  1177. def test_mode_sortwarning(self):
  1178. # Check for the warning that is raised when the mode
  1179. # results cannot be sorted
  1180. expected = Series(["foo", np.nan])
  1181. s = Series([1, "foo", "foo", np.nan, np.nan])
  1182. with tm.assert_produces_warning(UserWarning):
  1183. result = s.mode(dropna=False)
  1184. result = result.sort_values().reset_index(drop=True)
  1185. tm.assert_series_equal(result, expected)
  1186. def test_mode_boolean_with_na(self):
  1187. # GH#42107
  1188. ser = Series([True, False, True, pd.NA], dtype="boolean")
  1189. result = ser.mode()
  1190. expected = Series({0: True}, dtype="boolean")
  1191. tm.assert_series_equal(result, expected)
  1192. @pytest.mark.parametrize(
  1193. "array,expected,dtype",
  1194. [
  1195. (
  1196. [0, 1j, 1, 1, 1 + 1j, 1 + 2j],
  1197. Series([1], dtype=np.complex128),
  1198. np.complex128,
  1199. ),
  1200. (
  1201. [0, 1j, 1, 1, 1 + 1j, 1 + 2j],
  1202. Series([1], dtype=np.complex64),
  1203. np.complex64,
  1204. ),
  1205. (
  1206. [1 + 1j, 2j, 1 + 1j],
  1207. Series([1 + 1j], dtype=np.complex128),
  1208. np.complex128,
  1209. ),
  1210. ],
  1211. )
  1212. def test_single_mode_value_complex(self, array, expected, dtype):
  1213. result = Series(array, dtype=dtype).mode()
  1214. tm.assert_series_equal(result, expected)
  1215. @pytest.mark.parametrize(
  1216. "array,expected,dtype",
  1217. [
  1218. (
  1219. # no modes
  1220. [0, 1j, 1, 1 + 1j, 1 + 2j],
  1221. Series([0j, 1j, 1 + 0j, 1 + 1j, 1 + 2j], dtype=np.complex128),
  1222. np.complex128,
  1223. ),
  1224. (
  1225. [1 + 1j, 2j, 1 + 1j, 2j, 3],
  1226. Series([2j, 1 + 1j], dtype=np.complex64),
  1227. np.complex64,
  1228. ),
  1229. ],
  1230. )
  1231. def test_multimode_complex(self, array, expected, dtype):
  1232. # GH 17927
  1233. # mode tries to sort multimodal series.
  1234. # Complex numbers are sorted by their magnitude
  1235. result = Series(array, dtype=dtype).mode()
  1236. tm.assert_series_equal(result, expected)