test_resample_api.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994
  1. from datetime import datetime
  2. import numpy as np
  3. import pytest
  4. from pandas._libs import lib
  5. from pandas.errors import UnsupportedFunctionCall
  6. import pandas as pd
  7. from pandas import (
  8. DataFrame,
  9. NamedAgg,
  10. Series,
  11. )
  12. import pandas._testing as tm
  13. from pandas.core.indexes.datetimes import date_range
  14. dti = date_range(start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="Min")
  15. test_series = Series(np.random.rand(len(dti)), dti)
  16. _test_frame = DataFrame({"A": test_series, "B": test_series, "C": np.arange(len(dti))})
  17. @pytest.fixture
  18. def test_frame():
  19. return _test_frame.copy()
  20. def test_str():
  21. r = test_series.resample("H")
  22. assert (
  23. "DatetimeIndexResampler [freq=<Hour>, axis=0, closed=left, "
  24. "label=left, convention=start, origin=start_day]" in str(r)
  25. )
  26. r = test_series.resample("H", origin="2000-01-01")
  27. assert (
  28. "DatetimeIndexResampler [freq=<Hour>, axis=0, closed=left, "
  29. "label=left, convention=start, origin=2000-01-01 00:00:00]" in str(r)
  30. )
  31. def test_api():
  32. r = test_series.resample("H")
  33. result = r.mean()
  34. assert isinstance(result, Series)
  35. assert len(result) == 217
  36. r = test_series.to_frame().resample("H")
  37. result = r.mean()
  38. assert isinstance(result, DataFrame)
  39. assert len(result) == 217
  40. def test_groupby_resample_api():
  41. # GH 12448
  42. # .groupby(...).resample(...) hitting warnings
  43. # when appropriate
  44. df = DataFrame(
  45. {
  46. "date": date_range(start="2016-01-01", periods=4, freq="W"),
  47. "group": [1, 1, 2, 2],
  48. "val": [5, 6, 7, 8],
  49. }
  50. ).set_index("date")
  51. # replication step
  52. i = (
  53. date_range("2016-01-03", periods=8).tolist()
  54. + date_range("2016-01-17", periods=8).tolist()
  55. )
  56. index = pd.MultiIndex.from_arrays([[1] * 8 + [2] * 8, i], names=["group", "date"])
  57. expected = DataFrame({"val": [5] * 7 + [6] + [7] * 7 + [8]}, index=index)
  58. result = df.groupby("group").apply(lambda x: x.resample("1D").ffill())[["val"]]
  59. tm.assert_frame_equal(result, expected)
  60. def test_groupby_resample_on_api():
  61. # GH 15021
  62. # .groupby(...).resample(on=...) results in an unexpected
  63. # keyword warning.
  64. df = DataFrame(
  65. {
  66. "key": ["A", "B"] * 5,
  67. "dates": date_range("2016-01-01", periods=10),
  68. "values": np.random.randn(10),
  69. }
  70. )
  71. expected = df.set_index("dates").groupby("key").resample("D").mean()
  72. result = df.groupby("key").resample("D", on="dates").mean()
  73. tm.assert_frame_equal(result, expected)
  74. def test_resample_group_keys():
  75. df = DataFrame({"A": 1, "B": 2}, index=date_range("2000", periods=10))
  76. expected = df.copy()
  77. # group_keys=False
  78. g = df.resample("5D", group_keys=False)
  79. result = g.apply(lambda x: x)
  80. tm.assert_frame_equal(result, expected)
  81. # group_keys defaults to False
  82. g = df.resample("5D")
  83. result = g.apply(lambda x: x)
  84. tm.assert_frame_equal(result, expected)
  85. # group_keys=True
  86. expected.index = pd.MultiIndex.from_arrays(
  87. [pd.to_datetime(["2000-01-01", "2000-01-06"]).repeat(5), expected.index]
  88. )
  89. g = df.resample("5D", group_keys=True)
  90. result = g.apply(lambda x: x)
  91. tm.assert_frame_equal(result, expected)
  92. def test_pipe(test_frame):
  93. # GH17905
  94. # series
  95. r = test_series.resample("H")
  96. expected = r.max() - r.mean()
  97. result = r.pipe(lambda x: x.max() - x.mean())
  98. tm.assert_series_equal(result, expected)
  99. # dataframe
  100. r = test_frame.resample("H")
  101. expected = r.max() - r.mean()
  102. result = r.pipe(lambda x: x.max() - x.mean())
  103. tm.assert_frame_equal(result, expected)
  104. def test_getitem(test_frame):
  105. r = test_frame.resample("H")
  106. tm.assert_index_equal(r._selected_obj.columns, test_frame.columns)
  107. r = test_frame.resample("H")["B"]
  108. assert r._selected_obj.name == test_frame.columns[1]
  109. # technically this is allowed
  110. r = test_frame.resample("H")["A", "B"]
  111. tm.assert_index_equal(r._selected_obj.columns, test_frame.columns[[0, 1]])
  112. r = test_frame.resample("H")["A", "B"]
  113. tm.assert_index_equal(r._selected_obj.columns, test_frame.columns[[0, 1]])
  114. @pytest.mark.parametrize("key", [["D"], ["A", "D"]])
  115. def test_select_bad_cols(key, test_frame):
  116. g = test_frame.resample("H")
  117. # 'A' should not be referenced as a bad column...
  118. # will have to rethink regex if you change message!
  119. msg = r"^\"Columns not found: 'D'\"$"
  120. with pytest.raises(KeyError, match=msg):
  121. g[key]
  122. def test_attribute_access(test_frame):
  123. r = test_frame.resample("H")
  124. tm.assert_series_equal(r.A.sum(), r["A"].sum())
  125. @pytest.mark.parametrize("attr", ["groups", "ngroups", "indices"])
  126. def test_api_compat_before_use(attr):
  127. # make sure that we are setting the binner
  128. # on these attributes
  129. rng = date_range("1/1/2012", periods=100, freq="S")
  130. ts = Series(np.arange(len(rng)), index=rng)
  131. rs = ts.resample("30s")
  132. # before use
  133. getattr(rs, attr)
  134. # after grouper is initialized is ok
  135. rs.mean()
  136. getattr(rs, attr)
  137. def tests_raises_on_nuisance(test_frame):
  138. df = test_frame
  139. df["D"] = "foo"
  140. r = df.resample("H")
  141. result = r[["A", "B"]].mean()
  142. expected = pd.concat([r.A.mean(), r.B.mean()], axis=1)
  143. tm.assert_frame_equal(result, expected)
  144. expected = r[["A", "B", "C"]].mean()
  145. with pytest.raises(TypeError, match="Could not convert"):
  146. r.mean()
  147. result = r.mean(numeric_only=True)
  148. tm.assert_frame_equal(result, expected)
  149. def test_downsample_but_actually_upsampling():
  150. # this is reindex / asfreq
  151. rng = date_range("1/1/2012", periods=100, freq="S")
  152. ts = Series(np.arange(len(rng), dtype="int64"), index=rng)
  153. result = ts.resample("20s").asfreq()
  154. expected = Series(
  155. [0, 20, 40, 60, 80],
  156. index=date_range("2012-01-01 00:00:00", freq="20s", periods=5),
  157. )
  158. tm.assert_series_equal(result, expected)
  159. def test_combined_up_downsampling_of_irregular():
  160. # since we are really doing an operation like this
  161. # ts2.resample('2s').mean().ffill()
  162. # preserve these semantics
  163. rng = date_range("1/1/2012", periods=100, freq="S")
  164. ts = Series(np.arange(len(rng)), index=rng)
  165. ts2 = ts.iloc[[0, 1, 2, 3, 5, 7, 11, 15, 16, 25, 30]]
  166. result = ts2.resample("2s").mean().ffill()
  167. expected = Series(
  168. [
  169. 0.5,
  170. 2.5,
  171. 5.0,
  172. 7.0,
  173. 7.0,
  174. 11.0,
  175. 11.0,
  176. 15.0,
  177. 16.0,
  178. 16.0,
  179. 16.0,
  180. 16.0,
  181. 25.0,
  182. 25.0,
  183. 25.0,
  184. 30.0,
  185. ],
  186. index=pd.DatetimeIndex(
  187. [
  188. "2012-01-01 00:00:00",
  189. "2012-01-01 00:00:02",
  190. "2012-01-01 00:00:04",
  191. "2012-01-01 00:00:06",
  192. "2012-01-01 00:00:08",
  193. "2012-01-01 00:00:10",
  194. "2012-01-01 00:00:12",
  195. "2012-01-01 00:00:14",
  196. "2012-01-01 00:00:16",
  197. "2012-01-01 00:00:18",
  198. "2012-01-01 00:00:20",
  199. "2012-01-01 00:00:22",
  200. "2012-01-01 00:00:24",
  201. "2012-01-01 00:00:26",
  202. "2012-01-01 00:00:28",
  203. "2012-01-01 00:00:30",
  204. ],
  205. dtype="datetime64[ns]",
  206. freq="2S",
  207. ),
  208. )
  209. tm.assert_series_equal(result, expected)
  210. def test_transform_series():
  211. r = test_series.resample("20min")
  212. expected = test_series.groupby(pd.Grouper(freq="20min")).transform("mean")
  213. result = r.transform("mean")
  214. tm.assert_series_equal(result, expected)
  215. @pytest.mark.parametrize("on", [None, "date"])
  216. def test_transform_frame(on):
  217. # GH#47079
  218. index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
  219. index.name = "date"
  220. df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
  221. expected = df.groupby(pd.Grouper(freq="20min")).transform("mean")
  222. if on == "date":
  223. # Move date to being a column; result will then have a RangeIndex
  224. expected = expected.reset_index(drop=True)
  225. df = df.reset_index()
  226. r = df.resample("20min", on=on)
  227. result = r.transform("mean")
  228. tm.assert_frame_equal(result, expected)
  229. def test_fillna():
  230. # need to upsample here
  231. rng = date_range("1/1/2012", periods=10, freq="2S")
  232. ts = Series(np.arange(len(rng), dtype="int64"), index=rng)
  233. r = ts.resample("s")
  234. expected = r.ffill()
  235. result = r.fillna(method="ffill")
  236. tm.assert_series_equal(result, expected)
  237. expected = r.bfill()
  238. result = r.fillna(method="bfill")
  239. tm.assert_series_equal(result, expected)
  240. msg = (
  241. r"Invalid fill method\. Expecting pad \(ffill\), backfill "
  242. r"\(bfill\) or nearest\. Got 0"
  243. )
  244. with pytest.raises(ValueError, match=msg):
  245. r.fillna(0)
  246. @pytest.mark.parametrize(
  247. "func",
  248. [
  249. lambda x: x.resample("20min", group_keys=False),
  250. lambda x: x.groupby(pd.Grouper(freq="20min"), group_keys=False),
  251. ],
  252. ids=["resample", "groupby"],
  253. )
  254. def test_apply_without_aggregation(func):
  255. # both resample and groupby should work w/o aggregation
  256. t = func(test_series)
  257. result = t.apply(lambda x: x)
  258. tm.assert_series_equal(result, test_series)
  259. def test_apply_without_aggregation2():
  260. grouped = test_series.to_frame(name="foo").resample("20min", group_keys=False)
  261. result = grouped["foo"].apply(lambda x: x)
  262. tm.assert_series_equal(result, test_series.rename("foo"))
  263. def test_agg_consistency():
  264. # make sure that we are consistent across
  265. # similar aggregations with and w/o selection list
  266. df = DataFrame(
  267. np.random.randn(1000, 3),
  268. index=date_range("1/1/2012", freq="S", periods=1000),
  269. columns=["A", "B", "C"],
  270. )
  271. r = df.resample("3T")
  272. msg = r"Column\(s\) \['r1', 'r2'\] do not exist"
  273. with pytest.raises(KeyError, match=msg):
  274. r.agg({"r1": "mean", "r2": "sum"})
  275. def test_agg_consistency_int_str_column_mix():
  276. # GH#39025
  277. df = DataFrame(
  278. np.random.randn(1000, 2),
  279. index=date_range("1/1/2012", freq="S", periods=1000),
  280. columns=[1, "a"],
  281. )
  282. r = df.resample("3T")
  283. msg = r"Column\(s\) \[2, 'b'\] do not exist"
  284. with pytest.raises(KeyError, match=msg):
  285. r.agg({2: "mean", "b": "sum"})
  286. # TODO(GH#14008): once GH 14008 is fixed, move these tests into
  287. # `Base` test class
  288. def test_agg():
  289. # test with all three Resampler apis and TimeGrouper
  290. np.random.seed(1234)
  291. index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
  292. index.name = "date"
  293. df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
  294. df_col = df.reset_index()
  295. df_mult = df_col.copy()
  296. df_mult.index = pd.MultiIndex.from_arrays(
  297. [range(10), df.index], names=["index", "date"]
  298. )
  299. r = df.resample("2D")
  300. cases = [
  301. r,
  302. df_col.resample("2D", on="date"),
  303. df_mult.resample("2D", level="date"),
  304. df.groupby(pd.Grouper(freq="2D")),
  305. ]
  306. a_mean = r["A"].mean()
  307. a_std = r["A"].std()
  308. a_sum = r["A"].sum()
  309. b_mean = r["B"].mean()
  310. b_std = r["B"].std()
  311. b_sum = r["B"].sum()
  312. expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
  313. expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]])
  314. for t in cases:
  315. # In case 2, "date" is an index and a column, so get included in the agg
  316. if t == cases[2]:
  317. date_mean = t["date"].mean()
  318. date_std = t["date"].std()
  319. exp = pd.concat([date_mean, date_std, expected], axis=1)
  320. exp.columns = pd.MultiIndex.from_product(
  321. [["date", "A", "B"], ["mean", "std"]]
  322. )
  323. result = t.aggregate([np.mean, np.std])
  324. tm.assert_frame_equal(result, exp)
  325. else:
  326. result = t.aggregate([np.mean, np.std])
  327. tm.assert_frame_equal(result, expected)
  328. expected = pd.concat([a_mean, b_std], axis=1)
  329. for t in cases:
  330. result = t.aggregate({"A": np.mean, "B": np.std})
  331. tm.assert_frame_equal(result, expected, check_like=True)
  332. result = t.aggregate(A=("A", np.mean), B=("B", np.std))
  333. tm.assert_frame_equal(result, expected, check_like=True)
  334. result = t.aggregate(A=NamedAgg("A", np.mean), B=NamedAgg("B", np.std))
  335. tm.assert_frame_equal(result, expected, check_like=True)
  336. expected = pd.concat([a_mean, a_std], axis=1)
  337. expected.columns = pd.MultiIndex.from_tuples([("A", "mean"), ("A", "std")])
  338. for t in cases:
  339. result = t.aggregate({"A": ["mean", "std"]})
  340. tm.assert_frame_equal(result, expected)
  341. expected = pd.concat([a_mean, a_sum], axis=1)
  342. expected.columns = ["mean", "sum"]
  343. for t in cases:
  344. result = t["A"].aggregate(["mean", "sum"])
  345. tm.assert_frame_equal(result, expected)
  346. result = t["A"].aggregate(mean="mean", sum="sum")
  347. tm.assert_frame_equal(result, expected)
  348. msg = "nested renamer is not supported"
  349. for t in cases:
  350. with pytest.raises(pd.errors.SpecificationError, match=msg):
  351. t.aggregate({"A": {"mean": "mean", "sum": "sum"}})
  352. expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
  353. expected.columns = pd.MultiIndex.from_tuples(
  354. [("A", "mean"), ("A", "sum"), ("B", "mean2"), ("B", "sum2")]
  355. )
  356. for t in cases:
  357. with pytest.raises(pd.errors.SpecificationError, match=msg):
  358. t.aggregate(
  359. {
  360. "A": {"mean": "mean", "sum": "sum"},
  361. "B": {"mean2": "mean", "sum2": "sum"},
  362. }
  363. )
  364. expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
  365. expected.columns = pd.MultiIndex.from_tuples(
  366. [("A", "mean"), ("A", "std"), ("B", "mean"), ("B", "std")]
  367. )
  368. for t in cases:
  369. result = t.aggregate({"A": ["mean", "std"], "B": ["mean", "std"]})
  370. tm.assert_frame_equal(result, expected, check_like=True)
  371. expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
  372. expected.columns = pd.MultiIndex.from_tuples(
  373. [
  374. ("r1", "A", "mean"),
  375. ("r1", "A", "sum"),
  376. ("r2", "B", "mean"),
  377. ("r2", "B", "sum"),
  378. ]
  379. )
  380. def test_agg_misc():
  381. # test with all three Resampler apis and TimeGrouper
  382. np.random.seed(1234)
  383. index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
  384. index.name = "date"
  385. df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
  386. df_col = df.reset_index()
  387. df_mult = df_col.copy()
  388. df_mult.index = pd.MultiIndex.from_arrays(
  389. [range(10), df.index], names=["index", "date"]
  390. )
  391. r = df.resample("2D")
  392. cases = [
  393. r,
  394. df_col.resample("2D", on="date"),
  395. df_mult.resample("2D", level="date"),
  396. df.groupby(pd.Grouper(freq="2D")),
  397. ]
  398. # passed lambda
  399. for t in cases:
  400. result = t.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)})
  401. rcustom = t["B"].apply(lambda x: np.std(x, ddof=1))
  402. expected = pd.concat([r["A"].sum(), rcustom], axis=1)
  403. tm.assert_frame_equal(result, expected, check_like=True)
  404. result = t.agg(A=("A", np.sum), B=("B", lambda x: np.std(x, ddof=1)))
  405. tm.assert_frame_equal(result, expected, check_like=True)
  406. result = t.agg(
  407. A=NamedAgg("A", np.sum), B=NamedAgg("B", lambda x: np.std(x, ddof=1))
  408. )
  409. tm.assert_frame_equal(result, expected, check_like=True)
  410. # agg with renamers
  411. expected = pd.concat(
  412. [t["A"].sum(), t["B"].sum(), t["A"].mean(), t["B"].mean()], axis=1
  413. )
  414. expected.columns = pd.MultiIndex.from_tuples(
  415. [("result1", "A"), ("result1", "B"), ("result2", "A"), ("result2", "B")]
  416. )
  417. msg = r"Column\(s\) \['result1', 'result2'\] do not exist"
  418. for t in cases:
  419. with pytest.raises(KeyError, match=msg):
  420. t[["A", "B"]].agg({"result1": np.sum, "result2": np.mean})
  421. with pytest.raises(KeyError, match=msg):
  422. t[["A", "B"]].agg(A=("result1", np.sum), B=("result2", np.mean))
  423. with pytest.raises(KeyError, match=msg):
  424. t[["A", "B"]].agg(
  425. A=NamedAgg("result1", np.sum), B=NamedAgg("result2", np.mean)
  426. )
  427. # agg with different hows
  428. expected = pd.concat(
  429. [t["A"].sum(), t["A"].std(), t["B"].mean(), t["B"].std()], axis=1
  430. )
  431. expected.columns = pd.MultiIndex.from_tuples(
  432. [("A", "sum"), ("A", "std"), ("B", "mean"), ("B", "std")]
  433. )
  434. for t in cases:
  435. result = t.agg({"A": ["sum", "std"], "B": ["mean", "std"]})
  436. tm.assert_frame_equal(result, expected, check_like=True)
  437. # equivalent of using a selection list / or not
  438. for t in cases:
  439. result = t[["A", "B"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
  440. tm.assert_frame_equal(result, expected, check_like=True)
  441. msg = "nested renamer is not supported"
  442. # series like aggs
  443. for t in cases:
  444. with pytest.raises(pd.errors.SpecificationError, match=msg):
  445. t["A"].agg({"A": ["sum", "std"]})
  446. with pytest.raises(pd.errors.SpecificationError, match=msg):
  447. t["A"].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
  448. # errors
  449. # invalid names in the agg specification
  450. msg = r"Column\(s\) \['B'\] do not exist"
  451. for t in cases:
  452. with pytest.raises(KeyError, match=msg):
  453. t[["A"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]})
  454. @pytest.mark.parametrize(
  455. "func", [["min"], ["mean", "max"], {"A": "sum"}, {"A": "prod", "B": "median"}]
  456. )
  457. def test_multi_agg_axis_1_raises(func):
  458. # GH#46904
  459. np.random.seed(1234)
  460. index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
  461. index.name = "date"
  462. df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index).T
  463. res = df.resample("M", axis=1)
  464. with pytest.raises(NotImplementedError, match="axis other than 0 is not supported"):
  465. res.agg(func)
  466. def test_agg_nested_dicts():
  467. np.random.seed(1234)
  468. index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
  469. index.name = "date"
  470. df = DataFrame(np.random.rand(10, 2), columns=list("AB"), index=index)
  471. df_col = df.reset_index()
  472. df_mult = df_col.copy()
  473. df_mult.index = pd.MultiIndex.from_arrays(
  474. [range(10), df.index], names=["index", "date"]
  475. )
  476. r = df.resample("2D")
  477. cases = [
  478. r,
  479. df_col.resample("2D", on="date"),
  480. df_mult.resample("2D", level="date"),
  481. df.groupby(pd.Grouper(freq="2D")),
  482. ]
  483. msg = "nested renamer is not supported"
  484. for t in cases:
  485. with pytest.raises(pd.errors.SpecificationError, match=msg):
  486. t.aggregate({"r1": {"A": ["mean", "sum"]}, "r2": {"B": ["mean", "sum"]}})
  487. for t in cases:
  488. with pytest.raises(pd.errors.SpecificationError, match=msg):
  489. t[["A", "B"]].agg(
  490. {"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}}
  491. )
  492. with pytest.raises(pd.errors.SpecificationError, match=msg):
  493. t.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})
  494. def test_try_aggregate_non_existing_column():
  495. # GH 16766
  496. data = [
  497. {"dt": datetime(2017, 6, 1, 0), "x": 1.0, "y": 2.0},
  498. {"dt": datetime(2017, 6, 1, 1), "x": 2.0, "y": 2.0},
  499. {"dt": datetime(2017, 6, 1, 2), "x": 3.0, "y": 1.5},
  500. ]
  501. df = DataFrame(data).set_index("dt")
  502. # Error as we don't have 'z' column
  503. msg = r"Column\(s\) \['z'\] do not exist"
  504. with pytest.raises(KeyError, match=msg):
  505. df.resample("30T").agg({"x": ["mean"], "y": ["median"], "z": ["sum"]})
  506. def test_agg_list_like_func_with_args():
  507. # 50624
  508. df = DataFrame(
  509. {"x": [1, 2, 3]}, index=date_range("2020-01-01", periods=3, freq="D")
  510. )
  511. def foo1(x, a=1, c=0):
  512. return x + a + c
  513. def foo2(x, b=2, c=0):
  514. return x + b + c
  515. msg = r"foo1\(\) got an unexpected keyword argument 'b'"
  516. with pytest.raises(TypeError, match=msg):
  517. df.resample("D").agg([foo1, foo2], 3, b=3, c=4)
  518. result = df.resample("D").agg([foo1, foo2], 3, c=4)
  519. expected = DataFrame(
  520. [[8, 8], [9, 9], [10, 10]],
  521. index=date_range("2020-01-01", periods=3, freq="D"),
  522. columns=pd.MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]),
  523. )
  524. tm.assert_frame_equal(result, expected)
  525. def test_selection_api_validation():
  526. # GH 13500
  527. index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D")
  528. rng = np.arange(len(index), dtype=np.int64)
  529. df = DataFrame(
  530. {"date": index, "a": rng},
  531. index=pd.MultiIndex.from_arrays([rng, index], names=["v", "d"]),
  532. )
  533. df_exp = DataFrame({"a": rng}, index=index)
  534. # non DatetimeIndex
  535. msg = (
  536. "Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, "
  537. "but got an instance of 'Index'"
  538. )
  539. with pytest.raises(TypeError, match=msg):
  540. df.resample("2D", level="v")
  541. msg = "The Grouper cannot specify both a key and a level!"
  542. with pytest.raises(ValueError, match=msg):
  543. df.resample("2D", on="date", level="d")
  544. msg = "unhashable type: 'list'"
  545. with pytest.raises(TypeError, match=msg):
  546. df.resample("2D", on=["a", "date"])
  547. msg = r"\"Level \['a', 'date'\] not found\""
  548. with pytest.raises(KeyError, match=msg):
  549. df.resample("2D", level=["a", "date"])
  550. # upsampling not allowed
  551. msg = (
  552. "Upsampling from level= or on= selection is not supported, use "
  553. r"\.set_index\(\.\.\.\) to explicitly set index to datetime-like"
  554. )
  555. with pytest.raises(ValueError, match=msg):
  556. df.resample("2D", level="d").asfreq()
  557. with pytest.raises(ValueError, match=msg):
  558. df.resample("2D", on="date").asfreq()
  559. exp = df_exp.resample("2D").sum()
  560. exp.index.name = "date"
  561. result = df.resample("2D", on="date").sum()
  562. tm.assert_frame_equal(exp, result)
  563. exp.index.name = "d"
  564. with pytest.raises(TypeError, match="datetime64 type does not support sum"):
  565. df.resample("2D", level="d").sum()
  566. result = df.resample("2D", level="d").sum(numeric_only=True)
  567. tm.assert_frame_equal(exp, result)
  568. @pytest.mark.parametrize(
  569. "col_name", ["t2", "t2x", "t2q", "T_2M", "t2p", "t2m", "t2m1", "T2M"]
  570. )
  571. def test_agg_with_datetime_index_list_agg_func(col_name):
  572. # GH 22660
  573. # The parametrized column names would get converted to dates by our
  574. # date parser. Some would result in OutOfBoundsError (ValueError) while
  575. # others would result in OverflowError when passed into Timestamp.
  576. # We catch these errors and move on to the correct branch.
  577. df = DataFrame(
  578. list(range(200)),
  579. index=date_range(
  580. start="2017-01-01", freq="15min", periods=200, tz="Europe/Berlin"
  581. ),
  582. columns=[col_name],
  583. )
  584. result = df.resample("1d").aggregate(["mean"])
  585. expected = DataFrame(
  586. [47.5, 143.5, 195.5],
  587. index=date_range(start="2017-01-01", freq="D", periods=3, tz="Europe/Berlin"),
  588. columns=pd.MultiIndex(levels=[[col_name], ["mean"]], codes=[[0], [0]]),
  589. )
  590. tm.assert_frame_equal(result, expected)
  591. def test_resample_agg_readonly():
  592. # GH#31710 cython needs to allow readonly data
  593. index = date_range("2020-01-01", "2020-01-02", freq="1h")
  594. arr = np.zeros_like(index)
  595. arr.setflags(write=False)
  596. ser = Series(arr, index=index)
  597. rs = ser.resample("1D")
  598. expected = Series([pd.Timestamp(0), pd.Timestamp(0)], index=index[::24])
  599. result = rs.agg("last")
  600. tm.assert_series_equal(result, expected)
  601. result = rs.agg("first")
  602. tm.assert_series_equal(result, expected)
  603. result = rs.agg("max")
  604. tm.assert_series_equal(result, expected)
  605. result = rs.agg("min")
  606. tm.assert_series_equal(result, expected)
  607. @pytest.mark.parametrize(
  608. "start,end,freq,data,resample_freq,origin,closed,exp_data,exp_end,exp_periods",
  609. [
  610. (
  611. "2000-10-01 23:30:00",
  612. "2000-10-02 00:26:00",
  613. "7min",
  614. [0, 3, 6, 9, 12, 15, 18, 21, 24],
  615. "17min",
  616. "end",
  617. None,
  618. [0, 18, 27, 63],
  619. "20001002 00:26:00",
  620. 4,
  621. ),
  622. (
  623. "20200101 8:26:35",
  624. "20200101 9:31:58",
  625. "77s",
  626. [1] * 51,
  627. "7min",
  628. "end",
  629. "right",
  630. [1, 6, 5, 6, 5, 6, 5, 6, 5, 6],
  631. "2020-01-01 09:30:45",
  632. 10,
  633. ),
  634. (
  635. "2000-10-01 23:30:00",
  636. "2000-10-02 00:26:00",
  637. "7min",
  638. [0, 3, 6, 9, 12, 15, 18, 21, 24],
  639. "17min",
  640. "end",
  641. "left",
  642. [0, 18, 27, 39, 24],
  643. "20001002 00:43:00",
  644. 5,
  645. ),
  646. (
  647. "2000-10-01 23:30:00",
  648. "2000-10-02 00:26:00",
  649. "7min",
  650. [0, 3, 6, 9, 12, 15, 18, 21, 24],
  651. "17min",
  652. "end_day",
  653. None,
  654. [3, 15, 45, 45],
  655. "2000-10-02 00:29:00",
  656. 4,
  657. ),
  658. ],
  659. )
  660. def test_end_and_end_day_origin(
  661. start,
  662. end,
  663. freq,
  664. data,
  665. resample_freq,
  666. origin,
  667. closed,
  668. exp_data,
  669. exp_end,
  670. exp_periods,
  671. ):
  672. rng = date_range(start, end, freq=freq)
  673. ts = Series(data, index=rng)
  674. res = ts.resample(resample_freq, origin=origin, closed=closed).sum()
  675. expected = Series(
  676. exp_data,
  677. index=date_range(end=exp_end, freq=resample_freq, periods=exp_periods),
  678. )
  679. tm.assert_series_equal(res, expected)
  680. @pytest.mark.parametrize(
  681. # expected_data is a string when op raises a ValueError
  682. "method, numeric_only, expected_data",
  683. [
  684. ("sum", True, {"num": [25]}),
  685. ("sum", False, {"cat": ["cat_1cat_2"], "num": [25]}),
  686. ("sum", lib.no_default, {"cat": ["cat_1cat_2"], "num": [25]}),
  687. ("prod", True, {"num": [100]}),
  688. ("prod", False, "can't multiply sequence"),
  689. ("prod", lib.no_default, "can't multiply sequence"),
  690. ("min", True, {"num": [5]}),
  691. ("min", False, {"cat": ["cat_1"], "num": [5]}),
  692. ("min", lib.no_default, {"cat": ["cat_1"], "num": [5]}),
  693. ("max", True, {"num": [20]}),
  694. ("max", False, {"cat": ["cat_2"], "num": [20]}),
  695. ("max", lib.no_default, {"cat": ["cat_2"], "num": [20]}),
  696. ("first", True, {"num": [5]}),
  697. ("first", False, {"cat": ["cat_1"], "num": [5]}),
  698. ("first", lib.no_default, {"cat": ["cat_1"], "num": [5]}),
  699. ("last", True, {"num": [20]}),
  700. ("last", False, {"cat": ["cat_2"], "num": [20]}),
  701. ("last", lib.no_default, {"cat": ["cat_2"], "num": [20]}),
  702. ("mean", True, {"num": [12.5]}),
  703. ("mean", False, "Could not convert"),
  704. ("mean", lib.no_default, "Could not convert"),
  705. ("median", True, {"num": [12.5]}),
  706. ("median", False, "could not convert"),
  707. ("median", lib.no_default, "could not convert"),
  708. ("std", True, {"num": [10.606601717798213]}),
  709. ("std", False, "could not convert string to float"),
  710. ("std", lib.no_default, "could not convert string to float"),
  711. ("var", True, {"num": [112.5]}),
  712. ("var", False, "could not convert string to float"),
  713. ("var", lib.no_default, "could not convert string to float"),
  714. ("sem", True, {"num": [7.5]}),
  715. ("sem", False, "could not convert string to float"),
  716. ("sem", lib.no_default, "could not convert string to float"),
  717. ],
  718. )
  719. def test_frame_downsample_method(method, numeric_only, expected_data):
  720. # GH#46442 test if `numeric_only` behave as expected for DataFrameGroupBy
  721. index = date_range("2018-01-01", periods=2, freq="D")
  722. expected_index = date_range("2018-12-31", periods=1, freq="Y")
  723. df = DataFrame({"cat": ["cat_1", "cat_2"], "num": [5, 20]}, index=index)
  724. resampled = df.resample("Y")
  725. if numeric_only is lib.no_default:
  726. kwargs = {}
  727. else:
  728. kwargs = {"numeric_only": numeric_only}
  729. func = getattr(resampled, method)
  730. if isinstance(expected_data, str):
  731. klass = TypeError if method in ("var", "mean", "median", "prod") else ValueError
  732. with pytest.raises(klass, match=expected_data):
  733. _ = func(**kwargs)
  734. else:
  735. result = func(**kwargs)
  736. expected = DataFrame(expected_data, index=expected_index)
  737. tm.assert_frame_equal(result, expected)
  738. @pytest.mark.parametrize(
  739. "method, numeric_only, expected_data",
  740. [
  741. ("sum", True, ()),
  742. ("sum", False, ["cat_1cat_2"]),
  743. ("sum", lib.no_default, ["cat_1cat_2"]),
  744. ("prod", True, ()),
  745. ("prod", False, ()),
  746. ("prod", lib.no_default, ()),
  747. ("min", True, ()),
  748. ("min", False, ["cat_1"]),
  749. ("min", lib.no_default, ["cat_1"]),
  750. ("max", True, ()),
  751. ("max", False, ["cat_2"]),
  752. ("max", lib.no_default, ["cat_2"]),
  753. ("first", True, ()),
  754. ("first", False, ["cat_1"]),
  755. ("first", lib.no_default, ["cat_1"]),
  756. ("last", True, ()),
  757. ("last", False, ["cat_2"]),
  758. ("last", lib.no_default, ["cat_2"]),
  759. ],
  760. )
  761. def test_series_downsample_method(method, numeric_only, expected_data):
  762. # GH#46442 test if `numeric_only` behave as expected for SeriesGroupBy
  763. index = date_range("2018-01-01", periods=2, freq="D")
  764. expected_index = date_range("2018-12-31", periods=1, freq="Y")
  765. df = Series(["cat_1", "cat_2"], index=index)
  766. resampled = df.resample("Y")
  767. kwargs = {} if numeric_only is lib.no_default else {"numeric_only": numeric_only}
  768. func = getattr(resampled, method)
  769. if numeric_only and numeric_only is not lib.no_default:
  770. msg = rf"Cannot use numeric_only=True with SeriesGroupBy\.{method}"
  771. with pytest.raises(TypeError, match=msg):
  772. func(**kwargs)
  773. elif method == "prod":
  774. with pytest.raises(TypeError, match="can't multiply sequence by non-int"):
  775. func(**kwargs)
  776. else:
  777. result = func(**kwargs)
  778. expected = Series(expected_data, index=expected_index)
  779. tm.assert_series_equal(result, expected)
  780. @pytest.mark.parametrize(
  781. "method, raises",
  782. [
  783. ("sum", True),
  784. ("prod", True),
  785. ("min", True),
  786. ("max", True),
  787. ("first", False),
  788. ("last", False),
  789. ("median", False),
  790. ("mean", True),
  791. ("std", True),
  792. ("var", True),
  793. ("sem", False),
  794. ("ohlc", False),
  795. ("nunique", False),
  796. ],
  797. )
  798. def test_args_kwargs_depr(method, raises):
  799. index = date_range("20180101", periods=3, freq="h")
  800. df = Series([2, 4, 6], index=index)
  801. resampled = df.resample("30min")
  802. args = ()
  803. func = getattr(resampled, method)
  804. error_msg = "numpy operations are not valid with resample."
  805. error_msg_type = "too many arguments passed in"
  806. warn_msg = f"Passing additional args to DatetimeIndexResampler.{method}"
  807. if raises:
  808. with tm.assert_produces_warning(FutureWarning, match=warn_msg):
  809. with pytest.raises(UnsupportedFunctionCall, match=error_msg):
  810. func(*args, 1, 2, 3)
  811. else:
  812. with tm.assert_produces_warning(FutureWarning, match=warn_msg):
  813. with pytest.raises(TypeError, match=error_msg_type):
  814. func(*args, 1, 2, 3)
  815. def test_resample_empty():
  816. # GH#52484
  817. df = DataFrame(
  818. index=pd.to_datetime(
  819. ["2018-01-01 00:00:00", "2018-01-01 12:00:00", "2018-01-02 00:00:00"]
  820. )
  821. )
  822. expected = DataFrame(
  823. index=pd.to_datetime(
  824. [
  825. "2018-01-01 00:00:00",
  826. "2018-01-01 08:00:00",
  827. "2018-01-01 16:00:00",
  828. "2018-01-02 00:00:00",
  829. ]
  830. )
  831. )
  832. result = df.resample("8H").mean()
  833. tm.assert_frame_equal(result, expected)