test_period_index.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879
  1. from datetime import datetime
  2. import dateutil
  3. import numpy as np
  4. import pytest
  5. import pytz
  6. from pandas._libs.tslibs.ccalendar import (
  7. DAYS,
  8. MONTHS,
  9. )
  10. from pandas._libs.tslibs.period import IncompatibleFrequency
  11. from pandas.errors import InvalidIndexError
  12. import pandas as pd
  13. from pandas import (
  14. DataFrame,
  15. Series,
  16. Timestamp,
  17. )
  18. import pandas._testing as tm
  19. from pandas.core.indexes.datetimes import date_range
  20. from pandas.core.indexes.period import (
  21. Period,
  22. PeriodIndex,
  23. period_range,
  24. )
  25. from pandas.core.resample import _get_period_range_edges
  26. from pandas.tseries import offsets
  27. @pytest.fixture()
  28. def _index_factory():
  29. return period_range
  30. @pytest.fixture
  31. def _series_name():
  32. return "pi"
  33. class TestPeriodIndex:
  34. @pytest.mark.parametrize("freq", ["2D", "1H", "2H"])
  35. @pytest.mark.parametrize("kind", ["period", None, "timestamp"])
  36. def test_asfreq(self, series_and_frame, freq, kind):
  37. # GH 12884, 15944
  38. # make sure .asfreq() returns PeriodIndex (except kind='timestamp')
  39. obj = series_and_frame
  40. if kind == "timestamp":
  41. expected = obj.to_timestamp().resample(freq).asfreq()
  42. else:
  43. start = obj.index[0].to_timestamp(how="start")
  44. end = (obj.index[-1] + obj.index.freq).to_timestamp(how="start")
  45. new_index = date_range(start=start, end=end, freq=freq, inclusive="left")
  46. expected = obj.to_timestamp().reindex(new_index).to_period(freq)
  47. result = obj.resample(freq, kind=kind).asfreq()
  48. tm.assert_almost_equal(result, expected)
  49. def test_asfreq_fill_value(self, series):
  50. # test for fill value during resampling, issue 3715
  51. s = series
  52. new_index = date_range(
  53. s.index[0].to_timestamp(how="start"),
  54. (s.index[-1]).to_timestamp(how="start"),
  55. freq="1H",
  56. )
  57. expected = s.to_timestamp().reindex(new_index, fill_value=4.0)
  58. result = s.resample("1H", kind="timestamp").asfreq(fill_value=4.0)
  59. tm.assert_series_equal(result, expected)
  60. frame = s.to_frame("value")
  61. new_index = date_range(
  62. frame.index[0].to_timestamp(how="start"),
  63. (frame.index[-1]).to_timestamp(how="start"),
  64. freq="1H",
  65. )
  66. expected = frame.to_timestamp().reindex(new_index, fill_value=3.0)
  67. result = frame.resample("1H", kind="timestamp").asfreq(fill_value=3.0)
  68. tm.assert_frame_equal(result, expected)
  69. @pytest.mark.parametrize("freq", ["H", "12H", "2D", "W"])
  70. @pytest.mark.parametrize("kind", [None, "period", "timestamp"])
  71. @pytest.mark.parametrize("kwargs", [{"on": "date"}, {"level": "d"}])
  72. def test_selection(self, index, freq, kind, kwargs):
  73. # This is a bug, these should be implemented
  74. # GH 14008
  75. rng = np.arange(len(index), dtype=np.int64)
  76. df = DataFrame(
  77. {"date": index, "a": rng},
  78. index=pd.MultiIndex.from_arrays([rng, index], names=["v", "d"]),
  79. )
  80. msg = (
  81. "Resampling from level= or on= selection with a PeriodIndex is "
  82. r"not currently supported, use \.set_index\(\.\.\.\) to "
  83. "explicitly set index"
  84. )
  85. with pytest.raises(NotImplementedError, match=msg):
  86. df.resample(freq, kind=kind, **kwargs)
  87. @pytest.mark.parametrize("month", MONTHS)
  88. @pytest.mark.parametrize("meth", ["ffill", "bfill"])
  89. @pytest.mark.parametrize("conv", ["start", "end"])
  90. @pytest.mark.parametrize("targ", ["D", "B", "M"])
  91. def test_annual_upsample_cases(
  92. self, targ, conv, meth, month, simple_period_range_series
  93. ):
  94. ts = simple_period_range_series("1/1/1990", "12/31/1991", freq=f"A-{month}")
  95. result = getattr(ts.resample(targ, convention=conv), meth)()
  96. expected = result.to_timestamp(targ, how=conv)
  97. expected = expected.asfreq(targ, meth).to_period()
  98. tm.assert_series_equal(result, expected)
  99. def test_basic_downsample(self, simple_period_range_series):
  100. ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M")
  101. result = ts.resample("a-dec").mean()
  102. expected = ts.groupby(ts.index.year).mean()
  103. expected.index = period_range("1/1/1990", "6/30/1995", freq="a-dec")
  104. tm.assert_series_equal(result, expected)
  105. # this is ok
  106. tm.assert_series_equal(ts.resample("a-dec").mean(), result)
  107. tm.assert_series_equal(ts.resample("a").mean(), result)
  108. @pytest.mark.parametrize(
  109. "rule,expected_error_msg",
  110. [
  111. ("a-dec", "<YearEnd: month=12>"),
  112. ("q-mar", "<QuarterEnd: startingMonth=3>"),
  113. ("M", "<MonthEnd>"),
  114. ("w-thu", "<Week: weekday=3>"),
  115. ],
  116. )
  117. def test_not_subperiod(self, simple_period_range_series, rule, expected_error_msg):
  118. # These are incompatible period rules for resampling
  119. ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="w-wed")
  120. msg = (
  121. "Frequency <Week: weekday=2> cannot be resampled to "
  122. f"{expected_error_msg}, as they are not sub or super periods"
  123. )
  124. with pytest.raises(IncompatibleFrequency, match=msg):
  125. ts.resample(rule).mean()
  126. @pytest.mark.parametrize("freq", ["D", "2D"])
  127. def test_basic_upsample(self, freq, simple_period_range_series):
  128. ts = simple_period_range_series("1/1/1990", "6/30/1995", freq="M")
  129. result = ts.resample("a-dec").mean()
  130. resampled = result.resample(freq, convention="end").ffill()
  131. expected = result.to_timestamp(freq, how="end")
  132. expected = expected.asfreq(freq, "ffill").to_period(freq)
  133. tm.assert_series_equal(resampled, expected)
  134. def test_upsample_with_limit(self):
  135. rng = period_range("1/1/2000", periods=5, freq="A")
  136. ts = Series(np.random.randn(len(rng)), rng)
  137. result = ts.resample("M", convention="end").ffill(limit=2)
  138. expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2)
  139. tm.assert_series_equal(result, expected)
  140. def test_annual_upsample(self, simple_period_range_series):
  141. ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="A-DEC")
  142. df = DataFrame({"a": ts})
  143. rdf = df.resample("D").ffill()
  144. exp = df["a"].resample("D").ffill()
  145. tm.assert_series_equal(rdf["a"], exp)
  146. rng = period_range("2000", "2003", freq="A-DEC")
  147. ts = Series([1, 2, 3, 4], index=rng)
  148. result = ts.resample("M").ffill()
  149. ex_index = period_range("2000-01", "2003-12", freq="M")
  150. expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill")
  151. tm.assert_series_equal(result, expected)
  152. @pytest.mark.parametrize("month", MONTHS)
  153. @pytest.mark.parametrize("target", ["D", "B", "M"])
  154. @pytest.mark.parametrize("convention", ["start", "end"])
  155. def test_quarterly_upsample(
  156. self, month, target, convention, simple_period_range_series
  157. ):
  158. freq = f"Q-{month}"
  159. ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq)
  160. result = ts.resample(target, convention=convention).ffill()
  161. expected = result.to_timestamp(target, how=convention)
  162. expected = expected.asfreq(target, "ffill").to_period()
  163. tm.assert_series_equal(result, expected)
  164. @pytest.mark.parametrize("target", ["D", "B"])
  165. @pytest.mark.parametrize("convention", ["start", "end"])
  166. def test_monthly_upsample(self, target, convention, simple_period_range_series):
  167. ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="M")
  168. result = ts.resample(target, convention=convention).ffill()
  169. expected = result.to_timestamp(target, how=convention)
  170. expected = expected.asfreq(target, "ffill").to_period()
  171. tm.assert_series_equal(result, expected)
  172. def test_resample_basic(self):
  173. # GH3609
  174. s = Series(
  175. range(100),
  176. index=date_range("20130101", freq="s", periods=100, name="idx"),
  177. dtype="float",
  178. )
  179. s[10:30] = np.nan
  180. index = PeriodIndex(
  181. [Period("2013-01-01 00:00", "T"), Period("2013-01-01 00:01", "T")],
  182. name="idx",
  183. )
  184. expected = Series([34.5, 79.5], index=index)
  185. result = s.to_period().resample("T", kind="period").mean()
  186. tm.assert_series_equal(result, expected)
  187. result2 = s.resample("T", kind="period").mean()
  188. tm.assert_series_equal(result2, expected)
  189. @pytest.mark.parametrize(
  190. "freq,expected_vals", [("M", [31, 29, 31, 9]), ("2M", [31 + 29, 31 + 9])]
  191. )
  192. def test_resample_count(self, freq, expected_vals):
  193. # GH12774
  194. series = Series(1, index=period_range(start="2000", periods=100))
  195. result = series.resample(freq).count()
  196. expected_index = period_range(
  197. start="2000", freq=freq, periods=len(expected_vals)
  198. )
  199. expected = Series(expected_vals, index=expected_index)
  200. tm.assert_series_equal(result, expected)
  201. def test_resample_same_freq(self, resample_method):
  202. # GH12770
  203. series = Series(range(3), index=period_range(start="2000", periods=3, freq="M"))
  204. expected = series
  205. result = getattr(series.resample("M"), resample_method)()
  206. tm.assert_series_equal(result, expected)
  207. def test_resample_incompat_freq(self):
  208. msg = (
  209. "Frequency <MonthEnd> cannot be resampled to <Week: weekday=6>, "
  210. "as they are not sub or super periods"
  211. )
  212. with pytest.raises(IncompatibleFrequency, match=msg):
  213. Series(
  214. range(3), index=period_range(start="2000", periods=3, freq="M")
  215. ).resample("W").mean()
  216. def test_with_local_timezone_pytz(self):
  217. # see gh-5430
  218. local_timezone = pytz.timezone("America/Los_Angeles")
  219. start = datetime(year=2013, month=11, day=1, hour=0, minute=0, tzinfo=pytz.utc)
  220. # 1 day later
  221. end = datetime(year=2013, month=11, day=2, hour=0, minute=0, tzinfo=pytz.utc)
  222. index = date_range(start, end, freq="H")
  223. series = Series(1, index=index)
  224. series = series.tz_convert(local_timezone)
  225. result = series.resample("D", kind="period").mean()
  226. # Create the expected series
  227. # Index is moved back a day with the timezone conversion from UTC to
  228. # Pacific
  229. expected_index = period_range(start=start, end=end, freq="D") - offsets.Day()
  230. expected = Series(1.0, index=expected_index)
  231. tm.assert_series_equal(result, expected)
  232. def test_resample_with_pytz(self):
  233. # GH 13238
  234. s = Series(
  235. 2, index=date_range("2017-01-01", periods=48, freq="H", tz="US/Eastern")
  236. )
  237. result = s.resample("D").mean()
  238. expected = Series(
  239. 2.0,
  240. index=pd.DatetimeIndex(
  241. ["2017-01-01", "2017-01-02"], tz="US/Eastern", freq="D"
  242. ),
  243. )
  244. tm.assert_series_equal(result, expected)
  245. # Especially assert that the timezone is LMT for pytz
  246. assert result.index.tz == pytz.timezone("US/Eastern")
  247. def test_with_local_timezone_dateutil(self):
  248. # see gh-5430
  249. local_timezone = "dateutil/America/Los_Angeles"
  250. start = datetime(
  251. year=2013, month=11, day=1, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()
  252. )
  253. # 1 day later
  254. end = datetime(
  255. year=2013, month=11, day=2, hour=0, minute=0, tzinfo=dateutil.tz.tzutc()
  256. )
  257. index = date_range(start, end, freq="H", name="idx")
  258. series = Series(1, index=index)
  259. series = series.tz_convert(local_timezone)
  260. result = series.resample("D", kind="period").mean()
  261. # Create the expected series
  262. # Index is moved back a day with the timezone conversion from UTC to
  263. # Pacific
  264. expected_index = (
  265. period_range(start=start, end=end, freq="D", name="idx") - offsets.Day()
  266. )
  267. expected = Series(1.0, index=expected_index)
  268. tm.assert_series_equal(result, expected)
  269. def test_resample_nonexistent_time_bin_edge(self):
  270. # GH 19375
  271. index = date_range("2017-03-12", "2017-03-12 1:45:00", freq="15T")
  272. s = Series(np.zeros(len(index)), index=index)
  273. expected = s.tz_localize("US/Pacific")
  274. expected.index = pd.DatetimeIndex(expected.index, freq="900S")
  275. result = expected.resample("900S").mean()
  276. tm.assert_series_equal(result, expected)
  277. # GH 23742
  278. index = date_range(start="2017-10-10", end="2017-10-20", freq="1H")
  279. index = index.tz_localize("UTC").tz_convert("America/Sao_Paulo")
  280. df = DataFrame(data=list(range(len(index))), index=index)
  281. result = df.groupby(pd.Grouper(freq="1D")).count()
  282. expected = date_range(
  283. start="2017-10-09",
  284. end="2017-10-20",
  285. freq="D",
  286. tz="America/Sao_Paulo",
  287. nonexistent="shift_forward",
  288. inclusive="left",
  289. )
  290. tm.assert_index_equal(result.index, expected)
  291. def test_resample_ambiguous_time_bin_edge(self):
  292. # GH 10117
  293. idx = date_range(
  294. "2014-10-25 22:00:00", "2014-10-26 00:30:00", freq="30T", tz="Europe/London"
  295. )
  296. expected = Series(np.zeros(len(idx)), index=idx)
  297. result = expected.resample("30T").mean()
  298. tm.assert_series_equal(result, expected)
  299. def test_fill_method_and_how_upsample(self):
  300. # GH2073
  301. s = Series(
  302. np.arange(9, dtype="int64"),
  303. index=date_range("2010-01-01", periods=9, freq="Q"),
  304. )
  305. last = s.resample("M").ffill()
  306. both = s.resample("M").ffill().resample("M").last().astype("int64")
  307. tm.assert_series_equal(last, both)
  308. @pytest.mark.parametrize("day", DAYS)
  309. @pytest.mark.parametrize("target", ["D", "B"])
  310. @pytest.mark.parametrize("convention", ["start", "end"])
  311. def test_weekly_upsample(self, day, target, convention, simple_period_range_series):
  312. freq = f"W-{day}"
  313. ts = simple_period_range_series("1/1/1990", "12/31/1995", freq=freq)
  314. result = ts.resample(target, convention=convention).ffill()
  315. expected = result.to_timestamp(target, how=convention)
  316. expected = expected.asfreq(target, "ffill").to_period()
  317. tm.assert_series_equal(result, expected)
  318. def test_resample_to_timestamps(self, simple_period_range_series):
  319. ts = simple_period_range_series("1/1/1990", "12/31/1995", freq="M")
  320. result = ts.resample("A-DEC", kind="timestamp").mean()
  321. expected = ts.to_timestamp(how="start").resample("A-DEC").mean()
  322. tm.assert_series_equal(result, expected)
  323. @pytest.mark.parametrize("month", MONTHS)
  324. def test_resample_to_quarterly(self, simple_period_range_series, month):
  325. ts = simple_period_range_series("1990", "1992", freq=f"A-{month}")
  326. quar_ts = ts.resample(f"Q-{month}").ffill()
  327. stamps = ts.to_timestamp("D", how="start")
  328. qdates = period_range(
  329. ts.index[0].asfreq("D", "start"),
  330. ts.index[-1].asfreq("D", "end"),
  331. freq=f"Q-{month}",
  332. )
  333. expected = stamps.reindex(qdates.to_timestamp("D", "s"), method="ffill")
  334. expected.index = qdates
  335. tm.assert_series_equal(quar_ts, expected)
  336. @pytest.mark.parametrize("how", ["start", "end"])
  337. def test_resample_to_quarterly_start_end(self, simple_period_range_series, how):
  338. # conforms, but different month
  339. ts = simple_period_range_series("1990", "1992", freq="A-JUN")
  340. result = ts.resample("Q-MAR", convention=how).ffill()
  341. expected = ts.asfreq("Q-MAR", how=how)
  342. expected = expected.reindex(result.index, method="ffill")
  343. # .to_timestamp('D')
  344. # expected = expected.resample('Q-MAR').ffill()
  345. tm.assert_series_equal(result, expected)
  346. def test_resample_fill_missing(self):
  347. rng = PeriodIndex([2000, 2005, 2007, 2009], freq="A")
  348. s = Series(np.random.randn(4), index=rng)
  349. stamps = s.to_timestamp()
  350. filled = s.resample("A").ffill()
  351. expected = stamps.resample("A").ffill().to_period("A")
  352. tm.assert_series_equal(filled, expected)
  353. def test_cant_fill_missing_dups(self):
  354. rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq="A")
  355. s = Series(np.random.randn(5), index=rng)
  356. msg = "Reindexing only valid with uniquely valued Index objects"
  357. with pytest.raises(InvalidIndexError, match=msg):
  358. s.resample("A").ffill()
  359. @pytest.mark.parametrize("freq", ["5min"])
  360. @pytest.mark.parametrize("kind", ["period", None, "timestamp"])
  361. def test_resample_5minute(self, freq, kind):
  362. rng = period_range("1/1/2000", "1/5/2000", freq="T")
  363. ts = Series(np.random.randn(len(rng)), index=rng)
  364. expected = ts.to_timestamp().resample(freq).mean()
  365. if kind != "timestamp":
  366. expected = expected.to_period(freq)
  367. result = ts.resample(freq, kind=kind).mean()
  368. tm.assert_series_equal(result, expected)
  369. def test_upsample_daily_business_daily(self, simple_period_range_series):
  370. ts = simple_period_range_series("1/1/2000", "2/1/2000", freq="B")
  371. result = ts.resample("D").asfreq()
  372. expected = ts.asfreq("D").reindex(period_range("1/3/2000", "2/1/2000"))
  373. tm.assert_series_equal(result, expected)
  374. ts = simple_period_range_series("1/1/2000", "2/1/2000")
  375. result = ts.resample("H", convention="s").asfreq()
  376. exp_rng = period_range("1/1/2000", "2/1/2000 23:00", freq="H")
  377. expected = ts.asfreq("H", how="s").reindex(exp_rng)
  378. tm.assert_series_equal(result, expected)
  379. def test_resample_irregular_sparse(self):
  380. dr = date_range(start="1/1/2012", freq="5min", periods=1000)
  381. s = Series(np.array(100), index=dr)
  382. # subset the data.
  383. subset = s[:"2012-01-04 06:55"]
  384. result = subset.resample("10min").apply(len)
  385. expected = s.resample("10min").apply(len).loc[result.index]
  386. tm.assert_series_equal(result, expected)
  387. def test_resample_weekly_all_na(self):
  388. rng = date_range("1/1/2000", periods=10, freq="W-WED")
  389. ts = Series(np.random.randn(len(rng)), index=rng)
  390. result = ts.resample("W-THU").asfreq()
  391. assert result.isna().all()
  392. result = ts.resample("W-THU").asfreq().ffill()[:-1]
  393. expected = ts.asfreq("W-THU").ffill()
  394. tm.assert_series_equal(result, expected)
  395. def test_resample_tz_localized(self):
  396. dr = date_range(start="2012-4-13", end="2012-5-1")
  397. ts = Series(range(len(dr)), index=dr)
  398. ts_utc = ts.tz_localize("UTC")
  399. ts_local = ts_utc.tz_convert("America/Los_Angeles")
  400. result = ts_local.resample("W").mean()
  401. ts_local_naive = ts_local.copy()
  402. ts_local_naive.index = [
  403. x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime()
  404. ]
  405. exp = ts_local_naive.resample("W").mean().tz_localize("America/Los_Angeles")
  406. exp.index = pd.DatetimeIndex(exp.index, freq="W")
  407. tm.assert_series_equal(result, exp)
  408. # it works
  409. result = ts_local.resample("D").mean()
  410. # #2245
  411. idx = date_range(
  412. "2001-09-20 15:59", "2001-09-20 16:00", freq="T", tz="Australia/Sydney"
  413. )
  414. s = Series([1, 2], index=idx)
  415. result = s.resample("D", closed="right", label="right").mean()
  416. ex_index = date_range("2001-09-21", periods=1, freq="D", tz="Australia/Sydney")
  417. expected = Series([1.5], index=ex_index)
  418. tm.assert_series_equal(result, expected)
  419. # for good measure
  420. result = s.resample("D", kind="period").mean()
  421. ex_index = period_range("2001-09-20", periods=1, freq="D")
  422. expected = Series([1.5], index=ex_index)
  423. tm.assert_series_equal(result, expected)
  424. # GH 6397
  425. # comparing an offset that doesn't propagate tz's
  426. rng = date_range("1/1/2011", periods=20000, freq="H")
  427. rng = rng.tz_localize("EST")
  428. ts = DataFrame(index=rng)
  429. ts["first"] = np.random.randn(len(rng))
  430. ts["second"] = np.cumsum(np.random.randn(len(rng)))
  431. expected = DataFrame(
  432. {
  433. "first": ts.resample("A").sum()["first"],
  434. "second": ts.resample("A").mean()["second"],
  435. },
  436. columns=["first", "second"],
  437. )
  438. result = (
  439. ts.resample("A")
  440. .agg({"first": np.sum, "second": np.mean})
  441. .reindex(columns=["first", "second"])
  442. )
  443. tm.assert_frame_equal(result, expected)
  444. def test_closed_left_corner(self):
  445. # #1465
  446. s = Series(
  447. np.random.randn(21),
  448. index=date_range(start="1/1/2012 9:30", freq="1min", periods=21),
  449. )
  450. s[0] = np.nan
  451. result = s.resample("10min", closed="left", label="right").mean()
  452. exp = s[1:].resample("10min", closed="left", label="right").mean()
  453. tm.assert_series_equal(result, exp)
  454. result = s.resample("10min", closed="left", label="left").mean()
  455. exp = s[1:].resample("10min", closed="left", label="left").mean()
  456. ex_index = date_range(start="1/1/2012 9:30", freq="10min", periods=3)
  457. tm.assert_index_equal(result.index, ex_index)
  458. tm.assert_series_equal(result, exp)
  459. def test_quarterly_resampling(self):
  460. rng = period_range("2000Q1", periods=10, freq="Q-DEC")
  461. ts = Series(np.arange(10), index=rng)
  462. result = ts.resample("A").mean()
  463. exp = ts.to_timestamp().resample("A").mean().to_period()
  464. tm.assert_series_equal(result, exp)
  465. def test_resample_weekly_bug_1726(self):
  466. # 8/6/12 is a Monday
  467. ind = date_range(start="8/6/2012", end="8/26/2012", freq="D")
  468. n = len(ind)
  469. data = [[x] * 5 for x in range(n)]
  470. df = DataFrame(data, columns=["open", "high", "low", "close", "vol"], index=ind)
  471. # it works!
  472. df.resample("W-MON", closed="left", label="left").first()
  473. def test_resample_with_dst_time_change(self):
  474. # GH 15549
  475. index = (
  476. pd.DatetimeIndex([1457537600000000000, 1458059600000000000])
  477. .tz_localize("UTC")
  478. .tz_convert("America/Chicago")
  479. )
  480. df = DataFrame([1, 2], index=index)
  481. result = df.resample("12h", closed="right", label="right").last().ffill()
  482. expected_index_values = [
  483. "2016-03-09 12:00:00-06:00",
  484. "2016-03-10 00:00:00-06:00",
  485. "2016-03-10 12:00:00-06:00",
  486. "2016-03-11 00:00:00-06:00",
  487. "2016-03-11 12:00:00-06:00",
  488. "2016-03-12 00:00:00-06:00",
  489. "2016-03-12 12:00:00-06:00",
  490. "2016-03-13 00:00:00-06:00",
  491. "2016-03-13 13:00:00-05:00",
  492. "2016-03-14 01:00:00-05:00",
  493. "2016-03-14 13:00:00-05:00",
  494. "2016-03-15 01:00:00-05:00",
  495. "2016-03-15 13:00:00-05:00",
  496. ]
  497. index = pd.to_datetime(expected_index_values, utc=True).tz_convert(
  498. "America/Chicago"
  499. )
  500. index = pd.DatetimeIndex(index, freq="12h")
  501. expected = DataFrame(
  502. [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0],
  503. index=index,
  504. )
  505. tm.assert_frame_equal(result, expected)
  506. def test_resample_bms_2752(self):
  507. # GH2753
  508. timeseries = Series(
  509. index=pd.bdate_range("20000101", "20000201"), dtype=np.float64
  510. )
  511. res1 = timeseries.resample("BMS").mean()
  512. res2 = timeseries.resample("BMS").mean().resample("B").mean()
  513. assert res1.index[0] == Timestamp("20000103")
  514. assert res1.index[0] == res2.index[0]
  515. @pytest.mark.xfail(reason="Commented out for more than 3 years. Should this work?")
  516. def test_monthly_convention_span(self):
  517. rng = period_range("2000-01", periods=3, freq="M")
  518. ts = Series(np.arange(3), index=rng)
  519. # hacky way to get same thing
  520. exp_index = period_range("2000-01-01", "2000-03-31", freq="D")
  521. expected = ts.asfreq("D", how="end").reindex(exp_index)
  522. expected = expected.fillna(method="bfill")
  523. result = ts.resample("D").mean()
  524. tm.assert_series_equal(result, expected)
  525. @pytest.mark.parametrize(
  526. "from_freq, to_freq", [("D", "M"), ("Q", "A"), ("M", "Q"), ("D", "W")]
  527. )
  528. def test_default_right_closed_label(self, from_freq, to_freq):
  529. idx = date_range(start="8/15/2012", periods=100, freq=from_freq)
  530. df = DataFrame(np.random.randn(len(idx), 2), idx)
  531. resampled = df.resample(to_freq).mean()
  532. tm.assert_frame_equal(
  533. resampled, df.resample(to_freq, closed="right", label="right").mean()
  534. )
  535. @pytest.mark.parametrize(
  536. "from_freq, to_freq",
  537. [("D", "MS"), ("Q", "AS"), ("M", "QS"), ("H", "D"), ("T", "H")],
  538. )
  539. def test_default_left_closed_label(self, from_freq, to_freq):
  540. idx = date_range(start="8/15/2012", periods=100, freq=from_freq)
  541. df = DataFrame(np.random.randn(len(idx), 2), idx)
  542. resampled = df.resample(to_freq).mean()
  543. tm.assert_frame_equal(
  544. resampled, df.resample(to_freq, closed="left", label="left").mean()
  545. )
  546. def test_all_values_single_bin(self):
  547. # 2070
  548. index = period_range(start="2012-01-01", end="2012-12-31", freq="M")
  549. s = Series(np.random.randn(len(index)), index=index)
  550. result = s.resample("A").mean()
  551. tm.assert_almost_equal(result[0], s.mean())
  552. def test_evenly_divisible_with_no_extra_bins(self):
  553. # 4076
  554. # when the frequency is evenly divisible, sometimes extra bins
  555. df = DataFrame(np.random.randn(9, 3), index=date_range("2000-1-1", periods=9))
  556. result = df.resample("5D").mean()
  557. expected = pd.concat([df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T
  558. expected.index = pd.DatetimeIndex(
  559. [Timestamp("2000-1-1"), Timestamp("2000-1-6")], freq="5D"
  560. )
  561. tm.assert_frame_equal(result, expected)
  562. index = date_range(start="2001-5-4", periods=28)
  563. df = DataFrame(
  564. [
  565. {
  566. "REST_KEY": 1,
  567. "DLY_TRN_QT": 80,
  568. "DLY_SLS_AMT": 90,
  569. "COOP_DLY_TRN_QT": 30,
  570. "COOP_DLY_SLS_AMT": 20,
  571. }
  572. ]
  573. * 28
  574. + [
  575. {
  576. "REST_KEY": 2,
  577. "DLY_TRN_QT": 70,
  578. "DLY_SLS_AMT": 10,
  579. "COOP_DLY_TRN_QT": 50,
  580. "COOP_DLY_SLS_AMT": 20,
  581. }
  582. ]
  583. * 28,
  584. index=index.append(index),
  585. ).sort_index()
  586. index = date_range("2001-5-4", periods=4, freq="7D")
  587. expected = DataFrame(
  588. [
  589. {
  590. "REST_KEY": 14,
  591. "DLY_TRN_QT": 14,
  592. "DLY_SLS_AMT": 14,
  593. "COOP_DLY_TRN_QT": 14,
  594. "COOP_DLY_SLS_AMT": 14,
  595. }
  596. ]
  597. * 4,
  598. index=index,
  599. )
  600. result = df.resample("7D").count()
  601. tm.assert_frame_equal(result, expected)
  602. expected = DataFrame(
  603. [
  604. {
  605. "REST_KEY": 21,
  606. "DLY_TRN_QT": 1050,
  607. "DLY_SLS_AMT": 700,
  608. "COOP_DLY_TRN_QT": 560,
  609. "COOP_DLY_SLS_AMT": 280,
  610. }
  611. ]
  612. * 4,
  613. index=index,
  614. )
  615. result = df.resample("7D").sum()
  616. tm.assert_frame_equal(result, expected)
  617. @pytest.mark.parametrize("freq, period_mult", [("H", 24), ("12H", 2)])
  618. @pytest.mark.parametrize("kind", [None, "period"])
  619. def test_upsampling_ohlc(self, freq, period_mult, kind):
  620. # GH 13083
  621. pi = period_range(start="2000", freq="D", periods=10)
  622. s = Series(range(len(pi)), index=pi)
  623. expected = s.to_timestamp().resample(freq).ohlc().to_period(freq)
  624. # timestamp-based resampling doesn't include all sub-periods
  625. # of the last original period, so extend accordingly:
  626. new_index = period_range(start="2000", freq=freq, periods=period_mult * len(pi))
  627. expected = expected.reindex(new_index)
  628. result = s.resample(freq, kind=kind).ohlc()
  629. tm.assert_frame_equal(result, expected)
  630. @pytest.mark.parametrize(
  631. "periods, values",
  632. [
  633. (
  634. [
  635. pd.NaT,
  636. "1970-01-01 00:00:00",
  637. pd.NaT,
  638. "1970-01-01 00:00:02",
  639. "1970-01-01 00:00:03",
  640. ],
  641. [2, 3, 5, 7, 11],
  642. ),
  643. (
  644. [
  645. pd.NaT,
  646. pd.NaT,
  647. "1970-01-01 00:00:00",
  648. pd.NaT,
  649. pd.NaT,
  650. pd.NaT,
  651. "1970-01-01 00:00:02",
  652. "1970-01-01 00:00:03",
  653. pd.NaT,
  654. pd.NaT,
  655. ],
  656. [1, 2, 3, 5, 6, 8, 7, 11, 12, 13],
  657. ),
  658. ],
  659. )
  660. @pytest.mark.parametrize(
  661. "freq, expected_values",
  662. [
  663. ("1s", [3, np.NaN, 7, 11]),
  664. ("2s", [3, (7 + 11) / 2]),
  665. ("3s", [(3 + 7) / 2, 11]),
  666. ],
  667. )
  668. def test_resample_with_nat(self, periods, values, freq, expected_values):
  669. # GH 13224
  670. index = PeriodIndex(periods, freq="S")
  671. frame = DataFrame(values, index=index)
  672. expected_index = period_range(
  673. "1970-01-01 00:00:00", periods=len(expected_values), freq=freq
  674. )
  675. expected = DataFrame(expected_values, index=expected_index)
  676. result = frame.resample(freq).mean()
  677. tm.assert_frame_equal(result, expected)
  678. def test_resample_with_only_nat(self):
  679. # GH 13224
  680. pi = PeriodIndex([pd.NaT] * 3, freq="S")
  681. frame = DataFrame([2, 3, 5], index=pi, columns=["a"])
  682. expected_index = PeriodIndex(data=[], freq=pi.freq)
  683. expected = DataFrame(index=expected_index, columns=["a"], dtype="float64")
  684. result = frame.resample("1s").mean()
  685. tm.assert_frame_equal(result, expected)
  686. @pytest.mark.parametrize(
  687. "start,end,start_freq,end_freq,offset",
  688. [
  689. ("19910905", "19910909 03:00", "H", "24H", "10H"),
  690. ("19910905", "19910909 12:00", "H", "24H", "10H"),
  691. ("19910905", "19910909 23:00", "H", "24H", "10H"),
  692. ("19910905 10:00", "19910909", "H", "24H", "10H"),
  693. ("19910905 10:00", "19910909 10:00", "H", "24H", "10H"),
  694. ("19910905", "19910909 10:00", "H", "24H", "10H"),
  695. ("19910905 12:00", "19910909", "H", "24H", "10H"),
  696. ("19910905 12:00", "19910909 03:00", "H", "24H", "10H"),
  697. ("19910905 12:00", "19910909 12:00", "H", "24H", "10H"),
  698. ("19910905 12:00", "19910909 12:00", "H", "24H", "34H"),
  699. ("19910905 12:00", "19910909 12:00", "H", "17H", "10H"),
  700. ("19910905 12:00", "19910909 12:00", "H", "17H", "3H"),
  701. ("19910905 12:00", "19910909 1:00", "H", "M", "3H"),
  702. ("19910905", "19910913 06:00", "2H", "24H", "10H"),
  703. ("19910905", "19910905 01:39", "Min", "5Min", "3Min"),
  704. ("19910905", "19910905 03:18", "2Min", "5Min", "3Min"),
  705. ],
  706. )
  707. def test_resample_with_offset(self, start, end, start_freq, end_freq, offset):
  708. # GH 23882 & 31809
  709. s = Series(0, index=period_range(start, end, freq=start_freq))
  710. s = s + np.arange(len(s))
  711. result = s.resample(end_freq, offset=offset).mean()
  712. result = result.to_timestamp(end_freq)
  713. expected = s.to_timestamp().resample(end_freq, offset=offset).mean()
  714. if end_freq == "M":
  715. # TODO: is non-tick the relevant characteristic? (GH 33815)
  716. expected.index = expected.index._with_freq(None)
  717. tm.assert_series_equal(result, expected)
  718. @pytest.mark.parametrize(
  719. "first,last,freq,exp_first,exp_last",
  720. [
  721. ("19910905", "19920406", "D", "19910905", "19920406"),
  722. ("19910905 00:00", "19920406 06:00", "D", "19910905", "19920406"),
  723. (
  724. "19910905 06:00",
  725. "19920406 06:00",
  726. "H",
  727. "19910905 06:00",
  728. "19920406 06:00",
  729. ),
  730. ("19910906", "19920406", "M", "1991-09", "1992-04"),
  731. ("19910831", "19920430", "M", "1991-08", "1992-04"),
  732. ("1991-08", "1992-04", "M", "1991-08", "1992-04"),
  733. ],
  734. )
  735. def test_get_period_range_edges(self, first, last, freq, exp_first, exp_last):
  736. first = Period(first)
  737. last = Period(last)
  738. exp_first = Period(exp_first, freq=freq)
  739. exp_last = Period(exp_last, freq=freq)
  740. freq = pd.tseries.frequencies.to_offset(freq)
  741. result = _get_period_range_edges(first, last, freq)
  742. expected = (exp_first, exp_last)
  743. assert result == expected
  744. def test_sum_min_count(self):
  745. # GH 19974
  746. index = date_range(start="2018", freq="M", periods=6)
  747. data = np.ones(6)
  748. data[3:6] = np.nan
  749. s = Series(data, index).to_period()
  750. result = s.resample("Q").sum(min_count=1)
  751. expected = Series(
  752. [3.0, np.nan], index=PeriodIndex(["2018Q1", "2018Q2"], freq="Q-DEC")
  753. )
  754. tm.assert_series_equal(result, expected)