test_datetime_index.py 63 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966
  1. from datetime import datetime
  2. from functools import partial
  3. from io import StringIO
  4. from typing import List
  5. import numpy as np
  6. import pytest
  7. import pytz
  8. from pandas._libs import lib
  9. from pandas._typing import DatetimeNaTType
  10. import pandas as pd
  11. from pandas import (
  12. DataFrame,
  13. Series,
  14. Timedelta,
  15. Timestamp,
  16. isna,
  17. notna,
  18. )
  19. import pandas._testing as tm
  20. from pandas.core.groupby.grouper import Grouper
  21. from pandas.core.indexes.datetimes import date_range
  22. from pandas.core.indexes.period import (
  23. Period,
  24. period_range,
  25. )
  26. from pandas.core.resample import (
  27. DatetimeIndex,
  28. _get_timestamp_range_edges,
  29. )
  30. from pandas.tseries import offsets
  31. from pandas.tseries.offsets import Minute
  32. @pytest.fixture()
  33. def _index_factory():
  34. return date_range
  35. @pytest.fixture
  36. def _index_freq():
  37. return "Min"
  38. @pytest.fixture
  39. def _static_values(index):
  40. return np.random.rand(len(index))
  41. @pytest.fixture(params=["s", "ms", "us", "ns"])
  42. def unit(request):
  43. return request.param
  44. def test_custom_grouper(index, unit):
  45. dti = index.as_unit(unit)
  46. s = Series(np.array([1] * len(dti)), index=dti, dtype="int64")
  47. b = Grouper(freq=Minute(5))
  48. g = s.groupby(b)
  49. # check all cython functions work
  50. g.ohlc() # doesn't use _cython_agg_general
  51. funcs = ["sum", "mean", "prod", "min", "max", "var"]
  52. for f in funcs:
  53. g._cython_agg_general(f, alt=None, numeric_only=True)
  54. b = Grouper(freq=Minute(5), closed="right", label="right")
  55. g = s.groupby(b)
  56. # check all cython functions work
  57. g.ohlc() # doesn't use _cython_agg_general
  58. funcs = ["sum", "mean", "prod", "min", "max", "var"]
  59. for f in funcs:
  60. g._cython_agg_general(f, alt=None, numeric_only=True)
  61. assert g.ngroups == 2593
  62. assert notna(g.mean()).all()
  63. # construct expected val
  64. arr = [1] + [5] * 2592
  65. idx = dti[0:-1:5]
  66. idx = idx.append(dti[-1:])
  67. idx = DatetimeIndex(idx, freq="5T").as_unit(unit)
  68. expect = Series(arr, index=idx)
  69. # GH2763 - return input dtype if we can
  70. result = g.agg(np.sum)
  71. tm.assert_series_equal(result, expect)
  72. def test_custom_grouper_df(index, unit):
  73. b = Grouper(freq=Minute(5), closed="right", label="right")
  74. dti = index.as_unit(unit)
  75. df = DataFrame(np.random.rand(len(dti), 10), index=dti, dtype="float64")
  76. r = df.groupby(b).agg(np.sum)
  77. assert len(r.columns) == 10
  78. assert len(r.index) == 2593
  79. @pytest.mark.parametrize(
  80. "_index_start,_index_end,_index_name",
  81. [("1/1/2000 00:00:00", "1/1/2000 00:13:00", "index")],
  82. )
  83. @pytest.mark.parametrize(
  84. "closed, expected",
  85. [
  86. (
  87. "right",
  88. lambda s: Series(
  89. [s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()],
  90. index=date_range("1/1/2000", periods=4, freq="5min", name="index"),
  91. ),
  92. ),
  93. (
  94. "left",
  95. lambda s: Series(
  96. [s[:5].mean(), s[5:10].mean(), s[10:].mean()],
  97. index=date_range(
  98. "1/1/2000 00:05", periods=3, freq="5min", name="index"
  99. ),
  100. ),
  101. ),
  102. ],
  103. )
  104. def test_resample_basic(series, closed, expected, unit):
  105. s = series
  106. s.index = s.index.as_unit(unit)
  107. expected = expected(s)
  108. expected.index = expected.index.as_unit(unit)
  109. result = s.resample("5min", closed=closed, label="right").mean()
  110. tm.assert_series_equal(result, expected)
  111. def test_resample_integerarray(unit):
  112. # GH 25580, resample on IntegerArray
  113. ts = Series(
  114. range(9),
  115. index=date_range("1/1/2000", periods=9, freq="T").as_unit(unit),
  116. dtype="Int64",
  117. )
  118. result = ts.resample("3T").sum()
  119. expected = Series(
  120. [3, 12, 21],
  121. index=date_range("1/1/2000", periods=3, freq="3T").as_unit(unit),
  122. dtype="Int64",
  123. )
  124. tm.assert_series_equal(result, expected)
  125. result = ts.resample("3T").mean()
  126. expected = Series(
  127. [1, 4, 7],
  128. index=date_range("1/1/2000", periods=3, freq="3T").as_unit(unit),
  129. dtype="Float64",
  130. )
  131. tm.assert_series_equal(result, expected)
  132. def test_resample_basic_grouper(series, unit):
  133. s = series
  134. s.index = s.index.as_unit(unit)
  135. result = s.resample("5Min").last()
  136. grouper = Grouper(freq=Minute(5), closed="left", label="left")
  137. expected = s.groupby(grouper).agg(lambda x: x[-1])
  138. tm.assert_series_equal(result, expected)
  139. @pytest.mark.parametrize(
  140. "_index_start,_index_end,_index_name",
  141. [("1/1/2000 00:00:00", "1/1/2000 00:13:00", "index")],
  142. )
  143. @pytest.mark.parametrize(
  144. "keyword,value",
  145. [("label", "righttt"), ("closed", "righttt"), ("convention", "starttt")],
  146. )
  147. def test_resample_string_kwargs(series, keyword, value, unit):
  148. # see gh-19303
  149. # Check that wrong keyword argument strings raise an error
  150. series.index = series.index.as_unit(unit)
  151. msg = f"Unsupported value {value} for `{keyword}`"
  152. with pytest.raises(ValueError, match=msg):
  153. series.resample("5min", **({keyword: value}))
  154. @pytest.mark.parametrize(
  155. "_index_start,_index_end,_index_name",
  156. [("1/1/2000 00:00:00", "1/1/2000 00:13:00", "index")],
  157. )
  158. def test_resample_how(series, downsample_method, unit):
  159. if downsample_method == "ohlc":
  160. pytest.skip("covered by test_resample_how_ohlc")
  161. s = series
  162. s.index = s.index.as_unit(unit)
  163. grouplist = np.ones_like(s)
  164. grouplist[0] = 0
  165. grouplist[1:6] = 1
  166. grouplist[6:11] = 2
  167. grouplist[11:] = 3
  168. expected = s.groupby(grouplist).agg(downsample_method)
  169. expected.index = date_range(
  170. "1/1/2000", periods=4, freq="5min", name="index"
  171. ).as_unit(unit)
  172. result = getattr(
  173. s.resample("5min", closed="right", label="right"), downsample_method
  174. )()
  175. tm.assert_series_equal(result, expected)
  176. @pytest.mark.parametrize(
  177. "_index_start,_index_end,_index_name",
  178. [("1/1/2000 00:00:00", "1/1/2000 00:13:00", "index")],
  179. )
  180. def test_resample_how_ohlc(series, unit):
  181. s = series
  182. s.index = s.index.as_unit(unit)
  183. grouplist = np.ones_like(s)
  184. grouplist[0] = 0
  185. grouplist[1:6] = 1
  186. grouplist[6:11] = 2
  187. grouplist[11:] = 3
  188. def _ohlc(group):
  189. if isna(group).all():
  190. return np.repeat(np.nan, 4)
  191. return [group[0], group.max(), group.min(), group[-1]]
  192. expected = DataFrame(
  193. s.groupby(grouplist).agg(_ohlc).values.tolist(),
  194. index=date_range("1/1/2000", periods=4, freq="5min", name="index").as_unit(
  195. unit
  196. ),
  197. columns=["open", "high", "low", "close"],
  198. )
  199. result = s.resample("5min", closed="right", label="right").ohlc()
  200. tm.assert_frame_equal(result, expected)
  201. def test_resample_how_callables(unit):
  202. # GH#7929
  203. data = np.arange(5, dtype=np.int64)
  204. ind = date_range(start="2014-01-01", periods=len(data), freq="d").as_unit(unit)
  205. df = DataFrame({"A": data, "B": data}, index=ind)
  206. def fn(x, a=1):
  207. return str(type(x))
  208. class FnClass:
  209. def __call__(self, x):
  210. return str(type(x))
  211. df_standard = df.resample("M").apply(fn)
  212. df_lambda = df.resample("M").apply(lambda x: str(type(x)))
  213. df_partial = df.resample("M").apply(partial(fn))
  214. df_partial2 = df.resample("M").apply(partial(fn, a=2))
  215. df_class = df.resample("M").apply(FnClass())
  216. tm.assert_frame_equal(df_standard, df_lambda)
  217. tm.assert_frame_equal(df_standard, df_partial)
  218. tm.assert_frame_equal(df_standard, df_partial2)
  219. tm.assert_frame_equal(df_standard, df_class)
  220. def test_resample_rounding(unit):
  221. # GH 8371
  222. # odd results when rounding is needed
  223. data = """date,time,value
  224. 11-08-2014,00:00:01.093,1
  225. 11-08-2014,00:00:02.159,1
  226. 11-08-2014,00:00:02.667,1
  227. 11-08-2014,00:00:03.175,1
  228. 11-08-2014,00:00:07.058,1
  229. 11-08-2014,00:00:07.362,1
  230. 11-08-2014,00:00:08.324,1
  231. 11-08-2014,00:00:08.830,1
  232. 11-08-2014,00:00:08.982,1
  233. 11-08-2014,00:00:09.815,1
  234. 11-08-2014,00:00:10.540,1
  235. 11-08-2014,00:00:11.061,1
  236. 11-08-2014,00:00:11.617,1
  237. 11-08-2014,00:00:13.607,1
  238. 11-08-2014,00:00:14.535,1
  239. 11-08-2014,00:00:15.525,1
  240. 11-08-2014,00:00:17.960,1
  241. 11-08-2014,00:00:20.674,1
  242. 11-08-2014,00:00:21.191,1"""
  243. df = pd.read_csv(
  244. StringIO(data),
  245. parse_dates={"timestamp": ["date", "time"]},
  246. index_col="timestamp",
  247. )
  248. df.index = df.index.as_unit(unit)
  249. df.index.name = None
  250. result = df.resample("6s").sum()
  251. expected = DataFrame(
  252. {"value": [4, 9, 4, 2]},
  253. index=date_range("2014-11-08", freq="6s", periods=4).as_unit(unit),
  254. )
  255. tm.assert_frame_equal(result, expected)
  256. result = df.resample("7s").sum()
  257. expected = DataFrame(
  258. {"value": [4, 10, 4, 1]},
  259. index=date_range("2014-11-08", freq="7s", periods=4).as_unit(unit),
  260. )
  261. tm.assert_frame_equal(result, expected)
  262. result = df.resample("11s").sum()
  263. expected = DataFrame(
  264. {"value": [11, 8]},
  265. index=date_range("2014-11-08", freq="11s", periods=2).as_unit(unit),
  266. )
  267. tm.assert_frame_equal(result, expected)
  268. result = df.resample("13s").sum()
  269. expected = DataFrame(
  270. {"value": [13, 6]},
  271. index=date_range("2014-11-08", freq="13s", periods=2).as_unit(unit),
  272. )
  273. tm.assert_frame_equal(result, expected)
  274. result = df.resample("17s").sum()
  275. expected = DataFrame(
  276. {"value": [16, 3]},
  277. index=date_range("2014-11-08", freq="17s", periods=2).as_unit(unit),
  278. )
  279. tm.assert_frame_equal(result, expected)
  280. def test_resample_basic_from_daily(unit):
  281. # from daily
  282. dti = date_range(
  283. start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D", name="index"
  284. ).as_unit(unit)
  285. s = Series(np.random.rand(len(dti)), dti)
  286. # to weekly
  287. result = s.resample("w-sun").last()
  288. assert len(result) == 3
  289. assert (result.index.dayofweek == [6, 6, 6]).all()
  290. assert result.iloc[0] == s["1/2/2005"]
  291. assert result.iloc[1] == s["1/9/2005"]
  292. assert result.iloc[2] == s.iloc[-1]
  293. result = s.resample("W-MON").last()
  294. assert len(result) == 2
  295. assert (result.index.dayofweek == [0, 0]).all()
  296. assert result.iloc[0] == s["1/3/2005"]
  297. assert result.iloc[1] == s["1/10/2005"]
  298. result = s.resample("W-TUE").last()
  299. assert len(result) == 2
  300. assert (result.index.dayofweek == [1, 1]).all()
  301. assert result.iloc[0] == s["1/4/2005"]
  302. assert result.iloc[1] == s["1/10/2005"]
  303. result = s.resample("W-WED").last()
  304. assert len(result) == 2
  305. assert (result.index.dayofweek == [2, 2]).all()
  306. assert result.iloc[0] == s["1/5/2005"]
  307. assert result.iloc[1] == s["1/10/2005"]
  308. result = s.resample("W-THU").last()
  309. assert len(result) == 2
  310. assert (result.index.dayofweek == [3, 3]).all()
  311. assert result.iloc[0] == s["1/6/2005"]
  312. assert result.iloc[1] == s["1/10/2005"]
  313. result = s.resample("W-FRI").last()
  314. assert len(result) == 2
  315. assert (result.index.dayofweek == [4, 4]).all()
  316. assert result.iloc[0] == s["1/7/2005"]
  317. assert result.iloc[1] == s["1/10/2005"]
  318. # to biz day
  319. result = s.resample("B").last()
  320. assert len(result) == 7
  321. assert (result.index.dayofweek == [4, 0, 1, 2, 3, 4, 0]).all()
  322. assert result.iloc[0] == s["1/2/2005"]
  323. assert result.iloc[1] == s["1/3/2005"]
  324. assert result.iloc[5] == s["1/9/2005"]
  325. assert result.index.name == "index"
  326. def test_resample_upsampling_picked_but_not_correct(unit):
  327. # Test for issue #3020
  328. dates = date_range("01-Jan-2014", "05-Jan-2014", freq="D").as_unit(unit)
  329. series = Series(1, index=dates)
  330. result = series.resample("D").mean()
  331. assert result.index[0] == dates[0]
  332. # GH 5955
  333. # incorrect deciding to upsample when the axis frequency matches the
  334. # resample frequency
  335. s = Series(
  336. np.arange(1.0, 6), index=[datetime(1975, 1, i, 12, 0) for i in range(1, 6)]
  337. )
  338. s.index = s.index.as_unit(unit)
  339. expected = Series(
  340. np.arange(1.0, 6),
  341. index=date_range("19750101", periods=5, freq="D").as_unit(unit),
  342. )
  343. result = s.resample("D").count()
  344. tm.assert_series_equal(result, Series(1, index=expected.index))
  345. result1 = s.resample("D").sum()
  346. result2 = s.resample("D").mean()
  347. tm.assert_series_equal(result1, expected)
  348. tm.assert_series_equal(result2, expected)
  349. @pytest.mark.parametrize("f", ["sum", "mean", "prod", "min", "max", "var"])
  350. def test_resample_frame_basic_cy_funcs(f, unit):
  351. df = tm.makeTimeDataFrame()
  352. df.index = df.index.as_unit(unit)
  353. b = Grouper(freq="M")
  354. g = df.groupby(b)
  355. # check all cython functions work
  356. g._cython_agg_general(f, alt=None, numeric_only=True)
  357. @pytest.mark.parametrize("freq", ["A", "M"])
  358. def test_resample_frame_basic_M_A(freq, unit):
  359. df = tm.makeTimeDataFrame()
  360. df.index = df.index.as_unit(unit)
  361. result = df.resample(freq).mean()
  362. tm.assert_series_equal(result["A"], df["A"].resample(freq).mean())
  363. @pytest.mark.parametrize("freq", ["W-WED", "M"])
  364. def test_resample_frame_basic_kind(freq, unit):
  365. df = tm.makeTimeDataFrame()
  366. df.index = df.index.as_unit(unit)
  367. df.resample(freq, kind="period").mean()
  368. def test_resample_upsample(unit):
  369. # from daily
  370. dti = date_range(
  371. start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D", name="index"
  372. ).as_unit(unit)
  373. s = Series(np.random.rand(len(dti)), dti)
  374. # to minutely, by padding
  375. result = s.resample("Min").ffill()
  376. assert len(result) == 12961
  377. assert result[0] == s[0]
  378. assert result[-1] == s[-1]
  379. assert result.index.name == "index"
  380. def test_resample_how_method(unit):
  381. # GH9915
  382. s = Series(
  383. [11, 22],
  384. index=[
  385. Timestamp("2015-03-31 21:48:52.672000"),
  386. Timestamp("2015-03-31 21:49:52.739000"),
  387. ],
  388. )
  389. s.index = s.index.as_unit(unit)
  390. expected = Series(
  391. [11, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22],
  392. index=DatetimeIndex(
  393. [
  394. Timestamp("2015-03-31 21:48:50"),
  395. Timestamp("2015-03-31 21:49:00"),
  396. Timestamp("2015-03-31 21:49:10"),
  397. Timestamp("2015-03-31 21:49:20"),
  398. Timestamp("2015-03-31 21:49:30"),
  399. Timestamp("2015-03-31 21:49:40"),
  400. Timestamp("2015-03-31 21:49:50"),
  401. ],
  402. freq="10s",
  403. ),
  404. )
  405. expected.index = expected.index.as_unit(unit)
  406. tm.assert_series_equal(s.resample("10S").mean(), expected)
  407. def test_resample_extra_index_point(unit):
  408. # GH#9756
  409. index = date_range(start="20150101", end="20150331", freq="BM").as_unit(unit)
  410. expected = DataFrame({"A": Series([21, 41, 63], index=index)})
  411. index = date_range(start="20150101", end="20150331", freq="B").as_unit(unit)
  412. df = DataFrame({"A": Series(range(len(index)), index=index)}, dtype="int64")
  413. result = df.resample("BM").last()
  414. tm.assert_frame_equal(result, expected)
  415. def test_upsample_with_limit(unit):
  416. rng = date_range("1/1/2000", periods=3, freq="5t").as_unit(unit)
  417. ts = Series(np.random.randn(len(rng)), rng)
  418. result = ts.resample("t").ffill(limit=2)
  419. expected = ts.reindex(result.index, method="ffill", limit=2)
  420. tm.assert_series_equal(result, expected)
  421. @pytest.mark.parametrize("freq", ["5D", "10H", "5Min", "10S"])
  422. @pytest.mark.parametrize("rule", ["Y", "3M", "15D", "30H", "15Min", "30S"])
  423. def test_nearest_upsample_with_limit(tz_aware_fixture, freq, rule, unit):
  424. # GH 33939
  425. rng = date_range("1/1/2000", periods=3, freq=freq, tz=tz_aware_fixture).as_unit(
  426. unit
  427. )
  428. ts = Series(np.random.randn(len(rng)), rng)
  429. result = ts.resample(rule).nearest(limit=2)
  430. expected = ts.reindex(result.index, method="nearest", limit=2)
  431. tm.assert_series_equal(result, expected)
  432. def test_resample_ohlc(series, unit):
  433. s = series
  434. s.index = s.index.as_unit(unit)
  435. grouper = Grouper(freq=Minute(5))
  436. expect = s.groupby(grouper).agg(lambda x: x[-1])
  437. result = s.resample("5Min").ohlc()
  438. assert len(result) == len(expect)
  439. assert len(result.columns) == 4
  440. xs = result.iloc[-2]
  441. assert xs["open"] == s[-6]
  442. assert xs["high"] == s[-6:-1].max()
  443. assert xs["low"] == s[-6:-1].min()
  444. assert xs["close"] == s[-2]
  445. xs = result.iloc[0]
  446. assert xs["open"] == s[0]
  447. assert xs["high"] == s[:5].max()
  448. assert xs["low"] == s[:5].min()
  449. assert xs["close"] == s[4]
  450. def test_resample_ohlc_result(unit):
  451. # GH 12332
  452. index = date_range("1-1-2000", "2-15-2000", freq="h").as_unit(unit)
  453. index = index.union(date_range("4-15-2000", "5-15-2000", freq="h").as_unit(unit))
  454. s = Series(range(len(index)), index=index)
  455. a = s.loc[:"4-15-2000"].resample("30T").ohlc()
  456. assert isinstance(a, DataFrame)
  457. b = s.loc[:"4-14-2000"].resample("30T").ohlc()
  458. assert isinstance(b, DataFrame)
  459. def test_resample_ohlc_result_odd_period(unit):
  460. # GH12348
  461. # raising on odd period
  462. rng = date_range("2013-12-30", "2014-01-07").as_unit(unit)
  463. index = rng.drop(
  464. [
  465. Timestamp("2014-01-01"),
  466. Timestamp("2013-12-31"),
  467. Timestamp("2014-01-04"),
  468. Timestamp("2014-01-05"),
  469. ]
  470. )
  471. df = DataFrame(data=np.arange(len(index)), index=index)
  472. result = df.resample("B").mean()
  473. expected = df.reindex(index=date_range(rng[0], rng[-1], freq="B").as_unit(unit))
  474. tm.assert_frame_equal(result, expected)
  475. def test_resample_ohlc_dataframe(unit):
  476. df = (
  477. DataFrame(
  478. {
  479. "PRICE": {
  480. Timestamp("2011-01-06 10:59:05", tz=None): 24990,
  481. Timestamp("2011-01-06 12:43:33", tz=None): 25499,
  482. Timestamp("2011-01-06 12:54:09", tz=None): 25499,
  483. },
  484. "VOLUME": {
  485. Timestamp("2011-01-06 10:59:05", tz=None): 1500000000,
  486. Timestamp("2011-01-06 12:43:33", tz=None): 5000000000,
  487. Timestamp("2011-01-06 12:54:09", tz=None): 100000000,
  488. },
  489. }
  490. )
  491. ).reindex(["VOLUME", "PRICE"], axis=1)
  492. df.index = df.index.as_unit(unit)
  493. df.columns.name = "Cols"
  494. res = df.resample("H").ohlc()
  495. exp = pd.concat(
  496. [df["VOLUME"].resample("H").ohlc(), df["PRICE"].resample("H").ohlc()],
  497. axis=1,
  498. keys=df.columns,
  499. )
  500. assert exp.columns.names[0] == "Cols"
  501. tm.assert_frame_equal(exp, res)
  502. df.columns = [["a", "b"], ["c", "d"]]
  503. res = df.resample("H").ohlc()
  504. exp.columns = pd.MultiIndex.from_tuples(
  505. [
  506. ("a", "c", "open"),
  507. ("a", "c", "high"),
  508. ("a", "c", "low"),
  509. ("a", "c", "close"),
  510. ("b", "d", "open"),
  511. ("b", "d", "high"),
  512. ("b", "d", "low"),
  513. ("b", "d", "close"),
  514. ]
  515. )
  516. tm.assert_frame_equal(exp, res)
  517. # dupe columns fail atm
  518. # df.columns = ['PRICE', 'PRICE']
  519. def test_resample_dup_index():
  520. # GH 4812
  521. # dup columns with resample raising
  522. df = DataFrame(
  523. np.random.randn(4, 12),
  524. index=[2000, 2000, 2000, 2000],
  525. columns=[Period(year=2000, month=i + 1, freq="M") for i in range(12)],
  526. )
  527. df.iloc[3, :] = np.nan
  528. result = df.resample("Q", axis=1).mean()
  529. expected = df.groupby(lambda x: int((x.month - 1) / 3), axis=1).mean()
  530. expected.columns = [Period(year=2000, quarter=i + 1, freq="Q") for i in range(4)]
  531. tm.assert_frame_equal(result, expected)
  532. def test_resample_reresample(unit):
  533. dti = date_range(
  534. start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D"
  535. ).as_unit(unit)
  536. s = Series(np.random.rand(len(dti)), dti)
  537. bs = s.resample("B", closed="right", label="right").mean()
  538. result = bs.resample("8H").mean()
  539. assert len(result) == 22
  540. assert isinstance(result.index.freq, offsets.DateOffset)
  541. assert result.index.freq == offsets.Hour(8)
  542. @pytest.mark.parametrize(
  543. "freq, expected_kwargs",
  544. [
  545. ["A-DEC", {"start": "1990", "end": "2000", "freq": "a-dec"}],
  546. ["A-JUN", {"start": "1990", "end": "2000", "freq": "a-jun"}],
  547. ["M", {"start": "1990-01", "end": "2000-01", "freq": "M"}],
  548. ],
  549. )
  550. def test_resample_timestamp_to_period(
  551. simple_date_range_series, freq, expected_kwargs, unit
  552. ):
  553. ts = simple_date_range_series("1/1/1990", "1/1/2000")
  554. ts.index = ts.index.as_unit(unit)
  555. result = ts.resample(freq, kind="period").mean()
  556. expected = ts.resample(freq).mean()
  557. expected.index = period_range(**expected_kwargs)
  558. tm.assert_series_equal(result, expected)
  559. def test_ohlc_5min(unit):
  560. def _ohlc(group):
  561. if isna(group).all():
  562. return np.repeat(np.nan, 4)
  563. return [group[0], group.max(), group.min(), group[-1]]
  564. rng = date_range("1/1/2000 00:00:00", "1/1/2000 5:59:50", freq="10s").as_unit(unit)
  565. ts = Series(np.random.randn(len(rng)), index=rng)
  566. resampled = ts.resample("5min", closed="right", label="right").ohlc()
  567. assert (resampled.loc["1/1/2000 00:00"] == ts[0]).all()
  568. exp = _ohlc(ts[1:31])
  569. assert (resampled.loc["1/1/2000 00:05"] == exp).all()
  570. exp = _ohlc(ts["1/1/2000 5:55:01":])
  571. assert (resampled.loc["1/1/2000 6:00:00"] == exp).all()
  572. def test_downsample_non_unique(unit):
  573. rng = date_range("1/1/2000", "2/29/2000").as_unit(unit)
  574. rng2 = rng.repeat(5).values
  575. ts = Series(np.random.randn(len(rng2)), index=rng2)
  576. result = ts.resample("M").mean()
  577. expected = ts.groupby(lambda x: x.month).mean()
  578. assert len(result) == 2
  579. tm.assert_almost_equal(result[0], expected[1])
  580. tm.assert_almost_equal(result[1], expected[2])
  581. def test_asfreq_non_unique(unit):
  582. # GH #1077
  583. rng = date_range("1/1/2000", "2/29/2000").as_unit(unit)
  584. rng2 = rng.repeat(2).values
  585. ts = Series(np.random.randn(len(rng2)), index=rng2)
  586. msg = "cannot reindex on an axis with duplicate labels"
  587. with pytest.raises(ValueError, match=msg):
  588. ts.asfreq("B")
  589. def test_resample_axis1(unit):
  590. rng = date_range("1/1/2000", "2/29/2000").as_unit(unit)
  591. df = DataFrame(np.random.randn(3, len(rng)), columns=rng, index=["a", "b", "c"])
  592. result = df.resample("M", axis=1).mean()
  593. expected = df.T.resample("M").mean().T
  594. tm.assert_frame_equal(result, expected)
  595. @pytest.mark.parametrize("freq", ["t", "5t", "15t", "30t", "4h", "12h"])
  596. def test_resample_anchored_ticks(freq, unit):
  597. # If a fixed delta (5 minute, 4 hour) evenly divides a day, we should
  598. # "anchor" the origin at midnight so we get regular intervals rather
  599. # than starting from the first timestamp which might start in the
  600. # middle of a desired interval
  601. rng = date_range("1/1/2000 04:00:00", periods=86400, freq="s").as_unit(unit)
  602. ts = Series(np.random.randn(len(rng)), index=rng)
  603. ts[:2] = np.nan # so results are the same
  604. result = ts[2:].resample(freq, closed="left", label="left").mean()
  605. expected = ts.resample(freq, closed="left", label="left").mean()
  606. tm.assert_series_equal(result, expected)
  607. @pytest.mark.parametrize("end", [1, 2])
  608. def test_resample_single_group(end, unit):
  609. mysum = lambda x: x.sum()
  610. rng = date_range("2000-1-1", f"2000-{end}-10", freq="D").as_unit(unit)
  611. ts = Series(np.random.randn(len(rng)), index=rng)
  612. tm.assert_series_equal(ts.resample("M").sum(), ts.resample("M").apply(mysum))
  613. def test_resample_single_group_std(unit):
  614. # GH 3849
  615. s = Series(
  616. [30.1, 31.6],
  617. index=[Timestamp("20070915 15:30:00"), Timestamp("20070915 15:40:00")],
  618. )
  619. s.index = s.index.as_unit(unit)
  620. expected = Series(
  621. [0.75], index=DatetimeIndex([Timestamp("20070915")], freq="D").as_unit(unit)
  622. )
  623. result = s.resample("D").apply(lambda x: np.std(x))
  624. tm.assert_series_equal(result, expected)
  625. def test_resample_offset(unit):
  626. # GH 31809
  627. rng = date_range("1/1/2000 00:00:00", "1/1/2000 02:00", freq="s").as_unit(unit)
  628. ts = Series(np.random.randn(len(rng)), index=rng)
  629. resampled = ts.resample("5min", offset="2min").mean()
  630. exp_rng = date_range("12/31/1999 23:57:00", "1/1/2000 01:57", freq="5min").as_unit(
  631. unit
  632. )
  633. tm.assert_index_equal(resampled.index, exp_rng)
  634. @pytest.mark.parametrize(
  635. "kwargs",
  636. [
  637. {"origin": "1999-12-31 23:57:00"},
  638. {"origin": Timestamp("1970-01-01 00:02:00")},
  639. {"origin": "epoch", "offset": "2m"},
  640. # origin of '1999-31-12 12:02:00' should be equivalent for this case
  641. {"origin": "1999-12-31 12:02:00"},
  642. {"offset": "-3m"},
  643. ],
  644. )
  645. def test_resample_origin(kwargs, unit):
  646. # GH 31809
  647. rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s").as_unit(unit)
  648. ts = Series(np.random.randn(len(rng)), index=rng)
  649. exp_rng = date_range(
  650. "1999-12-31 23:57:00", "2000-01-01 01:57", freq="5min"
  651. ).as_unit(unit)
  652. resampled = ts.resample("5min", **kwargs).mean()
  653. tm.assert_index_equal(resampled.index, exp_rng)
  654. @pytest.mark.parametrize(
  655. "origin", ["invalid_value", "epch", "startday", "startt", "2000-30-30", object()]
  656. )
  657. def test_resample_bad_origin(origin, unit):
  658. rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s").as_unit(unit)
  659. ts = Series(np.random.randn(len(rng)), index=rng)
  660. msg = (
  661. "'origin' should be equal to 'epoch', 'start', 'start_day', "
  662. "'end', 'end_day' or should be a Timestamp convertible type. Got "
  663. f"'{origin}' instead."
  664. )
  665. with pytest.raises(ValueError, match=msg):
  666. ts.resample("5min", origin=origin)
  667. @pytest.mark.parametrize("offset", ["invalid_value", "12dayys", "2000-30-30", object()])
  668. def test_resample_bad_offset(offset, unit):
  669. rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s").as_unit(unit)
  670. ts = Series(np.random.randn(len(rng)), index=rng)
  671. msg = f"'offset' should be a Timedelta convertible type. Got '{offset}' instead."
  672. with pytest.raises(ValueError, match=msg):
  673. ts.resample("5min", offset=offset)
  674. def test_resample_origin_prime_freq(unit):
  675. # GH 31809
  676. start, end = "2000-10-01 23:30:00", "2000-10-02 00:30:00"
  677. rng = date_range(start, end, freq="7min").as_unit(unit)
  678. ts = Series(np.random.randn(len(rng)), index=rng)
  679. exp_rng = date_range(
  680. "2000-10-01 23:14:00", "2000-10-02 00:22:00", freq="17min"
  681. ).as_unit(unit)
  682. resampled = ts.resample("17min").mean()
  683. tm.assert_index_equal(resampled.index, exp_rng)
  684. resampled = ts.resample("17min", origin="start_day").mean()
  685. tm.assert_index_equal(resampled.index, exp_rng)
  686. exp_rng = date_range(
  687. "2000-10-01 23:30:00", "2000-10-02 00:21:00", freq="17min"
  688. ).as_unit(unit)
  689. resampled = ts.resample("17min", origin="start").mean()
  690. tm.assert_index_equal(resampled.index, exp_rng)
  691. resampled = ts.resample("17min", offset="23h30min").mean()
  692. tm.assert_index_equal(resampled.index, exp_rng)
  693. resampled = ts.resample("17min", origin="start_day", offset="23h30min").mean()
  694. tm.assert_index_equal(resampled.index, exp_rng)
  695. exp_rng = date_range(
  696. "2000-10-01 23:18:00", "2000-10-02 00:26:00", freq="17min"
  697. ).as_unit(unit)
  698. resampled = ts.resample("17min", origin="epoch").mean()
  699. tm.assert_index_equal(resampled.index, exp_rng)
  700. exp_rng = date_range(
  701. "2000-10-01 23:24:00", "2000-10-02 00:15:00", freq="17min"
  702. ).as_unit(unit)
  703. resampled = ts.resample("17min", origin="2000-01-01").mean()
  704. tm.assert_index_equal(resampled.index, exp_rng)
  705. def test_resample_origin_with_tz(unit):
  706. # GH 31809
  707. msg = "The origin must have the same timezone as the index."
  708. tz = "Europe/Paris"
  709. rng = date_range(
  710. "2000-01-01 00:00:00", "2000-01-01 02:00", freq="s", tz=tz
  711. ).as_unit(unit)
  712. ts = Series(np.random.randn(len(rng)), index=rng)
  713. exp_rng = date_range(
  714. "1999-12-31 23:57:00", "2000-01-01 01:57", freq="5min", tz=tz
  715. ).as_unit(unit)
  716. resampled = ts.resample("5min", origin="1999-12-31 23:57:00+00:00").mean()
  717. tm.assert_index_equal(resampled.index, exp_rng)
  718. # origin of '1999-31-12 12:02:00+03:00' should be equivalent for this case
  719. resampled = ts.resample("5min", origin="1999-12-31 12:02:00+03:00").mean()
  720. tm.assert_index_equal(resampled.index, exp_rng)
  721. resampled = ts.resample("5min", origin="epoch", offset="2m").mean()
  722. tm.assert_index_equal(resampled.index, exp_rng)
  723. with pytest.raises(ValueError, match=msg):
  724. ts.resample("5min", origin="12/31/1999 23:57:00").mean()
  725. # if the series is not tz aware, origin should not be tz aware
  726. rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s").as_unit(unit)
  727. ts = Series(np.random.randn(len(rng)), index=rng)
  728. with pytest.raises(ValueError, match=msg):
  729. ts.resample("5min", origin="12/31/1999 23:57:00+03:00").mean()
  730. def test_resample_origin_epoch_with_tz_day_vs_24h(unit):
  731. # GH 34474
  732. start, end = "2000-10-01 23:30:00+0500", "2000-12-02 00:30:00+0500"
  733. rng = date_range(start, end, freq="7min").as_unit(unit)
  734. random_values = np.random.randn(len(rng))
  735. ts_1 = Series(random_values, index=rng)
  736. result_1 = ts_1.resample("D", origin="epoch").mean()
  737. result_2 = ts_1.resample("24H", origin="epoch").mean()
  738. tm.assert_series_equal(result_1, result_2)
  739. # check that we have the same behavior with epoch even if we are not timezone aware
  740. ts_no_tz = ts_1.tz_localize(None)
  741. result_3 = ts_no_tz.resample("D", origin="epoch").mean()
  742. result_4 = ts_no_tz.resample("24H", origin="epoch").mean()
  743. tm.assert_series_equal(result_1, result_3.tz_localize(rng.tz), check_freq=False)
  744. tm.assert_series_equal(result_1, result_4.tz_localize(rng.tz), check_freq=False)
  745. # check that we have the similar results with two different timezones (+2H and +5H)
  746. start, end = "2000-10-01 23:30:00+0200", "2000-12-02 00:30:00+0200"
  747. rng = date_range(start, end, freq="7min").as_unit(unit)
  748. ts_2 = Series(random_values, index=rng)
  749. result_5 = ts_2.resample("D", origin="epoch").mean()
  750. result_6 = ts_2.resample("24H", origin="epoch").mean()
  751. tm.assert_series_equal(result_1.tz_localize(None), result_5.tz_localize(None))
  752. tm.assert_series_equal(result_1.tz_localize(None), result_6.tz_localize(None))
  753. def test_resample_origin_with_day_freq_on_dst(unit):
  754. # GH 31809
  755. tz = "America/Chicago"
  756. def _create_series(values, timestamps, freq="D"):
  757. return Series(
  758. values,
  759. index=DatetimeIndex(
  760. [Timestamp(t, tz=tz) for t in timestamps], freq=freq, ambiguous=True
  761. ).as_unit(unit),
  762. )
  763. # test classical behavior of origin in a DST context
  764. start = Timestamp("2013-11-02", tz=tz)
  765. end = Timestamp("2013-11-03 23:59", tz=tz)
  766. rng = date_range(start, end, freq="1h").as_unit(unit)
  767. ts = Series(np.ones(len(rng)), index=rng)
  768. expected = _create_series([24.0, 25.0], ["2013-11-02", "2013-11-03"])
  769. for origin in ["epoch", "start", "start_day", start, None]:
  770. result = ts.resample("D", origin=origin).sum()
  771. tm.assert_series_equal(result, expected)
  772. # test complex behavior of origin/offset in a DST context
  773. start = Timestamp("2013-11-03", tz=tz)
  774. end = Timestamp("2013-11-03 23:59", tz=tz)
  775. rng = date_range(start, end, freq="1h").as_unit(unit)
  776. ts = Series(np.ones(len(rng)), index=rng)
  777. expected_ts = ["2013-11-02 22:00-05:00", "2013-11-03 22:00-06:00"]
  778. expected = _create_series([23.0, 2.0], expected_ts)
  779. result = ts.resample("D", origin="start", offset="-2H").sum()
  780. tm.assert_series_equal(result, expected)
  781. expected_ts = ["2013-11-02 22:00-05:00", "2013-11-03 21:00-06:00"]
  782. expected = _create_series([22.0, 3.0], expected_ts, freq="24H")
  783. result = ts.resample("24H", origin="start", offset="-2H").sum()
  784. tm.assert_series_equal(result, expected)
  785. expected_ts = ["2013-11-02 02:00-05:00", "2013-11-03 02:00-06:00"]
  786. expected = _create_series([3.0, 22.0], expected_ts)
  787. result = ts.resample("D", origin="start", offset="2H").sum()
  788. tm.assert_series_equal(result, expected)
  789. expected_ts = ["2013-11-02 23:00-05:00", "2013-11-03 23:00-06:00"]
  790. expected = _create_series([24.0, 1.0], expected_ts)
  791. result = ts.resample("D", origin="start", offset="-1H").sum()
  792. tm.assert_series_equal(result, expected)
  793. expected_ts = ["2013-11-02 01:00-05:00", "2013-11-03 01:00:00-0500"]
  794. expected = _create_series([1.0, 24.0], expected_ts)
  795. result = ts.resample("D", origin="start", offset="1H").sum()
  796. tm.assert_series_equal(result, expected)
  797. def test_resample_daily_anchored(unit):
  798. rng = date_range("1/1/2000 0:00:00", periods=10000, freq="T").as_unit(unit)
  799. ts = Series(np.random.randn(len(rng)), index=rng)
  800. ts[:2] = np.nan # so results are the same
  801. result = ts[2:].resample("D", closed="left", label="left").mean()
  802. expected = ts.resample("D", closed="left", label="left").mean()
  803. tm.assert_series_equal(result, expected)
  804. def test_resample_to_period_monthly_buglet(unit):
  805. # GH #1259
  806. rng = date_range("1/1/2000", "12/31/2000").as_unit(unit)
  807. ts = Series(np.random.randn(len(rng)), index=rng)
  808. result = ts.resample("M", kind="period").mean()
  809. exp_index = period_range("Jan-2000", "Dec-2000", freq="M")
  810. tm.assert_index_equal(result.index, exp_index)
  811. def test_period_with_agg():
  812. # aggregate a period resampler with a lambda
  813. s2 = Series(
  814. np.random.randint(0, 5, 50),
  815. index=period_range("2012-01-01", freq="H", periods=50),
  816. dtype="float64",
  817. )
  818. expected = s2.to_timestamp().resample("D").mean().to_period()
  819. result = s2.resample("D").agg(lambda x: x.mean())
  820. tm.assert_series_equal(result, expected)
  821. def test_resample_segfault(unit):
  822. # GH 8573
  823. # segfaulting in older versions
  824. all_wins_and_wagers = [
  825. (1, datetime(2013, 10, 1, 16, 20), 1, 0),
  826. (2, datetime(2013, 10, 1, 16, 10), 1, 0),
  827. (2, datetime(2013, 10, 1, 18, 15), 1, 0),
  828. (2, datetime(2013, 10, 1, 16, 10, 31), 1, 0),
  829. ]
  830. df = DataFrame.from_records(
  831. all_wins_and_wagers, columns=("ID", "timestamp", "A", "B")
  832. ).set_index("timestamp")
  833. df.index = df.index.as_unit(unit)
  834. result = df.groupby("ID").resample("5min").sum()
  835. expected = df.groupby("ID").apply(lambda x: x.resample("5min").sum())
  836. tm.assert_frame_equal(result, expected)
  837. def test_resample_dtype_preservation(unit):
  838. # GH 12202
  839. # validation tests for dtype preservation
  840. df = DataFrame(
  841. {
  842. "date": date_range(start="2016-01-01", periods=4, freq="W").as_unit(unit),
  843. "group": [1, 1, 2, 2],
  844. "val": Series([5, 6, 7, 8], dtype="int32"),
  845. }
  846. ).set_index("date")
  847. result = df.resample("1D").ffill()
  848. assert result.val.dtype == np.int32
  849. result = df.groupby("group").resample("1D").ffill()
  850. assert result.val.dtype == np.int32
  851. def test_resample_dtype_coercion(unit):
  852. pytest.importorskip("scipy.interpolate")
  853. # GH 16361
  854. df = {"a": [1, 3, 1, 4]}
  855. df = DataFrame(df, index=date_range("2017-01-01", "2017-01-04").as_unit(unit))
  856. expected = df.astype("float64").resample("H").mean()["a"].interpolate("cubic")
  857. result = df.resample("H")["a"].mean().interpolate("cubic")
  858. tm.assert_series_equal(result, expected)
  859. result = df.resample("H").mean()["a"].interpolate("cubic")
  860. tm.assert_series_equal(result, expected)
  861. def test_weekly_resample_buglet(unit):
  862. # #1327
  863. rng = date_range("1/1/2000", freq="B", periods=20).as_unit(unit)
  864. ts = Series(np.random.randn(len(rng)), index=rng)
  865. resampled = ts.resample("W").mean()
  866. expected = ts.resample("W-SUN").mean()
  867. tm.assert_series_equal(resampled, expected)
  868. def test_monthly_resample_error(unit):
  869. # #1451
  870. dates = date_range("4/16/2012 20:00", periods=5000, freq="h").as_unit(unit)
  871. ts = Series(np.random.randn(len(dates)), index=dates)
  872. # it works!
  873. ts.resample("M")
  874. def test_nanosecond_resample_error():
  875. # GH 12307 - Values falls after last bin when
  876. # Resampling using pd.tseries.offsets.Nano as period
  877. start = 1443707890427
  878. exp_start = 1443707890400
  879. indx = date_range(start=pd.to_datetime(start), periods=10, freq="100n")
  880. ts = Series(range(len(indx)), index=indx)
  881. r = ts.resample(pd.tseries.offsets.Nano(100))
  882. result = r.agg("mean")
  883. exp_indx = date_range(start=pd.to_datetime(exp_start), periods=10, freq="100n")
  884. exp = Series(range(len(exp_indx)), index=exp_indx, dtype=float)
  885. tm.assert_series_equal(result, exp)
  886. def test_resample_anchored_intraday(simple_date_range_series, unit):
  887. # #1471, #1458
  888. rng = date_range("1/1/2012", "4/1/2012", freq="100min").as_unit(unit)
  889. df = DataFrame(rng.month, index=rng)
  890. result = df.resample("M").mean()
  891. expected = df.resample("M", kind="period").mean().to_timestamp(how="end")
  892. expected.index += Timedelta(1, "ns") - Timedelta(1, "D")
  893. expected.index = expected.index.as_unit(unit)._with_freq("infer")
  894. assert expected.index.freq == "M"
  895. tm.assert_frame_equal(result, expected)
  896. result = df.resample("M", closed="left").mean()
  897. exp = df.shift(1, freq="D").resample("M", kind="period").mean()
  898. exp = exp.to_timestamp(how="end")
  899. exp.index = exp.index + Timedelta(1, "ns") - Timedelta(1, "D")
  900. exp.index = exp.index.as_unit(unit)._with_freq("infer")
  901. assert exp.index.freq == "M"
  902. tm.assert_frame_equal(result, exp)
  903. rng = date_range("1/1/2012", "4/1/2012", freq="100min").as_unit(unit)
  904. df = DataFrame(rng.month, index=rng)
  905. result = df.resample("Q").mean()
  906. expected = df.resample("Q", kind="period").mean().to_timestamp(how="end")
  907. expected.index += Timedelta(1, "ns") - Timedelta(1, "D")
  908. expected.index._data.freq = "Q"
  909. expected.index._freq = lib.no_default
  910. expected.index = expected.index.as_unit(unit)
  911. tm.assert_frame_equal(result, expected)
  912. result = df.resample("Q", closed="left").mean()
  913. expected = df.shift(1, freq="D").resample("Q", kind="period", closed="left").mean()
  914. expected = expected.to_timestamp(how="end")
  915. expected.index += Timedelta(1, "ns") - Timedelta(1, "D")
  916. expected.index._data.freq = "Q"
  917. expected.index._freq = lib.no_default
  918. expected.index = expected.index.as_unit(unit)
  919. tm.assert_frame_equal(result, expected)
  920. ts = simple_date_range_series("2012-04-29 23:00", "2012-04-30 5:00", freq="h")
  921. ts.index = ts.index.as_unit(unit)
  922. resampled = ts.resample("M").mean()
  923. assert len(resampled) == 1
  924. @pytest.mark.parametrize("freq", ["MS", "BMS", "QS-MAR", "AS-DEC", "AS-JUN"])
  925. def test_resample_anchored_monthstart(simple_date_range_series, freq, unit):
  926. ts = simple_date_range_series("1/1/2000", "12/31/2002")
  927. ts.index = ts.index.as_unit(unit)
  928. ts.resample(freq).mean()
  929. @pytest.mark.parametrize("label, sec", [[None, 2.0], ["right", "4.2"]])
  930. def test_resample_anchored_multiday(label, sec):
  931. # When resampling a range spanning multiple days, ensure that the
  932. # start date gets used to determine the offset. Fixes issue where
  933. # a one day period is not a multiple of the frequency.
  934. #
  935. # See: https://github.com/pandas-dev/pandas/issues/8683
  936. index1 = date_range("2014-10-14 23:06:23.206", periods=3, freq="400L")
  937. index2 = date_range("2014-10-15 23:00:00", periods=2, freq="2200L")
  938. index = index1.union(index2)
  939. s = Series(np.random.randn(5), index=index)
  940. # Ensure left closing works
  941. result = s.resample("2200L", label=label).mean()
  942. assert result.index[-1] == Timestamp(f"2014-10-15 23:00:{sec}00")
  943. def test_corner_cases(unit):
  944. # miscellaneous test coverage
  945. rng = date_range("1/1/2000", periods=12, freq="t").as_unit(unit)
  946. ts = Series(np.random.randn(len(rng)), index=rng)
  947. result = ts.resample("5t", closed="right", label="left").mean()
  948. ex_index = date_range("1999-12-31 23:55", periods=4, freq="5t").as_unit(unit)
  949. tm.assert_index_equal(result.index, ex_index)
  950. def test_corner_cases_period(simple_period_range_series):
  951. # miscellaneous test coverage
  952. len0pts = simple_period_range_series("2007-01", "2010-05", freq="M")[:0]
  953. # it works
  954. result = len0pts.resample("A-DEC").mean()
  955. assert len(result) == 0
  956. def test_corner_cases_date(simple_date_range_series, unit):
  957. # resample to periods
  958. ts = simple_date_range_series("2000-04-28", "2000-04-30 11:00", freq="h")
  959. ts.index = ts.index.as_unit(unit)
  960. result = ts.resample("M", kind="period").mean()
  961. assert len(result) == 1
  962. assert result.index[0] == Period("2000-04", freq="M")
  963. def test_anchored_lowercase_buglet(unit):
  964. dates = date_range("4/16/2012 20:00", periods=50000, freq="s").as_unit(unit)
  965. ts = Series(np.random.randn(len(dates)), index=dates)
  966. # it works!
  967. ts.resample("d").mean()
  968. def test_upsample_apply_functions(unit):
  969. # #1596
  970. rng = date_range("2012-06-12", periods=4, freq="h").as_unit(unit)
  971. ts = Series(np.random.randn(len(rng)), index=rng)
  972. result = ts.resample("20min").aggregate(["mean", "sum"])
  973. assert isinstance(result, DataFrame)
  974. def test_resample_not_monotonic(unit):
  975. rng = date_range("2012-06-12", periods=200, freq="h").as_unit(unit)
  976. ts = Series(np.random.randn(len(rng)), index=rng)
  977. ts = ts.take(np.random.permutation(len(ts)))
  978. result = ts.resample("D").sum()
  979. exp = ts.sort_index().resample("D").sum()
  980. tm.assert_series_equal(result, exp)
  981. @pytest.mark.parametrize(
  982. "dtype",
  983. [
  984. "int64",
  985. "int32",
  986. "float64",
  987. pytest.param(
  988. "float32",
  989. marks=pytest.mark.xfail(
  990. reason="Empty groups cause x.mean() to return float64"
  991. ),
  992. ),
  993. ],
  994. )
  995. def test_resample_median_bug_1688(dtype):
  996. df = DataFrame(
  997. [1, 2],
  998. index=[datetime(2012, 1, 1, 0, 0, 0), datetime(2012, 1, 1, 0, 5, 0)],
  999. dtype=dtype,
  1000. )
  1001. result = df.resample("T").apply(lambda x: x.mean())
  1002. exp = df.asfreq("T")
  1003. tm.assert_frame_equal(result, exp)
  1004. result = df.resample("T").median()
  1005. exp = df.asfreq("T")
  1006. tm.assert_frame_equal(result, exp)
  1007. def test_how_lambda_functions(simple_date_range_series, unit):
  1008. ts = simple_date_range_series("1/1/2000", "4/1/2000")
  1009. ts.index = ts.index.as_unit(unit)
  1010. result = ts.resample("M").apply(lambda x: x.mean())
  1011. exp = ts.resample("M").mean()
  1012. tm.assert_series_equal(result, exp)
  1013. foo_exp = ts.resample("M").mean()
  1014. foo_exp.name = "foo"
  1015. bar_exp = ts.resample("M").std()
  1016. bar_exp.name = "bar"
  1017. result = ts.resample("M").apply([lambda x: x.mean(), lambda x: x.std(ddof=1)])
  1018. result.columns = ["foo", "bar"]
  1019. tm.assert_series_equal(result["foo"], foo_exp)
  1020. tm.assert_series_equal(result["bar"], bar_exp)
  1021. # this is a MI Series, so comparing the names of the results
  1022. # doesn't make sense
  1023. result = ts.resample("M").aggregate(
  1024. {"foo": lambda x: x.mean(), "bar": lambda x: x.std(ddof=1)}
  1025. )
  1026. tm.assert_series_equal(result["foo"], foo_exp, check_names=False)
  1027. tm.assert_series_equal(result["bar"], bar_exp, check_names=False)
  1028. def test_resample_unequal_times(unit):
  1029. # #1772
  1030. start = datetime(1999, 3, 1, 5)
  1031. # end hour is less than start
  1032. end = datetime(2012, 7, 31, 4)
  1033. bad_ind = date_range(start, end, freq="30min").as_unit(unit)
  1034. df = DataFrame({"close": 1}, index=bad_ind)
  1035. # it works!
  1036. df.resample("AS").sum()
  1037. def test_resample_consistency(unit):
  1038. # GH 6418
  1039. # resample with bfill / limit / reindex consistency
  1040. i30 = date_range("2002-02-02", periods=4, freq="30T").as_unit(unit)
  1041. s = Series(np.arange(4.0), index=i30)
  1042. s[2] = np.NaN
  1043. # Upsample by factor 3 with reindex() and resample() methods:
  1044. i10 = date_range(i30[0], i30[-1], freq="10T").as_unit(unit)
  1045. s10 = s.reindex(index=i10, method="bfill")
  1046. s10_2 = s.reindex(index=i10, method="bfill", limit=2)
  1047. rl = s.reindex_like(s10, method="bfill", limit=2)
  1048. r10_2 = s.resample("10Min").bfill(limit=2)
  1049. r10 = s.resample("10Min").bfill()
  1050. # s10_2, r10, r10_2, rl should all be equal
  1051. tm.assert_series_equal(s10_2, r10)
  1052. tm.assert_series_equal(s10_2, r10_2)
  1053. tm.assert_series_equal(s10_2, rl)
  1054. dates1: List[DatetimeNaTType] = [
  1055. datetime(2014, 10, 1),
  1056. datetime(2014, 9, 3),
  1057. datetime(2014, 11, 5),
  1058. datetime(2014, 9, 5),
  1059. datetime(2014, 10, 8),
  1060. datetime(2014, 7, 15),
  1061. ]
  1062. dates2: List[DatetimeNaTType] = (
  1063. dates1[:2] + [pd.NaT] + dates1[2:4] + [pd.NaT] + dates1[4:]
  1064. )
  1065. dates3 = [pd.NaT] + dates1 + [pd.NaT]
  1066. @pytest.mark.parametrize("dates", [dates1, dates2, dates3])
  1067. def test_resample_timegrouper(dates):
  1068. # GH 7227
  1069. df = DataFrame({"A": dates, "B": np.arange(len(dates))})
  1070. result = df.set_index("A").resample("M").count()
  1071. exp_idx = DatetimeIndex(
  1072. ["2014-07-31", "2014-08-31", "2014-09-30", "2014-10-31", "2014-11-30"],
  1073. freq="M",
  1074. name="A",
  1075. )
  1076. expected = DataFrame({"B": [1, 0, 2, 2, 1]}, index=exp_idx)
  1077. if df["A"].isna().any():
  1078. expected.index = expected.index._with_freq(None)
  1079. tm.assert_frame_equal(result, expected)
  1080. result = df.groupby(Grouper(freq="M", key="A")).count()
  1081. tm.assert_frame_equal(result, expected)
  1082. df = DataFrame({"A": dates, "B": np.arange(len(dates)), "C": np.arange(len(dates))})
  1083. result = df.set_index("A").resample("M").count()
  1084. expected = DataFrame(
  1085. {"B": [1, 0, 2, 2, 1], "C": [1, 0, 2, 2, 1]},
  1086. index=exp_idx,
  1087. columns=["B", "C"],
  1088. )
  1089. if df["A"].isna().any():
  1090. expected.index = expected.index._with_freq(None)
  1091. tm.assert_frame_equal(result, expected)
  1092. result = df.groupby(Grouper(freq="M", key="A")).count()
  1093. tm.assert_frame_equal(result, expected)
  1094. def test_resample_nunique(unit):
  1095. # GH 12352
  1096. df = DataFrame(
  1097. {
  1098. "ID": {
  1099. Timestamp("2015-06-05 00:00:00"): "0010100903",
  1100. Timestamp("2015-06-08 00:00:00"): "0010150847",
  1101. },
  1102. "DATE": {
  1103. Timestamp("2015-06-05 00:00:00"): "2015-06-05",
  1104. Timestamp("2015-06-08 00:00:00"): "2015-06-08",
  1105. },
  1106. }
  1107. )
  1108. df.index = df.index.as_unit(unit)
  1109. r = df.resample("D")
  1110. g = df.groupby(Grouper(freq="D"))
  1111. expected = df.groupby(Grouper(freq="D")).ID.apply(lambda x: x.nunique())
  1112. assert expected.name == "ID"
  1113. for t in [r, g]:
  1114. result = t.ID.nunique()
  1115. tm.assert_series_equal(result, expected)
  1116. result = df.ID.resample("D").nunique()
  1117. tm.assert_series_equal(result, expected)
  1118. result = df.ID.groupby(Grouper(freq="D")).nunique()
  1119. tm.assert_series_equal(result, expected)
  1120. def test_resample_nunique_preserves_column_level_names(unit):
  1121. # see gh-23222
  1122. df = tm.makeTimeDataFrame(freq="1D").abs()
  1123. df.index = df.index.as_unit(unit)
  1124. df.columns = pd.MultiIndex.from_arrays(
  1125. [df.columns.tolist()] * 2, names=["lev0", "lev1"]
  1126. )
  1127. result = df.resample("1h").nunique()
  1128. tm.assert_index_equal(df.columns, result.columns)
  1129. @pytest.mark.parametrize(
  1130. "func",
  1131. [
  1132. lambda x: x.nunique(),
  1133. lambda x: x.agg(Series.nunique),
  1134. lambda x: x.agg("nunique"),
  1135. ],
  1136. ids=["nunique", "series_nunique", "nunique_str"],
  1137. )
  1138. def test_resample_nunique_with_date_gap(func, unit):
  1139. # GH 13453
  1140. # Since all elements are unique, these should all be the same
  1141. index = date_range("1-1-2000", "2-15-2000", freq="h").as_unit(unit)
  1142. index2 = date_range("4-15-2000", "5-15-2000", freq="h").as_unit(unit)
  1143. index3 = index.append(index2)
  1144. s = Series(range(len(index3)), index=index3, dtype="int64")
  1145. r = s.resample("M")
  1146. result = r.count()
  1147. expected = func(r)
  1148. tm.assert_series_equal(result, expected)
  1149. @pytest.mark.parametrize("n", [10000, 100000])
  1150. @pytest.mark.parametrize("k", [10, 100, 1000])
  1151. def test_resample_group_info(n, k, unit):
  1152. # GH10914
  1153. # use a fixed seed to always have the same uniques
  1154. prng = np.random.RandomState(1234)
  1155. dr = date_range(start="2015-08-27", periods=n // 10, freq="T").as_unit(unit)
  1156. ts = Series(prng.randint(0, n // k, n).astype("int64"), index=prng.choice(dr, n))
  1157. left = ts.resample("30T").nunique()
  1158. ix = date_range(start=ts.index.min(), end=ts.index.max(), freq="30T").as_unit(unit)
  1159. vals = ts.values
  1160. bins = np.searchsorted(ix.values, ts.index, side="right")
  1161. sorter = np.lexsort((vals, bins))
  1162. vals, bins = vals[sorter], bins[sorter]
  1163. mask = np.r_[True, vals[1:] != vals[:-1]]
  1164. mask |= np.r_[True, bins[1:] != bins[:-1]]
  1165. arr = np.bincount(bins[mask] - 1, minlength=len(ix)).astype("int64", copy=False)
  1166. right = Series(arr, index=ix)
  1167. tm.assert_series_equal(left, right)
  1168. def test_resample_size(unit):
  1169. n = 10000
  1170. dr = date_range("2015-09-19", periods=n, freq="T").as_unit(unit)
  1171. ts = Series(np.random.randn(n), index=np.random.choice(dr, n))
  1172. left = ts.resample("7T").size()
  1173. ix = date_range(start=left.index.min(), end=ts.index.max(), freq="7T").as_unit(unit)
  1174. bins = np.searchsorted(ix.values, ts.index.values, side="right")
  1175. val = np.bincount(bins, minlength=len(ix) + 1)[1:].astype("int64", copy=False)
  1176. right = Series(val, index=ix)
  1177. tm.assert_series_equal(left, right)
  1178. def test_resample_across_dst():
  1179. # The test resamples a DatetimeIndex with values before and after a
  1180. # DST change
  1181. # Issue: 14682
  1182. # The DatetimeIndex we will start with
  1183. # (note that DST happens at 03:00+02:00 -> 02:00+01:00)
  1184. # 2016-10-30 02:23:00+02:00, 2016-10-30 02:23:00+01:00
  1185. df1 = DataFrame([1477786980, 1477790580], columns=["ts"])
  1186. dti1 = DatetimeIndex(
  1187. pd.to_datetime(df1.ts, unit="s")
  1188. .dt.tz_localize("UTC")
  1189. .dt.tz_convert("Europe/Madrid")
  1190. )
  1191. # The expected DatetimeIndex after resampling.
  1192. # 2016-10-30 02:00:00+02:00, 2016-10-30 02:00:00+01:00
  1193. df2 = DataFrame([1477785600, 1477789200], columns=["ts"])
  1194. dti2 = DatetimeIndex(
  1195. pd.to_datetime(df2.ts, unit="s")
  1196. .dt.tz_localize("UTC")
  1197. .dt.tz_convert("Europe/Madrid"),
  1198. freq="H",
  1199. )
  1200. df = DataFrame([5, 5], index=dti1)
  1201. result = df.resample(rule="H").sum()
  1202. expected = DataFrame([5, 5], index=dti2)
  1203. tm.assert_frame_equal(result, expected)
  1204. def test_groupby_with_dst_time_change(unit):
  1205. # GH 24972
  1206. index = (
  1207. DatetimeIndex([1478064900001000000, 1480037118776792000], tz="UTC")
  1208. .tz_convert("America/Chicago")
  1209. .as_unit(unit)
  1210. )
  1211. df = DataFrame([1, 2], index=index)
  1212. result = df.groupby(Grouper(freq="1d")).last()
  1213. expected_index_values = date_range(
  1214. "2016-11-02", "2016-11-24", freq="d", tz="America/Chicago"
  1215. ).as_unit(unit)
  1216. index = DatetimeIndex(expected_index_values)
  1217. expected = DataFrame([1.0] + ([np.nan] * 21) + [2.0], index=index)
  1218. tm.assert_frame_equal(result, expected)
  1219. def test_resample_dst_anchor(unit):
  1220. # 5172
  1221. dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz="US/Eastern").as_unit(unit)
  1222. df = DataFrame([5], index=dti)
  1223. dti = DatetimeIndex(df.index.normalize(), freq="D").as_unit(unit)
  1224. expected = DataFrame([5], index=dti)
  1225. tm.assert_frame_equal(df.resample(rule="D").sum(), expected)
  1226. df.resample(rule="MS").sum()
  1227. tm.assert_frame_equal(
  1228. df.resample(rule="MS").sum(),
  1229. DataFrame(
  1230. [5],
  1231. index=DatetimeIndex(
  1232. [datetime(2012, 11, 1)], tz="US/Eastern", freq="MS"
  1233. ).as_unit(unit),
  1234. ),
  1235. )
  1236. dti = date_range(
  1237. "2013-09-30", "2013-11-02", freq="30Min", tz="Europe/Paris"
  1238. ).as_unit(unit)
  1239. values = range(dti.size)
  1240. df = DataFrame({"a": values, "b": values, "c": values}, index=dti, dtype="int64")
  1241. how = {"a": "min", "b": "max", "c": "count"}
  1242. tm.assert_frame_equal(
  1243. df.resample("W-MON").agg(how)[["a", "b", "c"]],
  1244. DataFrame(
  1245. {
  1246. "a": [0, 48, 384, 720, 1056, 1394],
  1247. "b": [47, 383, 719, 1055, 1393, 1586],
  1248. "c": [48, 336, 336, 336, 338, 193],
  1249. },
  1250. index=date_range(
  1251. "9/30/2013", "11/4/2013", freq="W-MON", tz="Europe/Paris"
  1252. ).as_unit(unit),
  1253. ),
  1254. "W-MON Frequency",
  1255. )
  1256. tm.assert_frame_equal(
  1257. df.resample("2W-MON").agg(how)[["a", "b", "c"]],
  1258. DataFrame(
  1259. {
  1260. "a": [0, 48, 720, 1394],
  1261. "b": [47, 719, 1393, 1586],
  1262. "c": [48, 672, 674, 193],
  1263. },
  1264. index=date_range(
  1265. "9/30/2013", "11/11/2013", freq="2W-MON", tz="Europe/Paris"
  1266. ).as_unit(unit),
  1267. ),
  1268. "2W-MON Frequency",
  1269. )
  1270. tm.assert_frame_equal(
  1271. df.resample("MS").agg(how)[["a", "b", "c"]],
  1272. DataFrame(
  1273. {"a": [0, 48, 1538], "b": [47, 1537, 1586], "c": [48, 1490, 49]},
  1274. index=date_range(
  1275. "9/1/2013", "11/1/2013", freq="MS", tz="Europe/Paris"
  1276. ).as_unit(unit),
  1277. ),
  1278. "MS Frequency",
  1279. )
  1280. tm.assert_frame_equal(
  1281. df.resample("2MS").agg(how)[["a", "b", "c"]],
  1282. DataFrame(
  1283. {"a": [0, 1538], "b": [1537, 1586], "c": [1538, 49]},
  1284. index=date_range(
  1285. "9/1/2013", "11/1/2013", freq="2MS", tz="Europe/Paris"
  1286. ).as_unit(unit),
  1287. ),
  1288. "2MS Frequency",
  1289. )
  1290. df_daily = df["10/26/2013":"10/29/2013"]
  1291. tm.assert_frame_equal(
  1292. df_daily.resample("D").agg({"a": "min", "b": "max", "c": "count"})[
  1293. ["a", "b", "c"]
  1294. ],
  1295. DataFrame(
  1296. {
  1297. "a": [1248, 1296, 1346, 1394],
  1298. "b": [1295, 1345, 1393, 1441],
  1299. "c": [48, 50, 48, 48],
  1300. },
  1301. index=date_range(
  1302. "10/26/2013", "10/29/2013", freq="D", tz="Europe/Paris"
  1303. ).as_unit(unit),
  1304. ),
  1305. "D Frequency",
  1306. )
  1307. def test_downsample_across_dst(unit):
  1308. # GH 8531
  1309. tz = pytz.timezone("Europe/Berlin")
  1310. dt = datetime(2014, 10, 26)
  1311. dates = date_range(tz.localize(dt), periods=4, freq="2H").as_unit(unit)
  1312. result = Series(5, index=dates).resample("H").mean()
  1313. expected = Series(
  1314. [5.0, np.nan] * 3 + [5.0],
  1315. index=date_range(tz.localize(dt), periods=7, freq="H").as_unit(unit),
  1316. )
  1317. tm.assert_series_equal(result, expected)
  1318. def test_downsample_across_dst_weekly(unit):
  1319. # GH 9119, GH 21459
  1320. df = DataFrame(
  1321. index=DatetimeIndex(
  1322. ["2017-03-25", "2017-03-26", "2017-03-27", "2017-03-28", "2017-03-29"],
  1323. tz="Europe/Amsterdam",
  1324. ).as_unit(unit),
  1325. data=[11, 12, 13, 14, 15],
  1326. )
  1327. result = df.resample("1W").sum()
  1328. expected = DataFrame(
  1329. [23, 42],
  1330. index=DatetimeIndex(
  1331. ["2017-03-26", "2017-04-02"], tz="Europe/Amsterdam", freq="W"
  1332. ).as_unit(unit),
  1333. )
  1334. tm.assert_frame_equal(result, expected)
  1335. def test_downsample_across_dst_weekly_2(unit):
  1336. # GH 9119, GH 21459
  1337. idx = date_range("2013-04-01", "2013-05-01", tz="Europe/London", freq="H").as_unit(
  1338. unit
  1339. )
  1340. s = Series(index=idx, dtype=np.float64)
  1341. result = s.resample("W").mean()
  1342. expected = Series(
  1343. index=date_range("2013-04-07", freq="W", periods=5, tz="Europe/London").as_unit(
  1344. unit
  1345. ),
  1346. dtype=np.float64,
  1347. )
  1348. tm.assert_series_equal(result, expected)
  1349. def test_downsample_dst_at_midnight(unit):
  1350. # GH 25758
  1351. start = datetime(2018, 11, 3, 12)
  1352. end = datetime(2018, 11, 5, 12)
  1353. index = date_range(start, end, freq="1H").as_unit(unit)
  1354. index = index.tz_localize("UTC").tz_convert("America/Havana")
  1355. data = list(range(len(index)))
  1356. dataframe = DataFrame(data, index=index)
  1357. result = dataframe.groupby(Grouper(freq="1D")).mean()
  1358. dti = date_range("2018-11-03", periods=3).tz_localize(
  1359. "America/Havana", ambiguous=True
  1360. )
  1361. dti = DatetimeIndex(dti, freq="D").as_unit(unit)
  1362. expected = DataFrame([7.5, 28.0, 44.5], index=dti)
  1363. tm.assert_frame_equal(result, expected)
  1364. def test_resample_with_nat(unit):
  1365. # GH 13020
  1366. index = DatetimeIndex(
  1367. [
  1368. pd.NaT,
  1369. "1970-01-01 00:00:00",
  1370. pd.NaT,
  1371. "1970-01-01 00:00:01",
  1372. "1970-01-01 00:00:02",
  1373. ]
  1374. )
  1375. frame = DataFrame([2, 3, 5, 7, 11], index=index)
  1376. frame.index = frame.index.as_unit(unit)
  1377. index_1s = DatetimeIndex(
  1378. ["1970-01-01 00:00:00", "1970-01-01 00:00:01", "1970-01-01 00:00:02"]
  1379. ).as_unit(unit)
  1380. frame_1s = DataFrame([3.0, 7.0, 11.0], index=index_1s)
  1381. tm.assert_frame_equal(frame.resample("1s").mean(), frame_1s)
  1382. index_2s = DatetimeIndex(["1970-01-01 00:00:00", "1970-01-01 00:00:02"]).as_unit(
  1383. unit
  1384. )
  1385. frame_2s = DataFrame([5.0, 11.0], index=index_2s)
  1386. tm.assert_frame_equal(frame.resample("2s").mean(), frame_2s)
  1387. index_3s = DatetimeIndex(["1970-01-01 00:00:00"]).as_unit(unit)
  1388. frame_3s = DataFrame([7.0], index=index_3s)
  1389. tm.assert_frame_equal(frame.resample("3s").mean(), frame_3s)
  1390. tm.assert_frame_equal(frame.resample("60s").mean(), frame_3s)
  1391. def test_resample_datetime_values(unit):
  1392. # GH 13119
  1393. # check that datetime dtype is preserved when NaT values are
  1394. # introduced by the resampling
  1395. dates = [datetime(2016, 1, 15), datetime(2016, 1, 19)]
  1396. df = DataFrame({"timestamp": dates}, index=dates)
  1397. df.index = df.index.as_unit(unit)
  1398. exp = Series(
  1399. [datetime(2016, 1, 15), pd.NaT, datetime(2016, 1, 19)],
  1400. index=date_range("2016-01-15", periods=3, freq="2D").as_unit(unit),
  1401. name="timestamp",
  1402. )
  1403. res = df.resample("2D").first()["timestamp"]
  1404. tm.assert_series_equal(res, exp)
  1405. res = df["timestamp"].resample("2D").first()
  1406. tm.assert_series_equal(res, exp)
  1407. def test_resample_apply_with_additional_args(series, unit):
  1408. # GH 14615
  1409. def f(data, add_arg):
  1410. return np.mean(data) * add_arg
  1411. series.index = series.index.as_unit(unit)
  1412. multiplier = 10
  1413. result = series.resample("D").apply(f, multiplier)
  1414. expected = series.resample("D").mean().multiply(multiplier)
  1415. tm.assert_series_equal(result, expected)
  1416. # Testing as kwarg
  1417. result = series.resample("D").apply(f, add_arg=multiplier)
  1418. expected = series.resample("D").mean().multiply(multiplier)
  1419. tm.assert_series_equal(result, expected)
  1420. # Testing dataframe
  1421. df = DataFrame({"A": 1, "B": 2}, index=date_range("2017", periods=10))
  1422. result = df.groupby("A").resample("D").agg(f, multiplier).astype(float)
  1423. expected = df.groupby("A").resample("D").mean().multiply(multiplier)
  1424. tm.assert_frame_equal(result, expected)
  1425. @pytest.mark.parametrize("k", [1, 2, 3])
  1426. @pytest.mark.parametrize(
  1427. "n1, freq1, n2, freq2",
  1428. [
  1429. (30, "S", 0.5, "Min"),
  1430. (60, "S", 1, "Min"),
  1431. (3600, "S", 1, "H"),
  1432. (60, "Min", 1, "H"),
  1433. (21600, "S", 0.25, "D"),
  1434. (86400, "S", 1, "D"),
  1435. (43200, "S", 0.5, "D"),
  1436. (1440, "Min", 1, "D"),
  1437. (12, "H", 0.5, "D"),
  1438. (24, "H", 1, "D"),
  1439. ],
  1440. )
  1441. def test_resample_equivalent_offsets(n1, freq1, n2, freq2, k, unit):
  1442. # GH 24127
  1443. n1_ = n1 * k
  1444. n2_ = n2 * k
  1445. s = Series(
  1446. 0,
  1447. index=date_range("19910905 13:00", "19911005 07:00", freq=freq1).as_unit(unit),
  1448. )
  1449. s = s + range(len(s))
  1450. result1 = s.resample(str(n1_) + freq1).mean()
  1451. result2 = s.resample(str(n2_) + freq2).mean()
  1452. tm.assert_series_equal(result1, result2)
  1453. @pytest.mark.parametrize(
  1454. "first,last,freq,exp_first,exp_last",
  1455. [
  1456. ("19910905", "19920406", "D", "19910905", "19920407"),
  1457. ("19910905 00:00", "19920406 06:00", "D", "19910905", "19920407"),
  1458. ("19910905 06:00", "19920406 06:00", "H", "19910905 06:00", "19920406 07:00"),
  1459. ("19910906", "19920406", "M", "19910831", "19920430"),
  1460. ("19910831", "19920430", "M", "19910831", "19920531"),
  1461. ("1991-08", "1992-04", "M", "19910831", "19920531"),
  1462. ],
  1463. )
  1464. def test_get_timestamp_range_edges(first, last, freq, exp_first, exp_last, unit):
  1465. first = Period(first)
  1466. first = first.to_timestamp(first.freq).as_unit(unit)
  1467. last = Period(last)
  1468. last = last.to_timestamp(last.freq).as_unit(unit)
  1469. exp_first = Timestamp(exp_first)
  1470. exp_last = Timestamp(exp_last)
  1471. freq = pd.tseries.frequencies.to_offset(freq)
  1472. result = _get_timestamp_range_edges(first, last, freq, unit="ns")
  1473. expected = (exp_first, exp_last)
  1474. assert result == expected
  1475. @pytest.mark.parametrize("duplicates", [True, False])
  1476. def test_resample_apply_product(duplicates, unit):
  1477. # GH 5586
  1478. index = date_range(start="2012-01-31", freq="M", periods=12).as_unit(unit)
  1479. ts = Series(range(12), index=index)
  1480. df = DataFrame({"A": ts, "B": ts + 2})
  1481. if duplicates:
  1482. df.columns = ["A", "A"]
  1483. result = df.resample("Q").apply(np.prod)
  1484. expected = DataFrame(
  1485. np.array([[0, 24], [60, 210], [336, 720], [990, 1716]], dtype=np.int64),
  1486. index=DatetimeIndex(
  1487. ["2012-03-31", "2012-06-30", "2012-09-30", "2012-12-31"], freq="Q-DEC"
  1488. ).as_unit(unit),
  1489. columns=df.columns,
  1490. )
  1491. tm.assert_frame_equal(result, expected)
  1492. @pytest.mark.parametrize(
  1493. "first,last,freq_in,freq_out,exp_last",
  1494. [
  1495. (
  1496. "2020-03-28",
  1497. "2020-03-31",
  1498. "D",
  1499. "24H",
  1500. "2020-03-30 01:00",
  1501. ), # includes transition into DST
  1502. (
  1503. "2020-03-28",
  1504. "2020-10-27",
  1505. "D",
  1506. "24H",
  1507. "2020-10-27 00:00",
  1508. ), # includes transition into and out of DST
  1509. (
  1510. "2020-10-25",
  1511. "2020-10-27",
  1512. "D",
  1513. "24H",
  1514. "2020-10-26 23:00",
  1515. ), # includes transition out of DST
  1516. (
  1517. "2020-03-28",
  1518. "2020-03-31",
  1519. "24H",
  1520. "D",
  1521. "2020-03-30 00:00",
  1522. ), # same as above, but from 24H to D
  1523. ("2020-03-28", "2020-10-27", "24H", "D", "2020-10-27 00:00"),
  1524. ("2020-10-25", "2020-10-27", "24H", "D", "2020-10-26 00:00"),
  1525. ],
  1526. )
  1527. def test_resample_calendar_day_with_dst(
  1528. first: str, last: str, freq_in: str, freq_out: str, exp_last: str, unit
  1529. ):
  1530. # GH 35219
  1531. ts = Series(
  1532. 1.0, date_range(first, last, freq=freq_in, tz="Europe/Amsterdam").as_unit(unit)
  1533. )
  1534. result = ts.resample(freq_out).ffill()
  1535. expected = Series(
  1536. 1.0,
  1537. date_range(first, exp_last, freq=freq_out, tz="Europe/Amsterdam").as_unit(unit),
  1538. )
  1539. tm.assert_series_equal(result, expected)
  1540. @pytest.mark.parametrize("func", ["min", "max", "first", "last"])
  1541. def test_resample_aggregate_functions_min_count(func, unit):
  1542. # GH#37768
  1543. index = date_range(start="2020", freq="M", periods=3).as_unit(unit)
  1544. ser = Series([1, np.nan, np.nan], index)
  1545. result = getattr(ser.resample("Q"), func)(min_count=2)
  1546. expected = Series(
  1547. [np.nan],
  1548. index=DatetimeIndex(["2020-03-31"], freq="Q-DEC").as_unit(unit),
  1549. )
  1550. tm.assert_series_equal(result, expected)
  1551. def test_resample_unsigned_int(any_unsigned_int_numpy_dtype, unit):
  1552. # gh-43329
  1553. df = DataFrame(
  1554. index=date_range(start="2000-01-01", end="2000-01-03 23", freq="12H").as_unit(
  1555. unit
  1556. ),
  1557. columns=["x"],
  1558. data=[0, 1, 0] * 2,
  1559. dtype=any_unsigned_int_numpy_dtype,
  1560. )
  1561. df = df.loc[(df.index < "2000-01-02") | (df.index > "2000-01-03"), :]
  1562. result = df.resample("D").max()
  1563. expected = DataFrame(
  1564. [1, np.nan, 0],
  1565. columns=["x"],
  1566. index=date_range(start="2000-01-01", end="2000-01-03 23", freq="D").as_unit(
  1567. unit
  1568. ),
  1569. )
  1570. tm.assert_frame_equal(result, expected)
  1571. def test_long_rule_non_nano():
  1572. # https://github.com/pandas-dev/pandas/issues/51024
  1573. idx = date_range("0300-01-01", "2000-01-01", unit="s", freq="100Y")
  1574. ser = Series([1, 4, 2, 8, 5, 7, 1, 4, 2, 8, 5, 7, 1, 4, 2, 8, 5], index=idx)
  1575. result = ser.resample("200Y").mean()
  1576. expected_idx = DatetimeIndex(
  1577. np.array(
  1578. [
  1579. "0300-12-31",
  1580. "0500-12-31",
  1581. "0700-12-31",
  1582. "0900-12-31",
  1583. "1100-12-31",
  1584. "1300-12-31",
  1585. "1500-12-31",
  1586. "1700-12-31",
  1587. "1900-12-31",
  1588. ]
  1589. ).astype("datetime64[s]"),
  1590. freq="200A-DEC",
  1591. )
  1592. expected = Series([1.0, 3.0, 6.5, 4.0, 3.0, 6.5, 4.0, 3.0, 6.5], index=expected_idx)
  1593. tm.assert_series_equal(result, expected)