test_timeseries_window.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687
  1. import numpy as np
  2. import pytest
  3. from pandas import (
  4. DataFrame,
  5. Index,
  6. MultiIndex,
  7. NaT,
  8. Series,
  9. Timestamp,
  10. date_range,
  11. )
  12. import pandas._testing as tm
  13. from pandas.tseries import offsets
  14. @pytest.fixture
  15. def regular():
  16. return DataFrame(
  17. {"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
  18. ).set_index("A")
  19. @pytest.fixture
  20. def ragged():
  21. df = DataFrame({"B": range(5)})
  22. df.index = [
  23. Timestamp("20130101 09:00:00"),
  24. Timestamp("20130101 09:00:02"),
  25. Timestamp("20130101 09:00:03"),
  26. Timestamp("20130101 09:00:05"),
  27. Timestamp("20130101 09:00:06"),
  28. ]
  29. return df
  30. class TestRollingTS:
  31. # rolling time-series friendly
  32. # xref GH13327
  33. def test_doc_string(self):
  34. df = DataFrame(
  35. {"B": [0, 1, 2, np.nan, 4]},
  36. index=[
  37. Timestamp("20130101 09:00:00"),
  38. Timestamp("20130101 09:00:02"),
  39. Timestamp("20130101 09:00:03"),
  40. Timestamp("20130101 09:00:05"),
  41. Timestamp("20130101 09:00:06"),
  42. ],
  43. )
  44. df
  45. df.rolling("2s").sum()
  46. def test_invalid_window_non_int(self, regular):
  47. # not a valid freq
  48. msg = "passed window foobar is not compatible with a datetimelike index"
  49. with pytest.raises(ValueError, match=msg):
  50. regular.rolling(window="foobar")
  51. # not a datetimelike index
  52. msg = "window must be an integer"
  53. with pytest.raises(ValueError, match=msg):
  54. regular.reset_index().rolling(window="foobar")
  55. @pytest.mark.parametrize("freq", ["2MS", offsets.MonthBegin(2)])
  56. def test_invalid_window_nonfixed(self, freq, regular):
  57. # non-fixed freqs
  58. msg = "\\<2 \\* MonthBegins\\> is a non-fixed frequency"
  59. with pytest.raises(ValueError, match=msg):
  60. regular.rolling(window=freq)
  61. @pytest.mark.parametrize("freq", ["1D", offsets.Day(2), "2ms"])
  62. def test_valid_window(self, freq, regular):
  63. regular.rolling(window=freq)
  64. @pytest.mark.parametrize("minp", [1.0, "foo", np.array([1, 2, 3])])
  65. def test_invalid_minp(self, minp, regular):
  66. # non-integer min_periods
  67. msg = (
  68. r"local variable 'minp' referenced before assignment|"
  69. "min_periods must be an integer"
  70. )
  71. with pytest.raises(ValueError, match=msg):
  72. regular.rolling(window="1D", min_periods=minp)
  73. def test_on(self, regular):
  74. df = regular
  75. # not a valid column
  76. msg = (
  77. r"invalid on specified as foobar, must be a column "
  78. "\\(of DataFrame\\), an Index or None"
  79. )
  80. with pytest.raises(ValueError, match=msg):
  81. df.rolling(window="2s", on="foobar")
  82. # column is valid
  83. df = df.copy()
  84. df["C"] = date_range("20130101", periods=len(df))
  85. df.rolling(window="2d", on="C").sum()
  86. # invalid columns
  87. msg = "window must be an integer"
  88. with pytest.raises(ValueError, match=msg):
  89. df.rolling(window="2d", on="B")
  90. # ok even though on non-selected
  91. df.rolling(window="2d", on="C").B.sum()
  92. def test_monotonic_on(self):
  93. # on/index must be monotonic
  94. df = DataFrame(
  95. {"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
  96. )
  97. assert df.A.is_monotonic_increasing
  98. df.rolling("2s", on="A").sum()
  99. df = df.set_index("A")
  100. assert df.index.is_monotonic_increasing
  101. df.rolling("2s").sum()
  102. def test_non_monotonic_on(self):
  103. # GH 19248
  104. df = DataFrame(
  105. {"A": date_range("20130101", periods=5, freq="s"), "B": range(5)}
  106. )
  107. df = df.set_index("A")
  108. non_monotonic_index = df.index.to_list()
  109. non_monotonic_index[0] = non_monotonic_index[3]
  110. df.index = non_monotonic_index
  111. assert not df.index.is_monotonic_increasing
  112. msg = "index values must be monotonic"
  113. with pytest.raises(ValueError, match=msg):
  114. df.rolling("2s").sum()
  115. df = df.reset_index()
  116. msg = (
  117. r"invalid on specified as A, must be a column "
  118. "\\(of DataFrame\\), an Index or None"
  119. )
  120. with pytest.raises(ValueError, match=msg):
  121. df.rolling("2s", on="A").sum()
  122. def test_frame_on(self):
  123. df = DataFrame(
  124. {"B": range(5), "C": date_range("20130101 09:00:00", periods=5, freq="3s")}
  125. )
  126. df["A"] = [
  127. Timestamp("20130101 09:00:00"),
  128. Timestamp("20130101 09:00:02"),
  129. Timestamp("20130101 09:00:03"),
  130. Timestamp("20130101 09:00:05"),
  131. Timestamp("20130101 09:00:06"),
  132. ]
  133. # we are doing simulating using 'on'
  134. expected = df.set_index("A").rolling("2s").B.sum().reset_index(drop=True)
  135. result = df.rolling("2s", on="A").B.sum()
  136. tm.assert_series_equal(result, expected)
  137. # test as a frame
  138. # we should be ignoring the 'on' as an aggregation column
  139. # note that the expected is setting, computing, and resetting
  140. # so the columns need to be switched compared
  141. # to the actual result where they are ordered as in the
  142. # original
  143. expected = (
  144. df.set_index("A").rolling("2s")[["B"]].sum().reset_index()[["B", "A"]]
  145. )
  146. result = df.rolling("2s", on="A")[["B"]].sum()
  147. tm.assert_frame_equal(result, expected)
  148. def test_frame_on2(self):
  149. # using multiple aggregation columns
  150. df = DataFrame(
  151. {
  152. "A": [0, 1, 2, 3, 4],
  153. "B": [0, 1, 2, np.nan, 4],
  154. "C": Index(
  155. [
  156. Timestamp("20130101 09:00:00"),
  157. Timestamp("20130101 09:00:02"),
  158. Timestamp("20130101 09:00:03"),
  159. Timestamp("20130101 09:00:05"),
  160. Timestamp("20130101 09:00:06"),
  161. ]
  162. ),
  163. },
  164. columns=["A", "C", "B"],
  165. )
  166. expected1 = DataFrame(
  167. {"A": [0.0, 1, 3, 3, 7], "B": [0, 1, 3, np.nan, 4], "C": df["C"]},
  168. columns=["A", "C", "B"],
  169. )
  170. result = df.rolling("2s", on="C").sum()
  171. expected = expected1
  172. tm.assert_frame_equal(result, expected)
  173. expected = Series([0, 1, 3, np.nan, 4], name="B")
  174. result = df.rolling("2s", on="C").B.sum()
  175. tm.assert_series_equal(result, expected)
  176. expected = expected1[["A", "B", "C"]]
  177. result = df.rolling("2s", on="C")[["A", "B", "C"]].sum()
  178. tm.assert_frame_equal(result, expected)
  179. def test_basic_regular(self, regular):
  180. df = regular.copy()
  181. df.index = date_range("20130101", periods=5, freq="D")
  182. expected = df.rolling(window=1, min_periods=1).sum()
  183. result = df.rolling(window="1D").sum()
  184. tm.assert_frame_equal(result, expected)
  185. df.index = date_range("20130101", periods=5, freq="2D")
  186. expected = df.rolling(window=1, min_periods=1).sum()
  187. result = df.rolling(window="2D", min_periods=1).sum()
  188. tm.assert_frame_equal(result, expected)
  189. expected = df.rolling(window=1, min_periods=1).sum()
  190. result = df.rolling(window="2D", min_periods=1).sum()
  191. tm.assert_frame_equal(result, expected)
  192. expected = df.rolling(window=1).sum()
  193. result = df.rolling(window="2D").sum()
  194. tm.assert_frame_equal(result, expected)
  195. def test_min_periods(self, regular):
  196. # compare for min_periods
  197. df = regular
  198. # these slightly different
  199. expected = df.rolling(2, min_periods=1).sum()
  200. result = df.rolling("2s").sum()
  201. tm.assert_frame_equal(result, expected)
  202. expected = df.rolling(2, min_periods=1).sum()
  203. result = df.rolling("2s", min_periods=1).sum()
  204. tm.assert_frame_equal(result, expected)
  205. def test_closed(self, regular):
  206. # xref GH13965
  207. df = DataFrame(
  208. {"A": [1] * 5},
  209. index=[
  210. Timestamp("20130101 09:00:01"),
  211. Timestamp("20130101 09:00:02"),
  212. Timestamp("20130101 09:00:03"),
  213. Timestamp("20130101 09:00:04"),
  214. Timestamp("20130101 09:00:06"),
  215. ],
  216. )
  217. # closed must be 'right', 'left', 'both', 'neither'
  218. msg = "closed must be 'right', 'left', 'both' or 'neither'"
  219. with pytest.raises(ValueError, match=msg):
  220. regular.rolling(window="2s", closed="blabla")
  221. expected = df.copy()
  222. expected["A"] = [1.0, 2, 2, 2, 1]
  223. result = df.rolling("2s", closed="right").sum()
  224. tm.assert_frame_equal(result, expected)
  225. # default should be 'right'
  226. result = df.rolling("2s").sum()
  227. tm.assert_frame_equal(result, expected)
  228. expected = df.copy()
  229. expected["A"] = [1.0, 2, 3, 3, 2]
  230. result = df.rolling("2s", closed="both").sum()
  231. tm.assert_frame_equal(result, expected)
  232. expected = df.copy()
  233. expected["A"] = [np.nan, 1.0, 2, 2, 1]
  234. result = df.rolling("2s", closed="left").sum()
  235. tm.assert_frame_equal(result, expected)
  236. expected = df.copy()
  237. expected["A"] = [np.nan, 1.0, 1, 1, np.nan]
  238. result = df.rolling("2s", closed="neither").sum()
  239. tm.assert_frame_equal(result, expected)
  240. def test_ragged_sum(self, ragged):
  241. df = ragged
  242. result = df.rolling(window="1s", min_periods=1).sum()
  243. expected = df.copy()
  244. expected["B"] = [0.0, 1, 2, 3, 4]
  245. tm.assert_frame_equal(result, expected)
  246. result = df.rolling(window="2s", min_periods=1).sum()
  247. expected = df.copy()
  248. expected["B"] = [0.0, 1, 3, 3, 7]
  249. tm.assert_frame_equal(result, expected)
  250. result = df.rolling(window="2s", min_periods=2).sum()
  251. expected = df.copy()
  252. expected["B"] = [np.nan, np.nan, 3, np.nan, 7]
  253. tm.assert_frame_equal(result, expected)
  254. result = df.rolling(window="3s", min_periods=1).sum()
  255. expected = df.copy()
  256. expected["B"] = [0.0, 1, 3, 5, 7]
  257. tm.assert_frame_equal(result, expected)
  258. result = df.rolling(window="3s").sum()
  259. expected = df.copy()
  260. expected["B"] = [0.0, 1, 3, 5, 7]
  261. tm.assert_frame_equal(result, expected)
  262. result = df.rolling(window="4s", min_periods=1).sum()
  263. expected = df.copy()
  264. expected["B"] = [0.0, 1, 3, 6, 9]
  265. tm.assert_frame_equal(result, expected)
  266. result = df.rolling(window="4s", min_periods=3).sum()
  267. expected = df.copy()
  268. expected["B"] = [np.nan, np.nan, 3, 6, 9]
  269. tm.assert_frame_equal(result, expected)
  270. result = df.rolling(window="5s", min_periods=1).sum()
  271. expected = df.copy()
  272. expected["B"] = [0.0, 1, 3, 6, 10]
  273. tm.assert_frame_equal(result, expected)
  274. def test_ragged_mean(self, ragged):
  275. df = ragged
  276. result = df.rolling(window="1s", min_periods=1).mean()
  277. expected = df.copy()
  278. expected["B"] = [0.0, 1, 2, 3, 4]
  279. tm.assert_frame_equal(result, expected)
  280. result = df.rolling(window="2s", min_periods=1).mean()
  281. expected = df.copy()
  282. expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
  283. tm.assert_frame_equal(result, expected)
  284. def test_ragged_median(self, ragged):
  285. df = ragged
  286. result = df.rolling(window="1s", min_periods=1).median()
  287. expected = df.copy()
  288. expected["B"] = [0.0, 1, 2, 3, 4]
  289. tm.assert_frame_equal(result, expected)
  290. result = df.rolling(window="2s", min_periods=1).median()
  291. expected = df.copy()
  292. expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
  293. tm.assert_frame_equal(result, expected)
  294. def test_ragged_quantile(self, ragged):
  295. df = ragged
  296. result = df.rolling(window="1s", min_periods=1).quantile(0.5)
  297. expected = df.copy()
  298. expected["B"] = [0.0, 1, 2, 3, 4]
  299. tm.assert_frame_equal(result, expected)
  300. result = df.rolling(window="2s", min_periods=1).quantile(0.5)
  301. expected = df.copy()
  302. expected["B"] = [0.0, 1, 1.5, 3.0, 3.5]
  303. tm.assert_frame_equal(result, expected)
  304. def test_ragged_std(self, ragged):
  305. df = ragged
  306. result = df.rolling(window="1s", min_periods=1).std(ddof=0)
  307. expected = df.copy()
  308. expected["B"] = [0.0] * 5
  309. tm.assert_frame_equal(result, expected)
  310. result = df.rolling(window="1s", min_periods=1).std(ddof=1)
  311. expected = df.copy()
  312. expected["B"] = [np.nan] * 5
  313. tm.assert_frame_equal(result, expected)
  314. result = df.rolling(window="3s", min_periods=1).std(ddof=0)
  315. expected = df.copy()
  316. expected["B"] = [0.0] + [0.5] * 4
  317. tm.assert_frame_equal(result, expected)
  318. result = df.rolling(window="5s", min_periods=1).std(ddof=1)
  319. expected = df.copy()
  320. expected["B"] = [np.nan, 0.707107, 1.0, 1.0, 1.290994]
  321. tm.assert_frame_equal(result, expected)
  322. def test_ragged_var(self, ragged):
  323. df = ragged
  324. result = df.rolling(window="1s", min_periods=1).var(ddof=0)
  325. expected = df.copy()
  326. expected["B"] = [0.0] * 5
  327. tm.assert_frame_equal(result, expected)
  328. result = df.rolling(window="1s", min_periods=1).var(ddof=1)
  329. expected = df.copy()
  330. expected["B"] = [np.nan] * 5
  331. tm.assert_frame_equal(result, expected)
  332. result = df.rolling(window="3s", min_periods=1).var(ddof=0)
  333. expected = df.copy()
  334. expected["B"] = [0.0] + [0.25] * 4
  335. tm.assert_frame_equal(result, expected)
  336. result = df.rolling(window="5s", min_periods=1).var(ddof=1)
  337. expected = df.copy()
  338. expected["B"] = [np.nan, 0.5, 1.0, 1.0, 1 + 2 / 3.0]
  339. tm.assert_frame_equal(result, expected)
  340. def test_ragged_skew(self, ragged):
  341. df = ragged
  342. result = df.rolling(window="3s", min_periods=1).skew()
  343. expected = df.copy()
  344. expected["B"] = [np.nan] * 5
  345. tm.assert_frame_equal(result, expected)
  346. result = df.rolling(window="5s", min_periods=1).skew()
  347. expected = df.copy()
  348. expected["B"] = [np.nan] * 2 + [0.0, 0.0, 0.0]
  349. tm.assert_frame_equal(result, expected)
  350. def test_ragged_kurt(self, ragged):
  351. df = ragged
  352. result = df.rolling(window="3s", min_periods=1).kurt()
  353. expected = df.copy()
  354. expected["B"] = [np.nan] * 5
  355. tm.assert_frame_equal(result, expected)
  356. result = df.rolling(window="5s", min_periods=1).kurt()
  357. expected = df.copy()
  358. expected["B"] = [np.nan] * 4 + [-1.2]
  359. tm.assert_frame_equal(result, expected)
  360. def test_ragged_count(self, ragged):
  361. df = ragged
  362. result = df.rolling(window="1s", min_periods=1).count()
  363. expected = df.copy()
  364. expected["B"] = [1.0, 1, 1, 1, 1]
  365. tm.assert_frame_equal(result, expected)
  366. df = ragged
  367. result = df.rolling(window="1s").count()
  368. tm.assert_frame_equal(result, expected)
  369. result = df.rolling(window="2s", min_periods=1).count()
  370. expected = df.copy()
  371. expected["B"] = [1.0, 1, 2, 1, 2]
  372. tm.assert_frame_equal(result, expected)
  373. result = df.rolling(window="2s", min_periods=2).count()
  374. expected = df.copy()
  375. expected["B"] = [np.nan, np.nan, 2, np.nan, 2]
  376. tm.assert_frame_equal(result, expected)
  377. def test_regular_min(self):
  378. df = DataFrame(
  379. {"A": date_range("20130101", periods=5, freq="s"), "B": [0.0, 1, 2, 3, 4]}
  380. ).set_index("A")
  381. result = df.rolling("1s").min()
  382. expected = df.copy()
  383. expected["B"] = [0.0, 1, 2, 3, 4]
  384. tm.assert_frame_equal(result, expected)
  385. df = DataFrame(
  386. {"A": date_range("20130101", periods=5, freq="s"), "B": [5, 4, 3, 4, 5]}
  387. ).set_index("A")
  388. tm.assert_frame_equal(result, expected)
  389. result = df.rolling("2s").min()
  390. expected = df.copy()
  391. expected["B"] = [5.0, 4, 3, 3, 4]
  392. tm.assert_frame_equal(result, expected)
  393. result = df.rolling("5s").min()
  394. expected = df.copy()
  395. expected["B"] = [5.0, 4, 3, 3, 3]
  396. tm.assert_frame_equal(result, expected)
  397. def test_ragged_min(self, ragged):
  398. df = ragged
  399. result = df.rolling(window="1s", min_periods=1).min()
  400. expected = df.copy()
  401. expected["B"] = [0.0, 1, 2, 3, 4]
  402. tm.assert_frame_equal(result, expected)
  403. result = df.rolling(window="2s", min_periods=1).min()
  404. expected = df.copy()
  405. expected["B"] = [0.0, 1, 1, 3, 3]
  406. tm.assert_frame_equal(result, expected)
  407. result = df.rolling(window="5s", min_periods=1).min()
  408. expected = df.copy()
  409. expected["B"] = [0.0, 0, 0, 1, 1]
  410. tm.assert_frame_equal(result, expected)
  411. def test_perf_min(self):
  412. N = 10000
  413. dfp = DataFrame(
  414. {"B": np.random.randn(N)}, index=date_range("20130101", periods=N, freq="s")
  415. )
  416. expected = dfp.rolling(2, min_periods=1).min()
  417. result = dfp.rolling("2s").min()
  418. assert ((result - expected) < 0.01).all().bool()
  419. expected = dfp.rolling(200, min_periods=1).min()
  420. result = dfp.rolling("200s").min()
  421. assert ((result - expected) < 0.01).all().bool()
  422. def test_ragged_max(self, ragged):
  423. df = ragged
  424. result = df.rolling(window="1s", min_periods=1).max()
  425. expected = df.copy()
  426. expected["B"] = [0.0, 1, 2, 3, 4]
  427. tm.assert_frame_equal(result, expected)
  428. result = df.rolling(window="2s", min_periods=1).max()
  429. expected = df.copy()
  430. expected["B"] = [0.0, 1, 2, 3, 4]
  431. tm.assert_frame_equal(result, expected)
  432. result = df.rolling(window="5s", min_periods=1).max()
  433. expected = df.copy()
  434. expected["B"] = [0.0, 1, 2, 3, 4]
  435. tm.assert_frame_equal(result, expected)
  436. @pytest.mark.parametrize(
  437. "freq, op, result_data",
  438. [
  439. ("ms", "min", [0.0] * 10),
  440. ("ms", "mean", [0.0] * 9 + [2.0 / 9]),
  441. ("ms", "max", [0.0] * 9 + [2.0]),
  442. ("s", "min", [0.0] * 10),
  443. ("s", "mean", [0.0] * 9 + [2.0 / 9]),
  444. ("s", "max", [0.0] * 9 + [2.0]),
  445. ("min", "min", [0.0] * 10),
  446. ("min", "mean", [0.0] * 9 + [2.0 / 9]),
  447. ("min", "max", [0.0] * 9 + [2.0]),
  448. ("h", "min", [0.0] * 10),
  449. ("h", "mean", [0.0] * 9 + [2.0 / 9]),
  450. ("h", "max", [0.0] * 9 + [2.0]),
  451. ("D", "min", [0.0] * 10),
  452. ("D", "mean", [0.0] * 9 + [2.0 / 9]),
  453. ("D", "max", [0.0] * 9 + [2.0]),
  454. ],
  455. )
  456. def test_freqs_ops(self, freq, op, result_data):
  457. # GH 21096
  458. index = date_range(start="2018-1-1 01:00:00", freq=f"1{freq}", periods=10)
  459. # Explicit cast to float to avoid implicit cast when setting nan
  460. s = Series(data=0, index=index, dtype="float")
  461. s.iloc[1] = np.nan
  462. s.iloc[-1] = 2
  463. result = getattr(s.rolling(window=f"10{freq}"), op)()
  464. expected = Series(data=result_data, index=index)
  465. tm.assert_series_equal(result, expected)
  466. @pytest.mark.parametrize(
  467. "f",
  468. [
  469. "sum",
  470. "mean",
  471. "count",
  472. "median",
  473. "std",
  474. "var",
  475. "kurt",
  476. "skew",
  477. "min",
  478. "max",
  479. ],
  480. )
  481. def test_all(self, f, regular):
  482. # simple comparison of integer vs time-based windowing
  483. df = regular * 2
  484. er = df.rolling(window=1)
  485. r = df.rolling(window="1s")
  486. result = getattr(r, f)()
  487. expected = getattr(er, f)()
  488. tm.assert_frame_equal(result, expected)
  489. result = r.quantile(0.5)
  490. expected = er.quantile(0.5)
  491. tm.assert_frame_equal(result, expected)
  492. def test_all2(self, arithmetic_win_operators):
  493. f = arithmetic_win_operators
  494. # more sophisticated comparison of integer vs.
  495. # time-based windowing
  496. df = DataFrame(
  497. {"B": np.arange(50)}, index=date_range("20130101", periods=50, freq="H")
  498. )
  499. # in-range data
  500. dft = df.between_time("09:00", "16:00")
  501. r = dft.rolling(window="5H")
  502. result = getattr(r, f)()
  503. # we need to roll the days separately
  504. # to compare with a time-based roll
  505. # finally groupby-apply will return a multi-index
  506. # so we need to drop the day
  507. def agg_by_day(x):
  508. x = x.between_time("09:00", "16:00")
  509. return getattr(x.rolling(5, min_periods=1), f)()
  510. expected = (
  511. df.groupby(df.index.day).apply(agg_by_day).reset_index(level=0, drop=True)
  512. )
  513. tm.assert_frame_equal(result, expected)
  514. def test_rolling_cov_offset(self):
  515. # GH16058
  516. idx = date_range("2017-01-01", periods=24, freq="1h")
  517. ss = Series(np.arange(len(idx)), index=idx)
  518. result = ss.rolling("2h").cov()
  519. expected = Series([np.nan] + [0.5] * (len(idx) - 1), index=idx)
  520. tm.assert_series_equal(result, expected)
  521. expected2 = ss.rolling(2, min_periods=1).cov()
  522. tm.assert_series_equal(result, expected2)
  523. result = ss.rolling("3h").cov()
  524. expected = Series([np.nan, 0.5] + [1.0] * (len(idx) - 2), index=idx)
  525. tm.assert_series_equal(result, expected)
  526. expected2 = ss.rolling(3, min_periods=1).cov()
  527. tm.assert_series_equal(result, expected2)
  528. def test_rolling_on_decreasing_index(self):
  529. # GH-19248, GH-32385
  530. index = [
  531. Timestamp("20190101 09:00:30"),
  532. Timestamp("20190101 09:00:27"),
  533. Timestamp("20190101 09:00:20"),
  534. Timestamp("20190101 09:00:18"),
  535. Timestamp("20190101 09:00:10"),
  536. ]
  537. df = DataFrame({"column": [3, 4, 4, 5, 6]}, index=index)
  538. result = df.rolling("5s").min()
  539. expected = DataFrame({"column": [3.0, 3.0, 4.0, 4.0, 6.0]}, index=index)
  540. tm.assert_frame_equal(result, expected)
  541. def test_rolling_on_empty(self):
  542. # GH-32385
  543. df = DataFrame({"column": []}, index=[])
  544. result = df.rolling("5s").min()
  545. expected = DataFrame({"column": []}, index=[])
  546. tm.assert_frame_equal(result, expected)
  547. def test_rolling_on_multi_index_level(self):
  548. # GH-15584
  549. df = DataFrame(
  550. {"column": range(6)},
  551. index=MultiIndex.from_product(
  552. [date_range("20190101", periods=3), range(2)], names=["date", "seq"]
  553. ),
  554. )
  555. result = df.rolling("10d", on=df.index.get_level_values("date")).sum()
  556. expected = DataFrame(
  557. {"column": [0.0, 1.0, 3.0, 6.0, 10.0, 15.0]}, index=df.index
  558. )
  559. tm.assert_frame_equal(result, expected)
  560. @pytest.mark.parametrize("msg, axis", [["column", 1], ["index", 0]])
  561. def test_nat_axis_error(msg, axis):
  562. idx = [Timestamp("2020"), NaT]
  563. kwargs = {"columns" if axis == 1 else "index": idx}
  564. df = DataFrame(np.eye(2), **kwargs)
  565. with pytest.raises(ValueError, match=f"{msg} values must not have NaT"):
  566. df.rolling("D", axis=axis).mean()