test_groupby.py 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232
  1. import numpy as np
  2. import pytest
  3. from pandas import (
  4. DataFrame,
  5. Index,
  6. MultiIndex,
  7. Series,
  8. Timestamp,
  9. date_range,
  10. to_datetime,
  11. )
  12. import pandas._testing as tm
  13. from pandas.api.indexers import BaseIndexer
  14. from pandas.core.groupby.groupby import get_groupby
  15. @pytest.fixture
  16. def times_frame():
  17. """Frame for testing times argument in EWM groupby."""
  18. return DataFrame(
  19. {
  20. "A": ["a", "b", "c", "a", "b", "c", "a", "b", "c", "a"],
  21. "B": [0, 0, 0, 1, 1, 1, 2, 2, 2, 3],
  22. "C": to_datetime(
  23. [
  24. "2020-01-01",
  25. "2020-01-01",
  26. "2020-01-01",
  27. "2020-01-02",
  28. "2020-01-10",
  29. "2020-01-22",
  30. "2020-01-03",
  31. "2020-01-23",
  32. "2020-01-23",
  33. "2020-01-04",
  34. ]
  35. ),
  36. }
  37. )
  38. @pytest.fixture
  39. def roll_frame():
  40. return DataFrame({"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)})
  41. class TestRolling:
  42. def test_groupby_unsupported_argument(self, roll_frame):
  43. msg = r"groupby\(\) got an unexpected keyword argument 'foo'"
  44. with pytest.raises(TypeError, match=msg):
  45. roll_frame.groupby("A", foo=1)
  46. def test_getitem(self, roll_frame):
  47. g = roll_frame.groupby("A")
  48. g_mutated = get_groupby(roll_frame, by="A")
  49. expected = g_mutated.B.apply(lambda x: x.rolling(2).mean())
  50. result = g.rolling(2).mean().B
  51. tm.assert_series_equal(result, expected)
  52. result = g.rolling(2).B.mean()
  53. tm.assert_series_equal(result, expected)
  54. result = g.B.rolling(2).mean()
  55. tm.assert_series_equal(result, expected)
  56. result = roll_frame.B.groupby(roll_frame.A).rolling(2).mean()
  57. tm.assert_series_equal(result, expected)
  58. def test_getitem_multiple(self, roll_frame):
  59. # GH 13174
  60. g = roll_frame.groupby("A")
  61. r = g.rolling(2, min_periods=0)
  62. g_mutated = get_groupby(roll_frame, by="A")
  63. expected = g_mutated.B.apply(lambda x: x.rolling(2, min_periods=0).count())
  64. result = r.B.count()
  65. tm.assert_series_equal(result, expected)
  66. result = r.B.count()
  67. tm.assert_series_equal(result, expected)
  68. @pytest.mark.parametrize(
  69. "f",
  70. [
  71. "sum",
  72. "mean",
  73. "min",
  74. "max",
  75. "count",
  76. "kurt",
  77. "skew",
  78. ],
  79. )
  80. def test_rolling(self, f, roll_frame):
  81. g = roll_frame.groupby("A", group_keys=False)
  82. r = g.rolling(window=4)
  83. result = getattr(r, f)()
  84. expected = g.apply(lambda x: getattr(x.rolling(4), f)())
  85. # groupby.apply doesn't drop the grouped-by column
  86. expected = expected.drop("A", axis=1)
  87. # GH 39732
  88. expected_index = MultiIndex.from_arrays([roll_frame["A"], range(40)])
  89. expected.index = expected_index
  90. tm.assert_frame_equal(result, expected)
  91. @pytest.mark.parametrize("f", ["std", "var"])
  92. def test_rolling_ddof(self, f, roll_frame):
  93. g = roll_frame.groupby("A", group_keys=False)
  94. r = g.rolling(window=4)
  95. result = getattr(r, f)(ddof=1)
  96. expected = g.apply(lambda x: getattr(x.rolling(4), f)(ddof=1))
  97. # groupby.apply doesn't drop the grouped-by column
  98. expected = expected.drop("A", axis=1)
  99. # GH 39732
  100. expected_index = MultiIndex.from_arrays([roll_frame["A"], range(40)])
  101. expected.index = expected_index
  102. tm.assert_frame_equal(result, expected)
  103. @pytest.mark.parametrize(
  104. "interpolation", ["linear", "lower", "higher", "midpoint", "nearest"]
  105. )
  106. def test_rolling_quantile(self, interpolation, roll_frame):
  107. g = roll_frame.groupby("A", group_keys=False)
  108. r = g.rolling(window=4)
  109. result = r.quantile(0.4, interpolation=interpolation)
  110. expected = g.apply(
  111. lambda x: x.rolling(4).quantile(0.4, interpolation=interpolation)
  112. )
  113. # groupby.apply doesn't drop the grouped-by column
  114. expected = expected.drop("A", axis=1)
  115. # GH 39732
  116. expected_index = MultiIndex.from_arrays([roll_frame["A"], range(40)])
  117. expected.index = expected_index
  118. tm.assert_frame_equal(result, expected)
  119. @pytest.mark.parametrize("f, expected_val", [["corr", 1], ["cov", 0.5]])
  120. def test_rolling_corr_cov_other_same_size_as_groups(self, f, expected_val):
  121. # GH 42915
  122. df = DataFrame(
  123. {"value": range(10), "idx1": [1] * 5 + [2] * 5, "idx2": [1, 2, 3, 4, 5] * 2}
  124. ).set_index(["idx1", "idx2"])
  125. other = DataFrame({"value": range(5), "idx2": [1, 2, 3, 4, 5]}).set_index(
  126. "idx2"
  127. )
  128. result = getattr(df.groupby(level=0).rolling(2), f)(other)
  129. expected_data = ([np.nan] + [expected_val] * 4) * 2
  130. expected = DataFrame(
  131. expected_data,
  132. columns=["value"],
  133. index=MultiIndex.from_arrays(
  134. [
  135. [1] * 5 + [2] * 5,
  136. [1] * 5 + [2] * 5,
  137. list(range(1, 6)) * 2,
  138. ],
  139. names=["idx1", "idx1", "idx2"],
  140. ),
  141. )
  142. tm.assert_frame_equal(result, expected)
  143. @pytest.mark.parametrize("f", ["corr", "cov"])
  144. def test_rolling_corr_cov_other_diff_size_as_groups(self, f, roll_frame):
  145. g = roll_frame.groupby("A")
  146. r = g.rolling(window=4)
  147. result = getattr(r, f)(roll_frame)
  148. def func(x):
  149. return getattr(x.rolling(4), f)(roll_frame)
  150. expected = g.apply(func)
  151. # GH 39591: The grouped column should be all np.nan
  152. # (groupby.apply inserts 0s for cov)
  153. expected["A"] = np.nan
  154. tm.assert_frame_equal(result, expected)
  155. @pytest.mark.parametrize("f", ["corr", "cov"])
  156. def test_rolling_corr_cov_pairwise(self, f, roll_frame):
  157. g = roll_frame.groupby("A")
  158. r = g.rolling(window=4)
  159. result = getattr(r.B, f)(pairwise=True)
  160. def func(x):
  161. return getattr(x.B.rolling(4), f)(pairwise=True)
  162. expected = g.apply(func)
  163. tm.assert_series_equal(result, expected)
  164. @pytest.mark.parametrize(
  165. "func, expected_values",
  166. [("cov", [[1.0, 1.0], [1.0, 4.0]]), ("corr", [[1.0, 0.5], [0.5, 1.0]])],
  167. )
  168. def test_rolling_corr_cov_unordered(self, func, expected_values):
  169. # GH 43386
  170. df = DataFrame(
  171. {
  172. "a": ["g1", "g2", "g1", "g1"],
  173. "b": [0, 0, 1, 2],
  174. "c": [2, 0, 6, 4],
  175. }
  176. )
  177. rol = df.groupby("a").rolling(3)
  178. result = getattr(rol, func)()
  179. expected = DataFrame(
  180. {
  181. "b": 4 * [np.nan] + expected_values[0] + 2 * [np.nan],
  182. "c": 4 * [np.nan] + expected_values[1] + 2 * [np.nan],
  183. },
  184. index=MultiIndex.from_tuples(
  185. [
  186. ("g1", 0, "b"),
  187. ("g1", 0, "c"),
  188. ("g1", 2, "b"),
  189. ("g1", 2, "c"),
  190. ("g1", 3, "b"),
  191. ("g1", 3, "c"),
  192. ("g2", 1, "b"),
  193. ("g2", 1, "c"),
  194. ],
  195. names=["a", None, None],
  196. ),
  197. )
  198. tm.assert_frame_equal(result, expected)
  199. def test_rolling_apply(self, raw, roll_frame):
  200. g = roll_frame.groupby("A", group_keys=False)
  201. r = g.rolling(window=4)
  202. # reduction
  203. result = r.apply(lambda x: x.sum(), raw=raw)
  204. expected = g.apply(lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw))
  205. # groupby.apply doesn't drop the grouped-by column
  206. expected = expected.drop("A", axis=1)
  207. # GH 39732
  208. expected_index = MultiIndex.from_arrays([roll_frame["A"], range(40)])
  209. expected.index = expected_index
  210. tm.assert_frame_equal(result, expected)
  211. def test_rolling_apply_mutability(self):
  212. # GH 14013
  213. df = DataFrame({"A": ["foo"] * 3 + ["bar"] * 3, "B": [1] * 6})
  214. g = df.groupby("A")
  215. mi = MultiIndex.from_tuples(
  216. [("bar", 3), ("bar", 4), ("bar", 5), ("foo", 0), ("foo", 1), ("foo", 2)]
  217. )
  218. mi.names = ["A", None]
  219. # Grouped column should not be a part of the output
  220. expected = DataFrame([np.nan, 2.0, 2.0] * 2, columns=["B"], index=mi)
  221. result = g.rolling(window=2).sum()
  222. tm.assert_frame_equal(result, expected)
  223. # Call an arbitrary function on the groupby
  224. g.sum()
  225. # Make sure nothing has been mutated
  226. result = g.rolling(window=2).sum()
  227. tm.assert_frame_equal(result, expected)
  228. @pytest.mark.parametrize("expected_value,raw_value", [[1.0, True], [0.0, False]])
  229. def test_groupby_rolling(self, expected_value, raw_value):
  230. # GH 31754
  231. def isnumpyarray(x):
  232. return int(isinstance(x, np.ndarray))
  233. df = DataFrame({"id": [1, 1, 1], "value": [1, 2, 3]})
  234. result = df.groupby("id").value.rolling(1).apply(isnumpyarray, raw=raw_value)
  235. expected = Series(
  236. [expected_value] * 3,
  237. index=MultiIndex.from_tuples(((1, 0), (1, 1), (1, 2)), names=["id", None]),
  238. name="value",
  239. )
  240. tm.assert_series_equal(result, expected)
  241. def test_groupby_rolling_center_center(self):
  242. # GH 35552
  243. series = Series(range(1, 6))
  244. result = series.groupby(series).rolling(center=True, window=3).mean()
  245. expected = Series(
  246. [np.nan] * 5,
  247. index=MultiIndex.from_tuples(((1, 0), (2, 1), (3, 2), (4, 3), (5, 4))),
  248. )
  249. tm.assert_series_equal(result, expected)
  250. series = Series(range(1, 5))
  251. result = series.groupby(series).rolling(center=True, window=3).mean()
  252. expected = Series(
  253. [np.nan] * 4,
  254. index=MultiIndex.from_tuples(((1, 0), (2, 1), (3, 2), (4, 3))),
  255. )
  256. tm.assert_series_equal(result, expected)
  257. df = DataFrame({"a": ["a"] * 5 + ["b"] * 6, "b": range(11)})
  258. result = df.groupby("a").rolling(center=True, window=3).mean()
  259. expected = DataFrame(
  260. [np.nan, 1, 2, 3, np.nan, np.nan, 6, 7, 8, 9, np.nan],
  261. index=MultiIndex.from_tuples(
  262. (
  263. ("a", 0),
  264. ("a", 1),
  265. ("a", 2),
  266. ("a", 3),
  267. ("a", 4),
  268. ("b", 5),
  269. ("b", 6),
  270. ("b", 7),
  271. ("b", 8),
  272. ("b", 9),
  273. ("b", 10),
  274. ),
  275. names=["a", None],
  276. ),
  277. columns=["b"],
  278. )
  279. tm.assert_frame_equal(result, expected)
  280. df = DataFrame({"a": ["a"] * 5 + ["b"] * 5, "b": range(10)})
  281. result = df.groupby("a").rolling(center=True, window=3).mean()
  282. expected = DataFrame(
  283. [np.nan, 1, 2, 3, np.nan, np.nan, 6, 7, 8, np.nan],
  284. index=MultiIndex.from_tuples(
  285. (
  286. ("a", 0),
  287. ("a", 1),
  288. ("a", 2),
  289. ("a", 3),
  290. ("a", 4),
  291. ("b", 5),
  292. ("b", 6),
  293. ("b", 7),
  294. ("b", 8),
  295. ("b", 9),
  296. ),
  297. names=["a", None],
  298. ),
  299. columns=["b"],
  300. )
  301. tm.assert_frame_equal(result, expected)
  302. def test_groupby_rolling_center_on(self):
  303. # GH 37141
  304. df = DataFrame(
  305. data={
  306. "Date": date_range("2020-01-01", "2020-01-10"),
  307. "gb": ["group_1"] * 6 + ["group_2"] * 4,
  308. "value": range(10),
  309. }
  310. )
  311. result = (
  312. df.groupby("gb")
  313. .rolling(6, on="Date", center=True, min_periods=1)
  314. .value.mean()
  315. )
  316. expected = Series(
  317. [1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 7.0, 7.5, 7.5, 7.5],
  318. name="value",
  319. index=MultiIndex.from_tuples(
  320. (
  321. ("group_1", Timestamp("2020-01-01")),
  322. ("group_1", Timestamp("2020-01-02")),
  323. ("group_1", Timestamp("2020-01-03")),
  324. ("group_1", Timestamp("2020-01-04")),
  325. ("group_1", Timestamp("2020-01-05")),
  326. ("group_1", Timestamp("2020-01-06")),
  327. ("group_2", Timestamp("2020-01-07")),
  328. ("group_2", Timestamp("2020-01-08")),
  329. ("group_2", Timestamp("2020-01-09")),
  330. ("group_2", Timestamp("2020-01-10")),
  331. ),
  332. names=["gb", "Date"],
  333. ),
  334. )
  335. tm.assert_series_equal(result, expected)
  336. @pytest.mark.parametrize("min_periods", [5, 4, 3])
  337. def test_groupby_rolling_center_min_periods(self, min_periods):
  338. # GH 36040
  339. df = DataFrame({"group": ["A"] * 10 + ["B"] * 10, "data": range(20)})
  340. window_size = 5
  341. result = (
  342. df.groupby("group")
  343. .rolling(window_size, center=True, min_periods=min_periods)
  344. .mean()
  345. )
  346. result = result.reset_index()[["group", "data"]]
  347. grp_A_mean = [1.0, 1.5, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 7.5, 8.0]
  348. grp_B_mean = [x + 10.0 for x in grp_A_mean]
  349. num_nans = max(0, min_periods - 3) # For window_size of 5
  350. nans = [np.nan] * num_nans
  351. grp_A_expected = nans + grp_A_mean[num_nans : 10 - num_nans] + nans
  352. grp_B_expected = nans + grp_B_mean[num_nans : 10 - num_nans] + nans
  353. expected = DataFrame(
  354. {"group": ["A"] * 10 + ["B"] * 10, "data": grp_A_expected + grp_B_expected}
  355. )
  356. tm.assert_frame_equal(result, expected)
  357. def test_groupby_subselect_rolling(self):
  358. # GH 35486
  359. df = DataFrame(
  360. {"a": [1, 2, 3, 2], "b": [4.0, 2.0, 3.0, 1.0], "c": [10, 20, 30, 20]}
  361. )
  362. result = df.groupby("a")[["b"]].rolling(2).max()
  363. expected = DataFrame(
  364. [np.nan, np.nan, 2.0, np.nan],
  365. columns=["b"],
  366. index=MultiIndex.from_tuples(
  367. ((1, 0), (2, 1), (2, 3), (3, 2)), names=["a", None]
  368. ),
  369. )
  370. tm.assert_frame_equal(result, expected)
  371. result = df.groupby("a")["b"].rolling(2).max()
  372. expected = Series(
  373. [np.nan, np.nan, 2.0, np.nan],
  374. index=MultiIndex.from_tuples(
  375. ((1, 0), (2, 1), (2, 3), (3, 2)), names=["a", None]
  376. ),
  377. name="b",
  378. )
  379. tm.assert_series_equal(result, expected)
  380. def test_groupby_rolling_custom_indexer(self):
  381. # GH 35557
  382. class SimpleIndexer(BaseIndexer):
  383. def get_window_bounds(
  384. self,
  385. num_values=0,
  386. min_periods=None,
  387. center=None,
  388. closed=None,
  389. step=None,
  390. ):
  391. min_periods = self.window_size if min_periods is None else 0
  392. end = np.arange(num_values, dtype=np.int64) + 1
  393. start = end.copy() - self.window_size
  394. start[start < 0] = min_periods
  395. return start, end
  396. df = DataFrame(
  397. {"a": [1.0, 2.0, 3.0, 4.0, 5.0] * 3}, index=[0] * 5 + [1] * 5 + [2] * 5
  398. )
  399. result = (
  400. df.groupby(df.index)
  401. .rolling(SimpleIndexer(window_size=3), min_periods=1)
  402. .sum()
  403. )
  404. expected = df.groupby(df.index).rolling(window=3, min_periods=1).sum()
  405. tm.assert_frame_equal(result, expected)
  406. def test_groupby_rolling_subset_with_closed(self):
  407. # GH 35549
  408. df = DataFrame(
  409. {
  410. "column1": range(6),
  411. "column2": range(6),
  412. "group": 3 * ["A", "B"],
  413. "date": [Timestamp("2019-01-01")] * 6,
  414. }
  415. )
  416. result = (
  417. df.groupby("group").rolling("1D", on="date", closed="left")["column1"].sum()
  418. )
  419. expected = Series(
  420. [np.nan, 0.0, 2.0, np.nan, 1.0, 4.0],
  421. index=MultiIndex.from_tuples(
  422. [("A", Timestamp("2019-01-01"))] * 3
  423. + [("B", Timestamp("2019-01-01"))] * 3,
  424. names=["group", "date"],
  425. ),
  426. name="column1",
  427. )
  428. tm.assert_series_equal(result, expected)
  429. def test_groupby_subset_rolling_subset_with_closed(self):
  430. # GH 35549
  431. df = DataFrame(
  432. {
  433. "column1": range(6),
  434. "column2": range(6),
  435. "group": 3 * ["A", "B"],
  436. "date": [Timestamp("2019-01-01")] * 6,
  437. }
  438. )
  439. result = (
  440. df.groupby("group")[["column1", "date"]]
  441. .rolling("1D", on="date", closed="left")["column1"]
  442. .sum()
  443. )
  444. expected = Series(
  445. [np.nan, 0.0, 2.0, np.nan, 1.0, 4.0],
  446. index=MultiIndex.from_tuples(
  447. [("A", Timestamp("2019-01-01"))] * 3
  448. + [("B", Timestamp("2019-01-01"))] * 3,
  449. names=["group", "date"],
  450. ),
  451. name="column1",
  452. )
  453. tm.assert_series_equal(result, expected)
  454. @pytest.mark.parametrize("func", ["max", "min"])
  455. def test_groupby_rolling_index_changed(self, func):
  456. # GH: #36018 nlevels of MultiIndex changed
  457. ds = Series(
  458. [1, 2, 2],
  459. index=MultiIndex.from_tuples(
  460. [("a", "x"), ("a", "y"), ("c", "z")], names=["1", "2"]
  461. ),
  462. name="a",
  463. )
  464. result = getattr(ds.groupby(ds).rolling(2), func)()
  465. expected = Series(
  466. [np.nan, np.nan, 2.0],
  467. index=MultiIndex.from_tuples(
  468. [(1, "a", "x"), (2, "a", "y"), (2, "c", "z")], names=["a", "1", "2"]
  469. ),
  470. name="a",
  471. )
  472. tm.assert_series_equal(result, expected)
  473. def test_groupby_rolling_empty_frame(self):
  474. # GH 36197
  475. expected = DataFrame({"s1": []})
  476. result = expected.groupby("s1").rolling(window=1).sum()
  477. # GH 32262
  478. expected = expected.drop(columns="s1")
  479. # GH-38057 from_tuples gives empty object dtype, we now get float/int levels
  480. # expected.index = MultiIndex.from_tuples([], names=["s1", None])
  481. expected.index = MultiIndex.from_product(
  482. [Index([], dtype="float64"), Index([], dtype="int64")], names=["s1", None]
  483. )
  484. tm.assert_frame_equal(result, expected)
  485. expected = DataFrame({"s1": [], "s2": []})
  486. result = expected.groupby(["s1", "s2"]).rolling(window=1).sum()
  487. # GH 32262
  488. expected = expected.drop(columns=["s1", "s2"])
  489. expected.index = MultiIndex.from_product(
  490. [
  491. Index([], dtype="float64"),
  492. Index([], dtype="float64"),
  493. Index([], dtype="int64"),
  494. ],
  495. names=["s1", "s2", None],
  496. )
  497. tm.assert_frame_equal(result, expected)
  498. def test_groupby_rolling_string_index(self):
  499. # GH: 36727
  500. df = DataFrame(
  501. [
  502. ["A", "group_1", Timestamp(2019, 1, 1, 9)],
  503. ["B", "group_1", Timestamp(2019, 1, 2, 9)],
  504. ["Z", "group_2", Timestamp(2019, 1, 3, 9)],
  505. ["H", "group_1", Timestamp(2019, 1, 6, 9)],
  506. ["E", "group_2", Timestamp(2019, 1, 20, 9)],
  507. ],
  508. columns=["index", "group", "eventTime"],
  509. ).set_index("index")
  510. groups = df.groupby("group")
  511. df["count_to_date"] = groups.cumcount()
  512. rolling_groups = groups.rolling("10d", on="eventTime")
  513. result = rolling_groups.apply(lambda df: df.shape[0])
  514. expected = DataFrame(
  515. [
  516. ["A", "group_1", Timestamp(2019, 1, 1, 9), 1.0],
  517. ["B", "group_1", Timestamp(2019, 1, 2, 9), 2.0],
  518. ["H", "group_1", Timestamp(2019, 1, 6, 9), 3.0],
  519. ["Z", "group_2", Timestamp(2019, 1, 3, 9), 1.0],
  520. ["E", "group_2", Timestamp(2019, 1, 20, 9), 1.0],
  521. ],
  522. columns=["index", "group", "eventTime", "count_to_date"],
  523. ).set_index(["group", "index"])
  524. tm.assert_frame_equal(result, expected)
  525. def test_groupby_rolling_no_sort(self):
  526. # GH 36889
  527. result = (
  528. DataFrame({"foo": [2, 1], "bar": [2, 1]})
  529. .groupby("foo", sort=False)
  530. .rolling(1)
  531. .min()
  532. )
  533. expected = DataFrame(
  534. np.array([[2.0, 2.0], [1.0, 1.0]]),
  535. columns=["foo", "bar"],
  536. index=MultiIndex.from_tuples([(2, 0), (1, 1)], names=["foo", None]),
  537. )
  538. # GH 32262
  539. expected = expected.drop(columns="foo")
  540. tm.assert_frame_equal(result, expected)
  541. def test_groupby_rolling_count_closed_on(self):
  542. # GH 35869
  543. df = DataFrame(
  544. {
  545. "column1": range(6),
  546. "column2": range(6),
  547. "group": 3 * ["A", "B"],
  548. "date": date_range(end="20190101", periods=6),
  549. }
  550. )
  551. result = (
  552. df.groupby("group")
  553. .rolling("3d", on="date", closed="left")["column1"]
  554. .count()
  555. )
  556. expected = Series(
  557. [np.nan, 1.0, 1.0, np.nan, 1.0, 1.0],
  558. name="column1",
  559. index=MultiIndex.from_tuples(
  560. [
  561. ("A", Timestamp("2018-12-27")),
  562. ("A", Timestamp("2018-12-29")),
  563. ("A", Timestamp("2018-12-31")),
  564. ("B", Timestamp("2018-12-28")),
  565. ("B", Timestamp("2018-12-30")),
  566. ("B", Timestamp("2019-01-01")),
  567. ],
  568. names=["group", "date"],
  569. ),
  570. )
  571. tm.assert_series_equal(result, expected)
  572. @pytest.mark.parametrize(
  573. ("func", "kwargs"),
  574. [("rolling", {"window": 2, "min_periods": 1}), ("expanding", {})],
  575. )
  576. def test_groupby_rolling_sem(self, func, kwargs):
  577. # GH: 26476
  578. df = DataFrame(
  579. [["a", 1], ["a", 2], ["b", 1], ["b", 2], ["b", 3]], columns=["a", "b"]
  580. )
  581. result = getattr(df.groupby("a"), func)(**kwargs).sem()
  582. expected = DataFrame(
  583. {"a": [np.nan] * 5, "b": [np.nan, 0.70711, np.nan, 0.70711, 0.70711]},
  584. index=MultiIndex.from_tuples(
  585. [("a", 0), ("a", 1), ("b", 2), ("b", 3), ("b", 4)], names=["a", None]
  586. ),
  587. )
  588. # GH 32262
  589. expected = expected.drop(columns="a")
  590. tm.assert_frame_equal(result, expected)
  591. @pytest.mark.parametrize(
  592. ("rollings", "key"), [({"on": "a"}, "a"), ({"on": None}, "index")]
  593. )
  594. def test_groupby_rolling_nans_in_index(self, rollings, key):
  595. # GH: 34617
  596. df = DataFrame(
  597. {
  598. "a": to_datetime(["2020-06-01 12:00", "2020-06-01 14:00", np.nan]),
  599. "b": [1, 2, 3],
  600. "c": [1, 1, 1],
  601. }
  602. )
  603. if key == "index":
  604. df = df.set_index("a")
  605. with pytest.raises(ValueError, match=f"{key} values must not have NaT"):
  606. df.groupby("c").rolling("60min", **rollings)
  607. @pytest.mark.parametrize("group_keys", [True, False])
  608. def test_groupby_rolling_group_keys(self, group_keys):
  609. # GH 37641
  610. # GH 38523: GH 37641 actually was not a bug.
  611. # group_keys only applies to groupby.apply directly
  612. arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]]
  613. index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
  614. s = Series([1, 2, 3], index=index)
  615. result = s.groupby(["idx1", "idx2"], group_keys=group_keys).rolling(1).mean()
  616. expected = Series(
  617. [1.0, 2.0, 3.0],
  618. index=MultiIndex.from_tuples(
  619. [
  620. ("val1", "val1", "val1", "val1"),
  621. ("val1", "val1", "val1", "val1"),
  622. ("val2", "val2", "val2", "val2"),
  623. ],
  624. names=["idx1", "idx2", "idx1", "idx2"],
  625. ),
  626. )
  627. tm.assert_series_equal(result, expected)
  628. def test_groupby_rolling_index_level_and_column_label(self):
  629. # The groupby keys should not appear as a resulting column
  630. arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]]
  631. index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
  632. df = DataFrame({"A": [1, 1, 2], "B": range(3)}, index=index)
  633. result = df.groupby(["idx1", "A"]).rolling(1).mean()
  634. expected = DataFrame(
  635. {"B": [0.0, 1.0, 2.0]},
  636. index=MultiIndex.from_tuples(
  637. [
  638. ("val1", 1, "val1", "val1"),
  639. ("val1", 1, "val1", "val1"),
  640. ("val2", 2, "val2", "val2"),
  641. ],
  642. names=["idx1", "A", "idx1", "idx2"],
  643. ),
  644. )
  645. tm.assert_frame_equal(result, expected)
  646. def test_groupby_rolling_resulting_multiindex(self):
  647. # a few different cases checking the created MultiIndex of the result
  648. # https://github.com/pandas-dev/pandas/pull/38057
  649. # grouping by 1 columns -> 2-level MI as result
  650. df = DataFrame({"a": np.arange(8.0), "b": [1, 2] * 4})
  651. result = df.groupby("b").rolling(3).mean()
  652. expected_index = MultiIndex.from_tuples(
  653. [(1, 0), (1, 2), (1, 4), (1, 6), (2, 1), (2, 3), (2, 5), (2, 7)],
  654. names=["b", None],
  655. )
  656. tm.assert_index_equal(result.index, expected_index)
  657. def test_groupby_rolling_resulting_multiindex2(self):
  658. # grouping by 2 columns -> 3-level MI as result
  659. df = DataFrame({"a": np.arange(12.0), "b": [1, 2] * 6, "c": [1, 2, 3, 4] * 3})
  660. result = df.groupby(["b", "c"]).rolling(2).sum()
  661. expected_index = MultiIndex.from_tuples(
  662. [
  663. (1, 1, 0),
  664. (1, 1, 4),
  665. (1, 1, 8),
  666. (1, 3, 2),
  667. (1, 3, 6),
  668. (1, 3, 10),
  669. (2, 2, 1),
  670. (2, 2, 5),
  671. (2, 2, 9),
  672. (2, 4, 3),
  673. (2, 4, 7),
  674. (2, 4, 11),
  675. ],
  676. names=["b", "c", None],
  677. )
  678. tm.assert_index_equal(result.index, expected_index)
  679. def test_groupby_rolling_resulting_multiindex3(self):
  680. # grouping with 1 level on dataframe with 2-level MI -> 3-level MI as result
  681. df = DataFrame({"a": np.arange(8.0), "b": [1, 2] * 4, "c": [1, 2, 3, 4] * 2})
  682. df = df.set_index("c", append=True)
  683. result = df.groupby("b").rolling(3).mean()
  684. expected_index = MultiIndex.from_tuples(
  685. [
  686. (1, 0, 1),
  687. (1, 2, 3),
  688. (1, 4, 1),
  689. (1, 6, 3),
  690. (2, 1, 2),
  691. (2, 3, 4),
  692. (2, 5, 2),
  693. (2, 7, 4),
  694. ],
  695. names=["b", None, "c"],
  696. )
  697. tm.assert_index_equal(result.index, expected_index, exact="equiv")
  698. def test_groupby_rolling_object_doesnt_affect_groupby_apply(self, roll_frame):
  699. # GH 39732
  700. g = roll_frame.groupby("A", group_keys=False)
  701. expected = g.apply(lambda x: x.rolling(4).sum()).index
  702. _ = g.rolling(window=4)
  703. result = g.apply(lambda x: x.rolling(4).sum()).index
  704. tm.assert_index_equal(result, expected)
  705. @pytest.mark.parametrize(
  706. ("window", "min_periods", "closed", "expected"),
  707. [
  708. (2, 0, "left", [None, 0.0, 1.0, 1.0, None, 0.0, 1.0, 1.0]),
  709. (2, 2, "left", [None, None, 1.0, 1.0, None, None, 1.0, 1.0]),
  710. (4, 4, "left", [None, None, None, None, None, None, None, None]),
  711. (4, 4, "right", [None, None, None, 5.0, None, None, None, 5.0]),
  712. ],
  713. )
  714. def test_groupby_rolling_var(self, window, min_periods, closed, expected):
  715. df = DataFrame([1, 2, 3, 4, 5, 6, 7, 8])
  716. result = (
  717. df.groupby([1, 2, 1, 2, 1, 2, 1, 2])
  718. .rolling(window=window, min_periods=min_periods, closed=closed)
  719. .var(0)
  720. )
  721. expected_result = DataFrame(
  722. np.array(expected, dtype="float64"),
  723. index=MultiIndex(
  724. levels=[np.array([1, 2]), [0, 1, 2, 3, 4, 5, 6, 7]],
  725. codes=[[0, 0, 0, 0, 1, 1, 1, 1], [0, 2, 4, 6, 1, 3, 5, 7]],
  726. ),
  727. )
  728. tm.assert_frame_equal(result, expected_result)
  729. @pytest.mark.parametrize(
  730. "columns", [MultiIndex.from_tuples([("A", ""), ("B", "C")]), ["A", "B"]]
  731. )
  732. def test_by_column_not_in_values(self, columns):
  733. # GH 32262
  734. df = DataFrame([[1, 0]] * 20 + [[2, 0]] * 12 + [[3, 0]] * 8, columns=columns)
  735. g = df.groupby("A")
  736. original_obj = g.obj.copy(deep=True)
  737. r = g.rolling(4)
  738. result = r.sum()
  739. assert "A" not in result.columns
  740. tm.assert_frame_equal(g.obj, original_obj)
  741. def test_groupby_level(self):
  742. # GH 38523, 38787
  743. arrays = [
  744. ["Falcon", "Falcon", "Parrot", "Parrot"],
  745. ["Captive", "Wild", "Captive", "Wild"],
  746. ]
  747. index = MultiIndex.from_arrays(arrays, names=("Animal", "Type"))
  748. df = DataFrame({"Max Speed": [390.0, 350.0, 30.0, 20.0]}, index=index)
  749. result = df.groupby(level=0)["Max Speed"].rolling(2).sum()
  750. expected = Series(
  751. [np.nan, 740.0, np.nan, 50.0],
  752. index=MultiIndex.from_tuples(
  753. [
  754. ("Falcon", "Falcon", "Captive"),
  755. ("Falcon", "Falcon", "Wild"),
  756. ("Parrot", "Parrot", "Captive"),
  757. ("Parrot", "Parrot", "Wild"),
  758. ],
  759. names=["Animal", "Animal", "Type"],
  760. ),
  761. name="Max Speed",
  762. )
  763. tm.assert_series_equal(result, expected)
  764. @pytest.mark.parametrize(
  765. "by, expected_data",
  766. [
  767. [["id"], {"num": [100.0, 150.0, 150.0, 200.0]}],
  768. [
  769. ["id", "index"],
  770. {
  771. "date": [
  772. Timestamp("2018-01-01"),
  773. Timestamp("2018-01-02"),
  774. Timestamp("2018-01-01"),
  775. Timestamp("2018-01-02"),
  776. ],
  777. "num": [100.0, 200.0, 150.0, 250.0],
  778. },
  779. ],
  780. ],
  781. )
  782. def test_as_index_false(self, by, expected_data):
  783. # GH 39433
  784. data = [
  785. ["A", "2018-01-01", 100.0],
  786. ["A", "2018-01-02", 200.0],
  787. ["B", "2018-01-01", 150.0],
  788. ["B", "2018-01-02", 250.0],
  789. ]
  790. df = DataFrame(data, columns=["id", "date", "num"])
  791. df["date"] = to_datetime(df["date"])
  792. df = df.set_index(["date"])
  793. gp_by = [getattr(df, attr) for attr in by]
  794. result = (
  795. df.groupby(gp_by, as_index=False).rolling(window=2, min_periods=1).mean()
  796. )
  797. expected = {"id": ["A", "A", "B", "B"]}
  798. expected.update(expected_data)
  799. expected = DataFrame(
  800. expected,
  801. index=df.index,
  802. )
  803. tm.assert_frame_equal(result, expected)
  804. def test_nan_and_zero_endpoints(self, any_int_numpy_dtype):
  805. # https://github.com/twosigma/pandas/issues/53
  806. typ = np.dtype(any_int_numpy_dtype).type
  807. size = 1000
  808. idx = np.repeat(typ(0), size)
  809. idx[-1] = 1
  810. val = 5e25
  811. arr = np.repeat(val, size)
  812. arr[0] = np.nan
  813. arr[-1] = 0
  814. df = DataFrame(
  815. {
  816. "index": idx,
  817. "adl2": arr,
  818. }
  819. ).set_index("index")
  820. result = df.groupby("index")["adl2"].rolling(window=10, min_periods=1).mean()
  821. expected = Series(
  822. arr,
  823. name="adl2",
  824. index=MultiIndex.from_arrays(
  825. [
  826. Index([0] * 999 + [1], dtype=typ, name="index"),
  827. Index([0] * 999 + [1], dtype=typ, name="index"),
  828. ],
  829. ),
  830. )
  831. tm.assert_series_equal(result, expected)
  832. def test_groupby_rolling_non_monotonic(self):
  833. # GH 43909
  834. shuffled = [3, 0, 1, 2]
  835. sec = 1_000
  836. df = DataFrame(
  837. [{"t": Timestamp(2 * x * sec), "x": x + 1, "c": 42} for x in shuffled]
  838. )
  839. with pytest.raises(ValueError, match=r".* must be monotonic"):
  840. df.groupby("c").rolling(on="t", window="3s")
  841. def test_groupby_monotonic(self):
  842. # GH 15130
  843. # we don't need to validate monotonicity when grouping
  844. # GH 43909 we should raise an error here to match
  845. # behaviour of non-groupby rolling.
  846. data = [
  847. ["David", "1/1/2015", 100],
  848. ["David", "1/5/2015", 500],
  849. ["David", "5/30/2015", 50],
  850. ["David", "7/25/2015", 50],
  851. ["Ryan", "1/4/2014", 100],
  852. ["Ryan", "1/19/2015", 500],
  853. ["Ryan", "3/31/2016", 50],
  854. ["Joe", "7/1/2015", 100],
  855. ["Joe", "9/9/2015", 500],
  856. ["Joe", "10/15/2015", 50],
  857. ]
  858. df = DataFrame(data=data, columns=["name", "date", "amount"])
  859. df["date"] = to_datetime(df["date"])
  860. df = df.sort_values("date")
  861. expected = (
  862. df.set_index("date")
  863. .groupby("name")
  864. .apply(lambda x: x.rolling("180D")["amount"].sum())
  865. )
  866. result = df.groupby("name").rolling("180D", on="date")["amount"].sum()
  867. tm.assert_series_equal(result, expected)
  868. def test_datelike_on_monotonic_within_each_group(self):
  869. # GH 13966 (similar to #15130, closed by #15175)
  870. # superseded by 43909
  871. # GH 46061: OK if the on is monotonic relative to each each group
  872. dates = date_range(start="2016-01-01 09:30:00", periods=20, freq="s")
  873. df = DataFrame(
  874. {
  875. "A": [1] * 20 + [2] * 12 + [3] * 8,
  876. "B": np.concatenate((dates, dates)),
  877. "C": np.arange(40),
  878. }
  879. )
  880. expected = (
  881. df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean())
  882. )
  883. result = df.groupby("A").rolling("4s", on="B").C.mean()
  884. tm.assert_series_equal(result, expected)
  885. def test_datelike_on_not_monotonic_within_each_group(self):
  886. # GH 46061
  887. df = DataFrame(
  888. {
  889. "A": [1] * 3 + [2] * 3,
  890. "B": [Timestamp(year, 1, 1) for year in [2020, 2021, 2019]] * 2,
  891. "C": range(6),
  892. }
  893. )
  894. with pytest.raises(ValueError, match="Each group within B must be monotonic."):
  895. df.groupby("A").rolling("365D", on="B")
  896. class TestExpanding:
  897. @pytest.fixture
  898. def frame(self):
  899. return DataFrame({"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)})
  900. @pytest.mark.parametrize(
  901. "f", ["sum", "mean", "min", "max", "count", "kurt", "skew"]
  902. )
  903. def test_expanding(self, f, frame):
  904. g = frame.groupby("A", group_keys=False)
  905. r = g.expanding()
  906. result = getattr(r, f)()
  907. expected = g.apply(lambda x: getattr(x.expanding(), f)())
  908. # groupby.apply doesn't drop the grouped-by column
  909. expected = expected.drop("A", axis=1)
  910. # GH 39732
  911. expected_index = MultiIndex.from_arrays([frame["A"], range(40)])
  912. expected.index = expected_index
  913. tm.assert_frame_equal(result, expected)
  914. @pytest.mark.parametrize("f", ["std", "var"])
  915. def test_expanding_ddof(self, f, frame):
  916. g = frame.groupby("A", group_keys=False)
  917. r = g.expanding()
  918. result = getattr(r, f)(ddof=0)
  919. expected = g.apply(lambda x: getattr(x.expanding(), f)(ddof=0))
  920. # groupby.apply doesn't drop the grouped-by column
  921. expected = expected.drop("A", axis=1)
  922. # GH 39732
  923. expected_index = MultiIndex.from_arrays([frame["A"], range(40)])
  924. expected.index = expected_index
  925. tm.assert_frame_equal(result, expected)
  926. @pytest.mark.parametrize(
  927. "interpolation", ["linear", "lower", "higher", "midpoint", "nearest"]
  928. )
  929. def test_expanding_quantile(self, interpolation, frame):
  930. g = frame.groupby("A", group_keys=False)
  931. r = g.expanding()
  932. result = r.quantile(0.4, interpolation=interpolation)
  933. expected = g.apply(
  934. lambda x: x.expanding().quantile(0.4, interpolation=interpolation)
  935. )
  936. # groupby.apply doesn't drop the grouped-by column
  937. expected = expected.drop("A", axis=1)
  938. # GH 39732
  939. expected_index = MultiIndex.from_arrays([frame["A"], range(40)])
  940. expected.index = expected_index
  941. tm.assert_frame_equal(result, expected)
  942. @pytest.mark.parametrize("f", ["corr", "cov"])
  943. def test_expanding_corr_cov(self, f, frame):
  944. g = frame.groupby("A")
  945. r = g.expanding()
  946. result = getattr(r, f)(frame)
  947. def func_0(x):
  948. return getattr(x.expanding(), f)(frame)
  949. expected = g.apply(func_0)
  950. # GH 39591: groupby.apply returns 1 instead of nan for windows
  951. # with all nan values
  952. null_idx = list(range(20, 61)) + list(range(72, 113))
  953. expected.iloc[null_idx, 1] = np.nan
  954. # GH 39591: The grouped column should be all np.nan
  955. # (groupby.apply inserts 0s for cov)
  956. expected["A"] = np.nan
  957. tm.assert_frame_equal(result, expected)
  958. result = getattr(r.B, f)(pairwise=True)
  959. def func_1(x):
  960. return getattr(x.B.expanding(), f)(pairwise=True)
  961. expected = g.apply(func_1)
  962. tm.assert_series_equal(result, expected)
  963. def test_expanding_apply(self, raw, frame):
  964. g = frame.groupby("A", group_keys=False)
  965. r = g.expanding()
  966. # reduction
  967. result = r.apply(lambda x: x.sum(), raw=raw)
  968. expected = g.apply(lambda x: x.expanding().apply(lambda y: y.sum(), raw=raw))
  969. # groupby.apply doesn't drop the grouped-by column
  970. expected = expected.drop("A", axis=1)
  971. # GH 39732
  972. expected_index = MultiIndex.from_arrays([frame["A"], range(40)])
  973. expected.index = expected_index
  974. tm.assert_frame_equal(result, expected)
  975. class TestEWM:
  976. @pytest.mark.parametrize(
  977. "method, expected_data",
  978. [
  979. ["mean", [0.0, 0.6666666666666666, 1.4285714285714286, 2.2666666666666666]],
  980. ["std", [np.nan, 0.707107, 0.963624, 1.177164]],
  981. ["var", [np.nan, 0.5, 0.9285714285714286, 1.3857142857142857]],
  982. ],
  983. )
  984. def test_methods(self, method, expected_data):
  985. # GH 16037
  986. df = DataFrame({"A": ["a"] * 4, "B": range(4)})
  987. result = getattr(df.groupby("A").ewm(com=1.0), method)()
  988. expected = DataFrame(
  989. {"B": expected_data},
  990. index=MultiIndex.from_tuples(
  991. [
  992. ("a", 0),
  993. ("a", 1),
  994. ("a", 2),
  995. ("a", 3),
  996. ],
  997. names=["A", None],
  998. ),
  999. )
  1000. tm.assert_frame_equal(result, expected)
  1001. @pytest.mark.parametrize(
  1002. "method, expected_data",
  1003. [["corr", [np.nan, 1.0, 1.0, 1]], ["cov", [np.nan, 0.5, 0.928571, 1.385714]]],
  1004. )
  1005. def test_pairwise_methods(self, method, expected_data):
  1006. # GH 16037
  1007. df = DataFrame({"A": ["a"] * 4, "B": range(4)})
  1008. result = getattr(df.groupby("A").ewm(com=1.0), method)()
  1009. expected = DataFrame(
  1010. {"B": expected_data},
  1011. index=MultiIndex.from_tuples(
  1012. [
  1013. ("a", 0, "B"),
  1014. ("a", 1, "B"),
  1015. ("a", 2, "B"),
  1016. ("a", 3, "B"),
  1017. ],
  1018. names=["A", None, None],
  1019. ),
  1020. )
  1021. tm.assert_frame_equal(result, expected)
  1022. expected = df.groupby("A").apply(lambda x: getattr(x.ewm(com=1.0), method)())
  1023. tm.assert_frame_equal(result, expected)
  1024. def test_times(self, times_frame):
  1025. # GH 40951
  1026. halflife = "23 days"
  1027. # GH#42738
  1028. times = times_frame.pop("C")
  1029. result = times_frame.groupby("A").ewm(halflife=halflife, times=times).mean()
  1030. expected = DataFrame(
  1031. {
  1032. "B": [
  1033. 0.0,
  1034. 0.507534,
  1035. 1.020088,
  1036. 1.537661,
  1037. 0.0,
  1038. 0.567395,
  1039. 1.221209,
  1040. 0.0,
  1041. 0.653141,
  1042. 1.195003,
  1043. ]
  1044. },
  1045. index=MultiIndex.from_tuples(
  1046. [
  1047. ("a", 0),
  1048. ("a", 3),
  1049. ("a", 6),
  1050. ("a", 9),
  1051. ("b", 1),
  1052. ("b", 4),
  1053. ("b", 7),
  1054. ("c", 2),
  1055. ("c", 5),
  1056. ("c", 8),
  1057. ],
  1058. names=["A", None],
  1059. ),
  1060. )
  1061. tm.assert_frame_equal(result, expected)
  1062. def test_times_array(self, times_frame):
  1063. # GH 40951
  1064. halflife = "23 days"
  1065. times = times_frame.pop("C")
  1066. gb = times_frame.groupby("A")
  1067. result = gb.ewm(halflife=halflife, times=times).mean()
  1068. expected = gb.ewm(halflife=halflife, times=times.values).mean()
  1069. tm.assert_frame_equal(result, expected)
  1070. def test_dont_mutate_obj_after_slicing(self):
  1071. # GH 43355
  1072. df = DataFrame(
  1073. {
  1074. "id": ["a", "a", "b", "b", "b"],
  1075. "timestamp": date_range("2021-9-1", periods=5, freq="H"),
  1076. "y": range(5),
  1077. }
  1078. )
  1079. grp = df.groupby("id").rolling("1H", on="timestamp")
  1080. result = grp.count()
  1081. expected_df = DataFrame(
  1082. {
  1083. "timestamp": date_range("2021-9-1", periods=5, freq="H"),
  1084. "y": [1.0] * 5,
  1085. },
  1086. index=MultiIndex.from_arrays(
  1087. [["a", "a", "b", "b", "b"], list(range(5))], names=["id", None]
  1088. ),
  1089. )
  1090. tm.assert_frame_equal(result, expected_df)
  1091. result = grp["y"].count()
  1092. expected_series = Series(
  1093. [1.0] * 5,
  1094. index=MultiIndex.from_arrays(
  1095. [
  1096. ["a", "a", "b", "b", "b"],
  1097. date_range("2021-9-1", periods=5, freq="H"),
  1098. ],
  1099. names=["id", "timestamp"],
  1100. ),
  1101. name="y",
  1102. )
  1103. tm.assert_series_equal(result, expected_series)
  1104. # This is the key test
  1105. result = grp.count()
  1106. tm.assert_frame_equal(result, expected_df)