test_resampler_grouper.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642
  1. from textwrap import dedent
  2. import numpy as np
  3. import pytest
  4. from pandas.compat import is_platform_windows
  5. from pandas.util._test_decorators import async_mark
  6. import pandas as pd
  7. from pandas import (
  8. DataFrame,
  9. Index,
  10. Series,
  11. TimedeltaIndex,
  12. Timestamp,
  13. )
  14. import pandas._testing as tm
  15. from pandas.core.indexes.datetimes import date_range
  16. test_frame = DataFrame(
  17. {"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)},
  18. index=date_range("1/1/2000", freq="s", periods=40),
  19. )
  20. @async_mark()
  21. async def test_tab_complete_ipython6_warning(ip):
  22. from IPython.core.completer import provisionalcompleter
  23. code = dedent(
  24. """\
  25. import pandas._testing as tm
  26. s = tm.makeTimeSeries()
  27. rs = s.resample("D")
  28. """
  29. )
  30. await ip.run_code(code)
  31. # GH 31324 newer jedi version raises Deprecation warning;
  32. # appears resolved 2021-02-02
  33. with tm.assert_produces_warning(None):
  34. with provisionalcompleter("ignore"):
  35. list(ip.Completer.completions("rs.", 1))
  36. def test_deferred_with_groupby():
  37. # GH 12486
  38. # support deferred resample ops with groupby
  39. data = [
  40. ["2010-01-01", "A", 2],
  41. ["2010-01-02", "A", 3],
  42. ["2010-01-05", "A", 8],
  43. ["2010-01-10", "A", 7],
  44. ["2010-01-13", "A", 3],
  45. ["2010-01-01", "B", 5],
  46. ["2010-01-03", "B", 2],
  47. ["2010-01-04", "B", 1],
  48. ["2010-01-11", "B", 7],
  49. ["2010-01-14", "B", 3],
  50. ]
  51. df = DataFrame(data, columns=["date", "id", "score"])
  52. df.date = pd.to_datetime(df.date)
  53. def f_0(x):
  54. return x.set_index("date").resample("D").asfreq()
  55. expected = df.groupby("id").apply(f_0)
  56. result = df.set_index("date").groupby("id").resample("D").asfreq()
  57. tm.assert_frame_equal(result, expected)
  58. df = DataFrame(
  59. {
  60. "date": date_range(start="2016-01-01", periods=4, freq="W"),
  61. "group": [1, 1, 2, 2],
  62. "val": [5, 6, 7, 8],
  63. }
  64. ).set_index("date")
  65. def f_1(x):
  66. return x.resample("1D").ffill()
  67. expected = df.groupby("group").apply(f_1)
  68. result = df.groupby("group").resample("1D").ffill()
  69. tm.assert_frame_equal(result, expected)
  70. def test_getitem():
  71. g = test_frame.groupby("A")
  72. expected = g.B.apply(lambda x: x.resample("2s").mean())
  73. result = g.resample("2s").B.mean()
  74. tm.assert_series_equal(result, expected)
  75. result = g.B.resample("2s").mean()
  76. tm.assert_series_equal(result, expected)
  77. result = g.resample("2s").mean().B
  78. tm.assert_series_equal(result, expected)
  79. def test_getitem_multiple():
  80. # GH 13174
  81. # multiple calls after selection causing an issue with aliasing
  82. data = [{"id": 1, "buyer": "A"}, {"id": 2, "buyer": "B"}]
  83. df = DataFrame(data, index=date_range("2016-01-01", periods=2))
  84. r = df.groupby("id").resample("1D")
  85. result = r["buyer"].count()
  86. expected = Series(
  87. [1, 1],
  88. index=pd.MultiIndex.from_tuples(
  89. [(1, Timestamp("2016-01-01")), (2, Timestamp("2016-01-02"))],
  90. names=["id", None],
  91. ),
  92. name="buyer",
  93. )
  94. tm.assert_series_equal(result, expected)
  95. result = r["buyer"].count()
  96. tm.assert_series_equal(result, expected)
  97. def test_groupby_resample_on_api_with_getitem():
  98. # GH 17813
  99. df = DataFrame(
  100. {"id": list("aabbb"), "date": date_range("1-1-2016", periods=5), "data": 1}
  101. )
  102. exp = df.set_index("date").groupby("id").resample("2D")["data"].sum()
  103. result = df.groupby("id").resample("2D", on="date")["data"].sum()
  104. tm.assert_series_equal(result, exp)
  105. def test_groupby_with_origin():
  106. # GH 31809
  107. freq = "1399min" # prime number that is smaller than 24h
  108. start, end = "1/1/2000 00:00:00", "1/31/2000 00:00"
  109. middle = "1/15/2000 00:00:00"
  110. rng = date_range(start, end, freq="1231min") # prime number
  111. ts = Series(np.random.randn(len(rng)), index=rng)
  112. ts2 = ts[middle:end]
  113. # proves that grouper without a fixed origin does not work
  114. # when dealing with unusual frequencies
  115. simple_grouper = pd.Grouper(freq=freq)
  116. count_ts = ts.groupby(simple_grouper).agg("count")
  117. count_ts = count_ts[middle:end]
  118. count_ts2 = ts2.groupby(simple_grouper).agg("count")
  119. with pytest.raises(AssertionError, match="Index are different"):
  120. tm.assert_index_equal(count_ts.index, count_ts2.index)
  121. # test origin on 1970-01-01 00:00:00
  122. origin = Timestamp(0)
  123. adjusted_grouper = pd.Grouper(freq=freq, origin=origin)
  124. adjusted_count_ts = ts.groupby(adjusted_grouper).agg("count")
  125. adjusted_count_ts = adjusted_count_ts[middle:end]
  126. adjusted_count_ts2 = ts2.groupby(adjusted_grouper).agg("count")
  127. tm.assert_series_equal(adjusted_count_ts, adjusted_count_ts2)
  128. # test origin on 2049-10-18 20:00:00
  129. origin_future = Timestamp(0) + pd.Timedelta("1399min") * 30_000
  130. adjusted_grouper2 = pd.Grouper(freq=freq, origin=origin_future)
  131. adjusted2_count_ts = ts.groupby(adjusted_grouper2).agg("count")
  132. adjusted2_count_ts = adjusted2_count_ts[middle:end]
  133. adjusted2_count_ts2 = ts2.groupby(adjusted_grouper2).agg("count")
  134. tm.assert_series_equal(adjusted2_count_ts, adjusted2_count_ts2)
  135. # both grouper use an adjusted timestamp that is a multiple of 1399 min
  136. # they should be equals even if the adjusted_timestamp is in the future
  137. tm.assert_series_equal(adjusted_count_ts, adjusted2_count_ts2)
  138. def test_nearest():
  139. # GH 17496
  140. # Resample nearest
  141. index = date_range("1/1/2000", periods=3, freq="T")
  142. result = Series(range(3), index=index).resample("20s").nearest()
  143. expected = Series(
  144. [0, 0, 1, 1, 1, 2, 2],
  145. index=pd.DatetimeIndex(
  146. [
  147. "2000-01-01 00:00:00",
  148. "2000-01-01 00:00:20",
  149. "2000-01-01 00:00:40",
  150. "2000-01-01 00:01:00",
  151. "2000-01-01 00:01:20",
  152. "2000-01-01 00:01:40",
  153. "2000-01-01 00:02:00",
  154. ],
  155. dtype="datetime64[ns]",
  156. freq="20S",
  157. ),
  158. )
  159. tm.assert_series_equal(result, expected)
  160. @pytest.mark.parametrize(
  161. "f",
  162. [
  163. "first",
  164. "last",
  165. "median",
  166. "sem",
  167. "sum",
  168. "mean",
  169. "min",
  170. "max",
  171. "size",
  172. "count",
  173. "nearest",
  174. "bfill",
  175. "ffill",
  176. "asfreq",
  177. "ohlc",
  178. ],
  179. )
  180. def test_methods(f):
  181. g = test_frame.groupby("A")
  182. r = g.resample("2s")
  183. result = getattr(r, f)()
  184. expected = g.apply(lambda x: getattr(x.resample("2s"), f)())
  185. tm.assert_equal(result, expected)
  186. def test_methods_nunique():
  187. # series only
  188. g = test_frame.groupby("A")
  189. r = g.resample("2s")
  190. result = r.B.nunique()
  191. expected = g.B.apply(lambda x: x.resample("2s").nunique())
  192. tm.assert_series_equal(result, expected)
  193. @pytest.mark.parametrize("f", ["std", "var"])
  194. def test_methods_std_var(f):
  195. g = test_frame.groupby("A")
  196. r = g.resample("2s")
  197. result = getattr(r, f)(ddof=1)
  198. expected = g.apply(lambda x: getattr(x.resample("2s"), f)(ddof=1))
  199. tm.assert_frame_equal(result, expected)
  200. def test_apply():
  201. g = test_frame.groupby("A")
  202. r = g.resample("2s")
  203. # reduction
  204. expected = g.resample("2s").sum()
  205. def f_0(x):
  206. return x.resample("2s").sum()
  207. result = r.apply(f_0)
  208. tm.assert_frame_equal(result, expected)
  209. def f_1(x):
  210. return x.resample("2s").apply(lambda y: y.sum())
  211. result = g.apply(f_1)
  212. # y.sum() results in int64 instead of int32 on 32-bit architectures
  213. expected = expected.astype("int64")
  214. tm.assert_frame_equal(result, expected)
  215. def test_apply_with_mutated_index():
  216. # GH 15169
  217. index = date_range("1-1-2015", "12-31-15", freq="D")
  218. df = DataFrame(data={"col1": np.random.rand(len(index))}, index=index)
  219. def f(x):
  220. s = Series([1, 2], index=["a", "b"])
  221. return s
  222. expected = df.groupby(pd.Grouper(freq="M")).apply(f)
  223. result = df.resample("M").apply(f)
  224. tm.assert_frame_equal(result, expected)
  225. # A case for series
  226. expected = df["col1"].groupby(pd.Grouper(freq="M"), group_keys=False).apply(f)
  227. result = df["col1"].resample("M").apply(f)
  228. tm.assert_series_equal(result, expected)
  229. def test_apply_columns_multilevel():
  230. # GH 16231
  231. cols = pd.MultiIndex.from_tuples([("A", "a", "", "one"), ("B", "b", "i", "two")])
  232. ind = date_range(start="2017-01-01", freq="15Min", periods=8)
  233. df = DataFrame(np.array([0] * 16).reshape(8, 2), index=ind, columns=cols)
  234. agg_dict = {col: (np.sum if col[3] == "one" else np.mean) for col in df.columns}
  235. result = df.resample("H").apply(lambda x: agg_dict[x.name](x))
  236. expected = DataFrame(
  237. 2 * [[0, 0.0]],
  238. index=date_range(start="2017-01-01", freq="1H", periods=2),
  239. columns=pd.MultiIndex.from_tuples(
  240. [("A", "a", "", "one"), ("B", "b", "i", "two")]
  241. ),
  242. )
  243. tm.assert_frame_equal(result, expected)
  244. def test_resample_groupby_with_label():
  245. # GH 13235
  246. index = date_range("2000-01-01", freq="2D", periods=5)
  247. df = DataFrame(index=index, data={"col0": [0, 0, 1, 1, 2], "col1": [1, 1, 1, 1, 1]})
  248. result = df.groupby("col0").resample("1W", label="left").sum()
  249. mi = [
  250. np.array([0, 0, 1, 2], dtype=np.int64),
  251. pd.to_datetime(
  252. np.array(["1999-12-26", "2000-01-02", "2000-01-02", "2000-01-02"])
  253. ),
  254. ]
  255. mindex = pd.MultiIndex.from_arrays(mi, names=["col0", None])
  256. expected = DataFrame(
  257. data={"col0": [0, 0, 2, 2], "col1": [1, 1, 2, 1]}, index=mindex
  258. )
  259. tm.assert_frame_equal(result, expected)
  260. def test_consistency_with_window():
  261. # consistent return values with window
  262. df = test_frame
  263. expected = Index([1, 2, 3], name="A")
  264. result = df.groupby("A").resample("2s").mean()
  265. assert result.index.nlevels == 2
  266. tm.assert_index_equal(result.index.levels[0], expected)
  267. result = df.groupby("A").rolling(20).mean()
  268. assert result.index.nlevels == 2
  269. tm.assert_index_equal(result.index.levels[0], expected)
  270. def test_median_duplicate_columns():
  271. # GH 14233
  272. df = DataFrame(
  273. np.random.randn(20, 3),
  274. columns=list("aaa"),
  275. index=date_range("2012-01-01", periods=20, freq="s"),
  276. )
  277. df2 = df.copy()
  278. df2.columns = ["a", "b", "c"]
  279. expected = df2.resample("5s").median()
  280. result = df.resample("5s").median()
  281. expected.columns = result.columns
  282. tm.assert_frame_equal(result, expected)
  283. def test_apply_to_one_column_of_df():
  284. # GH: 36951
  285. df = DataFrame(
  286. {"col": range(10), "col1": range(10, 20)},
  287. index=date_range("2012-01-01", periods=10, freq="20min"),
  288. )
  289. # access "col" via getattr -> make sure we handle AttributeError
  290. result = df.resample("H").apply(lambda group: group.col.sum())
  291. expected = Series(
  292. [3, 12, 21, 9], index=date_range("2012-01-01", periods=4, freq="H")
  293. )
  294. tm.assert_series_equal(result, expected)
  295. # access "col" via _getitem__ -> make sure we handle KeyErrpr
  296. result = df.resample("H").apply(lambda group: group["col"].sum())
  297. tm.assert_series_equal(result, expected)
  298. def test_resample_groupby_agg():
  299. # GH: 33548
  300. df = DataFrame(
  301. {
  302. "cat": [
  303. "cat_1",
  304. "cat_1",
  305. "cat_2",
  306. "cat_1",
  307. "cat_2",
  308. "cat_1",
  309. "cat_2",
  310. "cat_1",
  311. ],
  312. "num": [5, 20, 22, 3, 4, 30, 10, 50],
  313. "date": [
  314. "2019-2-1",
  315. "2018-02-03",
  316. "2020-3-11",
  317. "2019-2-2",
  318. "2019-2-2",
  319. "2018-12-4",
  320. "2020-3-11",
  321. "2020-12-12",
  322. ],
  323. }
  324. )
  325. df["date"] = pd.to_datetime(df["date"])
  326. resampled = df.groupby("cat").resample("Y", on="date")
  327. expected = resampled[["num"]].sum()
  328. result = resampled.agg({"num": "sum"})
  329. tm.assert_frame_equal(result, expected)
  330. def test_resample_groupby_agg_listlike():
  331. # GH 42905
  332. ts = Timestamp("2021-02-28 00:00:00")
  333. df = DataFrame({"class": ["beta"], "value": [69]}, index=Index([ts], name="date"))
  334. resampled = df.groupby("class").resample("M")["value"]
  335. result = resampled.agg(["sum", "size"])
  336. expected = DataFrame(
  337. [[69, 1]],
  338. index=pd.MultiIndex.from_tuples([("beta", ts)], names=["class", "date"]),
  339. columns=["sum", "size"],
  340. )
  341. tm.assert_frame_equal(result, expected)
  342. @pytest.mark.parametrize("keys", [["a"], ["a", "b"]])
  343. def test_empty(keys):
  344. # GH 26411
  345. df = DataFrame([], columns=["a", "b"], index=TimedeltaIndex([]))
  346. result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
  347. expected = (
  348. DataFrame(columns=["a", "b"])
  349. .set_index(keys, drop=False)
  350. .set_index(TimedeltaIndex([]), append=True)
  351. )
  352. if len(keys) == 1:
  353. expected.index.name = keys[0]
  354. tm.assert_frame_equal(result, expected)
  355. @pytest.mark.parametrize("consolidate", [True, False])
  356. def test_resample_groupby_agg_object_dtype_all_nan(consolidate):
  357. # https://github.com/pandas-dev/pandas/issues/39329
  358. dates = date_range("2020-01-01", periods=15, freq="D")
  359. df1 = DataFrame({"key": "A", "date": dates, "col1": range(15), "col_object": "val"})
  360. df2 = DataFrame({"key": "B", "date": dates, "col1": range(15)})
  361. df = pd.concat([df1, df2], ignore_index=True)
  362. if consolidate:
  363. df = df._consolidate()
  364. result = df.groupby(["key"]).resample("W", on="date").min()
  365. idx = pd.MultiIndex.from_arrays(
  366. [
  367. ["A"] * 3 + ["B"] * 3,
  368. pd.to_datetime(["2020-01-05", "2020-01-12", "2020-01-19"] * 2),
  369. ],
  370. names=["key", "date"],
  371. )
  372. expected = DataFrame(
  373. {
  374. "key": ["A"] * 3 + ["B"] * 3,
  375. "col1": [0, 5, 12] * 2,
  376. "col_object": ["val"] * 3 + [np.nan] * 3,
  377. },
  378. index=idx,
  379. )
  380. tm.assert_frame_equal(result, expected)
  381. def test_groupby_resample_with_list_of_keys():
  382. # GH 47362
  383. df = DataFrame(
  384. data={
  385. "date": date_range(start="2016-01-01", periods=8),
  386. "group": [0, 0, 0, 0, 1, 1, 1, 1],
  387. "val": [1, 7, 5, 2, 3, 10, 5, 1],
  388. }
  389. )
  390. result = df.groupby("group").resample("2D", on="date")[["val"]].mean()
  391. expected = DataFrame(
  392. data={
  393. "val": [4.0, 3.5, 6.5, 3.0],
  394. },
  395. index=Index(
  396. data=[
  397. (0, Timestamp("2016-01-01")),
  398. (0, Timestamp("2016-01-03")),
  399. (1, Timestamp("2016-01-05")),
  400. (1, Timestamp("2016-01-07")),
  401. ],
  402. name=("group", "date"),
  403. ),
  404. )
  405. tm.assert_frame_equal(result, expected)
  406. @pytest.mark.parametrize("keys", [["a"], ["a", "b"]])
  407. def test_resample_no_index(keys):
  408. # GH 47705
  409. df = DataFrame([], columns=["a", "b", "date"])
  410. df["date"] = pd.to_datetime(df["date"])
  411. df = df.set_index("date")
  412. result = df.groupby(keys).resample(rule=pd.to_timedelta("00:00:01")).mean()
  413. expected = DataFrame(columns=["a", "b", "date"]).set_index(keys, drop=False)
  414. expected["date"] = pd.to_datetime(expected["date"])
  415. expected = expected.set_index("date", append=True, drop=True)
  416. if len(keys) == 1:
  417. expected.index.name = keys[0]
  418. tm.assert_frame_equal(result, expected)
  419. def test_resample_no_columns():
  420. # GH#52484
  421. df = DataFrame(
  422. index=Index(
  423. pd.to_datetime(
  424. ["2018-01-01 00:00:00", "2018-01-01 12:00:00", "2018-01-02 00:00:00"]
  425. ),
  426. name="date",
  427. )
  428. )
  429. result = df.groupby([0, 0, 1]).resample(rule=pd.to_timedelta("06:00:00")).mean()
  430. index = pd.to_datetime(
  431. [
  432. "2018-01-01 00:00:00",
  433. "2018-01-01 06:00:00",
  434. "2018-01-01 12:00:00",
  435. "2018-01-02 00:00:00",
  436. ]
  437. )
  438. expected = DataFrame(
  439. index=pd.MultiIndex(
  440. levels=[np.array([0, 1], dtype=np.intp), index],
  441. codes=[[0, 0, 0, 1], [0, 1, 2, 3]],
  442. names=[None, "date"],
  443. )
  444. )
  445. # GH#52710 - Index comes out as 32-bit on 64-bit Windows
  446. tm.assert_frame_equal(result, expected, check_index_type=not is_platform_windows())
  447. def test_groupby_resample_size_all_index_same():
  448. # GH 46826
  449. df = DataFrame(
  450. {"A": [1] * 3 + [2] * 3 + [1] * 3 + [2] * 3, "B": np.arange(12)},
  451. index=date_range("31/12/2000 18:00", freq="H", periods=12),
  452. )
  453. result = df.groupby("A").resample("D").size()
  454. expected = Series(
  455. 3,
  456. index=pd.MultiIndex.from_tuples(
  457. [
  458. (1, Timestamp("2000-12-31")),
  459. (1, Timestamp("2001-01-01")),
  460. (2, Timestamp("2000-12-31")),
  461. (2, Timestamp("2001-01-01")),
  462. ],
  463. names=["A", None],
  464. ),
  465. )
  466. tm.assert_series_equal(result, expected)
  467. def test_groupby_resample_on_index_with_list_of_keys():
  468. # GH 50840
  469. df = DataFrame(
  470. data={
  471. "group": [0, 0, 0, 0, 1, 1, 1, 1],
  472. "val": [3, 1, 4, 1, 5, 9, 2, 6],
  473. },
  474. index=Series(
  475. date_range(start="2016-01-01", periods=8),
  476. name="date",
  477. ),
  478. )
  479. result = df.groupby("group").resample("2D")[["val"]].mean()
  480. expected = DataFrame(
  481. data={
  482. "val": [2.0, 2.5, 7.0, 4.0],
  483. },
  484. index=Index(
  485. data=[
  486. (0, Timestamp("2016-01-01")),
  487. (0, Timestamp("2016-01-03")),
  488. (1, Timestamp("2016-01-05")),
  489. (1, Timestamp("2016-01-07")),
  490. ],
  491. name=("group", "date"),
  492. ),
  493. )
  494. tm.assert_frame_equal(result, expected)
  495. def test_groupby_resample_on_index_with_list_of_keys_multi_columns():
  496. # GH 50876
  497. df = DataFrame(
  498. data={
  499. "group": [0, 0, 0, 0, 1, 1, 1, 1],
  500. "first_val": [3, 1, 4, 1, 5, 9, 2, 6],
  501. "second_val": [2, 7, 1, 8, 2, 8, 1, 8],
  502. "third_val": [1, 4, 1, 4, 2, 1, 3, 5],
  503. },
  504. index=Series(
  505. date_range(start="2016-01-01", periods=8),
  506. name="date",
  507. ),
  508. )
  509. result = df.groupby("group").resample("2D")[["first_val", "second_val"]].mean()
  510. expected = DataFrame(
  511. data={
  512. "first_val": [2.0, 2.5, 7.0, 4.0],
  513. "second_val": [4.5, 4.5, 5.0, 4.5],
  514. },
  515. index=Index(
  516. data=[
  517. (0, Timestamp("2016-01-01")),
  518. (0, Timestamp("2016-01-03")),
  519. (1, Timestamp("2016-01-05")),
  520. (1, Timestamp("2016-01-07")),
  521. ],
  522. name=("group", "date"),
  523. ),
  524. )
  525. tm.assert_frame_equal(result, expected)
  526. def test_groupby_resample_on_index_with_list_of_keys_missing_column():
  527. # GH 50876
  528. df = DataFrame(
  529. data={
  530. "group": [0, 0, 0, 0, 1, 1, 1, 1],
  531. "val": [3, 1, 4, 1, 5, 9, 2, 6],
  532. },
  533. index=Series(
  534. date_range(start="2016-01-01", periods=8),
  535. name="date",
  536. ),
  537. )
  538. with pytest.raises(KeyError, match="Columns not found"):
  539. df.groupby("group").resample("2D")[["val_not_in_dataframe"]].mean()