test_expanding.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695
  1. import numpy as np
  2. import pytest
  3. from pandas import (
  4. DataFrame,
  5. DatetimeIndex,
  6. Index,
  7. MultiIndex,
  8. Series,
  9. isna,
  10. notna,
  11. )
  12. import pandas._testing as tm
  13. def test_doc_string():
  14. df = DataFrame({"B": [0, 1, 2, np.nan, 4]})
  15. df
  16. df.expanding(2).sum()
  17. def test_constructor(frame_or_series):
  18. # GH 12669
  19. c = frame_or_series(range(5)).expanding
  20. # valid
  21. c(min_periods=1)
  22. @pytest.mark.parametrize("w", [2.0, "foo", np.array([2])])
  23. def test_constructor_invalid(frame_or_series, w):
  24. # not valid
  25. c = frame_or_series(range(5)).expanding
  26. msg = "min_periods must be an integer"
  27. with pytest.raises(ValueError, match=msg):
  28. c(min_periods=w)
  29. @pytest.mark.parametrize(
  30. "expander",
  31. [
  32. 1,
  33. pytest.param(
  34. "ls",
  35. marks=pytest.mark.xfail(
  36. reason="GH#16425 expanding with offset not supported"
  37. ),
  38. ),
  39. ],
  40. )
  41. def test_empty_df_expanding(expander):
  42. # GH 15819 Verifies that datetime and integer expanding windows can be
  43. # applied to empty DataFrames
  44. expected = DataFrame()
  45. result = DataFrame().expanding(expander).sum()
  46. tm.assert_frame_equal(result, expected)
  47. # Verifies that datetime and integer expanding windows can be applied
  48. # to empty DataFrames with datetime index
  49. expected = DataFrame(index=DatetimeIndex([]))
  50. result = DataFrame(index=DatetimeIndex([])).expanding(expander).sum()
  51. tm.assert_frame_equal(result, expected)
  52. def test_missing_minp_zero():
  53. # https://github.com/pandas-dev/pandas/pull/18921
  54. # minp=0
  55. x = Series([np.nan])
  56. result = x.expanding(min_periods=0).sum()
  57. expected = Series([0.0])
  58. tm.assert_series_equal(result, expected)
  59. # minp=1
  60. result = x.expanding(min_periods=1).sum()
  61. expected = Series([np.nan])
  62. tm.assert_series_equal(result, expected)
  63. def test_expanding_axis(axis_frame):
  64. # see gh-23372.
  65. df = DataFrame(np.ones((10, 20)))
  66. axis = df._get_axis_number(axis_frame)
  67. if axis == 0:
  68. expected = DataFrame(
  69. {i: [np.nan] * 2 + [float(j) for j in range(3, 11)] for i in range(20)}
  70. )
  71. else:
  72. # axis == 1
  73. expected = DataFrame([[np.nan] * 2 + [float(i) for i in range(3, 21)]] * 10)
  74. result = df.expanding(3, axis=axis_frame).sum()
  75. tm.assert_frame_equal(result, expected)
  76. def test_expanding_count_with_min_periods(frame_or_series):
  77. # GH 26996
  78. result = frame_or_series(range(5)).expanding(min_periods=3).count()
  79. expected = frame_or_series([np.nan, np.nan, 3.0, 4.0, 5.0])
  80. tm.assert_equal(result, expected)
  81. def test_expanding_count_default_min_periods_with_null_values(frame_or_series):
  82. # GH 26996
  83. values = [1, 2, 3, np.nan, 4, 5, 6]
  84. expected_counts = [1.0, 2.0, 3.0, 3.0, 4.0, 5.0, 6.0]
  85. result = frame_or_series(values).expanding().count()
  86. expected = frame_or_series(expected_counts)
  87. tm.assert_equal(result, expected)
  88. def test_expanding_count_with_min_periods_exceeding_series_length(frame_or_series):
  89. # GH 25857
  90. result = frame_or_series(range(5)).expanding(min_periods=6).count()
  91. expected = frame_or_series([np.nan, np.nan, np.nan, np.nan, np.nan])
  92. tm.assert_equal(result, expected)
  93. @pytest.mark.parametrize(
  94. "df,expected,min_periods",
  95. [
  96. (
  97. DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
  98. [
  99. ({"A": [1], "B": [4]}, [0]),
  100. ({"A": [1, 2], "B": [4, 5]}, [0, 1]),
  101. ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
  102. ],
  103. 3,
  104. ),
  105. (
  106. DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
  107. [
  108. ({"A": [1], "B": [4]}, [0]),
  109. ({"A": [1, 2], "B": [4, 5]}, [0, 1]),
  110. ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
  111. ],
  112. 2,
  113. ),
  114. (
  115. DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}),
  116. [
  117. ({"A": [1], "B": [4]}, [0]),
  118. ({"A": [1, 2], "B": [4, 5]}, [0, 1]),
  119. ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]),
  120. ],
  121. 1,
  122. ),
  123. (DataFrame({"A": [1], "B": [4]}), [], 2),
  124. (DataFrame(), [({}, [])], 1),
  125. (
  126. DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
  127. [
  128. ({"A": [1.0], "B": [np.nan]}, [0]),
  129. ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
  130. ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
  131. ],
  132. 3,
  133. ),
  134. (
  135. DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
  136. [
  137. ({"A": [1.0], "B": [np.nan]}, [0]),
  138. ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
  139. ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
  140. ],
  141. 2,
  142. ),
  143. (
  144. DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}),
  145. [
  146. ({"A": [1.0], "B": [np.nan]}, [0]),
  147. ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]),
  148. ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]),
  149. ],
  150. 1,
  151. ),
  152. ],
  153. )
  154. def test_iter_expanding_dataframe(df, expected, min_periods):
  155. # GH 11704
  156. expected = [DataFrame(values, index=index) for (values, index) in expected]
  157. for expected, actual in zip(expected, df.expanding(min_periods)):
  158. tm.assert_frame_equal(actual, expected)
  159. @pytest.mark.parametrize(
  160. "ser,expected,min_periods",
  161. [
  162. (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3),
  163. (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2),
  164. (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 1),
  165. (Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2),
  166. (Series([np.nan, 2]), [([np.nan], [0]), ([np.nan, 2], [0, 1])], 2),
  167. (Series([], dtype="int64"), [], 2),
  168. ],
  169. )
  170. def test_iter_expanding_series(ser, expected, min_periods):
  171. # GH 11704
  172. expected = [Series(values, index=index) for (values, index) in expected]
  173. for expected, actual in zip(expected, ser.expanding(min_periods)):
  174. tm.assert_series_equal(actual, expected)
  175. def test_center_invalid():
  176. # GH 20647
  177. df = DataFrame()
  178. with pytest.raises(TypeError, match=".* got an unexpected keyword"):
  179. df.expanding(center=True)
  180. def test_expanding_sem(frame_or_series):
  181. # GH: 26476
  182. obj = frame_or_series([0, 1, 2])
  183. result = obj.expanding().sem()
  184. if isinstance(result, DataFrame):
  185. result = Series(result[0].values)
  186. expected = Series([np.nan] + [0.707107] * 2)
  187. tm.assert_series_equal(result, expected)
  188. @pytest.mark.parametrize("method", ["skew", "kurt"])
  189. def test_expanding_skew_kurt_numerical_stability(method):
  190. # GH: 6929
  191. s = Series(np.random.rand(10))
  192. expected = getattr(s.expanding(3), method)()
  193. s = s + 5000
  194. result = getattr(s.expanding(3), method)()
  195. tm.assert_series_equal(result, expected)
  196. @pytest.mark.parametrize("window", [1, 3, 10, 20])
  197. @pytest.mark.parametrize("method", ["min", "max", "average"])
  198. @pytest.mark.parametrize("pct", [True, False])
  199. @pytest.mark.parametrize("ascending", [True, False])
  200. @pytest.mark.parametrize("test_data", ["default", "duplicates", "nans"])
  201. def test_rank(window, method, pct, ascending, test_data):
  202. length = 20
  203. if test_data == "default":
  204. ser = Series(data=np.random.rand(length))
  205. elif test_data == "duplicates":
  206. ser = Series(data=np.random.choice(3, length))
  207. elif test_data == "nans":
  208. ser = Series(
  209. data=np.random.choice([1.0, 0.25, 0.75, np.nan, np.inf, -np.inf], length)
  210. )
  211. expected = ser.expanding(window).apply(
  212. lambda x: x.rank(method=method, pct=pct, ascending=ascending).iloc[-1]
  213. )
  214. result = ser.expanding(window).rank(method=method, pct=pct, ascending=ascending)
  215. tm.assert_series_equal(result, expected)
  216. def test_expanding_corr(series):
  217. A = series.dropna()
  218. B = (A + np.random.randn(len(A)))[:-5]
  219. result = A.expanding().corr(B)
  220. rolling_result = A.rolling(window=len(A), min_periods=1).corr(B)
  221. tm.assert_almost_equal(rolling_result, result)
  222. def test_expanding_count(series):
  223. result = series.expanding(min_periods=0).count()
  224. tm.assert_almost_equal(
  225. result, series.rolling(window=len(series), min_periods=0).count()
  226. )
  227. def test_expanding_quantile(series):
  228. result = series.expanding().quantile(0.5)
  229. rolling_result = series.rolling(window=len(series), min_periods=1).quantile(0.5)
  230. tm.assert_almost_equal(result, rolling_result)
  231. def test_expanding_cov(series):
  232. A = series
  233. B = (A + np.random.randn(len(A)))[:-5]
  234. result = A.expanding().cov(B)
  235. rolling_result = A.rolling(window=len(A), min_periods=1).cov(B)
  236. tm.assert_almost_equal(rolling_result, result)
  237. def test_expanding_cov_pairwise(frame):
  238. result = frame.expanding().cov()
  239. rolling_result = frame.rolling(window=len(frame), min_periods=1).cov()
  240. tm.assert_frame_equal(result, rolling_result)
  241. def test_expanding_corr_pairwise(frame):
  242. result = frame.expanding().corr()
  243. rolling_result = frame.rolling(window=len(frame), min_periods=1).corr()
  244. tm.assert_frame_equal(result, rolling_result)
  245. @pytest.mark.parametrize(
  246. "func,static_comp",
  247. [
  248. ("sum", np.sum),
  249. ("mean", lambda x: np.mean(x, axis=0)),
  250. ("max", lambda x: np.max(x, axis=0)),
  251. ("min", lambda x: np.min(x, axis=0)),
  252. ],
  253. ids=["sum", "mean", "max", "min"],
  254. )
  255. def test_expanding_func(func, static_comp, frame_or_series):
  256. data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10))
  257. result = getattr(data.expanding(min_periods=1, axis=0), func)()
  258. assert isinstance(result, frame_or_series)
  259. expected = static_comp(data[:11])
  260. if frame_or_series is Series:
  261. tm.assert_almost_equal(result[10], expected)
  262. else:
  263. tm.assert_series_equal(result.iloc[10], expected, check_names=False)
  264. @pytest.mark.parametrize(
  265. "func,static_comp",
  266. [("sum", np.sum), ("mean", np.mean), ("max", np.max), ("min", np.min)],
  267. ids=["sum", "mean", "max", "min"],
  268. )
  269. def test_expanding_min_periods(func, static_comp):
  270. ser = Series(np.random.randn(50))
  271. result = getattr(ser.expanding(min_periods=30, axis=0), func)()
  272. assert result[:29].isna().all()
  273. tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50]))
  274. # min_periods is working correctly
  275. result = getattr(ser.expanding(min_periods=15, axis=0), func)()
  276. assert isna(result.iloc[13])
  277. assert notna(result.iloc[14])
  278. ser2 = Series(np.random.randn(20))
  279. result = getattr(ser2.expanding(min_periods=5, axis=0), func)()
  280. assert isna(result[3])
  281. assert notna(result[4])
  282. # min_periods=0
  283. result0 = getattr(ser.expanding(min_periods=0, axis=0), func)()
  284. result1 = getattr(ser.expanding(min_periods=1, axis=0), func)()
  285. tm.assert_almost_equal(result0, result1)
  286. result = getattr(ser.expanding(min_periods=1, axis=0), func)()
  287. tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50]))
  288. def test_expanding_apply(engine_and_raw, frame_or_series):
  289. engine, raw = engine_and_raw
  290. data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10))
  291. result = data.expanding(min_periods=1).apply(
  292. lambda x: x.mean(), raw=raw, engine=engine
  293. )
  294. assert isinstance(result, frame_or_series)
  295. if frame_or_series is Series:
  296. tm.assert_almost_equal(result[9], np.mean(data[:11], axis=0))
  297. else:
  298. tm.assert_series_equal(
  299. result.iloc[9], np.mean(data[:11], axis=0), check_names=False
  300. )
  301. def test_expanding_min_periods_apply(engine_and_raw):
  302. engine, raw = engine_and_raw
  303. ser = Series(np.random.randn(50))
  304. result = ser.expanding(min_periods=30).apply(
  305. lambda x: x.mean(), raw=raw, engine=engine
  306. )
  307. assert result[:29].isna().all()
  308. tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50]))
  309. # min_periods is working correctly
  310. result = ser.expanding(min_periods=15).apply(
  311. lambda x: x.mean(), raw=raw, engine=engine
  312. )
  313. assert isna(result.iloc[13])
  314. assert notna(result.iloc[14])
  315. ser2 = Series(np.random.randn(20))
  316. result = ser2.expanding(min_periods=5).apply(
  317. lambda x: x.mean(), raw=raw, engine=engine
  318. )
  319. assert isna(result[3])
  320. assert notna(result[4])
  321. # min_periods=0
  322. result0 = ser.expanding(min_periods=0).apply(
  323. lambda x: x.mean(), raw=raw, engine=engine
  324. )
  325. result1 = ser.expanding(min_periods=1).apply(
  326. lambda x: x.mean(), raw=raw, engine=engine
  327. )
  328. tm.assert_almost_equal(result0, result1)
  329. result = ser.expanding(min_periods=1).apply(
  330. lambda x: x.mean(), raw=raw, engine=engine
  331. )
  332. tm.assert_almost_equal(result.iloc[-1], np.mean(ser[:50]))
  333. @pytest.mark.parametrize(
  334. "f",
  335. [
  336. lambda x: (x.expanding(min_periods=5).cov(x, pairwise=True)),
  337. lambda x: (x.expanding(min_periods=5).corr(x, pairwise=True)),
  338. ],
  339. )
  340. def test_moment_functions_zero_length_pairwise(f):
  341. df1 = DataFrame()
  342. df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar"))
  343. df2["a"] = df2["a"].astype("float64")
  344. df1_expected = DataFrame(index=MultiIndex.from_product([df1.index, df1.columns]))
  345. df2_expected = DataFrame(
  346. index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]),
  347. columns=Index(["a"], name="foo"),
  348. dtype="float64",
  349. )
  350. df1_result = f(df1)
  351. tm.assert_frame_equal(df1_result, df1_expected)
  352. df2_result = f(df2)
  353. tm.assert_frame_equal(df2_result, df2_expected)
  354. @pytest.mark.parametrize(
  355. "f",
  356. [
  357. lambda x: x.expanding().count(),
  358. lambda x: x.expanding(min_periods=5).cov(x, pairwise=False),
  359. lambda x: x.expanding(min_periods=5).corr(x, pairwise=False),
  360. lambda x: x.expanding(min_periods=5).max(),
  361. lambda x: x.expanding(min_periods=5).min(),
  362. lambda x: x.expanding(min_periods=5).sum(),
  363. lambda x: x.expanding(min_periods=5).mean(),
  364. lambda x: x.expanding(min_periods=5).std(),
  365. lambda x: x.expanding(min_periods=5).var(),
  366. lambda x: x.expanding(min_periods=5).skew(),
  367. lambda x: x.expanding(min_periods=5).kurt(),
  368. lambda x: x.expanding(min_periods=5).quantile(0.5),
  369. lambda x: x.expanding(min_periods=5).median(),
  370. lambda x: x.expanding(min_periods=5).apply(sum, raw=False),
  371. lambda x: x.expanding(min_periods=5).apply(sum, raw=True),
  372. ],
  373. )
  374. def test_moment_functions_zero_length(f):
  375. # GH 8056
  376. s = Series(dtype=np.float64)
  377. s_expected = s
  378. df1 = DataFrame()
  379. df1_expected = df1
  380. df2 = DataFrame(columns=["a"])
  381. df2["a"] = df2["a"].astype("float64")
  382. df2_expected = df2
  383. s_result = f(s)
  384. tm.assert_series_equal(s_result, s_expected)
  385. df1_result = f(df1)
  386. tm.assert_frame_equal(df1_result, df1_expected)
  387. df2_result = f(df2)
  388. tm.assert_frame_equal(df2_result, df2_expected)
  389. def test_expanding_apply_empty_series(engine_and_raw):
  390. engine, raw = engine_and_raw
  391. ser = Series([], dtype=np.float64)
  392. tm.assert_series_equal(
  393. ser, ser.expanding().apply(lambda x: x.mean(), raw=raw, engine=engine)
  394. )
  395. def test_expanding_apply_min_periods_0(engine_and_raw):
  396. # GH 8080
  397. engine, raw = engine_and_raw
  398. s = Series([None, None, None])
  399. result = s.expanding(min_periods=0).apply(lambda x: len(x), raw=raw, engine=engine)
  400. expected = Series([1.0, 2.0, 3.0])
  401. tm.assert_series_equal(result, expected)
  402. def test_expanding_cov_diff_index():
  403. # GH 7512
  404. s1 = Series([1, 2, 3], index=[0, 1, 2])
  405. s2 = Series([1, 3], index=[0, 2])
  406. result = s1.expanding().cov(s2)
  407. expected = Series([None, None, 2.0])
  408. tm.assert_series_equal(result, expected)
  409. s2a = Series([1, None, 3], index=[0, 1, 2])
  410. result = s1.expanding().cov(s2a)
  411. tm.assert_series_equal(result, expected)
  412. s1 = Series([7, 8, 10], index=[0, 1, 3])
  413. s2 = Series([7, 9, 10], index=[0, 2, 3])
  414. result = s1.expanding().cov(s2)
  415. expected = Series([None, None, None, 4.5])
  416. tm.assert_series_equal(result, expected)
  417. def test_expanding_corr_diff_index():
  418. # GH 7512
  419. s1 = Series([1, 2, 3], index=[0, 1, 2])
  420. s2 = Series([1, 3], index=[0, 2])
  421. result = s1.expanding().corr(s2)
  422. expected = Series([None, None, 1.0])
  423. tm.assert_series_equal(result, expected)
  424. s2a = Series([1, None, 3], index=[0, 1, 2])
  425. result = s1.expanding().corr(s2a)
  426. tm.assert_series_equal(result, expected)
  427. s1 = Series([7, 8, 10], index=[0, 1, 3])
  428. s2 = Series([7, 9, 10], index=[0, 2, 3])
  429. result = s1.expanding().corr(s2)
  430. expected = Series([None, None, None, 1.0])
  431. tm.assert_series_equal(result, expected)
  432. def test_expanding_cov_pairwise_diff_length():
  433. # GH 7512
  434. df1 = DataFrame([[1, 5], [3, 2], [3, 9]], columns=Index(["A", "B"], name="foo"))
  435. df1a = DataFrame(
  436. [[1, 5], [3, 9]], index=[0, 2], columns=Index(["A", "B"], name="foo")
  437. )
  438. df2 = DataFrame(
  439. [[5, 6], [None, None], [2, 1]], columns=Index(["X", "Y"], name="foo")
  440. )
  441. df2a = DataFrame(
  442. [[5, 6], [2, 1]], index=[0, 2], columns=Index(["X", "Y"], name="foo")
  443. )
  444. # TODO: xref gh-15826
  445. # .loc is not preserving the names
  446. result1 = df1.expanding().cov(df2, pairwise=True).loc[2]
  447. result2 = df1.expanding().cov(df2a, pairwise=True).loc[2]
  448. result3 = df1a.expanding().cov(df2, pairwise=True).loc[2]
  449. result4 = df1a.expanding().cov(df2a, pairwise=True).loc[2]
  450. expected = DataFrame(
  451. [[-3.0, -6.0], [-5.0, -10.0]],
  452. columns=Index(["A", "B"], name="foo"),
  453. index=Index(["X", "Y"], name="foo"),
  454. )
  455. tm.assert_frame_equal(result1, expected)
  456. tm.assert_frame_equal(result2, expected)
  457. tm.assert_frame_equal(result3, expected)
  458. tm.assert_frame_equal(result4, expected)
  459. def test_expanding_corr_pairwise_diff_length():
  460. # GH 7512
  461. df1 = DataFrame(
  462. [[1, 2], [3, 2], [3, 4]], columns=["A", "B"], index=Index(range(3), name="bar")
  463. )
  464. df1a = DataFrame(
  465. [[1, 2], [3, 4]], index=Index([0, 2], name="bar"), columns=["A", "B"]
  466. )
  467. df2 = DataFrame(
  468. [[5, 6], [None, None], [2, 1]],
  469. columns=["X", "Y"],
  470. index=Index(range(3), name="bar"),
  471. )
  472. df2a = DataFrame(
  473. [[5, 6], [2, 1]], index=Index([0, 2], name="bar"), columns=["X", "Y"]
  474. )
  475. result1 = df1.expanding().corr(df2, pairwise=True).loc[2]
  476. result2 = df1.expanding().corr(df2a, pairwise=True).loc[2]
  477. result3 = df1a.expanding().corr(df2, pairwise=True).loc[2]
  478. result4 = df1a.expanding().corr(df2a, pairwise=True).loc[2]
  479. expected = DataFrame(
  480. [[-1.0, -1.0], [-1.0, -1.0]], columns=["A", "B"], index=Index(["X", "Y"])
  481. )
  482. tm.assert_frame_equal(result1, expected)
  483. tm.assert_frame_equal(result2, expected)
  484. tm.assert_frame_equal(result3, expected)
  485. tm.assert_frame_equal(result4, expected)
  486. def test_expanding_apply_args_kwargs(engine_and_raw):
  487. def mean_w_arg(x, const):
  488. return np.mean(x) + const
  489. engine, raw = engine_and_raw
  490. df = DataFrame(np.random.rand(20, 3))
  491. expected = df.expanding().apply(np.mean, engine=engine, raw=raw) + 20.0
  492. result = df.expanding().apply(mean_w_arg, engine=engine, raw=raw, args=(20,))
  493. tm.assert_frame_equal(result, expected)
  494. result = df.expanding().apply(mean_w_arg, raw=raw, kwargs={"const": 20})
  495. tm.assert_frame_equal(result, expected)
  496. def test_numeric_only_frame(arithmetic_win_operators, numeric_only):
  497. # GH#46560
  498. kernel = arithmetic_win_operators
  499. df = DataFrame({"a": [1], "b": 2, "c": 3})
  500. df["c"] = df["c"].astype(object)
  501. expanding = df.expanding()
  502. op = getattr(expanding, kernel, None)
  503. if op is not None:
  504. result = op(numeric_only=numeric_only)
  505. columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
  506. expected = df[columns].agg([kernel]).reset_index(drop=True).astype(float)
  507. assert list(expected.columns) == columns
  508. tm.assert_frame_equal(result, expected)
  509. @pytest.mark.parametrize("kernel", ["corr", "cov"])
  510. @pytest.mark.parametrize("use_arg", [True, False])
  511. def test_numeric_only_corr_cov_frame(kernel, numeric_only, use_arg):
  512. # GH#46560
  513. df = DataFrame({"a": [1, 2, 3], "b": 2, "c": 3})
  514. df["c"] = df["c"].astype(object)
  515. arg = (df,) if use_arg else ()
  516. expanding = df.expanding()
  517. op = getattr(expanding, kernel)
  518. result = op(*arg, numeric_only=numeric_only)
  519. # Compare result to op using float dtypes, dropping c when numeric_only is True
  520. columns = ["a", "b"] if numeric_only else ["a", "b", "c"]
  521. df2 = df[columns].astype(float)
  522. arg2 = (df2,) if use_arg else ()
  523. expanding2 = df2.expanding()
  524. op2 = getattr(expanding2, kernel)
  525. expected = op2(*arg2, numeric_only=numeric_only)
  526. tm.assert_frame_equal(result, expected)
  527. @pytest.mark.parametrize("dtype", [int, object])
  528. def test_numeric_only_series(arithmetic_win_operators, numeric_only, dtype):
  529. # GH#46560
  530. kernel = arithmetic_win_operators
  531. ser = Series([1], dtype=dtype)
  532. expanding = ser.expanding()
  533. op = getattr(expanding, kernel)
  534. if numeric_only and dtype is object:
  535. msg = f"Expanding.{kernel} does not implement numeric_only"
  536. with pytest.raises(NotImplementedError, match=msg):
  537. op(numeric_only=numeric_only)
  538. else:
  539. result = op(numeric_only=numeric_only)
  540. expected = ser.agg([kernel]).reset_index(drop=True).astype(float)
  541. tm.assert_series_equal(result, expected)
  542. @pytest.mark.parametrize("kernel", ["corr", "cov"])
  543. @pytest.mark.parametrize("use_arg", [True, False])
  544. @pytest.mark.parametrize("dtype", [int, object])
  545. def test_numeric_only_corr_cov_series(kernel, use_arg, numeric_only, dtype):
  546. # GH#46560
  547. ser = Series([1, 2, 3], dtype=dtype)
  548. arg = (ser,) if use_arg else ()
  549. expanding = ser.expanding()
  550. op = getattr(expanding, kernel)
  551. if numeric_only and dtype is object:
  552. msg = f"Expanding.{kernel} does not implement numeric_only"
  553. with pytest.raises(NotImplementedError, match=msg):
  554. op(*arg, numeric_only=numeric_only)
  555. else:
  556. result = op(*arg, numeric_only=numeric_only)
  557. ser2 = ser.astype(float)
  558. arg2 = (ser2,) if use_arg else ()
  559. expanding2 = ser2.expanding()
  560. op2 = getattr(expanding2, kernel)
  561. expected = op2(*arg2, numeric_only=numeric_only)
  562. tm.assert_series_equal(result, expected)