test_rolling_skew_kurt.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. from functools import partial
  2. import numpy as np
  3. import pytest
  4. import pandas.util._test_decorators as td
  5. from pandas import (
  6. DataFrame,
  7. Series,
  8. concat,
  9. isna,
  10. notna,
  11. )
  12. import pandas._testing as tm
  13. from pandas.tseries import offsets
  14. @td.skip_if_no_scipy
  15. @pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
  16. def test_series(series, sp_func, roll_func):
  17. import scipy.stats
  18. compare_func = partial(getattr(scipy.stats, sp_func), bias=False)
  19. result = getattr(series.rolling(50), roll_func)()
  20. assert isinstance(result, Series)
  21. tm.assert_almost_equal(result.iloc[-1], compare_func(series[-50:]))
  22. @td.skip_if_no_scipy
  23. @pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
  24. def test_frame(raw, frame, sp_func, roll_func):
  25. import scipy.stats
  26. compare_func = partial(getattr(scipy.stats, sp_func), bias=False)
  27. result = getattr(frame.rolling(50), roll_func)()
  28. assert isinstance(result, DataFrame)
  29. tm.assert_series_equal(
  30. result.iloc[-1, :],
  31. frame.iloc[-50:, :].apply(compare_func, axis=0, raw=raw),
  32. check_names=False,
  33. )
  34. @td.skip_if_no_scipy
  35. @pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
  36. def test_time_rule_series(series, sp_func, roll_func):
  37. import scipy.stats
  38. compare_func = partial(getattr(scipy.stats, sp_func), bias=False)
  39. win = 25
  40. ser = series[::2].resample("B").mean()
  41. series_result = getattr(ser.rolling(window=win, min_periods=10), roll_func)()
  42. last_date = series_result.index[-1]
  43. prev_date = last_date - 24 * offsets.BDay()
  44. trunc_series = series[::2].truncate(prev_date, last_date)
  45. tm.assert_almost_equal(series_result[-1], compare_func(trunc_series))
  46. @td.skip_if_no_scipy
  47. @pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
  48. def test_time_rule_frame(raw, frame, sp_func, roll_func):
  49. import scipy.stats
  50. compare_func = partial(getattr(scipy.stats, sp_func), bias=False)
  51. win = 25
  52. frm = frame[::2].resample("B").mean()
  53. frame_result = getattr(frm.rolling(window=win, min_periods=10), roll_func)()
  54. last_date = frame_result.index[-1]
  55. prev_date = last_date - 24 * offsets.BDay()
  56. trunc_frame = frame[::2].truncate(prev_date, last_date)
  57. tm.assert_series_equal(
  58. frame_result.xs(last_date),
  59. trunc_frame.apply(compare_func, raw=raw),
  60. check_names=False,
  61. )
  62. @td.skip_if_no_scipy
  63. @pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
  64. def test_nans(sp_func, roll_func):
  65. import scipy.stats
  66. compare_func = partial(getattr(scipy.stats, sp_func), bias=False)
  67. obj = Series(np.random.randn(50))
  68. obj[:10] = np.NaN
  69. obj[-10:] = np.NaN
  70. result = getattr(obj.rolling(50, min_periods=30), roll_func)()
  71. tm.assert_almost_equal(result.iloc[-1], compare_func(obj[10:-10]))
  72. # min_periods is working correctly
  73. result = getattr(obj.rolling(20, min_periods=15), roll_func)()
  74. assert isna(result.iloc[23])
  75. assert not isna(result.iloc[24])
  76. assert not isna(result.iloc[-6])
  77. assert isna(result.iloc[-5])
  78. obj2 = Series(np.random.randn(20))
  79. result = getattr(obj2.rolling(10, min_periods=5), roll_func)()
  80. assert isna(result.iloc[3])
  81. assert notna(result.iloc[4])
  82. result0 = getattr(obj.rolling(20, min_periods=0), roll_func)()
  83. result1 = getattr(obj.rolling(20, min_periods=1), roll_func)()
  84. tm.assert_almost_equal(result0, result1)
  85. @pytest.mark.parametrize("minp", [0, 99, 100])
  86. @pytest.mark.parametrize("roll_func", ["kurt", "skew"])
  87. def test_min_periods(series, minp, roll_func, step):
  88. result = getattr(
  89. series.rolling(len(series) + 1, min_periods=minp, step=step), roll_func
  90. )()
  91. expected = getattr(
  92. series.rolling(len(series), min_periods=minp, step=step), roll_func
  93. )()
  94. nan_mask = isna(result)
  95. tm.assert_series_equal(nan_mask, isna(expected))
  96. nan_mask = ~nan_mask
  97. tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
  98. @pytest.mark.parametrize("roll_func", ["kurt", "skew"])
  99. def test_center(roll_func):
  100. obj = Series(np.random.randn(50))
  101. obj[:10] = np.NaN
  102. obj[-10:] = np.NaN
  103. result = getattr(obj.rolling(20, center=True), roll_func)()
  104. expected = (
  105. getattr(concat([obj, Series([np.NaN] * 9)]).rolling(20), roll_func)()
  106. .iloc[9:]
  107. .reset_index(drop=True)
  108. )
  109. tm.assert_series_equal(result, expected)
  110. @pytest.mark.parametrize("roll_func", ["kurt", "skew"])
  111. def test_center_reindex_series(series, roll_func):
  112. # shifter index
  113. s = [f"x{x:d}" for x in range(12)]
  114. series_xp = (
  115. getattr(
  116. series.reindex(list(series.index) + s).rolling(window=25),
  117. roll_func,
  118. )()
  119. .shift(-12)
  120. .reindex(series.index)
  121. )
  122. series_rs = getattr(series.rolling(window=25, center=True), roll_func)()
  123. tm.assert_series_equal(series_xp, series_rs)
  124. @pytest.mark.slow
  125. @pytest.mark.parametrize("roll_func", ["kurt", "skew"])
  126. def test_center_reindex_frame(frame, roll_func):
  127. # shifter index
  128. s = [f"x{x:d}" for x in range(12)]
  129. frame_xp = (
  130. getattr(
  131. frame.reindex(list(frame.index) + s).rolling(window=25),
  132. roll_func,
  133. )()
  134. .shift(-12)
  135. .reindex(frame.index)
  136. )
  137. frame_rs = getattr(frame.rolling(window=25, center=True), roll_func)()
  138. tm.assert_frame_equal(frame_xp, frame_rs)
  139. def test_rolling_skew_edge_cases(step):
  140. expected = Series([np.NaN] * 4 + [0.0])[::step]
  141. # yields all NaN (0 variance)
  142. d = Series([1] * 5)
  143. x = d.rolling(window=5, step=step).skew()
  144. # index 4 should be 0 as it contains 5 same obs
  145. tm.assert_series_equal(expected, x)
  146. expected = Series([np.NaN] * 5)[::step]
  147. # yields all NaN (window too small)
  148. d = Series(np.random.randn(5))
  149. x = d.rolling(window=2, step=step).skew()
  150. tm.assert_series_equal(expected, x)
  151. # yields [NaN, NaN, NaN, 0.177994, 1.548824]
  152. d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401])
  153. expected = Series([np.NaN, np.NaN, np.NaN, 0.177994, 1.548824])[::step]
  154. x = d.rolling(window=4, step=step).skew()
  155. tm.assert_series_equal(expected, x)
  156. def test_rolling_kurt_edge_cases(step):
  157. expected = Series([np.NaN] * 4 + [-3.0])[::step]
  158. # yields all NaN (0 variance)
  159. d = Series([1] * 5)
  160. x = d.rolling(window=5, step=step).kurt()
  161. tm.assert_series_equal(expected, x)
  162. # yields all NaN (window too small)
  163. expected = Series([np.NaN] * 5)[::step]
  164. d = Series(np.random.randn(5))
  165. x = d.rolling(window=3, step=step).kurt()
  166. tm.assert_series_equal(expected, x)
  167. # yields [NaN, NaN, NaN, 1.224307, 2.671499]
  168. d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401])
  169. expected = Series([np.NaN, np.NaN, np.NaN, 1.224307, 2.671499])[::step]
  170. x = d.rolling(window=4, step=step).kurt()
  171. tm.assert_series_equal(expected, x)
  172. def test_rolling_skew_eq_value_fperr(step):
  173. # #18804 all rolling skew for all equal values should return Nan
  174. # #46717 update: all equal values should return 0 instead of NaN
  175. a = Series([1.1] * 15).rolling(window=10, step=step).skew()
  176. assert (a[a.index >= 9] == 0).all()
  177. assert a[a.index < 9].isna().all()
  178. def test_rolling_kurt_eq_value_fperr(step):
  179. # #18804 all rolling kurt for all equal values should return Nan
  180. # #46717 update: all equal values should return -3 instead of NaN
  181. a = Series([1.1] * 15).rolling(window=10, step=step).kurt()
  182. assert (a[a.index >= 9] == -3).all()
  183. assert a[a.index < 9].isna().all()