test_base.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. from datetime import datetime
  2. import numpy as np
  3. import pytest
  4. from pandas import (
  5. DataFrame,
  6. NaT,
  7. PeriodIndex,
  8. Series,
  9. )
  10. import pandas._testing as tm
  11. from pandas.core.groupby.groupby import DataError
  12. from pandas.core.groupby.grouper import Grouper
  13. from pandas.core.indexes.datetimes import date_range
  14. from pandas.core.indexes.period import period_range
  15. from pandas.core.indexes.timedeltas import timedelta_range
  16. from pandas.core.resample import _asfreq_compat
  17. # a fixture value can be overridden by the test parameter value. Note that the
  18. # value of the fixture can be overridden this way even if the test doesn't use
  19. # it directly (doesn't mention it in the function prototype).
  20. # see https://docs.pytest.org/en/latest/fixture.html#override-a-fixture-with-direct-test-parametrization # noqa:E501
  21. # in this module we override the fixture values defined in conftest.py
  22. # tuples of '_index_factory,_series_name,_index_start,_index_end'
  23. DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10))
  24. PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10))
  25. TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day")
  26. all_ts = pytest.mark.parametrize(
  27. "_index_factory,_series_name,_index_start,_index_end",
  28. [DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE],
  29. )
  30. @pytest.fixture
  31. def create_index(_index_factory):
  32. def _create_index(*args, **kwargs):
  33. """return the _index_factory created using the args, kwargs"""
  34. return _index_factory(*args, **kwargs)
  35. return _create_index
  36. @pytest.mark.parametrize("freq", ["2D", "1H"])
  37. @pytest.mark.parametrize(
  38. "_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE]
  39. )
  40. def test_asfreq(series_and_frame, freq, create_index):
  41. obj = series_and_frame
  42. result = obj.resample(freq).asfreq()
  43. new_index = create_index(obj.index[0], obj.index[-1], freq=freq)
  44. expected = obj.reindex(new_index)
  45. tm.assert_almost_equal(result, expected)
  46. @pytest.mark.parametrize(
  47. "_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE]
  48. )
  49. def test_asfreq_fill_value(series, create_index):
  50. # test for fill value during resampling, issue 3715
  51. ser = series
  52. result = ser.resample("1H").asfreq()
  53. new_index = create_index(ser.index[0], ser.index[-1], freq="1H")
  54. expected = ser.reindex(new_index)
  55. tm.assert_series_equal(result, expected)
  56. # Explicit cast to float to avoid implicit cast when setting None
  57. frame = ser.astype("float").to_frame("value")
  58. frame.iloc[1] = None
  59. result = frame.resample("1H").asfreq(fill_value=4.0)
  60. new_index = create_index(frame.index[0], frame.index[-1], freq="1H")
  61. expected = frame.reindex(new_index, fill_value=4.0)
  62. tm.assert_frame_equal(result, expected)
  63. @all_ts
  64. def test_resample_interpolate(frame):
  65. # # 12925
  66. df = frame
  67. tm.assert_frame_equal(
  68. df.resample("1T").asfreq().interpolate(), df.resample("1T").interpolate()
  69. )
  70. def test_raises_on_non_datetimelike_index():
  71. # this is a non datetimelike index
  72. xp = DataFrame()
  73. msg = (
  74. "Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, "
  75. "but got an instance of 'RangeIndex'"
  76. )
  77. with pytest.raises(TypeError, match=msg):
  78. xp.resample("A").mean()
  79. @all_ts
  80. @pytest.mark.parametrize("freq", ["M", "D", "H"])
  81. def test_resample_empty_series(freq, empty_series_dti, resample_method, request):
  82. # GH12771 & GH12868
  83. if resample_method == "ohlc" and isinstance(empty_series_dti.index, PeriodIndex):
  84. request.node.add_marker(
  85. pytest.mark.xfail(
  86. reason=f"GH13083: {resample_method} fails for PeriodIndex"
  87. )
  88. )
  89. ser = empty_series_dti
  90. result = getattr(ser.resample(freq), resample_method)()
  91. expected = ser.copy()
  92. expected.index = _asfreq_compat(ser.index, freq)
  93. tm.assert_index_equal(result.index, expected.index)
  94. assert result.index.freq == expected.index.freq
  95. tm.assert_series_equal(result, expected, check_dtype=False)
  96. @all_ts
  97. @pytest.mark.parametrize("freq", ["M", "D", "H"])
  98. def test_resample_nat_index_series(request, freq, series, resample_method):
  99. # GH39227
  100. if freq == "M":
  101. request.node.add_marker(pytest.mark.xfail(reason="Don't know why this fails"))
  102. ser = series.copy()
  103. ser.index = PeriodIndex([NaT] * len(ser), freq=freq)
  104. rs = ser.resample(freq)
  105. result = getattr(rs, resample_method)()
  106. if resample_method == "ohlc":
  107. expected = DataFrame(
  108. [], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"]
  109. )
  110. tm.assert_frame_equal(result, expected, check_dtype=False)
  111. else:
  112. expected = ser[:0].copy()
  113. tm.assert_series_equal(result, expected, check_dtype=False)
  114. tm.assert_index_equal(result.index, expected.index)
  115. assert result.index.freq == expected.index.freq
  116. @all_ts
  117. @pytest.mark.parametrize("freq", ["M", "D", "H"])
  118. @pytest.mark.parametrize("resample_method", ["count", "size"])
  119. def test_resample_count_empty_series(freq, empty_series_dti, resample_method):
  120. # GH28427
  121. result = getattr(empty_series_dti.resample(freq), resample_method)()
  122. index = _asfreq_compat(empty_series_dti.index, freq)
  123. expected = Series([], dtype="int64", index=index, name=empty_series_dti.name)
  124. tm.assert_series_equal(result, expected)
  125. @all_ts
  126. @pytest.mark.parametrize("freq", ["M", "D", "H"])
  127. def test_resample_empty_dataframe(empty_frame_dti, freq, resample_method):
  128. # GH13212
  129. df = empty_frame_dti
  130. # count retains dimensions too
  131. result = getattr(df.resample(freq, group_keys=False), resample_method)()
  132. if resample_method != "size":
  133. expected = df.copy()
  134. else:
  135. # GH14962
  136. expected = Series([], dtype=np.int64)
  137. expected.index = _asfreq_compat(df.index, freq)
  138. tm.assert_index_equal(result.index, expected.index)
  139. assert result.index.freq == expected.index.freq
  140. tm.assert_almost_equal(result, expected)
  141. # test size for GH13212 (currently stays as df)
  142. @all_ts
  143. @pytest.mark.parametrize("freq", ["M", "D", "H"])
  144. def test_resample_count_empty_dataframe(freq, empty_frame_dti):
  145. # GH28427
  146. empty_frame_dti["a"] = []
  147. result = empty_frame_dti.resample(freq).count()
  148. index = _asfreq_compat(empty_frame_dti.index, freq)
  149. expected = DataFrame({"a": []}, dtype="int64", index=index)
  150. tm.assert_frame_equal(result, expected)
  151. @all_ts
  152. @pytest.mark.parametrize("freq", ["M", "D", "H"])
  153. def test_resample_size_empty_dataframe(freq, empty_frame_dti):
  154. # GH28427
  155. empty_frame_dti["a"] = []
  156. result = empty_frame_dti.resample(freq).size()
  157. index = _asfreq_compat(empty_frame_dti.index, freq)
  158. expected = Series([], dtype="int64", index=index)
  159. tm.assert_series_equal(result, expected)
  160. @pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0))
  161. @pytest.mark.parametrize("dtype", [float, int, object, "datetime64[ns]"])
  162. def test_resample_empty_dtypes(index, dtype, resample_method):
  163. # Empty series were sometimes causing a segfault (for the functions
  164. # with Cython bounds-checking disabled) or an IndexError. We just run
  165. # them to ensure they no longer do. (GH #10228)
  166. empty_series_dti = Series([], index, dtype)
  167. try:
  168. getattr(empty_series_dti.resample("d", group_keys=False), resample_method)()
  169. except DataError:
  170. # Ignore these since some combinations are invalid
  171. # (ex: doing mean with dtype of np.object_)
  172. pass
  173. @all_ts
  174. @pytest.mark.parametrize("freq", ["M", "D", "H"])
  175. def test_apply_to_empty_series(empty_series_dti, freq):
  176. # GH 14313
  177. ser = empty_series_dti
  178. result = ser.resample(freq, group_keys=False).apply(lambda x: 1)
  179. expected = ser.resample(freq).apply(np.sum)
  180. tm.assert_series_equal(result, expected, check_dtype=False)
  181. @all_ts
  182. def test_resampler_is_iterable(series):
  183. # GH 15314
  184. freq = "H"
  185. tg = Grouper(freq=freq, convention="start")
  186. grouped = series.groupby(tg)
  187. resampled = series.resample(freq)
  188. for (rk, rv), (gk, gv) in zip(resampled, grouped):
  189. assert rk == gk
  190. tm.assert_series_equal(rv, gv)
  191. @all_ts
  192. def test_resample_quantile(series):
  193. # GH 15023
  194. ser = series
  195. q = 0.75
  196. freq = "H"
  197. result = ser.resample(freq).quantile(q)
  198. expected = ser.resample(freq).agg(lambda x: x.quantile(q)).rename(ser.name)
  199. tm.assert_series_equal(result, expected)