123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206 |
- from datetime import timedelta
- import numpy as np
- import pytest
- import pandas as pd
- from pandas import (
- DataFrame,
- Series,
- )
- import pandas._testing as tm
- from pandas.core.indexes.timedeltas import timedelta_range
- def test_asfreq_bug():
- df = DataFrame(data=[1, 3], index=[timedelta(), timedelta(minutes=3)])
- result = df.resample("1T").asfreq()
- expected = DataFrame(
- data=[1, np.nan, np.nan, 3],
- index=timedelta_range("0 day", periods=4, freq="1T"),
- )
- tm.assert_frame_equal(result, expected)
- def test_resample_with_nat():
- # GH 13223
- index = pd.to_timedelta(["0s", pd.NaT, "2s"])
- result = DataFrame({"value": [2, 3, 5]}, index).resample("1s").mean()
- expected = DataFrame(
- {"value": [2.5, np.nan, 5.0]},
- index=timedelta_range("0 day", periods=3, freq="1S"),
- )
- tm.assert_frame_equal(result, expected)
- def test_resample_as_freq_with_subperiod():
- # GH 13022
- index = timedelta_range("00:00:00", "00:10:00", freq="5T")
- df = DataFrame(data={"value": [1, 5, 10]}, index=index)
- result = df.resample("2T").asfreq()
- expected_data = {"value": [1, np.nan, np.nan, np.nan, np.nan, 10]}
- expected = DataFrame(
- data=expected_data, index=timedelta_range("00:00:00", "00:10:00", freq="2T")
- )
- tm.assert_frame_equal(result, expected)
- def test_resample_with_timedeltas():
- expected = DataFrame({"A": np.arange(1480)})
- expected = expected.groupby(expected.index // 30).sum()
- expected.index = timedelta_range("0 days", freq="30T", periods=50)
- df = DataFrame(
- {"A": np.arange(1480)}, index=pd.to_timedelta(np.arange(1480), unit="T")
- )
- result = df.resample("30T").sum()
- tm.assert_frame_equal(result, expected)
- s = df["A"]
- result = s.resample("30T").sum()
- tm.assert_series_equal(result, expected["A"])
- def test_resample_single_period_timedelta():
- s = Series(list(range(5)), index=timedelta_range("1 day", freq="s", periods=5))
- result = s.resample("2s").sum()
- expected = Series([1, 5, 4], index=timedelta_range("1 day", freq="2s", periods=3))
- tm.assert_series_equal(result, expected)
- def test_resample_timedelta_idempotency():
- # GH 12072
- index = timedelta_range("0", periods=9, freq="10L")
- series = Series(range(9), index=index)
- result = series.resample("10L").mean()
- expected = series.astype(float)
- tm.assert_series_equal(result, expected)
- def test_resample_offset_with_timedeltaindex():
- # GH 10530 & 31809
- rng = timedelta_range(start="0s", periods=25, freq="s")
- ts = Series(np.random.randn(len(rng)), index=rng)
- with_base = ts.resample("2s", offset="5s").mean()
- without_base = ts.resample("2s").mean()
- exp_without_base = timedelta_range(start="0s", end="25s", freq="2s")
- exp_with_base = timedelta_range(start="5s", end="29s", freq="2s")
- tm.assert_index_equal(without_base.index, exp_without_base)
- tm.assert_index_equal(with_base.index, exp_with_base)
- def test_resample_categorical_data_with_timedeltaindex():
- # GH #12169
- df = DataFrame({"Group_obj": "A"}, index=pd.to_timedelta(list(range(20)), unit="s"))
- df["Group"] = df["Group_obj"].astype("category")
- result = df.resample("10s").agg(lambda x: (x.value_counts().index[0]))
- expected = DataFrame(
- {"Group_obj": ["A", "A"], "Group": ["A", "A"]},
- index=pd.TimedeltaIndex([0, 10], unit="s", freq="10s"),
- )
- expected = expected.reindex(["Group_obj", "Group"], axis=1)
- expected["Group"] = expected["Group_obj"]
- tm.assert_frame_equal(result, expected)
- def test_resample_timedelta_values():
- # GH 13119
- # check that timedelta dtype is preserved when NaT values are
- # introduced by the resampling
- times = timedelta_range("1 day", "6 day", freq="4D")
- df = DataFrame({"time": times}, index=times)
- times2 = timedelta_range("1 day", "6 day", freq="2D")
- exp = Series(times2, index=times2, name="time")
- exp.iloc[1] = pd.NaT
- res = df.resample("2D").first()["time"]
- tm.assert_series_equal(res, exp)
- res = df["time"].resample("2D").first()
- tm.assert_series_equal(res, exp)
- @pytest.mark.parametrize(
- "start, end, freq, resample_freq",
- [
- ("8H", "21h59min50s", "10S", "3H"), # GH 30353 example
- ("3H", "22H", "1H", "5H"),
- ("527D", "5006D", "3D", "10D"),
- ("1D", "10D", "1D", "2D"), # GH 13022 example
- # tests that worked before GH 33498:
- ("8H", "21h59min50s", "10S", "2H"),
- ("0H", "21h59min50s", "10S", "3H"),
- ("10D", "85D", "D", "2D"),
- ],
- )
- def test_resample_timedelta_edge_case(start, end, freq, resample_freq):
- # GH 33498
- # check that the timedelta bins does not contains an extra bin
- idx = timedelta_range(start=start, end=end, freq=freq)
- s = Series(np.arange(len(idx)), index=idx)
- result = s.resample(resample_freq).min()
- expected_index = timedelta_range(freq=resample_freq, start=start, end=end)
- tm.assert_index_equal(result.index, expected_index)
- assert result.index.freq == expected_index.freq
- assert not np.isnan(result[-1])
- @pytest.mark.parametrize("duplicates", [True, False])
- def test_resample_with_timedelta_yields_no_empty_groups(duplicates):
- # GH 10603
- df = DataFrame(
- np.random.normal(size=(10000, 4)),
- index=timedelta_range(start="0s", periods=10000, freq="3906250n"),
- )
- if duplicates:
- # case with non-unique columns
- df.columns = ["A", "B", "A", "C"]
- result = df.loc["1s":, :].resample("3s").apply(lambda x: len(x))
- expected = DataFrame(
- [[768] * 4] * 12 + [[528] * 4],
- index=timedelta_range(start="1s", periods=13, freq="3s"),
- )
- expected.columns = df.columns
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
- def test_resample_quantile_timedelta(unit):
- # GH: 29485
- dtype = np.dtype(f"m8[{unit}]")
- df = DataFrame(
- {"value": pd.to_timedelta(np.arange(4), unit="s").astype(dtype)},
- index=pd.date_range("20200101", periods=4, tz="UTC"),
- )
- result = df.resample("2D").quantile(0.99)
- expected = DataFrame(
- {
- "value": [
- pd.Timedelta("0 days 00:00:00.990000"),
- pd.Timedelta("0 days 00:00:02.990000"),
- ]
- },
- index=pd.date_range("20200101", periods=2, tz="UTC", freq="2D"),
- ).astype(dtype)
- tm.assert_frame_equal(result, expected)
- def test_resample_closed_right():
- # GH#45414
- idx = pd.Index([pd.Timedelta(seconds=120 + i * 30) for i in range(10)])
- ser = Series(range(10), index=idx)
- result = ser.resample("T", closed="right", label="right").sum()
- expected = Series(
- [0, 3, 7, 11, 15, 9],
- index=pd.TimedeltaIndex(
- [pd.Timedelta(seconds=120 + i * 60) for i in range(6)], freq="T"
- ),
- )
- tm.assert_series_equal(result, expected)
|