123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197 |
- import numpy as np
- import pytest
- from pandas._libs.tslibs import IncompatibleFrequency
- from pandas import (
- DataFrame,
- Period,
- Series,
- Timestamp,
- date_range,
- period_range,
- to_datetime,
- )
- import pandas._testing as tm
- @pytest.fixture
- def date_range_frame():
- """
- Fixture for DataFrame of ints with date_range index
- Columns are ['A', 'B'].
- """
- N = 50
- rng = date_range("1/1/1990", periods=N, freq="53s")
- return DataFrame({"A": np.arange(N), "B": np.arange(N)}, index=rng)
- class TestFrameAsof:
- def test_basic(self, date_range_frame):
- # Explicitly cast to float to avoid implicit cast when setting np.nan
- df = date_range_frame.astype({"A": "float"})
- N = 50
- df.loc[df.index[15:30], "A"] = np.nan
- dates = date_range("1/1/1990", periods=N * 3, freq="25s")
- result = df.asof(dates)
- assert result.notna().all(1).all()
- lb = df.index[14]
- ub = df.index[30]
- dates = list(dates)
- result = df.asof(dates)
- assert result.notna().all(1).all()
- mask = (result.index >= lb) & (result.index < ub)
- rs = result[mask]
- assert (rs == 14).all(1).all()
- def test_subset(self, date_range_frame):
- N = 10
- # explicitly cast to float to avoid implicit upcast when setting to np.nan
- df = date_range_frame.iloc[:N].copy().astype({"A": "float"})
- df.loc[df.index[4:8], "A"] = np.nan
- dates = date_range("1/1/1990", periods=N * 3, freq="25s")
- # with a subset of A should be the same
- result = df.asof(dates, subset="A")
- expected = df.asof(dates)
- tm.assert_frame_equal(result, expected)
- # same with A/B
- result = df.asof(dates, subset=["A", "B"])
- expected = df.asof(dates)
- tm.assert_frame_equal(result, expected)
- # B gives df.asof
- result = df.asof(dates, subset="B")
- expected = df.resample("25s", closed="right").ffill().reindex(dates)
- expected.iloc[20:] = 9
- # no "missing", so "B" can retain int dtype (df["A"].dtype platform-dependent)
- expected["B"] = expected["B"].astype(df["B"].dtype)
- tm.assert_frame_equal(result, expected)
- def test_missing(self, date_range_frame):
- # GH 15118
- # no match found - `where` value before earliest date in index
- N = 10
- df = date_range_frame.iloc[:N].copy()
- result = df.asof("1989-12-31")
- expected = Series(
- index=["A", "B"], name=Timestamp("1989-12-31"), dtype=np.float64
- )
- tm.assert_series_equal(result, expected)
- result = df.asof(to_datetime(["1989-12-31"]))
- expected = DataFrame(
- index=to_datetime(["1989-12-31"]), columns=["A", "B"], dtype="float64"
- )
- tm.assert_frame_equal(result, expected)
- # Check that we handle PeriodIndex correctly, dont end up with
- # period.ordinal for series name
- df = df.to_period("D")
- result = df.asof("1989-12-31")
- assert isinstance(result.name, Period)
- def test_asof_all_nans(self, frame_or_series):
- # GH 15713
- # DataFrame/Series is all nans
- result = frame_or_series([np.nan]).asof([0])
- expected = frame_or_series([np.nan])
- tm.assert_equal(result, expected)
- def test_all_nans(self, date_range_frame):
- # GH 15713
- # DataFrame is all nans
- # testing non-default indexes, multiple inputs
- N = 150
- rng = date_range_frame.index
- dates = date_range("1/1/1990", periods=N, freq="25s")
- result = DataFrame(np.nan, index=rng, columns=["A"]).asof(dates)
- expected = DataFrame(np.nan, index=dates, columns=["A"])
- tm.assert_frame_equal(result, expected)
- # testing multiple columns
- dates = date_range("1/1/1990", periods=N, freq="25s")
- result = DataFrame(np.nan, index=rng, columns=["A", "B", "C"]).asof(dates)
- expected = DataFrame(np.nan, index=dates, columns=["A", "B", "C"])
- tm.assert_frame_equal(result, expected)
- # testing scalar input
- result = DataFrame(np.nan, index=[1, 2], columns=["A", "B"]).asof([3])
- expected = DataFrame(np.nan, index=[3], columns=["A", "B"])
- tm.assert_frame_equal(result, expected)
- result = DataFrame(np.nan, index=[1, 2], columns=["A", "B"]).asof(3)
- expected = Series(np.nan, index=["A", "B"], name=3)
- tm.assert_series_equal(result, expected)
- @pytest.mark.parametrize(
- "stamp,expected",
- [
- (
- Timestamp("2018-01-01 23:22:43.325+00:00"),
- Series(2, name=Timestamp("2018-01-01 23:22:43.325+00:00")),
- ),
- (
- Timestamp("2018-01-01 22:33:20.682+01:00"),
- Series(1, name=Timestamp("2018-01-01 22:33:20.682+01:00")),
- ),
- ],
- )
- def test_time_zone_aware_index(self, stamp, expected):
- # GH21194
- # Testing awareness of DataFrame index considering different
- # UTC and timezone
- df = DataFrame(
- data=[1, 2],
- index=[
- Timestamp("2018-01-01 21:00:05.001+00:00"),
- Timestamp("2018-01-01 22:35:10.550+00:00"),
- ],
- )
- result = df.asof(stamp)
- tm.assert_series_equal(result, expected)
- def test_is_copy(self, date_range_frame):
- # GH-27357, GH-30784: ensure the result of asof is an actual copy and
- # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings
- df = date_range_frame.astype({"A": "float"})
- N = 50
- df.loc[df.index[15:30], "A"] = np.nan
- dates = date_range("1/1/1990", periods=N * 3, freq="25s")
- result = df.asof(dates)
- with tm.assert_produces_warning(None):
- result["C"] = 1
- def test_asof_periodindex_mismatched_freq(self):
- N = 50
- rng = period_range("1/1/1990", periods=N, freq="H")
- df = DataFrame(np.random.randn(N), index=rng)
- # Mismatched freq
- msg = "Input has different freq"
- with pytest.raises(IncompatibleFrequency, match=msg):
- df.asof(rng.asfreq("D"))
- def test_asof_preserves_bool_dtype(self):
- # GH#16063 was casting bools to floats
- dti = date_range("2017-01-01", freq="MS", periods=4)
- ser = Series([True, False, True], index=dti[:-1])
- ts = dti[-1]
- res = ser.asof([ts])
- expected = Series([True], index=[ts])
- tm.assert_series_equal(res, expected)
|