123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208 |
- import numpy as np
- import pytest
- from pandas._libs.tslibs import IncompatibleFrequency
- from pandas import (
- DatetimeIndex,
- Series,
- Timestamp,
- date_range,
- isna,
- notna,
- offsets,
- )
- import pandas._testing as tm
- class TestSeriesAsof:
- def test_asof_nanosecond_index_access(self):
- ts = Timestamp("20130101").as_unit("ns")._value
- dti = DatetimeIndex([ts + 50 + i for i in range(100)])
- ser = Series(np.random.randn(100), index=dti)
- first_value = ser.asof(ser.index[0])
- # GH#46903 previously incorrectly was "day"
- assert dti.resolution == "nanosecond"
- # this used to not work bc parsing was done by dateutil that didn't
- # handle nanoseconds
- assert first_value == ser["2013-01-01 00:00:00.000000050"]
- expected_ts = np.datetime64("2013-01-01 00:00:00.000000050", "ns")
- assert first_value == ser[Timestamp(expected_ts)]
- def test_basic(self):
- # array or list or dates
- N = 50
- rng = date_range("1/1/1990", periods=N, freq="53s")
- ts = Series(np.random.randn(N), index=rng)
- ts.iloc[15:30] = np.nan
- dates = date_range("1/1/1990", periods=N * 3, freq="25s")
- result = ts.asof(dates)
- assert notna(result).all()
- lb = ts.index[14]
- ub = ts.index[30]
- result = ts.asof(list(dates))
- assert notna(result).all()
- lb = ts.index[14]
- ub = ts.index[30]
- mask = (result.index >= lb) & (result.index < ub)
- rs = result[mask]
- assert (rs == ts[lb]).all()
- val = result[result.index[result.index >= ub][0]]
- assert ts[ub] == val
- def test_scalar(self):
- N = 30
- rng = date_range("1/1/1990", periods=N, freq="53s")
- # Explicit cast to float avoid implicit cast when setting nan
- ts = Series(np.arange(N), index=rng, dtype="float")
- ts.iloc[5:10] = np.NaN
- ts.iloc[15:20] = np.NaN
- val1 = ts.asof(ts.index[7])
- val2 = ts.asof(ts.index[19])
- assert val1 == ts[4]
- assert val2 == ts[14]
- # accepts strings
- val1 = ts.asof(str(ts.index[7]))
- assert val1 == ts[4]
- # in there
- result = ts.asof(ts.index[3])
- assert result == ts[3]
- # no as of value
- d = ts.index[0] - offsets.BDay()
- assert np.isnan(ts.asof(d))
- def test_with_nan(self):
- # basic asof test
- rng = date_range("1/1/2000", "1/2/2000", freq="4h")
- s = Series(np.arange(len(rng)), index=rng)
- r = s.resample("2h").mean()
- result = r.asof(r.index)
- expected = Series(
- [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6.0],
- index=date_range("1/1/2000", "1/2/2000", freq="2h"),
- )
- tm.assert_series_equal(result, expected)
- r.iloc[3:5] = np.nan
- result = r.asof(r.index)
- expected = Series(
- [0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 5, 5, 6.0],
- index=date_range("1/1/2000", "1/2/2000", freq="2h"),
- )
- tm.assert_series_equal(result, expected)
- r.iloc[-3:] = np.nan
- result = r.asof(r.index)
- expected = Series(
- [0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 4.0],
- index=date_range("1/1/2000", "1/2/2000", freq="2h"),
- )
- tm.assert_series_equal(result, expected)
- def test_periodindex(self):
- from pandas import (
- PeriodIndex,
- period_range,
- )
- # array or list or dates
- N = 50
- rng = period_range("1/1/1990", periods=N, freq="H")
- ts = Series(np.random.randn(N), index=rng)
- ts.iloc[15:30] = np.nan
- dates = date_range("1/1/1990", periods=N * 3, freq="37min")
- result = ts.asof(dates)
- assert notna(result).all()
- lb = ts.index[14]
- ub = ts.index[30]
- result = ts.asof(list(dates))
- assert notna(result).all()
- lb = ts.index[14]
- ub = ts.index[30]
- pix = PeriodIndex(result.index.values, freq="H")
- mask = (pix >= lb) & (pix < ub)
- rs = result[mask]
- assert (rs == ts[lb]).all()
- ts.iloc[5:10] = np.nan
- ts.iloc[15:20] = np.nan
- val1 = ts.asof(ts.index[7])
- val2 = ts.asof(ts.index[19])
- assert val1 == ts[4]
- assert val2 == ts[14]
- # accepts strings
- val1 = ts.asof(str(ts.index[7]))
- assert val1 == ts[4]
- # in there
- assert ts.asof(ts.index[3]) == ts[3]
- # no as of value
- d = ts.index[0].to_timestamp() - offsets.BDay()
- assert isna(ts.asof(d))
- # Mismatched freq
- msg = "Input has different freq"
- with pytest.raises(IncompatibleFrequency, match=msg):
- ts.asof(rng.asfreq("D"))
- def test_errors(self):
- s = Series(
- [1, 2, 3],
- index=[Timestamp("20130101"), Timestamp("20130103"), Timestamp("20130102")],
- )
- # non-monotonic
- assert not s.index.is_monotonic_increasing
- with pytest.raises(ValueError, match="requires a sorted index"):
- s.asof(s.index[0])
- # subset with Series
- N = 10
- rng = date_range("1/1/1990", periods=N, freq="53s")
- s = Series(np.random.randn(N), index=rng)
- with pytest.raises(ValueError, match="not valid for Series"):
- s.asof(s.index[0], subset="foo")
- def test_all_nans(self):
- # GH 15713
- # series is all nans
- # testing non-default indexes
- N = 50
- rng = date_range("1/1/1990", periods=N, freq="53s")
- dates = date_range("1/1/1990", periods=N * 3, freq="25s")
- result = Series(np.nan, index=rng).asof(dates)
- expected = Series(np.nan, index=dates)
- tm.assert_series_equal(result, expected)
- # testing scalar input
- date = date_range("1/1/1990", periods=N * 3, freq="25s")[0]
- result = Series(np.nan, index=rng).asof(date)
- assert isna(result)
- # test name is propagated
- result = Series(np.nan, index=[1, 2, 3, 4], name="test").asof([4, 5])
- expected = Series(np.nan, index=[4, 5], name="test")
- tm.assert_series_equal(result, expected)
|