test_asof.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. import numpy as np
  2. import pytest
  3. from pandas._libs.tslibs import IncompatibleFrequency
  4. from pandas import (
  5. DatetimeIndex,
  6. Series,
  7. Timestamp,
  8. date_range,
  9. isna,
  10. notna,
  11. offsets,
  12. )
  13. import pandas._testing as tm
  14. class TestSeriesAsof:
  15. def test_asof_nanosecond_index_access(self):
  16. ts = Timestamp("20130101").as_unit("ns")._value
  17. dti = DatetimeIndex([ts + 50 + i for i in range(100)])
  18. ser = Series(np.random.randn(100), index=dti)
  19. first_value = ser.asof(ser.index[0])
  20. # GH#46903 previously incorrectly was "day"
  21. assert dti.resolution == "nanosecond"
  22. # this used to not work bc parsing was done by dateutil that didn't
  23. # handle nanoseconds
  24. assert first_value == ser["2013-01-01 00:00:00.000000050"]
  25. expected_ts = np.datetime64("2013-01-01 00:00:00.000000050", "ns")
  26. assert first_value == ser[Timestamp(expected_ts)]
  27. def test_basic(self):
  28. # array or list or dates
  29. N = 50
  30. rng = date_range("1/1/1990", periods=N, freq="53s")
  31. ts = Series(np.random.randn(N), index=rng)
  32. ts.iloc[15:30] = np.nan
  33. dates = date_range("1/1/1990", periods=N * 3, freq="25s")
  34. result = ts.asof(dates)
  35. assert notna(result).all()
  36. lb = ts.index[14]
  37. ub = ts.index[30]
  38. result = ts.asof(list(dates))
  39. assert notna(result).all()
  40. lb = ts.index[14]
  41. ub = ts.index[30]
  42. mask = (result.index >= lb) & (result.index < ub)
  43. rs = result[mask]
  44. assert (rs == ts[lb]).all()
  45. val = result[result.index[result.index >= ub][0]]
  46. assert ts[ub] == val
  47. def test_scalar(self):
  48. N = 30
  49. rng = date_range("1/1/1990", periods=N, freq="53s")
  50. # Explicit cast to float avoid implicit cast when setting nan
  51. ts = Series(np.arange(N), index=rng, dtype="float")
  52. ts.iloc[5:10] = np.NaN
  53. ts.iloc[15:20] = np.NaN
  54. val1 = ts.asof(ts.index[7])
  55. val2 = ts.asof(ts.index[19])
  56. assert val1 == ts[4]
  57. assert val2 == ts[14]
  58. # accepts strings
  59. val1 = ts.asof(str(ts.index[7]))
  60. assert val1 == ts[4]
  61. # in there
  62. result = ts.asof(ts.index[3])
  63. assert result == ts[3]
  64. # no as of value
  65. d = ts.index[0] - offsets.BDay()
  66. assert np.isnan(ts.asof(d))
  67. def test_with_nan(self):
  68. # basic asof test
  69. rng = date_range("1/1/2000", "1/2/2000", freq="4h")
  70. s = Series(np.arange(len(rng)), index=rng)
  71. r = s.resample("2h").mean()
  72. result = r.asof(r.index)
  73. expected = Series(
  74. [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6.0],
  75. index=date_range("1/1/2000", "1/2/2000", freq="2h"),
  76. )
  77. tm.assert_series_equal(result, expected)
  78. r.iloc[3:5] = np.nan
  79. result = r.asof(r.index)
  80. expected = Series(
  81. [0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 5, 5, 6.0],
  82. index=date_range("1/1/2000", "1/2/2000", freq="2h"),
  83. )
  84. tm.assert_series_equal(result, expected)
  85. r.iloc[-3:] = np.nan
  86. result = r.asof(r.index)
  87. expected = Series(
  88. [0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 4.0],
  89. index=date_range("1/1/2000", "1/2/2000", freq="2h"),
  90. )
  91. tm.assert_series_equal(result, expected)
  92. def test_periodindex(self):
  93. from pandas import (
  94. PeriodIndex,
  95. period_range,
  96. )
  97. # array or list or dates
  98. N = 50
  99. rng = period_range("1/1/1990", periods=N, freq="H")
  100. ts = Series(np.random.randn(N), index=rng)
  101. ts.iloc[15:30] = np.nan
  102. dates = date_range("1/1/1990", periods=N * 3, freq="37min")
  103. result = ts.asof(dates)
  104. assert notna(result).all()
  105. lb = ts.index[14]
  106. ub = ts.index[30]
  107. result = ts.asof(list(dates))
  108. assert notna(result).all()
  109. lb = ts.index[14]
  110. ub = ts.index[30]
  111. pix = PeriodIndex(result.index.values, freq="H")
  112. mask = (pix >= lb) & (pix < ub)
  113. rs = result[mask]
  114. assert (rs == ts[lb]).all()
  115. ts.iloc[5:10] = np.nan
  116. ts.iloc[15:20] = np.nan
  117. val1 = ts.asof(ts.index[7])
  118. val2 = ts.asof(ts.index[19])
  119. assert val1 == ts[4]
  120. assert val2 == ts[14]
  121. # accepts strings
  122. val1 = ts.asof(str(ts.index[7]))
  123. assert val1 == ts[4]
  124. # in there
  125. assert ts.asof(ts.index[3]) == ts[3]
  126. # no as of value
  127. d = ts.index[0].to_timestamp() - offsets.BDay()
  128. assert isna(ts.asof(d))
  129. # Mismatched freq
  130. msg = "Input has different freq"
  131. with pytest.raises(IncompatibleFrequency, match=msg):
  132. ts.asof(rng.asfreq("D"))
  133. def test_errors(self):
  134. s = Series(
  135. [1, 2, 3],
  136. index=[Timestamp("20130101"), Timestamp("20130103"), Timestamp("20130102")],
  137. )
  138. # non-monotonic
  139. assert not s.index.is_monotonic_increasing
  140. with pytest.raises(ValueError, match="requires a sorted index"):
  141. s.asof(s.index[0])
  142. # subset with Series
  143. N = 10
  144. rng = date_range("1/1/1990", periods=N, freq="53s")
  145. s = Series(np.random.randn(N), index=rng)
  146. with pytest.raises(ValueError, match="not valid for Series"):
  147. s.asof(s.index[0], subset="foo")
  148. def test_all_nans(self):
  149. # GH 15713
  150. # series is all nans
  151. # testing non-default indexes
  152. N = 50
  153. rng = date_range("1/1/1990", periods=N, freq="53s")
  154. dates = date_range("1/1/1990", periods=N * 3, freq="25s")
  155. result = Series(np.nan, index=rng).asof(dates)
  156. expected = Series(np.nan, index=dates)
  157. tm.assert_series_equal(result, expected)
  158. # testing scalar input
  159. date = date_range("1/1/1990", periods=N * 3, freq="25s")[0]
  160. result = Series(np.nan, index=rng).asof(date)
  161. assert isna(result)
  162. # test name is propagated
  163. result = Series(np.nan, index=[1, 2, 3, 4], name="test").asof([4, 5])
  164. expected = Series(np.nan, index=[4, 5], name="test")
  165. tm.assert_series_equal(result, expected)