test_indexing.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. from datetime import datetime
  2. import re
  3. import numpy as np
  4. import pytest
  5. from pandas import (
  6. Index,
  7. NaT,
  8. Timedelta,
  9. TimedeltaIndex,
  10. Timestamp,
  11. notna,
  12. offsets,
  13. timedelta_range,
  14. to_timedelta,
  15. )
  16. import pandas._testing as tm
  17. class TestGetItem:
  18. def test_getitem_slice_keeps_name(self):
  19. # GH#4226
  20. tdi = timedelta_range("1d", "5d", freq="H", name="timebucket")
  21. assert tdi[1:].name == tdi.name
  22. def test_getitem(self):
  23. idx1 = timedelta_range("1 day", "31 day", freq="D", name="idx")
  24. for idx in [idx1]:
  25. result = idx[0]
  26. assert result == Timedelta("1 day")
  27. result = idx[0:5]
  28. expected = timedelta_range("1 day", "5 day", freq="D", name="idx")
  29. tm.assert_index_equal(result, expected)
  30. assert result.freq == expected.freq
  31. result = idx[0:10:2]
  32. expected = timedelta_range("1 day", "9 day", freq="2D", name="idx")
  33. tm.assert_index_equal(result, expected)
  34. assert result.freq == expected.freq
  35. result = idx[-20:-5:3]
  36. expected = timedelta_range("12 day", "24 day", freq="3D", name="idx")
  37. tm.assert_index_equal(result, expected)
  38. assert result.freq == expected.freq
  39. result = idx[4::-1]
  40. expected = TimedeltaIndex(
  41. ["5 day", "4 day", "3 day", "2 day", "1 day"], freq="-1D", name="idx"
  42. )
  43. tm.assert_index_equal(result, expected)
  44. assert result.freq == expected.freq
  45. @pytest.mark.parametrize(
  46. "key",
  47. [
  48. Timestamp("1970-01-01"),
  49. Timestamp("1970-01-02"),
  50. datetime(1970, 1, 1),
  51. Timestamp("1970-01-03").to_datetime64(),
  52. # non-matching NA values
  53. np.datetime64("NaT"),
  54. ],
  55. )
  56. def test_timestamp_invalid_key(self, key):
  57. # GH#20464
  58. tdi = timedelta_range(0, periods=10)
  59. with pytest.raises(KeyError, match=re.escape(repr(key))):
  60. tdi.get_loc(key)
  61. class TestGetLoc:
  62. def test_get_loc_key_unit_mismatch(self):
  63. idx = to_timedelta(["0 days", "1 days", "2 days"])
  64. key = idx[1].as_unit("ms")
  65. loc = idx.get_loc(key)
  66. assert loc == 1
  67. def test_get_loc_key_unit_mismatch_not_castable(self):
  68. tdi = to_timedelta(["0 days", "1 days", "2 days"]).astype("m8[s]")
  69. assert tdi.dtype == "m8[s]"
  70. key = tdi[0].as_unit("ns") + Timedelta(1)
  71. with pytest.raises(KeyError, match=r"Timedelta\('0 days 00:00:00.000000001'\)"):
  72. tdi.get_loc(key)
  73. assert key not in tdi
  74. def test_get_loc(self):
  75. idx = to_timedelta(["0 days", "1 days", "2 days"])
  76. # GH 16909
  77. assert idx.get_loc(idx[1].to_timedelta64()) == 1
  78. # GH 16896
  79. assert idx.get_loc("0 days") == 0
  80. def test_get_loc_nat(self):
  81. tidx = TimedeltaIndex(["1 days 01:00:00", "NaT", "2 days 01:00:00"])
  82. assert tidx.get_loc(NaT) == 1
  83. assert tidx.get_loc(None) == 1
  84. assert tidx.get_loc(float("nan")) == 1
  85. assert tidx.get_loc(np.nan) == 1
  86. class TestGetIndexer:
  87. def test_get_indexer(self):
  88. idx = to_timedelta(["0 days", "1 days", "2 days"])
  89. tm.assert_numpy_array_equal(
  90. idx.get_indexer(idx), np.array([0, 1, 2], dtype=np.intp)
  91. )
  92. target = to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"])
  93. tm.assert_numpy_array_equal(
  94. idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp)
  95. )
  96. tm.assert_numpy_array_equal(
  97. idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp)
  98. )
  99. tm.assert_numpy_array_equal(
  100. idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp)
  101. )
  102. res = idx.get_indexer(target, "nearest", tolerance=Timedelta("1 hour"))
  103. tm.assert_numpy_array_equal(res, np.array([0, -1, 1], dtype=np.intp))
  104. class TestWhere:
  105. def test_where_doesnt_retain_freq(self):
  106. tdi = timedelta_range("1 day", periods=3, freq="D", name="idx")
  107. cond = [True, True, False]
  108. expected = TimedeltaIndex([tdi[0], tdi[1], tdi[0]], freq=None, name="idx")
  109. result = tdi.where(cond, tdi[::-1])
  110. tm.assert_index_equal(result, expected)
  111. def test_where_invalid_dtypes(self, fixed_now_ts):
  112. tdi = timedelta_range("1 day", periods=3, freq="D", name="idx")
  113. tail = tdi[2:].tolist()
  114. i2 = Index([NaT, NaT] + tail)
  115. mask = notna(i2)
  116. expected = Index([NaT._value, NaT._value] + tail, dtype=object, name="idx")
  117. assert isinstance(expected[0], int)
  118. result = tdi.where(mask, i2.asi8)
  119. tm.assert_index_equal(result, expected)
  120. ts = i2 + fixed_now_ts
  121. expected = Index([ts[0], ts[1]] + tail, dtype=object, name="idx")
  122. result = tdi.where(mask, ts)
  123. tm.assert_index_equal(result, expected)
  124. per = (i2 + fixed_now_ts).to_period("D")
  125. expected = Index([per[0], per[1]] + tail, dtype=object, name="idx")
  126. result = tdi.where(mask, per)
  127. tm.assert_index_equal(result, expected)
  128. ts = fixed_now_ts
  129. expected = Index([ts, ts] + tail, dtype=object, name="idx")
  130. result = tdi.where(mask, ts)
  131. tm.assert_index_equal(result, expected)
  132. def test_where_mismatched_nat(self):
  133. tdi = timedelta_range("1 day", periods=3, freq="D", name="idx")
  134. cond = np.array([True, False, False])
  135. dtnat = np.datetime64("NaT", "ns")
  136. expected = Index([tdi[0], dtnat, dtnat], dtype=object, name="idx")
  137. assert expected[2] is dtnat
  138. result = tdi.where(cond, dtnat)
  139. tm.assert_index_equal(result, expected)
  140. class TestTake:
  141. def test_take(self):
  142. # GH 10295
  143. idx1 = timedelta_range("1 day", "31 day", freq="D", name="idx")
  144. for idx in [idx1]:
  145. result = idx.take([0])
  146. assert result == Timedelta("1 day")
  147. result = idx.take([-1])
  148. assert result == Timedelta("31 day")
  149. result = idx.take([0, 1, 2])
  150. expected = timedelta_range("1 day", "3 day", freq="D", name="idx")
  151. tm.assert_index_equal(result, expected)
  152. assert result.freq == expected.freq
  153. result = idx.take([0, 2, 4])
  154. expected = timedelta_range("1 day", "5 day", freq="2D", name="idx")
  155. tm.assert_index_equal(result, expected)
  156. assert result.freq == expected.freq
  157. result = idx.take([7, 4, 1])
  158. expected = timedelta_range("8 day", "2 day", freq="-3D", name="idx")
  159. tm.assert_index_equal(result, expected)
  160. assert result.freq == expected.freq
  161. result = idx.take([3, 2, 5])
  162. expected = TimedeltaIndex(["4 day", "3 day", "6 day"], name="idx")
  163. tm.assert_index_equal(result, expected)
  164. assert result.freq is None
  165. result = idx.take([-3, 2, 5])
  166. expected = TimedeltaIndex(["29 day", "3 day", "6 day"], name="idx")
  167. tm.assert_index_equal(result, expected)
  168. assert result.freq is None
  169. def test_take_invalid_kwargs(self):
  170. idx = timedelta_range("1 day", "31 day", freq="D", name="idx")
  171. indices = [1, 6, 5, 9, 10, 13, 15, 3]
  172. msg = r"take\(\) got an unexpected keyword argument 'foo'"
  173. with pytest.raises(TypeError, match=msg):
  174. idx.take(indices, foo=2)
  175. msg = "the 'out' parameter is not supported"
  176. with pytest.raises(ValueError, match=msg):
  177. idx.take(indices, out=indices)
  178. msg = "the 'mode' parameter is not supported"
  179. with pytest.raises(ValueError, match=msg):
  180. idx.take(indices, mode="clip")
  181. def test_take_equiv_getitem(self):
  182. tds = ["1day 02:00:00", "1 day 04:00:00", "1 day 10:00:00"]
  183. idx = timedelta_range(start="1d", end="2d", freq="H", name="idx")
  184. expected = TimedeltaIndex(tds, freq=None, name="idx")
  185. taken1 = idx.take([2, 4, 10])
  186. taken2 = idx[[2, 4, 10]]
  187. for taken in [taken1, taken2]:
  188. tm.assert_index_equal(taken, expected)
  189. assert isinstance(taken, TimedeltaIndex)
  190. assert taken.freq is None
  191. assert taken.name == expected.name
  192. def test_take_fill_value(self):
  193. # GH 12631
  194. idx = TimedeltaIndex(["1 days", "2 days", "3 days"], name="xxx")
  195. result = idx.take(np.array([1, 0, -1]))
  196. expected = TimedeltaIndex(["2 days", "1 days", "3 days"], name="xxx")
  197. tm.assert_index_equal(result, expected)
  198. # fill_value
  199. result = idx.take(np.array([1, 0, -1]), fill_value=True)
  200. expected = TimedeltaIndex(["2 days", "1 days", "NaT"], name="xxx")
  201. tm.assert_index_equal(result, expected)
  202. # allow_fill=False
  203. result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
  204. expected = TimedeltaIndex(["2 days", "1 days", "3 days"], name="xxx")
  205. tm.assert_index_equal(result, expected)
  206. msg = (
  207. "When allow_fill=True and fill_value is not None, "
  208. "all indices must be >= -1"
  209. )
  210. with pytest.raises(ValueError, match=msg):
  211. idx.take(np.array([1, 0, -2]), fill_value=True)
  212. with pytest.raises(ValueError, match=msg):
  213. idx.take(np.array([1, 0, -5]), fill_value=True)
  214. msg = "index -5 is out of bounds for (axis 0 with )?size 3"
  215. with pytest.raises(IndexError, match=msg):
  216. idx.take(np.array([1, -5]))
  217. class TestMaybeCastSliceBound:
  218. @pytest.fixture(params=["increasing", "decreasing", None])
  219. def monotonic(self, request):
  220. return request.param
  221. @pytest.fixture
  222. def tdi(self, monotonic):
  223. tdi = timedelta_range("1 Day", periods=10)
  224. if monotonic == "decreasing":
  225. tdi = tdi[::-1]
  226. elif monotonic is None:
  227. taker = np.arange(10, dtype=np.intp)
  228. np.random.shuffle(taker)
  229. tdi = tdi.take(taker)
  230. return tdi
  231. def test_maybe_cast_slice_bound_invalid_str(self, tdi):
  232. # test the low-level _maybe_cast_slice_bound and that we get the
  233. # expected exception+message all the way up the stack
  234. msg = (
  235. "cannot do slice indexing on TimedeltaIndex with these "
  236. r"indexers \[foo\] of type str"
  237. )
  238. with pytest.raises(TypeError, match=msg):
  239. tdi._maybe_cast_slice_bound("foo", side="left")
  240. with pytest.raises(TypeError, match=msg):
  241. tdi.get_slice_bound("foo", side="left")
  242. with pytest.raises(TypeError, match=msg):
  243. tdi.slice_locs("foo", None, None)
  244. def test_slice_invalid_str_with_timedeltaindex(
  245. self, tdi, frame_or_series, indexer_sl
  246. ):
  247. obj = frame_or_series(range(10), index=tdi)
  248. msg = (
  249. "cannot do slice indexing on TimedeltaIndex with these "
  250. r"indexers \[foo\] of type str"
  251. )
  252. with pytest.raises(TypeError, match=msg):
  253. indexer_sl(obj)["foo":]
  254. with pytest.raises(TypeError, match=msg):
  255. indexer_sl(obj)["foo":-1]
  256. with pytest.raises(TypeError, match=msg):
  257. indexer_sl(obj)[:"foo"]
  258. with pytest.raises(TypeError, match=msg):
  259. indexer_sl(obj)[tdi[0] : "foo"]
  260. class TestContains:
  261. def test_contains_nonunique(self):
  262. # GH#9512
  263. for vals in (
  264. [0, 1, 0],
  265. [0, 0, -1],
  266. [0, -1, -1],
  267. ["00:01:00", "00:01:00", "00:02:00"],
  268. ["00:01:00", "00:01:00", "00:00:01"],
  269. ):
  270. idx = TimedeltaIndex(vals)
  271. assert idx[0] in idx
  272. def test_contains(self):
  273. # Checking for any NaT-like objects
  274. # GH#13603
  275. td = to_timedelta(range(5), unit="d") + offsets.Hour(1)
  276. for v in [NaT, None, float("nan"), np.nan]:
  277. assert v not in td
  278. td = to_timedelta([NaT])
  279. for v in [NaT, None, float("nan"), np.nan]:
  280. assert v in td