test_partial_slicing.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. import numpy as np
  2. import pytest
  3. from pandas import (
  4. DataFrame,
  5. PeriodIndex,
  6. Series,
  7. date_range,
  8. period_range,
  9. )
  10. import pandas._testing as tm
  11. class TestPeriodIndex:
  12. def test_getitem_periodindex_duplicates_string_slice(self, using_copy_on_write):
  13. # monotonic
  14. idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN")
  15. ts = Series(np.random.randn(len(idx)), index=idx)
  16. original = ts.copy()
  17. result = ts["2007"]
  18. expected = ts[1:3]
  19. tm.assert_series_equal(result, expected)
  20. result[:] = 1
  21. if using_copy_on_write:
  22. tm.assert_series_equal(ts, original)
  23. else:
  24. assert (ts[1:3] == 1).all()
  25. # not monotonic
  26. idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN")
  27. ts = Series(np.random.randn(len(idx)), index=idx)
  28. result = ts["2007"]
  29. expected = ts[idx == "2007"]
  30. tm.assert_series_equal(result, expected)
  31. def test_getitem_periodindex_quarter_string(self):
  32. pi = PeriodIndex(["2Q05", "3Q05", "4Q05", "1Q06", "2Q06"], freq="Q")
  33. ser = Series(np.random.rand(len(pi)), index=pi).cumsum()
  34. # Todo: fix these accessors!
  35. assert ser["05Q4"] == ser[2]
  36. def test_pindex_slice_index(self):
  37. pi = period_range(start="1/1/10", end="12/31/12", freq="M")
  38. s = Series(np.random.rand(len(pi)), index=pi)
  39. res = s["2010"]
  40. exp = s[0:12]
  41. tm.assert_series_equal(res, exp)
  42. res = s["2011"]
  43. exp = s[12:24]
  44. tm.assert_series_equal(res, exp)
  45. @pytest.mark.parametrize("make_range", [date_range, period_range])
  46. def test_range_slice_day(self, make_range):
  47. # GH#6716
  48. idx = make_range(start="2013/01/01", freq="D", periods=400)
  49. msg = "slice indices must be integers or None or have an __index__ method"
  50. # slices against index should raise IndexError
  51. values = [
  52. "2014",
  53. "2013/02",
  54. "2013/01/02",
  55. "2013/02/01 9H",
  56. "2013/02/01 09:00",
  57. ]
  58. for v in values:
  59. with pytest.raises(TypeError, match=msg):
  60. idx[v:]
  61. s = Series(np.random.rand(len(idx)), index=idx)
  62. tm.assert_series_equal(s["2013/01/02":], s[1:])
  63. tm.assert_series_equal(s["2013/01/02":"2013/01/05"], s[1:5])
  64. tm.assert_series_equal(s["2013/02":], s[31:])
  65. tm.assert_series_equal(s["2014":], s[365:])
  66. invalid = ["2013/02/01 9H", "2013/02/01 09:00"]
  67. for v in invalid:
  68. with pytest.raises(TypeError, match=msg):
  69. idx[v:]
  70. @pytest.mark.parametrize("make_range", [date_range, period_range])
  71. def test_range_slice_seconds(self, make_range):
  72. # GH#6716
  73. idx = make_range(start="2013/01/01 09:00:00", freq="S", periods=4000)
  74. msg = "slice indices must be integers or None or have an __index__ method"
  75. # slices against index should raise IndexError
  76. values = [
  77. "2014",
  78. "2013/02",
  79. "2013/01/02",
  80. "2013/02/01 9H",
  81. "2013/02/01 09:00",
  82. ]
  83. for v in values:
  84. with pytest.raises(TypeError, match=msg):
  85. idx[v:]
  86. s = Series(np.random.rand(len(idx)), index=idx)
  87. tm.assert_series_equal(s["2013/01/01 09:05":"2013/01/01 09:10"], s[300:660])
  88. tm.assert_series_equal(s["2013/01/01 10:00":"2013/01/01 10:05"], s[3600:3960])
  89. tm.assert_series_equal(s["2013/01/01 10H":], s[3600:])
  90. tm.assert_series_equal(s[:"2013/01/01 09:30"], s[:1860])
  91. for d in ["2013/01/01", "2013/01", "2013"]:
  92. tm.assert_series_equal(s[d:], s)
  93. @pytest.mark.parametrize("make_range", [date_range, period_range])
  94. def test_range_slice_outofbounds(self, make_range):
  95. # GH#5407
  96. idx = make_range(start="2013/10/01", freq="D", periods=10)
  97. df = DataFrame({"units": [100 + i for i in range(10)]}, index=idx)
  98. empty = DataFrame(index=type(idx)([], freq="D"), columns=["units"])
  99. empty["units"] = empty["units"].astype("int64")
  100. tm.assert_frame_equal(df["2013/09/01":"2013/09/30"], empty)
  101. tm.assert_frame_equal(df["2013/09/30":"2013/10/02"], df.iloc[:2])
  102. tm.assert_frame_equal(df["2013/10/01":"2013/10/02"], df.iloc[:2])
  103. tm.assert_frame_equal(df["2013/10/02":"2013/09/30"], empty)
  104. tm.assert_frame_equal(df["2013/10/15":"2013/10/17"], empty)
  105. tm.assert_frame_equal(df["2013-06":"2013-09"], empty)
  106. tm.assert_frame_equal(df["2013-11":"2013-12"], empty)
  107. @pytest.mark.parametrize("make_range", [date_range, period_range])
  108. def test_maybe_cast_slice_bound(self, make_range, frame_or_series):
  109. idx = make_range(start="2013/10/01", freq="D", periods=10)
  110. obj = DataFrame({"units": [100 + i for i in range(10)]}, index=idx)
  111. obj = tm.get_obj(obj, frame_or_series)
  112. msg = (
  113. f"cannot do slice indexing on {type(idx).__name__} with "
  114. r"these indexers \[foo\] of type str"
  115. )
  116. # Check the lower-level calls are raising where expected.
  117. with pytest.raises(TypeError, match=msg):
  118. idx._maybe_cast_slice_bound("foo", "left")
  119. with pytest.raises(TypeError, match=msg):
  120. idx.get_slice_bound("foo", "left")
  121. with pytest.raises(TypeError, match=msg):
  122. obj["2013/09/30":"foo"]
  123. with pytest.raises(TypeError, match=msg):
  124. obj["foo":"2013/09/30"]
  125. with pytest.raises(TypeError, match=msg):
  126. obj.loc["2013/09/30":"foo"]
  127. with pytest.raises(TypeError, match=msg):
  128. obj.loc["foo":"2013/09/30"]
  129. def test_partial_slice_doesnt_require_monotonicity(self):
  130. # See also: DatetimeIndex test ofm the same name
  131. dti = date_range("2014-01-01", periods=30, freq="30D")
  132. pi = dti.to_period("D")
  133. ser_montonic = Series(np.arange(30), index=pi)
  134. shuffler = list(range(0, 30, 2)) + list(range(1, 31, 2))
  135. ser = ser_montonic[shuffler]
  136. nidx = ser.index
  137. # Manually identified locations of year==2014
  138. indexer_2014 = np.array(
  139. [0, 1, 2, 3, 4, 5, 6, 15, 16, 17, 18, 19, 20], dtype=np.intp
  140. )
  141. assert (nidx[indexer_2014].year == 2014).all()
  142. assert not (nidx[~indexer_2014].year == 2014).any()
  143. result = nidx.get_loc("2014")
  144. tm.assert_numpy_array_equal(result, indexer_2014)
  145. expected = ser[indexer_2014]
  146. result = ser.loc["2014"]
  147. tm.assert_series_equal(result, expected)
  148. result = ser["2014"]
  149. tm.assert_series_equal(result, expected)
  150. # Manually identified locations where ser.index is within Mat 2015
  151. indexer_may2015 = np.array([23], dtype=np.intp)
  152. assert nidx[23].year == 2015 and nidx[23].month == 5
  153. result = nidx.get_loc("May 2015")
  154. tm.assert_numpy_array_equal(result, indexer_may2015)
  155. expected = ser[indexer_may2015]
  156. result = ser.loc["May 2015"]
  157. tm.assert_series_equal(result, expected)
  158. result = ser["May 2015"]
  159. tm.assert_series_equal(result, expected)