test_partial_indexing.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. import numpy as np
  2. import pytest
  3. from pandas import (
  4. DataFrame,
  5. IndexSlice,
  6. MultiIndex,
  7. date_range,
  8. )
  9. import pandas._testing as tm
  10. @pytest.fixture
  11. def df():
  12. # c1
  13. # 2016-01-01 00:00:00 a 0
  14. # b 1
  15. # c 2
  16. # 2016-01-01 12:00:00 a 3
  17. # b 4
  18. # c 5
  19. # 2016-01-02 00:00:00 a 6
  20. # b 7
  21. # c 8
  22. # 2016-01-02 12:00:00 a 9
  23. # b 10
  24. # c 11
  25. # 2016-01-03 00:00:00 a 12
  26. # b 13
  27. # c 14
  28. dr = date_range("2016-01-01", "2016-01-03", freq="12H")
  29. abc = ["a", "b", "c"]
  30. mi = MultiIndex.from_product([dr, abc])
  31. frame = DataFrame({"c1": range(0, 15)}, index=mi)
  32. return frame
  33. def test_partial_string_matching_single_index(df):
  34. # partial string matching on a single index
  35. for df_swap in [df.swaplevel(), df.swaplevel(0), df.swaplevel(0, 1)]:
  36. df_swap = df_swap.sort_index()
  37. just_a = df_swap.loc["a"]
  38. result = just_a.loc["2016-01-01"]
  39. expected = df.loc[IndexSlice[:, "a"], :].iloc[0:2]
  40. expected.index = expected.index.droplevel(1)
  41. tm.assert_frame_equal(result, expected)
  42. def test_get_loc_partial_timestamp_multiindex(df):
  43. mi = df.index
  44. key = ("2016-01-01", "a")
  45. loc = mi.get_loc(key)
  46. expected = np.zeros(len(mi), dtype=bool)
  47. expected[[0, 3]] = True
  48. tm.assert_numpy_array_equal(loc, expected)
  49. key2 = ("2016-01-02", "a")
  50. loc2 = mi.get_loc(key2)
  51. expected2 = np.zeros(len(mi), dtype=bool)
  52. expected2[[6, 9]] = True
  53. tm.assert_numpy_array_equal(loc2, expected2)
  54. key3 = ("2016-01", "a")
  55. loc3 = mi.get_loc(key3)
  56. expected3 = np.zeros(len(mi), dtype=bool)
  57. expected3[mi.get_level_values(1).get_loc("a")] = True
  58. tm.assert_numpy_array_equal(loc3, expected3)
  59. key4 = ("2016", "a")
  60. loc4 = mi.get_loc(key4)
  61. expected4 = expected3
  62. tm.assert_numpy_array_equal(loc4, expected4)
  63. # non-monotonic
  64. taker = np.arange(len(mi), dtype=np.intp)
  65. taker[::2] = taker[::-2]
  66. mi2 = mi.take(taker)
  67. loc5 = mi2.get_loc(key)
  68. expected5 = np.zeros(len(mi2), dtype=bool)
  69. expected5[[3, 14]] = True
  70. tm.assert_numpy_array_equal(loc5, expected5)
  71. def test_partial_string_timestamp_multiindex(df):
  72. # GH10331
  73. df_swap = df.swaplevel(0, 1).sort_index()
  74. SLC = IndexSlice
  75. # indexing with IndexSlice
  76. result = df.loc[SLC["2016-01-01":"2016-02-01", :], :]
  77. expected = df
  78. tm.assert_frame_equal(result, expected)
  79. # match on secondary index
  80. result = df_swap.loc[SLC[:, "2016-01-01":"2016-01-01"], :]
  81. expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]]
  82. tm.assert_frame_equal(result, expected)
  83. # partial string match on year only
  84. result = df.loc["2016"]
  85. expected = df
  86. tm.assert_frame_equal(result, expected)
  87. # partial string match on date
  88. result = df.loc["2016-01-01"]
  89. expected = df.iloc[0:6]
  90. tm.assert_frame_equal(result, expected)
  91. # partial string match on date and hour, from middle
  92. result = df.loc["2016-01-02 12"]
  93. # hourly resolution, same as index.levels[0], so we are _not_ slicing on
  94. # that level, so that level gets dropped
  95. expected = df.iloc[9:12].droplevel(0)
  96. tm.assert_frame_equal(result, expected)
  97. # partial string match on secondary index
  98. result = df_swap.loc[SLC[:, "2016-01-02"], :]
  99. expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]]
  100. tm.assert_frame_equal(result, expected)
  101. # tuple selector with partial string match on date
  102. # "2016-01-01" has daily resolution, so _is_ a slice on the first level.
  103. result = df.loc[("2016-01-01", "a"), :]
  104. expected = df.iloc[[0, 3]]
  105. expected = df.iloc[[0, 3]].droplevel(1)
  106. tm.assert_frame_equal(result, expected)
  107. # Slicing date on first level should break (of course) bc the DTI is the
  108. # second level on df_swap
  109. with pytest.raises(KeyError, match="'2016-01-01'"):
  110. df_swap.loc["2016-01-01"]
  111. def test_partial_string_timestamp_multiindex_str_key_raises(df):
  112. # Even though this syntax works on a single index, this is somewhat
  113. # ambiguous and we don't want to extend this behavior forward to work
  114. # in multi-indexes. This would amount to selecting a scalar from a
  115. # column.
  116. with pytest.raises(KeyError, match="'2016-01-01'"):
  117. df["2016-01-01"]
  118. def test_partial_string_timestamp_multiindex_daily_resolution(df):
  119. # GH12685 (partial string with daily resolution or below)
  120. result = df.loc[IndexSlice["2013-03":"2013-03", :], :]
  121. expected = df.iloc[118:180]
  122. tm.assert_frame_equal(result, expected)