test_indexing.py 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. from decimal import Decimal
  2. import numpy as np
  3. import pytest
  4. from pandas._libs.missing import is_matching_na
  5. import pandas as pd
  6. from pandas import Index
  7. import pandas._testing as tm
  8. class TestGetIndexer:
  9. @pytest.mark.parametrize(
  10. "method,expected",
  11. [
  12. ("pad", np.array([-1, 0, 1, 1], dtype=np.intp)),
  13. ("backfill", np.array([0, 0, 1, -1], dtype=np.intp)),
  14. ],
  15. )
  16. def test_get_indexer_strings(self, method, expected):
  17. index = Index(["b", "c"])
  18. actual = index.get_indexer(["a", "b", "c", "d"], method=method)
  19. tm.assert_numpy_array_equal(actual, expected)
  20. def test_get_indexer_strings_raises(self):
  21. index = Index(["b", "c"])
  22. msg = r"unsupported operand type\(s\) for -: 'str' and 'str'"
  23. with pytest.raises(TypeError, match=msg):
  24. index.get_indexer(["a", "b", "c", "d"], method="nearest")
  25. with pytest.raises(TypeError, match=msg):
  26. index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)
  27. with pytest.raises(TypeError, match=msg):
  28. index.get_indexer(
  29. ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
  30. )
  31. def test_get_indexer_with_NA_values(
  32. self, unique_nulls_fixture, unique_nulls_fixture2
  33. ):
  34. # GH#22332
  35. # check pairwise, that no pair of na values
  36. # is mangled
  37. if unique_nulls_fixture is unique_nulls_fixture2:
  38. return # skip it, values are not unique
  39. arr = np.array([unique_nulls_fixture, unique_nulls_fixture2], dtype=object)
  40. index = Index(arr, dtype=object)
  41. result = index.get_indexer(
  42. [unique_nulls_fixture, unique_nulls_fixture2, "Unknown"]
  43. )
  44. expected = np.array([0, 1, -1], dtype=np.intp)
  45. tm.assert_numpy_array_equal(result, expected)
  46. class TestGetIndexerNonUnique:
  47. def test_get_indexer_non_unique_nas(self, nulls_fixture):
  48. # even though this isn't non-unique, this should still work
  49. index = Index(["a", "b", nulls_fixture])
  50. indexer, missing = index.get_indexer_non_unique([nulls_fixture])
  51. expected_indexer = np.array([2], dtype=np.intp)
  52. expected_missing = np.array([], dtype=np.intp)
  53. tm.assert_numpy_array_equal(indexer, expected_indexer)
  54. tm.assert_numpy_array_equal(missing, expected_missing)
  55. # actually non-unique
  56. index = Index(["a", nulls_fixture, "b", nulls_fixture])
  57. indexer, missing = index.get_indexer_non_unique([nulls_fixture])
  58. expected_indexer = np.array([1, 3], dtype=np.intp)
  59. tm.assert_numpy_array_equal(indexer, expected_indexer)
  60. tm.assert_numpy_array_equal(missing, expected_missing)
  61. # matching-but-not-identical nans
  62. if is_matching_na(nulls_fixture, float("NaN")):
  63. index = Index(["a", float("NaN"), "b", float("NaN")])
  64. match_but_not_identical = True
  65. elif is_matching_na(nulls_fixture, Decimal("NaN")):
  66. index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")])
  67. match_but_not_identical = True
  68. else:
  69. match_but_not_identical = False
  70. if match_but_not_identical:
  71. indexer, missing = index.get_indexer_non_unique([nulls_fixture])
  72. expected_indexer = np.array([1, 3], dtype=np.intp)
  73. tm.assert_numpy_array_equal(indexer, expected_indexer)
  74. tm.assert_numpy_array_equal(missing, expected_missing)
  75. @pytest.mark.filterwarnings("ignore:elementwise comp:DeprecationWarning")
  76. def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2):
  77. expected_missing = np.array([], dtype=np.intp)
  78. # matching-but-not-identical nats
  79. if is_matching_na(np_nat_fixture, np_nat_fixture2):
  80. # ensure nats are different objects
  81. index = Index(
  82. np.array(
  83. ["2021-10-02", np_nat_fixture.copy(), np_nat_fixture2.copy()],
  84. dtype=object,
  85. ),
  86. dtype=object,
  87. )
  88. # pass as index to prevent target from being casted to DatetimeIndex
  89. indexer, missing = index.get_indexer_non_unique(
  90. Index([np_nat_fixture], dtype=object)
  91. )
  92. expected_indexer = np.array([1, 2], dtype=np.intp)
  93. tm.assert_numpy_array_equal(indexer, expected_indexer)
  94. tm.assert_numpy_array_equal(missing, expected_missing)
  95. # dt64nat vs td64nat
  96. else:
  97. try:
  98. np_nat_fixture == np_nat_fixture2
  99. except (TypeError, OverflowError):
  100. # Numpy will raise on uncomparable types, like
  101. # np.datetime64('NaT', 'Y') and np.datetime64('NaT', 'ps')
  102. # https://github.com/numpy/numpy/issues/22762
  103. return
  104. index = Index(
  105. np.array(
  106. [
  107. "2021-10-02",
  108. np_nat_fixture,
  109. np_nat_fixture2,
  110. np_nat_fixture,
  111. np_nat_fixture2,
  112. ],
  113. dtype=object,
  114. ),
  115. dtype=object,
  116. )
  117. # pass as index to prevent target from being casted to DatetimeIndex
  118. indexer, missing = index.get_indexer_non_unique(
  119. Index([np_nat_fixture], dtype=object)
  120. )
  121. expected_indexer = np.array([1, 3], dtype=np.intp)
  122. tm.assert_numpy_array_equal(indexer, expected_indexer)
  123. tm.assert_numpy_array_equal(missing, expected_missing)
  124. class TestSliceLocs:
  125. @pytest.mark.parametrize(
  126. "in_slice,expected",
  127. [
  128. # error: Slice index must be an integer or None
  129. (pd.IndexSlice[::-1], "yxdcb"),
  130. (pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc]
  131. (pd.IndexSlice["b"::-1], "b"), # type: ignore[misc]
  132. (pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc]
  133. (pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc]
  134. (pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc]
  135. (pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc]
  136. # absent labels
  137. (pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc]
  138. (pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc]
  139. (pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc]
  140. (pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc]
  141. (pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc]
  142. (pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc]
  143. (pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc]
  144. (pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc]
  145. (pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc]
  146. ],
  147. )
  148. def test_slice_locs_negative_step(self, in_slice, expected):
  149. index = Index(list("bcdxy"))
  150. s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
  151. result = index[s_start : s_stop : in_slice.step]
  152. expected = Index(list(expected))
  153. tm.assert_index_equal(result, expected)
  154. def test_slice_locs_dup(self):
  155. index = Index(["a", "a", "b", "c", "d", "d"])
  156. assert index.slice_locs("a", "d") == (0, 6)
  157. assert index.slice_locs(end="d") == (0, 6)
  158. assert index.slice_locs("a", "c") == (0, 4)
  159. assert index.slice_locs("b", "d") == (2, 6)
  160. index2 = index[::-1]
  161. assert index2.slice_locs("d", "a") == (0, 6)
  162. assert index2.slice_locs(end="a") == (0, 6)
  163. assert index2.slice_locs("d", "b") == (0, 4)
  164. assert index2.slice_locs("c", "a") == (2, 6)