test_reindex.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import (
  5. Index,
  6. MultiIndex,
  7. )
  8. import pandas._testing as tm
  9. def test_reindex(idx):
  10. result, indexer = idx.reindex(list(idx[:4]))
  11. assert isinstance(result, MultiIndex)
  12. assert result.names == ["first", "second"]
  13. assert [level.name for level in result.levels] == ["first", "second"]
  14. result, indexer = idx.reindex(list(idx))
  15. assert isinstance(result, MultiIndex)
  16. assert indexer is None
  17. assert result.names == ["first", "second"]
  18. assert [level.name for level in result.levels] == ["first", "second"]
  19. def test_reindex_level(idx):
  20. index = Index(["one"])
  21. target, indexer = idx.reindex(index, level="second")
  22. target2, indexer2 = index.reindex(idx, level="second")
  23. exp_index = idx.join(index, level="second", how="right")
  24. exp_index2 = idx.join(index, level="second", how="left")
  25. assert target.equals(exp_index)
  26. exp_indexer = np.array([0, 2, 4])
  27. tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False)
  28. assert target2.equals(exp_index2)
  29. exp_indexer2 = np.array([0, -1, 0, -1, 0, -1])
  30. tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False)
  31. with pytest.raises(TypeError, match="Fill method not supported"):
  32. idx.reindex(idx, method="pad", level="second")
  33. def test_reindex_preserves_names_when_target_is_list_or_ndarray(idx):
  34. # GH6552
  35. idx = idx.copy()
  36. target = idx.copy()
  37. idx.names = target.names = [None, None]
  38. other_dtype = MultiIndex.from_product([[1, 2], [3, 4]])
  39. # list & ndarray cases
  40. assert idx.reindex([])[0].names == [None, None]
  41. assert idx.reindex(np.array([]))[0].names == [None, None]
  42. assert idx.reindex(target.tolist())[0].names == [None, None]
  43. assert idx.reindex(target.values)[0].names == [None, None]
  44. assert idx.reindex(other_dtype.tolist())[0].names == [None, None]
  45. assert idx.reindex(other_dtype.values)[0].names == [None, None]
  46. idx.names = ["foo", "bar"]
  47. assert idx.reindex([])[0].names == ["foo", "bar"]
  48. assert idx.reindex(np.array([]))[0].names == ["foo", "bar"]
  49. assert idx.reindex(target.tolist())[0].names == ["foo", "bar"]
  50. assert idx.reindex(target.values)[0].names == ["foo", "bar"]
  51. assert idx.reindex(other_dtype.tolist())[0].names == ["foo", "bar"]
  52. assert idx.reindex(other_dtype.values)[0].names == ["foo", "bar"]
  53. def test_reindex_lvl_preserves_names_when_target_is_list_or_array():
  54. # GH7774
  55. idx = MultiIndex.from_product([[0, 1], ["a", "b"]], names=["foo", "bar"])
  56. assert idx.reindex([], level=0)[0].names == ["foo", "bar"]
  57. assert idx.reindex([], level=1)[0].names == ["foo", "bar"]
  58. def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array():
  59. # GH7774
  60. idx = MultiIndex.from_product([[0, 1], ["a", "b"]])
  61. assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64
  62. assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_
  63. # case with EA levels
  64. cat = pd.Categorical(["foo", "bar"])
  65. dti = pd.date_range("2016-01-01", periods=2, tz="US/Pacific")
  66. mi = MultiIndex.from_product([cat, dti])
  67. assert mi.reindex([], level=0)[0].levels[0].dtype == cat.dtype
  68. assert mi.reindex([], level=1)[0].levels[1].dtype == dti.dtype
  69. def test_reindex_base(idx):
  70. expected = np.arange(idx.size, dtype=np.intp)
  71. actual = idx.get_indexer(idx)
  72. tm.assert_numpy_array_equal(expected, actual)
  73. with pytest.raises(ValueError, match="Invalid fill method"):
  74. idx.get_indexer(idx, method="invalid")
  75. def test_reindex_non_unique():
  76. idx = MultiIndex.from_tuples([(0, 0), (1, 1), (1, 1), (2, 2)])
  77. a = pd.Series(np.arange(4), index=idx)
  78. new_idx = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
  79. msg = "cannot handle a non-unique multi-index!"
  80. with pytest.raises(ValueError, match=msg):
  81. a.reindex(new_idx)
  82. @pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])
  83. def test_reindex_empty_with_level(values):
  84. # GH41170
  85. idx = MultiIndex.from_arrays(values)
  86. result, result_indexer = idx.reindex(np.array(["b"]), level=0)
  87. expected = MultiIndex(levels=[["b"], values[1]], codes=[[], []])
  88. expected_indexer = np.array([], dtype=result_indexer.dtype)
  89. tm.assert_index_equal(result, expected)
  90. tm.assert_numpy_array_equal(result_indexer, expected_indexer)
  91. def test_reindex_not_all_tuples():
  92. keys = [("i", "i"), ("i", "j"), ("j", "i"), "j"]
  93. mi = MultiIndex.from_tuples(keys[:-1])
  94. idx = Index(keys)
  95. res, indexer = mi.reindex(idx)
  96. tm.assert_index_equal(res, idx)
  97. expected = np.array([0, 1, 2, -1], dtype=np.intp)
  98. tm.assert_numpy_array_equal(indexer, expected)
  99. def test_reindex_limit_arg_with_multiindex():
  100. # GH21247
  101. idx = MultiIndex.from_tuples([(3, "A"), (4, "A"), (4, "B")])
  102. df = pd.Series([0.02, 0.01, 0.012], index=idx)
  103. new_idx = MultiIndex.from_tuples(
  104. [
  105. (3, "A"),
  106. (3, "B"),
  107. (4, "A"),
  108. (4, "B"),
  109. (4, "C"),
  110. (5, "B"),
  111. (5, "C"),
  112. (6, "B"),
  113. (6, "C"),
  114. ]
  115. )
  116. with pytest.raises(
  117. ValueError,
  118. match="limit argument only valid if doing pad, backfill or nearest reindexing",
  119. ):
  120. df.reindex(new_idx, fill_value=0, limit=1)
  121. def test_reindex_with_none_in_nested_multiindex():
  122. # GH42883
  123. index = MultiIndex.from_tuples([(("a", None), 1), (("b", None), 2)])
  124. index2 = MultiIndex.from_tuples([(("b", None), 2), (("a", None), 1)])
  125. df1_dtype = pd.DataFrame([1, 2], index=index)
  126. df2_dtype = pd.DataFrame([2, 1], index=index2)
  127. result = df1_dtype.reindex_like(df2_dtype)
  128. expected = df2_dtype
  129. tm.assert_frame_equal(result, expected)