test_join.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. import numpy as np
  2. from pandas.core.dtypes.common import is_int64_dtype
  3. from pandas import (
  4. Index,
  5. RangeIndex,
  6. )
  7. import pandas._testing as tm
  8. class TestJoin:
  9. def test_join_outer(self):
  10. # join with Index[int64]
  11. index = RangeIndex(start=0, stop=20, step=2)
  12. other = Index(np.arange(25, 14, -1, dtype=np.int64))
  13. res, lidx, ridx = index.join(other, how="outer", return_indexers=True)
  14. noidx_res = index.join(other, how="outer")
  15. tm.assert_index_equal(res, noidx_res)
  16. eres = Index(
  17. [0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
  18. )
  19. elidx = np.array(
  20. [0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, -1, -1, -1, -1, -1, -1, -1],
  21. dtype=np.intp,
  22. )
  23. eridx = np.array(
  24. [-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0],
  25. dtype=np.intp,
  26. )
  27. assert isinstance(res, Index) and is_int64_dtype(res.dtype)
  28. assert not isinstance(res, RangeIndex)
  29. tm.assert_index_equal(res, eres, exact=True)
  30. tm.assert_numpy_array_equal(lidx, elidx)
  31. tm.assert_numpy_array_equal(ridx, eridx)
  32. # join with RangeIndex
  33. other = RangeIndex(25, 14, -1)
  34. res, lidx, ridx = index.join(other, how="outer", return_indexers=True)
  35. noidx_res = index.join(other, how="outer")
  36. tm.assert_index_equal(res, noidx_res)
  37. assert isinstance(res, Index) and res.dtype == np.int64
  38. assert not isinstance(res, RangeIndex)
  39. tm.assert_index_equal(res, eres)
  40. tm.assert_numpy_array_equal(lidx, elidx)
  41. tm.assert_numpy_array_equal(ridx, eridx)
  42. def test_join_inner(self):
  43. # Join with non-RangeIndex
  44. index = RangeIndex(start=0, stop=20, step=2)
  45. other = Index(np.arange(25, 14, -1, dtype=np.int64))
  46. res, lidx, ridx = index.join(other, how="inner", return_indexers=True)
  47. # no guarantee of sortedness, so sort for comparison purposes
  48. ind = res.argsort()
  49. res = res.take(ind)
  50. lidx = lidx.take(ind)
  51. ridx = ridx.take(ind)
  52. eres = Index([16, 18])
  53. elidx = np.array([8, 9], dtype=np.intp)
  54. eridx = np.array([9, 7], dtype=np.intp)
  55. assert isinstance(res, Index) and res.dtype == np.int64
  56. tm.assert_index_equal(res, eres)
  57. tm.assert_numpy_array_equal(lidx, elidx)
  58. tm.assert_numpy_array_equal(ridx, eridx)
  59. # Join two RangeIndex
  60. other = RangeIndex(25, 14, -1)
  61. res, lidx, ridx = index.join(other, how="inner", return_indexers=True)
  62. assert isinstance(res, RangeIndex)
  63. tm.assert_index_equal(res, eres, exact="equiv")
  64. tm.assert_numpy_array_equal(lidx, elidx)
  65. tm.assert_numpy_array_equal(ridx, eridx)
  66. def test_join_left(self):
  67. # Join with Index[int64]
  68. index = RangeIndex(start=0, stop=20, step=2)
  69. other = Index(np.arange(25, 14, -1, dtype=np.int64))
  70. res, lidx, ridx = index.join(other, how="left", return_indexers=True)
  71. eres = index
  72. eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], dtype=np.intp)
  73. assert isinstance(res, RangeIndex)
  74. tm.assert_index_equal(res, eres)
  75. assert lidx is None
  76. tm.assert_numpy_array_equal(ridx, eridx)
  77. # Join withRangeIndex
  78. other = Index(np.arange(25, 14, -1, dtype=np.int64))
  79. res, lidx, ridx = index.join(other, how="left", return_indexers=True)
  80. assert isinstance(res, RangeIndex)
  81. tm.assert_index_equal(res, eres)
  82. assert lidx is None
  83. tm.assert_numpy_array_equal(ridx, eridx)
  84. def test_join_right(self):
  85. # Join with Index[int64]
  86. index = RangeIndex(start=0, stop=20, step=2)
  87. other = Index(np.arange(25, 14, -1, dtype=np.int64))
  88. res, lidx, ridx = index.join(other, how="right", return_indexers=True)
  89. eres = other
  90. elidx = np.array([-1, -1, -1, -1, -1, -1, -1, 9, -1, 8, -1], dtype=np.intp)
  91. assert isinstance(other, Index) and other.dtype == np.int64
  92. tm.assert_index_equal(res, eres)
  93. tm.assert_numpy_array_equal(lidx, elidx)
  94. assert ridx is None
  95. # Join withRangeIndex
  96. other = RangeIndex(25, 14, -1)
  97. res, lidx, ridx = index.join(other, how="right", return_indexers=True)
  98. eres = other
  99. assert isinstance(other, RangeIndex)
  100. tm.assert_index_equal(res, eres)
  101. tm.assert_numpy_array_equal(lidx, elidx)
  102. assert ridx is None
  103. def test_join_non_int_index(self):
  104. index = RangeIndex(start=0, stop=20, step=2)
  105. other = Index([3, 6, 7, 8, 10], dtype=object)
  106. outer = index.join(other, how="outer")
  107. outer2 = other.join(index, how="outer")
  108. expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18])
  109. tm.assert_index_equal(outer, outer2)
  110. tm.assert_index_equal(outer, expected)
  111. inner = index.join(other, how="inner")
  112. inner2 = other.join(index, how="inner")
  113. expected = Index([6, 8, 10])
  114. tm.assert_index_equal(inner, inner2)
  115. tm.assert_index_equal(inner, expected)
  116. left = index.join(other, how="left")
  117. tm.assert_index_equal(left, index.astype(object))
  118. left2 = other.join(index, how="left")
  119. tm.assert_index_equal(left2, other)
  120. right = index.join(other, how="right")
  121. tm.assert_index_equal(right, other)
  122. right2 = other.join(index, how="right")
  123. tm.assert_index_equal(right2, index.astype(object))
  124. def test_join_non_unique(self):
  125. index = RangeIndex(start=0, stop=20, step=2)
  126. other = Index([4, 4, 3, 3])
  127. res, lidx, ridx = index.join(other, return_indexers=True)
  128. eres = Index([0, 2, 4, 4, 6, 8, 10, 12, 14, 16, 18])
  129. elidx = np.array([0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.intp)
  130. eridx = np.array([-1, -1, 0, 1, -1, -1, -1, -1, -1, -1, -1], dtype=np.intp)
  131. tm.assert_index_equal(res, eres)
  132. tm.assert_numpy_array_equal(lidx, elidx)
  133. tm.assert_numpy_array_equal(ridx, eridx)
  134. def test_join_self(self, join_type):
  135. index = RangeIndex(start=0, stop=20, step=2)
  136. joined = index.join(index, how=join_type)
  137. assert index is joined