test_equivalence.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. import numpy as np
  2. import pytest
  3. from pandas.core.dtypes.common import is_any_real_numeric_dtype
  4. import pandas as pd
  5. from pandas import (
  6. Index,
  7. MultiIndex,
  8. Series,
  9. )
  10. import pandas._testing as tm
  11. def test_equals(idx):
  12. assert idx.equals(idx)
  13. assert idx.equals(idx.copy())
  14. assert idx.equals(idx.astype(object))
  15. assert idx.equals(idx.to_flat_index())
  16. assert idx.equals(idx.to_flat_index().astype("category"))
  17. assert not idx.equals(list(idx))
  18. assert not idx.equals(np.array(idx))
  19. same_values = Index(idx, dtype=object)
  20. assert idx.equals(same_values)
  21. assert same_values.equals(idx)
  22. if idx.nlevels == 1:
  23. # do not test MultiIndex
  24. assert not idx.equals(Series(idx))
  25. def test_equals_op(idx):
  26. # GH9947, GH10637
  27. index_a = idx
  28. n = len(index_a)
  29. index_b = index_a[0:-1]
  30. index_c = index_a[0:-1].append(index_a[-2:-1])
  31. index_d = index_a[0:1]
  32. with pytest.raises(ValueError, match="Lengths must match"):
  33. index_a == index_b
  34. expected1 = np.array([True] * n)
  35. expected2 = np.array([True] * (n - 1) + [False])
  36. tm.assert_numpy_array_equal(index_a == index_a, expected1)
  37. tm.assert_numpy_array_equal(index_a == index_c, expected2)
  38. # test comparisons with numpy arrays
  39. array_a = np.array(index_a)
  40. array_b = np.array(index_a[0:-1])
  41. array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
  42. array_d = np.array(index_a[0:1])
  43. with pytest.raises(ValueError, match="Lengths must match"):
  44. index_a == array_b
  45. tm.assert_numpy_array_equal(index_a == array_a, expected1)
  46. tm.assert_numpy_array_equal(index_a == array_c, expected2)
  47. # test comparisons with Series
  48. series_a = Series(array_a)
  49. series_b = Series(array_b)
  50. series_c = Series(array_c)
  51. series_d = Series(array_d)
  52. with pytest.raises(ValueError, match="Lengths must match"):
  53. index_a == series_b
  54. tm.assert_numpy_array_equal(index_a == series_a, expected1)
  55. tm.assert_numpy_array_equal(index_a == series_c, expected2)
  56. # cases where length is 1 for one of them
  57. with pytest.raises(ValueError, match="Lengths must match"):
  58. index_a == index_d
  59. with pytest.raises(ValueError, match="Lengths must match"):
  60. index_a == series_d
  61. with pytest.raises(ValueError, match="Lengths must match"):
  62. index_a == array_d
  63. msg = "Can only compare identically-labeled Series objects"
  64. with pytest.raises(ValueError, match=msg):
  65. series_a == series_d
  66. with pytest.raises(ValueError, match="Lengths must match"):
  67. series_a == array_d
  68. # comparing with a scalar should broadcast; note that we are excluding
  69. # MultiIndex because in this case each item in the index is a tuple of
  70. # length 2, and therefore is considered an array of length 2 in the
  71. # comparison instead of a scalar
  72. if not isinstance(index_a, MultiIndex):
  73. expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
  74. # assuming the 2nd to last item is unique in the data
  75. item = index_a[-2]
  76. tm.assert_numpy_array_equal(index_a == item, expected3)
  77. tm.assert_series_equal(series_a == item, Series(expected3))
  78. def test_compare_tuple():
  79. # GH#21517
  80. mi = MultiIndex.from_product([[1, 2]] * 2)
  81. all_false = np.array([False, False, False, False])
  82. result = mi == mi[0]
  83. expected = np.array([True, False, False, False])
  84. tm.assert_numpy_array_equal(result, expected)
  85. result = mi != mi[0]
  86. tm.assert_numpy_array_equal(result, ~expected)
  87. result = mi < mi[0]
  88. tm.assert_numpy_array_equal(result, all_false)
  89. result = mi <= mi[0]
  90. tm.assert_numpy_array_equal(result, expected)
  91. result = mi > mi[0]
  92. tm.assert_numpy_array_equal(result, ~expected)
  93. result = mi >= mi[0]
  94. tm.assert_numpy_array_equal(result, ~all_false)
  95. def test_compare_tuple_strs():
  96. # GH#34180
  97. mi = MultiIndex.from_tuples([("a", "b"), ("b", "c"), ("c", "a")])
  98. result = mi == ("c", "a")
  99. expected = np.array([False, False, True])
  100. tm.assert_numpy_array_equal(result, expected)
  101. result = mi == ("c",)
  102. expected = np.array([False, False, False])
  103. tm.assert_numpy_array_equal(result, expected)
  104. def test_equals_multi(idx):
  105. assert idx.equals(idx)
  106. assert not idx.equals(idx.values)
  107. assert idx.equals(Index(idx.values))
  108. assert idx.equal_levels(idx)
  109. assert not idx.equals(idx[:-1])
  110. assert not idx.equals(idx[-1])
  111. # different number of levels
  112. index = MultiIndex(
  113. levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))],
  114. codes=[
  115. np.array([0, 0, 1, 2, 2, 2, 3, 3]),
  116. np.array([0, 1, 0, 0, 0, 1, 0, 1]),
  117. np.array([1, 0, 1, 1, 0, 0, 1, 0]),
  118. ],
  119. )
  120. index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1])
  121. assert not index.equals(index2)
  122. assert not index.equal_levels(index2)
  123. # levels are different
  124. major_axis = Index(list(range(4)))
  125. minor_axis = Index(list(range(2)))
  126. major_codes = np.array([0, 0, 1, 2, 2, 3])
  127. minor_codes = np.array([0, 1, 0, 0, 1, 0])
  128. index = MultiIndex(
  129. levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
  130. )
  131. assert not idx.equals(index)
  132. assert not idx.equal_levels(index)
  133. # some of the labels are different
  134. major_axis = Index(["foo", "bar", "baz", "qux"])
  135. minor_axis = Index(["one", "two"])
  136. major_codes = np.array([0, 0, 2, 2, 3, 3])
  137. minor_codes = np.array([0, 1, 0, 1, 0, 1])
  138. index = MultiIndex(
  139. levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
  140. )
  141. assert not idx.equals(index)
  142. def test_identical(idx):
  143. mi = idx.copy()
  144. mi2 = idx.copy()
  145. assert mi.identical(mi2)
  146. mi = mi.set_names(["new1", "new2"])
  147. assert mi.equals(mi2)
  148. assert not mi.identical(mi2)
  149. mi2 = mi2.set_names(["new1", "new2"])
  150. assert mi.identical(mi2)
  151. mi4 = Index(mi.tolist(), tupleize_cols=False)
  152. assert not mi.identical(mi4)
  153. assert mi.equals(mi4)
  154. def test_equals_operator(idx):
  155. # GH9785
  156. assert (idx == idx).all()
  157. def test_equals_missing_values():
  158. # make sure take is not using -1
  159. i = MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp("20130101"))])
  160. result = i[0:1].equals(i[0])
  161. assert not result
  162. result = i[1:2].equals(i[1])
  163. assert not result
  164. def test_equals_missing_values_differently_sorted():
  165. # GH#38439
  166. mi1 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
  167. mi2 = MultiIndex.from_tuples([(np.nan, np.nan), (81.0, np.nan)])
  168. assert not mi1.equals(mi2)
  169. mi2 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
  170. assert mi1.equals(mi2)
  171. def test_is_():
  172. mi = MultiIndex.from_tuples(zip(range(10), range(10)))
  173. assert mi.is_(mi)
  174. assert mi.is_(mi.view())
  175. assert mi.is_(mi.view().view().view().view())
  176. mi2 = mi.view()
  177. # names are metadata, they don't change id
  178. mi2.names = ["A", "B"]
  179. assert mi2.is_(mi)
  180. assert mi.is_(mi2)
  181. assert not mi.is_(mi.set_names(["C", "D"]))
  182. # levels are inherent properties, they change identity
  183. mi3 = mi2.set_levels([list(range(10)), list(range(10))])
  184. assert not mi3.is_(mi2)
  185. # shouldn't change
  186. assert mi2.is_(mi)
  187. mi4 = mi3.view()
  188. # GH 17464 - Remove duplicate MultiIndex levels
  189. mi4 = mi4.set_levels([list(range(10)), list(range(10))])
  190. assert not mi4.is_(mi3)
  191. mi5 = mi.view()
  192. mi5 = mi5.set_levels(mi5.levels)
  193. assert not mi5.is_(mi)
  194. def test_is_all_dates(idx):
  195. assert not idx._is_all_dates
  196. def test_is_numeric(idx):
  197. # MultiIndex is never numeric
  198. assert not is_any_real_numeric_dtype(idx)
  199. def test_multiindex_compare():
  200. # GH 21149
  201. # Ensure comparison operations for MultiIndex with nlevels == 1
  202. # behave consistently with those for MultiIndex with nlevels > 1
  203. midx = MultiIndex.from_product([[0, 1]])
  204. # Equality self-test: MultiIndex object vs self
  205. expected = Series([True, True])
  206. result = Series(midx == midx)
  207. tm.assert_series_equal(result, expected)
  208. # Greater than comparison: MultiIndex object vs self
  209. expected = Series([False, False])
  210. result = Series(midx > midx)
  211. tm.assert_series_equal(result, expected)
  212. def test_equals_ea_int_regular_int():
  213. # GH#46026
  214. mi1 = MultiIndex.from_arrays([Index([1, 2], dtype="Int64"), [3, 4]])
  215. mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]])
  216. assert not mi1.equals(mi2)
  217. assert not mi2.equals(mi1)