test_assert_frame_equal.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. import pytest
  2. import pandas as pd
  3. from pandas import DataFrame
  4. import pandas._testing as tm
  5. @pytest.fixture(params=[True, False])
  6. def by_blocks_fixture(request):
  7. return request.param
  8. @pytest.fixture(params=["DataFrame", "Series"])
  9. def obj_fixture(request):
  10. return request.param
  11. def _assert_frame_equal_both(a, b, **kwargs):
  12. """
  13. Check that two DataFrame equal.
  14. This check is performed commutatively.
  15. Parameters
  16. ----------
  17. a : DataFrame
  18. The first DataFrame to compare.
  19. b : DataFrame
  20. The second DataFrame to compare.
  21. kwargs : dict
  22. The arguments passed to `tm.assert_frame_equal`.
  23. """
  24. tm.assert_frame_equal(a, b, **kwargs)
  25. tm.assert_frame_equal(b, a, **kwargs)
  26. @pytest.mark.parametrize("check_like", [True, False])
  27. def test_frame_equal_row_order_mismatch(check_like, obj_fixture):
  28. df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "c"])
  29. df2 = DataFrame({"A": [3, 2, 1], "B": [6, 5, 4]}, index=["c", "b", "a"])
  30. if not check_like: # Do not ignore row-column orderings.
  31. msg = f"{obj_fixture}.index are different"
  32. with pytest.raises(AssertionError, match=msg):
  33. tm.assert_frame_equal(df1, df2, check_like=check_like, obj=obj_fixture)
  34. else:
  35. _assert_frame_equal_both(df1, df2, check_like=check_like, obj=obj_fixture)
  36. @pytest.mark.parametrize(
  37. "df1,df2",
  38. [
  39. (DataFrame({"A": [1, 2, 3]}), DataFrame({"A": [1, 2, 3, 4]})),
  40. (DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), DataFrame({"A": [1, 2, 3]})),
  41. ],
  42. )
  43. def test_frame_equal_shape_mismatch(df1, df2, obj_fixture):
  44. msg = f"{obj_fixture} are different"
  45. with pytest.raises(AssertionError, match=msg):
  46. tm.assert_frame_equal(df1, df2, obj=obj_fixture)
  47. @pytest.mark.parametrize(
  48. "df1,df2,msg",
  49. [
  50. # Index
  51. (
  52. DataFrame.from_records({"a": [1, 2], "c": ["l1", "l2"]}, index=["a"]),
  53. DataFrame.from_records({"a": [1.0, 2.0], "c": ["l1", "l2"]}, index=["a"]),
  54. "DataFrame\\.index are different",
  55. ),
  56. # MultiIndex
  57. (
  58. DataFrame.from_records(
  59. {"a": [1, 2], "b": [2.1, 1.5], "c": ["l1", "l2"]}, index=["a", "b"]
  60. ),
  61. DataFrame.from_records(
  62. {"a": [1.0, 2.0], "b": [2.1, 1.5], "c": ["l1", "l2"]}, index=["a", "b"]
  63. ),
  64. "MultiIndex level \\[0\\] are different",
  65. ),
  66. ],
  67. )
  68. def test_frame_equal_index_dtype_mismatch(df1, df2, msg, check_index_type):
  69. kwargs = {"check_index_type": check_index_type}
  70. if check_index_type:
  71. with pytest.raises(AssertionError, match=msg):
  72. tm.assert_frame_equal(df1, df2, **kwargs)
  73. else:
  74. tm.assert_frame_equal(df1, df2, **kwargs)
  75. def test_empty_dtypes(check_dtype):
  76. columns = ["col1", "col2"]
  77. df1 = DataFrame(columns=columns)
  78. df2 = DataFrame(columns=columns)
  79. kwargs = {"check_dtype": check_dtype}
  80. df1["col1"] = df1["col1"].astype("int64")
  81. if check_dtype:
  82. msg = r"Attributes of DataFrame\..* are different"
  83. with pytest.raises(AssertionError, match=msg):
  84. tm.assert_frame_equal(df1, df2, **kwargs)
  85. else:
  86. tm.assert_frame_equal(df1, df2, **kwargs)
  87. @pytest.mark.parametrize("check_like", [True, False])
  88. def test_frame_equal_index_mismatch(check_like, obj_fixture):
  89. msg = f"""{obj_fixture}\\.index are different
  90. {obj_fixture}\\.index values are different \\(33\\.33333 %\\)
  91. \\[left\\]: Index\\(\\['a', 'b', 'c'\\], dtype='object'\\)
  92. \\[right\\]: Index\\(\\['a', 'b', 'd'\\], dtype='object'\\)
  93. At positional index 2, first diff: c != d"""
  94. df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "c"])
  95. df2 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "d"])
  96. with pytest.raises(AssertionError, match=msg):
  97. tm.assert_frame_equal(df1, df2, check_like=check_like, obj=obj_fixture)
  98. @pytest.mark.parametrize("check_like", [True, False])
  99. def test_frame_equal_columns_mismatch(check_like, obj_fixture):
  100. msg = f"""{obj_fixture}\\.columns are different
  101. {obj_fixture}\\.columns values are different \\(50\\.0 %\\)
  102. \\[left\\]: Index\\(\\['A', 'B'\\], dtype='object'\\)
  103. \\[right\\]: Index\\(\\['A', 'b'\\], dtype='object'\\)"""
  104. df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "c"])
  105. df2 = DataFrame({"A": [1, 2, 3], "b": [4, 5, 6]}, index=["a", "b", "c"])
  106. with pytest.raises(AssertionError, match=msg):
  107. tm.assert_frame_equal(df1, df2, check_like=check_like, obj=obj_fixture)
  108. def test_frame_equal_block_mismatch(by_blocks_fixture, obj_fixture):
  109. obj = obj_fixture
  110. msg = f"""{obj}\\.iloc\\[:, 1\\] \\(column name="B"\\) are different
  111. {obj}\\.iloc\\[:, 1\\] \\(column name="B"\\) values are different \\(33\\.33333 %\\)
  112. \\[index\\]: \\[0, 1, 2\\]
  113. \\[left\\]: \\[4, 5, 6\\]
  114. \\[right\\]: \\[4, 5, 7\\]"""
  115. df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
  116. df2 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 7]})
  117. with pytest.raises(AssertionError, match=msg):
  118. tm.assert_frame_equal(df1, df2, by_blocks=by_blocks_fixture, obj=obj_fixture)
  119. @pytest.mark.parametrize(
  120. "df1,df2,msg",
  121. [
  122. (
  123. DataFrame({"A": ["á", "à", "ä"], "E": ["é", "è", "ë"]}),
  124. DataFrame({"A": ["á", "à", "ä"], "E": ["é", "è", "e̊"]}),
  125. """{obj}\\.iloc\\[:, 1\\] \\(column name="E"\\) are different
  126. {obj}\\.iloc\\[:, 1\\] \\(column name="E"\\) values are different \\(33\\.33333 %\\)
  127. \\[index\\]: \\[0, 1, 2\\]
  128. \\[left\\]: \\[é, è, ë\\]
  129. \\[right\\]: \\[é, è, e̊\\]""",
  130. ),
  131. (
  132. DataFrame({"A": ["á", "à", "ä"], "E": ["é", "è", "ë"]}),
  133. DataFrame({"A": ["a", "a", "a"], "E": ["e", "e", "e"]}),
  134. """{obj}\\.iloc\\[:, 0\\] \\(column name="A"\\) are different
  135. {obj}\\.iloc\\[:, 0\\] \\(column name="A"\\) values are different \\(100\\.0 %\\)
  136. \\[index\\]: \\[0, 1, 2\\]
  137. \\[left\\]: \\[á, à, ä\\]
  138. \\[right\\]: \\[a, a, a\\]""",
  139. ),
  140. ],
  141. )
  142. def test_frame_equal_unicode(df1, df2, msg, by_blocks_fixture, obj_fixture):
  143. # see gh-20503
  144. #
  145. # Test ensures that `tm.assert_frame_equals` raises the right exception
  146. # when comparing DataFrames containing differing unicode objects.
  147. msg = msg.format(obj=obj_fixture)
  148. with pytest.raises(AssertionError, match=msg):
  149. tm.assert_frame_equal(df1, df2, by_blocks=by_blocks_fixture, obj=obj_fixture)
  150. def test_assert_frame_equal_extension_dtype_mismatch():
  151. # https://github.com/pandas-dev/pandas/issues/32747
  152. left = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
  153. right = left.astype(int)
  154. msg = (
  155. "Attributes of DataFrame\\.iloc\\[:, 0\\] "
  156. '\\(column name="a"\\) are different\n\n'
  157. 'Attribute "dtype" are different\n'
  158. "\\[left\\]: Int64\n"
  159. "\\[right\\]: int[32|64]"
  160. )
  161. tm.assert_frame_equal(left, right, check_dtype=False)
  162. with pytest.raises(AssertionError, match=msg):
  163. tm.assert_frame_equal(left, right, check_dtype=True)
  164. def test_assert_frame_equal_interval_dtype_mismatch():
  165. # https://github.com/pandas-dev/pandas/issues/32747
  166. left = DataFrame({"a": [pd.Interval(0, 1)]}, dtype="interval")
  167. right = left.astype(object)
  168. msg = (
  169. "Attributes of DataFrame\\.iloc\\[:, 0\\] "
  170. '\\(column name="a"\\) are different\n\n'
  171. 'Attribute "dtype" are different\n'
  172. "\\[left\\]: interval\\[int64, right\\]\n"
  173. "\\[right\\]: object"
  174. )
  175. tm.assert_frame_equal(left, right, check_dtype=False)
  176. with pytest.raises(AssertionError, match=msg):
  177. tm.assert_frame_equal(left, right, check_dtype=True)
  178. @pytest.mark.parametrize("right_dtype", ["Int32", "int64"])
  179. def test_assert_frame_equal_ignore_extension_dtype_mismatch(right_dtype):
  180. # https://github.com/pandas-dev/pandas/issues/35715
  181. left = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
  182. right = DataFrame({"a": [1, 2, 3]}, dtype=right_dtype)
  183. tm.assert_frame_equal(left, right, check_dtype=False)
  184. @pytest.mark.parametrize(
  185. "dtype",
  186. [
  187. ("timedelta64[ns]"),
  188. ("datetime64[ns, UTC]"),
  189. ("Period[D]"),
  190. ],
  191. )
  192. def test_assert_frame_equal_datetime_like_dtype_mismatch(dtype):
  193. df1 = DataFrame({"a": []}, dtype=dtype)
  194. df2 = DataFrame({"a": []})
  195. tm.assert_frame_equal(df1, df2, check_dtype=False)
  196. def test_allows_duplicate_labels():
  197. left = DataFrame()
  198. right = DataFrame().set_flags(allows_duplicate_labels=False)
  199. tm.assert_frame_equal(left, left)
  200. tm.assert_frame_equal(right, right)
  201. tm.assert_frame_equal(left, right, check_flags=False)
  202. tm.assert_frame_equal(right, left, check_flags=False)
  203. with pytest.raises(AssertionError, match="<Flags"):
  204. tm.assert_frame_equal(left, right)
  205. with pytest.raises(AssertionError, match="<Flags"):
  206. tm.assert_frame_equal(left, right)
  207. def test_assert_frame_equal_columns_mixed_dtype():
  208. # GH#39168
  209. df = DataFrame([[0, 1, 2]], columns=["foo", "bar", 42], index=[1, "test", 2])
  210. tm.assert_frame_equal(df, df, check_like=True)
  211. def test_frame_equal_extension_dtype(frame_or_series, any_numeric_ea_dtype):
  212. # GH#39410
  213. obj = frame_or_series([1, 2], dtype=any_numeric_ea_dtype)
  214. tm.assert_equal(obj, obj, check_exact=True)
  215. @pytest.mark.parametrize("indexer", [(0, 1), (1, 0)])
  216. def test_frame_equal_mixed_dtypes(frame_or_series, any_numeric_ea_dtype, indexer):
  217. dtypes = (any_numeric_ea_dtype, "int64")
  218. obj1 = frame_or_series([1, 2], dtype=dtypes[indexer[0]])
  219. obj2 = frame_or_series([1, 2], dtype=dtypes[indexer[1]])
  220. msg = r'(Series|DataFrame.iloc\[:, 0\] \(column name="0"\) classes) are different'
  221. with pytest.raises(AssertionError, match=msg):
  222. tm.assert_equal(obj1, obj2, check_exact=True, check_dtype=False)
  223. def test_assert_frame_equal_check_like_different_indexes():
  224. # GH#39739
  225. df1 = DataFrame(index=pd.Index([], dtype="object"))
  226. df2 = DataFrame(index=pd.RangeIndex(start=0, stop=0, step=1))
  227. with pytest.raises(AssertionError, match="DataFrame.index are different"):
  228. tm.assert_frame_equal(df1, df2, check_like=True)
  229. def test_assert_frame_equal_checking_allow_dups_flag():
  230. # GH#45554
  231. left = DataFrame([[1, 2], [3, 4]])
  232. left.flags.allows_duplicate_labels = False
  233. right = DataFrame([[1, 2], [3, 4]])
  234. right.flags.allows_duplicate_labels = True
  235. tm.assert_frame_equal(left, right, check_flags=False)
  236. with pytest.raises(AssertionError, match="allows_duplicate_labels"):
  237. tm.assert_frame_equal(left, right, check_flags=True)
  238. def test_assert_frame_equal_check_like_categorical_midx():
  239. # GH#48975
  240. left = DataFrame(
  241. [[1], [2], [3]],
  242. index=pd.MultiIndex.from_arrays(
  243. [
  244. pd.Categorical(["a", "b", "c"]),
  245. pd.Categorical(["a", "b", "c"]),
  246. ]
  247. ),
  248. )
  249. right = DataFrame(
  250. [[3], [2], [1]],
  251. index=pd.MultiIndex.from_arrays(
  252. [
  253. pd.Categorical(["c", "b", "a"]),
  254. pd.Categorical(["c", "b", "a"]),
  255. ]
  256. ),
  257. )
  258. tm.assert_frame_equal(left, right, check_like=True)
  259. def test_assert_frame_equal_ea_column_definition_in_exception_mask():
  260. # GH#50323
  261. df1 = DataFrame({"a": pd.Series([pd.NA, 1], dtype="Int64")})
  262. df2 = DataFrame({"a": pd.Series([1, 1], dtype="Int64")})
  263. msg = r'DataFrame.iloc\[:, 0\] \(column name="a"\) NA mask values are different'
  264. with pytest.raises(AssertionError, match=msg):
  265. tm.assert_frame_equal(df1, df2)
  266. def test_assert_frame_equal_ea_column_definition_in_exception():
  267. # GH#50323
  268. df1 = DataFrame({"a": pd.Series([pd.NA, 1], dtype="Int64")})
  269. df2 = DataFrame({"a": pd.Series([pd.NA, 2], dtype="Int64")})
  270. msg = r'DataFrame.iloc\[:, 0\] \(column name="a"\) values are different'
  271. with pytest.raises(AssertionError, match=msg):
  272. tm.assert_frame_equal(df1, df2)
  273. with pytest.raises(AssertionError, match=msg):
  274. tm.assert_frame_equal(df1, df2, check_exact=True)
  275. def test_assert_frame_equal_ts_column():
  276. # GH#50323
  277. df1 = DataFrame({"a": [pd.Timestamp("2019-12-31"), pd.Timestamp("2020-12-31")]})
  278. df2 = DataFrame({"a": [pd.Timestamp("2020-12-31"), pd.Timestamp("2020-12-31")]})
  279. msg = r'DataFrame.iloc\[:, 0\] \(column name="a"\) values are different'
  280. with pytest.raises(AssertionError, match=msg):
  281. tm.assert_frame_equal(df1, df2)