test_equals.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. import numpy as np
  2. from pandas import (
  3. DataFrame,
  4. date_range,
  5. )
  6. import pandas._testing as tm
  7. class TestEquals:
  8. def test_dataframe_not_equal(self):
  9. # see GH#28839
  10. df1 = DataFrame({"a": [1, 2], "b": ["s", "d"]})
  11. df2 = DataFrame({"a": ["s", "d"], "b": [1, 2]})
  12. assert df1.equals(df2) is False
  13. def test_equals_different_blocks(self, using_array_manager):
  14. # GH#9330
  15. df0 = DataFrame({"A": ["x", "y"], "B": [1, 2], "C": ["w", "z"]})
  16. df1 = df0.reset_index()[["A", "B", "C"]]
  17. if not using_array_manager:
  18. # this assert verifies that the above operations have
  19. # induced a block rearrangement
  20. assert df0._mgr.blocks[0].dtype != df1._mgr.blocks[0].dtype
  21. # do the real tests
  22. tm.assert_frame_equal(df0, df1)
  23. assert df0.equals(df1)
  24. assert df1.equals(df0)
  25. def test_equals(self):
  26. # Add object dtype column with nans
  27. index = np.random.random(10)
  28. df1 = DataFrame(np.random.random(10), index=index, columns=["floats"])
  29. df1["text"] = "the sky is so blue. we could use more chocolate.".split()
  30. df1["start"] = date_range("2000-1-1", periods=10, freq="T")
  31. df1["end"] = date_range("2000-1-1", periods=10, freq="D")
  32. df1["diff"] = df1["end"] - df1["start"]
  33. # Explicitly cast to object, to avoid implicit cast when setting np.nan
  34. df1["bool"] = (np.arange(10) % 3 == 0).astype(object)
  35. df1.loc[::2] = np.nan
  36. df2 = df1.copy()
  37. assert df1["text"].equals(df2["text"])
  38. assert df1["start"].equals(df2["start"])
  39. assert df1["end"].equals(df2["end"])
  40. assert df1["diff"].equals(df2["diff"])
  41. assert df1["bool"].equals(df2["bool"])
  42. assert df1.equals(df2)
  43. assert not df1.equals(object)
  44. # different dtype
  45. different = df1.copy()
  46. different["floats"] = different["floats"].astype("float32")
  47. assert not df1.equals(different)
  48. # different index
  49. different_index = -index
  50. different = df2.set_index(different_index)
  51. assert not df1.equals(different)
  52. # different columns
  53. different = df2.copy()
  54. different.columns = df2.columns[::-1]
  55. assert not df1.equals(different)
  56. # DatetimeIndex
  57. index = date_range("2000-1-1", periods=10, freq="T")
  58. df1 = df1.set_index(index)
  59. df2 = df1.copy()
  60. assert df1.equals(df2)
  61. # MultiIndex
  62. df3 = df1.set_index(["text"], append=True)
  63. df2 = df1.set_index(["text"], append=True)
  64. assert df3.equals(df2)
  65. df2 = df1.set_index(["floats"], append=True)
  66. assert not df3.equals(df2)
  67. # NaN in index
  68. df3 = df1.set_index(["floats"], append=True)
  69. df2 = df1.set_index(["floats"], append=True)
  70. assert df3.equals(df2)