1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 |
- import numpy as np
- from pandas import (
- DataFrame,
- date_range,
- )
- import pandas._testing as tm
- class TestEquals:
- def test_dataframe_not_equal(self):
- # see GH#28839
- df1 = DataFrame({"a": [1, 2], "b": ["s", "d"]})
- df2 = DataFrame({"a": ["s", "d"], "b": [1, 2]})
- assert df1.equals(df2) is False
- def test_equals_different_blocks(self, using_array_manager):
- # GH#9330
- df0 = DataFrame({"A": ["x", "y"], "B": [1, 2], "C": ["w", "z"]})
- df1 = df0.reset_index()[["A", "B", "C"]]
- if not using_array_manager:
- # this assert verifies that the above operations have
- # induced a block rearrangement
- assert df0._mgr.blocks[0].dtype != df1._mgr.blocks[0].dtype
- # do the real tests
- tm.assert_frame_equal(df0, df1)
- assert df0.equals(df1)
- assert df1.equals(df0)
- def test_equals(self):
- # Add object dtype column with nans
- index = np.random.random(10)
- df1 = DataFrame(np.random.random(10), index=index, columns=["floats"])
- df1["text"] = "the sky is so blue. we could use more chocolate.".split()
- df1["start"] = date_range("2000-1-1", periods=10, freq="T")
- df1["end"] = date_range("2000-1-1", periods=10, freq="D")
- df1["diff"] = df1["end"] - df1["start"]
- # Explicitly cast to object, to avoid implicit cast when setting np.nan
- df1["bool"] = (np.arange(10) % 3 == 0).astype(object)
- df1.loc[::2] = np.nan
- df2 = df1.copy()
- assert df1["text"].equals(df2["text"])
- assert df1["start"].equals(df2["start"])
- assert df1["end"].equals(df2["end"])
- assert df1["diff"].equals(df2["diff"])
- assert df1["bool"].equals(df2["bool"])
- assert df1.equals(df2)
- assert not df1.equals(object)
- # different dtype
- different = df1.copy()
- different["floats"] = different["floats"].astype("float32")
- assert not df1.equals(different)
- # different index
- different_index = -index
- different = df2.set_index(different_index)
- assert not df1.equals(different)
- # different columns
- different = df2.copy()
- different.columns = df2.columns[::-1]
- assert not df1.equals(different)
- # DatetimeIndex
- index = date_range("2000-1-1", periods=10, freq="T")
- df1 = df1.set_index(index)
- df2 = df1.copy()
- assert df1.equals(df2)
- # MultiIndex
- df3 = df1.set_index(["text"], append=True)
- df2 = df1.set_index(["text"], append=True)
- assert df3.equals(df2)
- df2 = df1.set_index(["floats"], append=True)
- assert not df3.equals(df2)
- # NaN in index
- df3 = df1.set_index(["floats"], append=True)
- df2 = df1.set_index(["floats"], append=True)
- assert df3.equals(df2)
|