123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- """
- Tests for DataFrame.mask; tests DataFrame.where as a side-effect.
- """
- import numpy as np
- from pandas import (
- NA,
- DataFrame,
- Float64Dtype,
- Series,
- StringDtype,
- Timedelta,
- isna,
- )
- import pandas._testing as tm
- class TestDataFrameMask:
- def test_mask(self):
- df = DataFrame(np.random.randn(5, 3))
- cond = df > 0
- rs = df.where(cond, np.nan)
- tm.assert_frame_equal(rs, df.mask(df <= 0))
- tm.assert_frame_equal(rs, df.mask(~cond))
- other = DataFrame(np.random.randn(5, 3))
- rs = df.where(cond, other)
- tm.assert_frame_equal(rs, df.mask(df <= 0, other))
- tm.assert_frame_equal(rs, df.mask(~cond, other))
- def test_mask2(self):
- # see GH#21891
- df = DataFrame([1, 2])
- res = df.mask([[True], [False]])
- exp = DataFrame([np.nan, 2])
- tm.assert_frame_equal(res, exp)
- def test_mask_inplace(self):
- # GH#8801
- df = DataFrame(np.random.randn(5, 3))
- cond = df > 0
- rdf = df.copy()
- return_value = rdf.where(cond, inplace=True)
- assert return_value is None
- tm.assert_frame_equal(rdf, df.where(cond))
- tm.assert_frame_equal(rdf, df.mask(~cond))
- rdf = df.copy()
- return_value = rdf.where(cond, -df, inplace=True)
- assert return_value is None
- tm.assert_frame_equal(rdf, df.where(cond, -df))
- tm.assert_frame_equal(rdf, df.mask(~cond, -df))
- def test_mask_edge_case_1xN_frame(self):
- # GH#4071
- df = DataFrame([[1, 2]])
- res = df.mask(DataFrame([[True, False]]))
- expec = DataFrame([[np.nan, 2]])
- tm.assert_frame_equal(res, expec)
- def test_mask_callable(self):
- # GH#12533
- df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
- result = df.mask(lambda x: x > 4, lambda x: x + 1)
- exp = DataFrame([[1, 2, 3], [4, 6, 7], [8, 9, 10]])
- tm.assert_frame_equal(result, exp)
- tm.assert_frame_equal(result, df.mask(df > 4, df + 1))
- # return ndarray and scalar
- result = df.mask(lambda x: (x % 2 == 0).values, lambda x: 99)
- exp = DataFrame([[1, 99, 3], [99, 5, 99], [7, 99, 9]])
- tm.assert_frame_equal(result, exp)
- tm.assert_frame_equal(result, df.mask(df % 2 == 0, 99))
- # chain
- result = (df + 2).mask(lambda x: x > 8, lambda x: x + 10)
- exp = DataFrame([[3, 4, 5], [6, 7, 8], [19, 20, 21]])
- tm.assert_frame_equal(result, exp)
- tm.assert_frame_equal(result, (df + 2).mask((df + 2) > 8, (df + 2) + 10))
- def test_mask_dtype_bool_conversion(self):
- # GH#3733
- df = DataFrame(data=np.random.randn(100, 50))
- df = df.where(df > 0) # create nans
- bools = df > 0
- mask = isna(df)
- expected = bools.astype(object).mask(mask)
- result = bools.mask(mask)
- tm.assert_frame_equal(result, expected)
- def test_mask_stringdtype(frame_or_series):
- # GH 40824
- obj = DataFrame(
- {"A": ["foo", "bar", "baz", NA]},
- index=["id1", "id2", "id3", "id4"],
- dtype=StringDtype(),
- )
- filtered_obj = DataFrame(
- {"A": ["this", "that"]}, index=["id2", "id3"], dtype=StringDtype()
- )
- expected = DataFrame(
- {"A": [NA, "this", "that", NA]},
- index=["id1", "id2", "id3", "id4"],
- dtype=StringDtype(),
- )
- if frame_or_series is Series:
- obj = obj["A"]
- filtered_obj = filtered_obj["A"]
- expected = expected["A"]
- filter_ser = Series([False, True, True, False])
- result = obj.mask(filter_ser, filtered_obj)
- tm.assert_equal(result, expected)
- def test_mask_where_dtype_timedelta():
- # https://github.com/pandas-dev/pandas/issues/39548
- df = DataFrame([Timedelta(i, unit="d") for i in range(5)])
- expected = DataFrame(np.full(5, np.nan, dtype="timedelta64[ns]"))
- tm.assert_frame_equal(df.mask(df.notna()), expected)
- expected = DataFrame(
- [np.nan, np.nan, np.nan, Timedelta("3 day"), Timedelta("4 day")]
- )
- tm.assert_frame_equal(df.where(df > Timedelta(2, unit="d")), expected)
- def test_mask_return_dtype():
- # GH#50488
- ser = Series([0.0, 1.0, 2.0, 3.0], dtype=Float64Dtype())
- cond = ~ser.isna()
- other = Series([True, False, True, False])
- excepted = Series([1.0, 0.0, 1.0, 0.0], dtype=ser.dtype)
- result = ser.mask(cond, other)
- tm.assert_series_equal(result, excepted)
- def test_mask_inplace_no_other():
- # GH#51685
- df = DataFrame({"a": [1, 2], "b": ["x", "y"]})
- cond = DataFrame({"a": [True, False], "b": [False, True]})
- df.mask(cond, inplace=True)
- expected = DataFrame({"a": [np.nan, 2], "b": ["x", np.nan]})
- tm.assert_frame_equal(df, expected)
|