test_mask.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. """
  2. Tests for DataFrame.mask; tests DataFrame.where as a side-effect.
  3. """
  4. import numpy as np
  5. from pandas import (
  6. NA,
  7. DataFrame,
  8. Float64Dtype,
  9. Series,
  10. StringDtype,
  11. Timedelta,
  12. isna,
  13. )
  14. import pandas._testing as tm
  15. class TestDataFrameMask:
  16. def test_mask(self):
  17. df = DataFrame(np.random.randn(5, 3))
  18. cond = df > 0
  19. rs = df.where(cond, np.nan)
  20. tm.assert_frame_equal(rs, df.mask(df <= 0))
  21. tm.assert_frame_equal(rs, df.mask(~cond))
  22. other = DataFrame(np.random.randn(5, 3))
  23. rs = df.where(cond, other)
  24. tm.assert_frame_equal(rs, df.mask(df <= 0, other))
  25. tm.assert_frame_equal(rs, df.mask(~cond, other))
  26. def test_mask2(self):
  27. # see GH#21891
  28. df = DataFrame([1, 2])
  29. res = df.mask([[True], [False]])
  30. exp = DataFrame([np.nan, 2])
  31. tm.assert_frame_equal(res, exp)
  32. def test_mask_inplace(self):
  33. # GH#8801
  34. df = DataFrame(np.random.randn(5, 3))
  35. cond = df > 0
  36. rdf = df.copy()
  37. return_value = rdf.where(cond, inplace=True)
  38. assert return_value is None
  39. tm.assert_frame_equal(rdf, df.where(cond))
  40. tm.assert_frame_equal(rdf, df.mask(~cond))
  41. rdf = df.copy()
  42. return_value = rdf.where(cond, -df, inplace=True)
  43. assert return_value is None
  44. tm.assert_frame_equal(rdf, df.where(cond, -df))
  45. tm.assert_frame_equal(rdf, df.mask(~cond, -df))
  46. def test_mask_edge_case_1xN_frame(self):
  47. # GH#4071
  48. df = DataFrame([[1, 2]])
  49. res = df.mask(DataFrame([[True, False]]))
  50. expec = DataFrame([[np.nan, 2]])
  51. tm.assert_frame_equal(res, expec)
  52. def test_mask_callable(self):
  53. # GH#12533
  54. df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
  55. result = df.mask(lambda x: x > 4, lambda x: x + 1)
  56. exp = DataFrame([[1, 2, 3], [4, 6, 7], [8, 9, 10]])
  57. tm.assert_frame_equal(result, exp)
  58. tm.assert_frame_equal(result, df.mask(df > 4, df + 1))
  59. # return ndarray and scalar
  60. result = df.mask(lambda x: (x % 2 == 0).values, lambda x: 99)
  61. exp = DataFrame([[1, 99, 3], [99, 5, 99], [7, 99, 9]])
  62. tm.assert_frame_equal(result, exp)
  63. tm.assert_frame_equal(result, df.mask(df % 2 == 0, 99))
  64. # chain
  65. result = (df + 2).mask(lambda x: x > 8, lambda x: x + 10)
  66. exp = DataFrame([[3, 4, 5], [6, 7, 8], [19, 20, 21]])
  67. tm.assert_frame_equal(result, exp)
  68. tm.assert_frame_equal(result, (df + 2).mask((df + 2) > 8, (df + 2) + 10))
  69. def test_mask_dtype_bool_conversion(self):
  70. # GH#3733
  71. df = DataFrame(data=np.random.randn(100, 50))
  72. df = df.where(df > 0) # create nans
  73. bools = df > 0
  74. mask = isna(df)
  75. expected = bools.astype(object).mask(mask)
  76. result = bools.mask(mask)
  77. tm.assert_frame_equal(result, expected)
  78. def test_mask_stringdtype(frame_or_series):
  79. # GH 40824
  80. obj = DataFrame(
  81. {"A": ["foo", "bar", "baz", NA]},
  82. index=["id1", "id2", "id3", "id4"],
  83. dtype=StringDtype(),
  84. )
  85. filtered_obj = DataFrame(
  86. {"A": ["this", "that"]}, index=["id2", "id3"], dtype=StringDtype()
  87. )
  88. expected = DataFrame(
  89. {"A": [NA, "this", "that", NA]},
  90. index=["id1", "id2", "id3", "id4"],
  91. dtype=StringDtype(),
  92. )
  93. if frame_or_series is Series:
  94. obj = obj["A"]
  95. filtered_obj = filtered_obj["A"]
  96. expected = expected["A"]
  97. filter_ser = Series([False, True, True, False])
  98. result = obj.mask(filter_ser, filtered_obj)
  99. tm.assert_equal(result, expected)
  100. def test_mask_where_dtype_timedelta():
  101. # https://github.com/pandas-dev/pandas/issues/39548
  102. df = DataFrame([Timedelta(i, unit="d") for i in range(5)])
  103. expected = DataFrame(np.full(5, np.nan, dtype="timedelta64[ns]"))
  104. tm.assert_frame_equal(df.mask(df.notna()), expected)
  105. expected = DataFrame(
  106. [np.nan, np.nan, np.nan, Timedelta("3 day"), Timedelta("4 day")]
  107. )
  108. tm.assert_frame_equal(df.where(df > Timedelta(2, unit="d")), expected)
  109. def test_mask_return_dtype():
  110. # GH#50488
  111. ser = Series([0.0, 1.0, 2.0, 3.0], dtype=Float64Dtype())
  112. cond = ~ser.isna()
  113. other = Series([True, False, True, False])
  114. excepted = Series([1.0, 0.0, 1.0, 0.0], dtype=ser.dtype)
  115. result = ser.mask(cond, other)
  116. tm.assert_series_equal(result, excepted)
  117. def test_mask_inplace_no_other():
  118. # GH#51685
  119. df = DataFrame({"a": [1, 2], "b": ["x", "y"]})
  120. cond = DataFrame({"a": [True, False], "b": [False, True]})
  121. df.mask(cond, inplace=True)
  122. expected = DataFrame({"a": [np.nan, 2], "b": ["x", np.nan]})
  123. tm.assert_frame_equal(df, expected)