test_interp_fillna.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. import numpy as np
  2. import pytest
  3. from pandas import (
  4. NA,
  5. DataFrame,
  6. Interval,
  7. NaT,
  8. Series,
  9. Timestamp,
  10. interval_range,
  11. )
  12. import pandas._testing as tm
  13. from pandas.tests.copy_view.util import get_array
  14. @pytest.mark.parametrize("method", ["pad", "nearest", "linear"])
  15. def test_interpolate_no_op(using_copy_on_write, method):
  16. df = DataFrame({"a": [1, 2]})
  17. df_orig = df.copy()
  18. result = df.interpolate(method=method)
  19. if using_copy_on_write:
  20. assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
  21. else:
  22. assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
  23. result.iloc[0, 0] = 100
  24. if using_copy_on_write:
  25. assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
  26. tm.assert_frame_equal(df, df_orig)
  27. @pytest.mark.parametrize("func", ["ffill", "bfill"])
  28. def test_interp_fill_functions(using_copy_on_write, func):
  29. # Check that these takes the same code paths as interpolate
  30. df = DataFrame({"a": [1, 2]})
  31. df_orig = df.copy()
  32. result = getattr(df, func)()
  33. if using_copy_on_write:
  34. assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
  35. else:
  36. assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
  37. result.iloc[0, 0] = 100
  38. if using_copy_on_write:
  39. assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
  40. tm.assert_frame_equal(df, df_orig)
  41. @pytest.mark.parametrize("func", ["ffill", "bfill"])
  42. @pytest.mark.parametrize(
  43. "vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
  44. )
  45. def test_interpolate_triggers_copy(using_copy_on_write, vals, func):
  46. df = DataFrame({"a": vals})
  47. result = getattr(df, func)()
  48. assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
  49. if using_copy_on_write:
  50. # Check that we don't have references when triggering a copy
  51. assert result._mgr._has_no_reference(0)
  52. @pytest.mark.parametrize(
  53. "vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
  54. )
  55. def test_interpolate_inplace_no_reference_no_copy(using_copy_on_write, vals):
  56. df = DataFrame({"a": vals})
  57. arr = get_array(df, "a")
  58. df.interpolate(method="linear", inplace=True)
  59. assert np.shares_memory(arr, get_array(df, "a"))
  60. if using_copy_on_write:
  61. # Check that we don't have references when triggering a copy
  62. assert df._mgr._has_no_reference(0)
  63. @pytest.mark.parametrize(
  64. "vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
  65. )
  66. def test_interpolate_inplace_with_refs(using_copy_on_write, vals):
  67. df = DataFrame({"a": [1, np.nan, 2]})
  68. df_orig = df.copy()
  69. arr = get_array(df, "a")
  70. view = df[:]
  71. df.interpolate(method="linear", inplace=True)
  72. if using_copy_on_write:
  73. # Check that copy was triggered in interpolate and that we don't
  74. # have any references left
  75. assert not np.shares_memory(arr, get_array(df, "a"))
  76. tm.assert_frame_equal(df_orig, view)
  77. assert df._mgr._has_no_reference(0)
  78. assert view._mgr._has_no_reference(0)
  79. else:
  80. assert np.shares_memory(arr, get_array(df, "a"))
  81. def test_interpolate_cleaned_fill_method(using_copy_on_write):
  82. # Check that "method is set to None" case works correctly
  83. df = DataFrame({"a": ["a", np.nan, "c"], "b": 1})
  84. df_orig = df.copy()
  85. result = df.interpolate(method="asfreq")
  86. if using_copy_on_write:
  87. assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
  88. else:
  89. assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
  90. result.iloc[0, 0] = Timestamp("2021-12-31")
  91. if using_copy_on_write:
  92. assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
  93. tm.assert_frame_equal(df, df_orig)
  94. def test_interpolate_object_convert_no_op(using_copy_on_write):
  95. df = DataFrame({"a": ["a", "b", "c"], "b": 1})
  96. arr_a = get_array(df, "a")
  97. df.interpolate(method="pad", inplace=True)
  98. # Now CoW makes a copy, it should not!
  99. if using_copy_on_write:
  100. assert df._mgr._has_no_reference(0)
  101. assert np.shares_memory(arr_a, get_array(df, "a"))
  102. def test_interpolate_object_convert_copies(using_copy_on_write):
  103. df = DataFrame({"a": Series([1, 2], dtype=object), "b": 1})
  104. arr_a = get_array(df, "a")
  105. df.interpolate(method="pad", inplace=True)
  106. if using_copy_on_write:
  107. assert df._mgr._has_no_reference(0)
  108. assert not np.shares_memory(arr_a, get_array(df, "a"))
  109. def test_interpolate_downcast(using_copy_on_write):
  110. df = DataFrame({"a": [1, np.nan, 2.5], "b": 1})
  111. arr_a = get_array(df, "a")
  112. df.interpolate(method="pad", inplace=True, downcast="infer")
  113. if using_copy_on_write:
  114. assert df._mgr._has_no_reference(0)
  115. assert np.shares_memory(arr_a, get_array(df, "a"))
  116. def test_interpolate_downcast_reference_triggers_copy(using_copy_on_write):
  117. df = DataFrame({"a": [1, np.nan, 2.5], "b": 1})
  118. df_orig = df.copy()
  119. arr_a = get_array(df, "a")
  120. view = df[:]
  121. df.interpolate(method="pad", inplace=True, downcast="infer")
  122. if using_copy_on_write:
  123. assert df._mgr._has_no_reference(0)
  124. assert not np.shares_memory(arr_a, get_array(df, "a"))
  125. tm.assert_frame_equal(df_orig, view)
  126. else:
  127. tm.assert_frame_equal(df, view)
  128. def test_fillna(using_copy_on_write):
  129. df = DataFrame({"a": [1.5, np.nan], "b": 1})
  130. df_orig = df.copy()
  131. df2 = df.fillna(5.5)
  132. if using_copy_on_write:
  133. assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
  134. else:
  135. assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
  136. df2.iloc[0, 1] = 100
  137. tm.assert_frame_equal(df_orig, df)
  138. def test_fillna_dict(using_copy_on_write):
  139. df = DataFrame({"a": [1.5, np.nan], "b": 1})
  140. df_orig = df.copy()
  141. df2 = df.fillna({"a": 100.5})
  142. if using_copy_on_write:
  143. assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
  144. assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
  145. else:
  146. assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
  147. df2.iloc[0, 1] = 100
  148. tm.assert_frame_equal(df_orig, df)
  149. @pytest.mark.parametrize("downcast", [None, False])
  150. def test_fillna_inplace(using_copy_on_write, downcast):
  151. df = DataFrame({"a": [1.5, np.nan], "b": 1})
  152. arr_a = get_array(df, "a")
  153. arr_b = get_array(df, "b")
  154. df.fillna(5.5, inplace=True, downcast=downcast)
  155. assert np.shares_memory(get_array(df, "a"), arr_a)
  156. assert np.shares_memory(get_array(df, "b"), arr_b)
  157. if using_copy_on_write:
  158. assert df._mgr._has_no_reference(0)
  159. assert df._mgr._has_no_reference(1)
  160. def test_fillna_inplace_reference(using_copy_on_write):
  161. df = DataFrame({"a": [1.5, np.nan], "b": 1})
  162. df_orig = df.copy()
  163. arr_a = get_array(df, "a")
  164. arr_b = get_array(df, "b")
  165. view = df[:]
  166. df.fillna(5.5, inplace=True)
  167. if using_copy_on_write:
  168. assert not np.shares_memory(get_array(df, "a"), arr_a)
  169. assert np.shares_memory(get_array(df, "b"), arr_b)
  170. assert view._mgr._has_no_reference(0)
  171. assert df._mgr._has_no_reference(0)
  172. tm.assert_frame_equal(view, df_orig)
  173. else:
  174. assert np.shares_memory(get_array(df, "a"), arr_a)
  175. assert np.shares_memory(get_array(df, "b"), arr_b)
  176. expected = DataFrame({"a": [1.5, 5.5], "b": 1})
  177. tm.assert_frame_equal(df, expected)
  178. def test_fillna_interval_inplace_reference(using_copy_on_write):
  179. ser = Series(interval_range(start=0, end=5), name="a")
  180. ser.iloc[1] = np.nan
  181. ser_orig = ser.copy()
  182. view = ser[:]
  183. ser.fillna(value=Interval(left=0, right=5), inplace=True)
  184. if using_copy_on_write:
  185. assert not np.shares_memory(
  186. get_array(ser, "a").left.values, get_array(view, "a").left.values
  187. )
  188. tm.assert_series_equal(view, ser_orig)
  189. else:
  190. assert np.shares_memory(
  191. get_array(ser, "a").left.values, get_array(view, "a").left.values
  192. )
  193. def test_fillna_series_empty_arg(using_copy_on_write):
  194. ser = Series([1, np.nan, 2])
  195. ser_orig = ser.copy()
  196. result = ser.fillna({})
  197. if using_copy_on_write:
  198. assert np.shares_memory(get_array(ser), get_array(result))
  199. else:
  200. assert not np.shares_memory(get_array(ser), get_array(result))
  201. ser.iloc[0] = 100.5
  202. tm.assert_series_equal(ser_orig, result)
  203. def test_fillna_series_empty_arg_inplace(using_copy_on_write):
  204. ser = Series([1, np.nan, 2])
  205. arr = get_array(ser)
  206. ser.fillna({}, inplace=True)
  207. assert np.shares_memory(get_array(ser), arr)
  208. if using_copy_on_write:
  209. assert ser._mgr._has_no_reference(0)
  210. def test_fillna_ea_noop_shares_memory(
  211. using_copy_on_write, any_numeric_ea_and_arrow_dtype
  212. ):
  213. df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype)
  214. df_orig = df.copy()
  215. df2 = df.fillna(100)
  216. assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
  217. if using_copy_on_write:
  218. assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
  219. assert not df2._mgr._has_no_reference(1)
  220. else:
  221. assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
  222. tm.assert_frame_equal(df_orig, df)
  223. df2.iloc[0, 1] = 100
  224. if using_copy_on_write:
  225. assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
  226. assert df2._mgr._has_no_reference(1)
  227. assert df._mgr._has_no_reference(1)
  228. tm.assert_frame_equal(df_orig, df)
  229. def test_fillna_inplace_ea_noop_shares_memory(
  230. using_copy_on_write, any_numeric_ea_and_arrow_dtype
  231. ):
  232. df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype)
  233. df_orig = df.copy()
  234. view = df[:]
  235. df.fillna(100, inplace=True)
  236. assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
  237. if using_copy_on_write:
  238. assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
  239. assert not df._mgr._has_no_reference(1)
  240. assert not view._mgr._has_no_reference(1)
  241. else:
  242. assert not np.shares_memory(get_array(df, "b"), get_array(view, "b"))
  243. df.iloc[0, 1] = 100
  244. tm.assert_frame_equal(df_orig, view)