123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319 |
- import numpy as np
- import pytest
- from pandas import (
- NA,
- DataFrame,
- Interval,
- NaT,
- Series,
- Timestamp,
- interval_range,
- )
- import pandas._testing as tm
- from pandas.tests.copy_view.util import get_array
- @pytest.mark.parametrize("method", ["pad", "nearest", "linear"])
- def test_interpolate_no_op(using_copy_on_write, method):
- df = DataFrame({"a": [1, 2]})
- df_orig = df.copy()
- result = df.interpolate(method=method)
- if using_copy_on_write:
- assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
- else:
- assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
- result.iloc[0, 0] = 100
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
- tm.assert_frame_equal(df, df_orig)
- @pytest.mark.parametrize("func", ["ffill", "bfill"])
- def test_interp_fill_functions(using_copy_on_write, func):
- # Check that these takes the same code paths as interpolate
- df = DataFrame({"a": [1, 2]})
- df_orig = df.copy()
- result = getattr(df, func)()
- if using_copy_on_write:
- assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
- else:
- assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
- result.iloc[0, 0] = 100
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
- tm.assert_frame_equal(df, df_orig)
- @pytest.mark.parametrize("func", ["ffill", "bfill"])
- @pytest.mark.parametrize(
- "vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
- )
- def test_interpolate_triggers_copy(using_copy_on_write, vals, func):
- df = DataFrame({"a": vals})
- result = getattr(df, func)()
- assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
- if using_copy_on_write:
- # Check that we don't have references when triggering a copy
- assert result._mgr._has_no_reference(0)
- @pytest.mark.parametrize(
- "vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
- )
- def test_interpolate_inplace_no_reference_no_copy(using_copy_on_write, vals):
- df = DataFrame({"a": vals})
- arr = get_array(df, "a")
- df.interpolate(method="linear", inplace=True)
- assert np.shares_memory(arr, get_array(df, "a"))
- if using_copy_on_write:
- # Check that we don't have references when triggering a copy
- assert df._mgr._has_no_reference(0)
- @pytest.mark.parametrize(
- "vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]]
- )
- def test_interpolate_inplace_with_refs(using_copy_on_write, vals):
- df = DataFrame({"a": [1, np.nan, 2]})
- df_orig = df.copy()
- arr = get_array(df, "a")
- view = df[:]
- df.interpolate(method="linear", inplace=True)
- if using_copy_on_write:
- # Check that copy was triggered in interpolate and that we don't
- # have any references left
- assert not np.shares_memory(arr, get_array(df, "a"))
- tm.assert_frame_equal(df_orig, view)
- assert df._mgr._has_no_reference(0)
- assert view._mgr._has_no_reference(0)
- else:
- assert np.shares_memory(arr, get_array(df, "a"))
- def test_interpolate_cleaned_fill_method(using_copy_on_write):
- # Check that "method is set to None" case works correctly
- df = DataFrame({"a": ["a", np.nan, "c"], "b": 1})
- df_orig = df.copy()
- result = df.interpolate(method="asfreq")
- if using_copy_on_write:
- assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
- else:
- assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
- result.iloc[0, 0] = Timestamp("2021-12-31")
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
- tm.assert_frame_equal(df, df_orig)
- def test_interpolate_object_convert_no_op(using_copy_on_write):
- df = DataFrame({"a": ["a", "b", "c"], "b": 1})
- arr_a = get_array(df, "a")
- df.interpolate(method="pad", inplace=True)
- # Now CoW makes a copy, it should not!
- if using_copy_on_write:
- assert df._mgr._has_no_reference(0)
- assert np.shares_memory(arr_a, get_array(df, "a"))
- def test_interpolate_object_convert_copies(using_copy_on_write):
- df = DataFrame({"a": Series([1, 2], dtype=object), "b": 1})
- arr_a = get_array(df, "a")
- df.interpolate(method="pad", inplace=True)
- if using_copy_on_write:
- assert df._mgr._has_no_reference(0)
- assert not np.shares_memory(arr_a, get_array(df, "a"))
- def test_interpolate_downcast(using_copy_on_write):
- df = DataFrame({"a": [1, np.nan, 2.5], "b": 1})
- arr_a = get_array(df, "a")
- df.interpolate(method="pad", inplace=True, downcast="infer")
- if using_copy_on_write:
- assert df._mgr._has_no_reference(0)
- assert np.shares_memory(arr_a, get_array(df, "a"))
- def test_interpolate_downcast_reference_triggers_copy(using_copy_on_write):
- df = DataFrame({"a": [1, np.nan, 2.5], "b": 1})
- df_orig = df.copy()
- arr_a = get_array(df, "a")
- view = df[:]
- df.interpolate(method="pad", inplace=True, downcast="infer")
- if using_copy_on_write:
- assert df._mgr._has_no_reference(0)
- assert not np.shares_memory(arr_a, get_array(df, "a"))
- tm.assert_frame_equal(df_orig, view)
- else:
- tm.assert_frame_equal(df, view)
- def test_fillna(using_copy_on_write):
- df = DataFrame({"a": [1.5, np.nan], "b": 1})
- df_orig = df.copy()
- df2 = df.fillna(5.5)
- if using_copy_on_write:
- assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
- else:
- assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
- df2.iloc[0, 1] = 100
- tm.assert_frame_equal(df_orig, df)
- def test_fillna_dict(using_copy_on_write):
- df = DataFrame({"a": [1.5, np.nan], "b": 1})
- df_orig = df.copy()
- df2 = df.fillna({"a": 100.5})
- if using_copy_on_write:
- assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
- assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
- else:
- assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
- df2.iloc[0, 1] = 100
- tm.assert_frame_equal(df_orig, df)
- @pytest.mark.parametrize("downcast", [None, False])
- def test_fillna_inplace(using_copy_on_write, downcast):
- df = DataFrame({"a": [1.5, np.nan], "b": 1})
- arr_a = get_array(df, "a")
- arr_b = get_array(df, "b")
- df.fillna(5.5, inplace=True, downcast=downcast)
- assert np.shares_memory(get_array(df, "a"), arr_a)
- assert np.shares_memory(get_array(df, "b"), arr_b)
- if using_copy_on_write:
- assert df._mgr._has_no_reference(0)
- assert df._mgr._has_no_reference(1)
- def test_fillna_inplace_reference(using_copy_on_write):
- df = DataFrame({"a": [1.5, np.nan], "b": 1})
- df_orig = df.copy()
- arr_a = get_array(df, "a")
- arr_b = get_array(df, "b")
- view = df[:]
- df.fillna(5.5, inplace=True)
- if using_copy_on_write:
- assert not np.shares_memory(get_array(df, "a"), arr_a)
- assert np.shares_memory(get_array(df, "b"), arr_b)
- assert view._mgr._has_no_reference(0)
- assert df._mgr._has_no_reference(0)
- tm.assert_frame_equal(view, df_orig)
- else:
- assert np.shares_memory(get_array(df, "a"), arr_a)
- assert np.shares_memory(get_array(df, "b"), arr_b)
- expected = DataFrame({"a": [1.5, 5.5], "b": 1})
- tm.assert_frame_equal(df, expected)
- def test_fillna_interval_inplace_reference(using_copy_on_write):
- ser = Series(interval_range(start=0, end=5), name="a")
- ser.iloc[1] = np.nan
- ser_orig = ser.copy()
- view = ser[:]
- ser.fillna(value=Interval(left=0, right=5), inplace=True)
- if using_copy_on_write:
- assert not np.shares_memory(
- get_array(ser, "a").left.values, get_array(view, "a").left.values
- )
- tm.assert_series_equal(view, ser_orig)
- else:
- assert np.shares_memory(
- get_array(ser, "a").left.values, get_array(view, "a").left.values
- )
- def test_fillna_series_empty_arg(using_copy_on_write):
- ser = Series([1, np.nan, 2])
- ser_orig = ser.copy()
- result = ser.fillna({})
- if using_copy_on_write:
- assert np.shares_memory(get_array(ser), get_array(result))
- else:
- assert not np.shares_memory(get_array(ser), get_array(result))
- ser.iloc[0] = 100.5
- tm.assert_series_equal(ser_orig, result)
- def test_fillna_series_empty_arg_inplace(using_copy_on_write):
- ser = Series([1, np.nan, 2])
- arr = get_array(ser)
- ser.fillna({}, inplace=True)
- assert np.shares_memory(get_array(ser), arr)
- if using_copy_on_write:
- assert ser._mgr._has_no_reference(0)
- def test_fillna_ea_noop_shares_memory(
- using_copy_on_write, any_numeric_ea_and_arrow_dtype
- ):
- df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype)
- df_orig = df.copy()
- df2 = df.fillna(100)
- assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
- if using_copy_on_write:
- assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
- assert not df2._mgr._has_no_reference(1)
- else:
- assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
- tm.assert_frame_equal(df_orig, df)
- df2.iloc[0, 1] = 100
- if using_copy_on_write:
- assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
- assert df2._mgr._has_no_reference(1)
- assert df._mgr._has_no_reference(1)
- tm.assert_frame_equal(df_orig, df)
- def test_fillna_inplace_ea_noop_shares_memory(
- using_copy_on_write, any_numeric_ea_and_arrow_dtype
- ):
- df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype)
- df_orig = df.copy()
- view = df[:]
- df.fillna(100, inplace=True)
- assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
- if using_copy_on_write:
- assert np.shares_memory(get_array(df, "b"), get_array(view, "b"))
- assert not df._mgr._has_no_reference(1)
- assert not view._mgr._has_no_reference(1)
- else:
- assert not np.shares_memory(get_array(df, "b"), get_array(view, "b"))
- df.iloc[0, 1] = 100
- tm.assert_frame_equal(df_orig, view)
|