123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121 |
- import numpy as np
- import pytest
- import pandas.util._test_decorators as td
- import pandas as pd
- from pandas import DataFrame
- import pandas._testing as tm
- from pandas.tests.copy_view.util import get_array
- @td.skip_array_manager_invalid_test
- def test_consolidate(using_copy_on_write):
- # create unconsolidated DataFrame
- df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
- df["c"] = [4, 5, 6]
- # take a viewing subset
- subset = df[:]
- # each block of subset references a block of df
- assert all(blk.refs.has_reference() for blk in subset._mgr.blocks)
- # consolidate the two int64 blocks
- subset._consolidate_inplace()
- # the float64 block still references the parent one because it still a view
- assert subset._mgr.blocks[0].refs.has_reference()
- # equivalent of assert np.shares_memory(df["b"].values, subset["b"].values)
- # but avoids caching df["b"]
- assert np.shares_memory(get_array(df, "b"), get_array(subset, "b"))
- # the new consolidated int64 block does not reference another
- assert not subset._mgr.blocks[1].refs.has_reference()
- # the parent dataframe now also only is linked for the float column
- assert not df._mgr.blocks[0].refs.has_reference()
- assert df._mgr.blocks[1].refs.has_reference()
- assert not df._mgr.blocks[2].refs.has_reference()
- # and modifying subset still doesn't modify parent
- if using_copy_on_write:
- subset.iloc[0, 1] = 0.0
- assert not df._mgr.blocks[1].refs.has_reference()
- assert df.loc[0, "b"] == 0.1
- @pytest.mark.single_cpu
- @td.skip_array_manager_invalid_test
- def test_switch_options():
- # ensure we can switch the value of the option within one session
- # (assuming data is constructed after switching)
- # using the option_context to ensure we set back to global option value
- # after running the test
- with pd.option_context("mode.copy_on_write", False):
- df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
- subset = df[:]
- subset.iloc[0, 0] = 0
- # df updated with CoW disabled
- assert df.iloc[0, 0] == 0
- pd.options.mode.copy_on_write = True
- df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
- subset = df[:]
- subset.iloc[0, 0] = 0
- # df not updated with CoW enabled
- assert df.iloc[0, 0] == 1
- pd.options.mode.copy_on_write = False
- df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
- subset = df[:]
- subset.iloc[0, 0] = 0
- # df updated with CoW disabled
- assert df.iloc[0, 0] == 0
- @td.skip_array_manager_invalid_test
- @pytest.mark.parametrize("dtype", [np.intp, np.int8])
- @pytest.mark.parametrize(
- "locs, arr",
- [
- ([0], np.array([-1, -2, -3])),
- ([1], np.array([-1, -2, -3])),
- ([5], np.array([-1, -2, -3])),
- ([0, 1], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
- ([0, 2], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
- ([0, 1, 2], np.array([[-1, -2, -3], [-4, -5, -6], [-4, -5, -6]]).T),
- ([1, 2], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
- ([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
- ([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
- ],
- )
- def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr, dtype):
- # Nothing currently calls iset with
- # more than 1 loc with inplace=True (only happens with inplace=False)
- # but ensure that it works
- df = DataFrame(
- {
- "a": [1, 2, 3],
- "b": [4, 5, 6],
- "c": [7, 8, 9],
- "d": [10, 11, 12],
- "e": [13, 14, 15],
- "f": ["a", "b", "c"],
- },
- )
- arr = arr.astype(dtype)
- df_orig = df.copy()
- df2 = df.copy(deep=None) # Trigger a CoW (if enabled, otherwise makes copy)
- df2._mgr.iset(locs, arr, inplace=True)
- tm.assert_frame_equal(df, df_orig)
- if using_copy_on_write:
- for i, col in enumerate(df.columns):
- if i not in locs:
- assert np.shares_memory(get_array(df, col), get_array(df2, col))
- else:
- for col in df.columns:
- assert not np.shares_memory(get_array(df, col), get_array(df2, col))
|