123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312 |
- import numpy as np
- import pytest
- from pandas import (
- DataFrame,
- Series,
- concat,
- merge,
- )
- import pandas._testing as tm
- from pandas.tests.copy_view.util import get_array
- def test_concat_frames(using_copy_on_write):
- df = DataFrame({"b": ["a"] * 3})
- df2 = DataFrame({"a": ["a"] * 3})
- df_orig = df.copy()
- result = concat([df, df2], axis=1)
- if using_copy_on_write:
- assert np.shares_memory(get_array(result, "b"), get_array(df, "b"))
- assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
- else:
- assert not np.shares_memory(get_array(result, "b"), get_array(df, "b"))
- assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
- result.iloc[0, 0] = "d"
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "b"), get_array(df, "b"))
- assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
- result.iloc[0, 1] = "d"
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
- tm.assert_frame_equal(df, df_orig)
- def test_concat_frames_updating_input(using_copy_on_write):
- df = DataFrame({"b": ["a"] * 3})
- df2 = DataFrame({"a": ["a"] * 3})
- result = concat([df, df2], axis=1)
- if using_copy_on_write:
- assert np.shares_memory(get_array(result, "b"), get_array(df, "b"))
- assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
- else:
- assert not np.shares_memory(get_array(result, "b"), get_array(df, "b"))
- assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
- expected = result.copy()
- df.iloc[0, 0] = "d"
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "b"), get_array(df, "b"))
- assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
- df2.iloc[0, 0] = "d"
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
- tm.assert_frame_equal(result, expected)
- def test_concat_series(using_copy_on_write):
- ser = Series([1, 2], name="a")
- ser2 = Series([3, 4], name="b")
- ser_orig = ser.copy()
- ser2_orig = ser2.copy()
- result = concat([ser, ser2], axis=1)
- if using_copy_on_write:
- assert np.shares_memory(get_array(result, "a"), ser.values)
- assert np.shares_memory(get_array(result, "b"), ser2.values)
- else:
- assert not np.shares_memory(get_array(result, "a"), ser.values)
- assert not np.shares_memory(get_array(result, "b"), ser2.values)
- result.iloc[0, 0] = 100
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "a"), ser.values)
- assert np.shares_memory(get_array(result, "b"), ser2.values)
- result.iloc[0, 1] = 1000
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "b"), ser2.values)
- tm.assert_series_equal(ser, ser_orig)
- tm.assert_series_equal(ser2, ser2_orig)
- def test_concat_frames_chained(using_copy_on_write):
- df1 = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
- df2 = DataFrame({"c": [4, 5, 6]})
- df3 = DataFrame({"d": [4, 5, 6]})
- result = concat([concat([df1, df2], axis=1), df3], axis=1)
- expected = result.copy()
- if using_copy_on_write:
- assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
- assert np.shares_memory(get_array(result, "c"), get_array(df2, "c"))
- assert np.shares_memory(get_array(result, "d"), get_array(df3, "d"))
- else:
- assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
- assert not np.shares_memory(get_array(result, "c"), get_array(df2, "c"))
- assert not np.shares_memory(get_array(result, "d"), get_array(df3, "d"))
- df1.iloc[0, 0] = 100
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
- tm.assert_frame_equal(result, expected)
- def test_concat_series_chained(using_copy_on_write):
- ser1 = Series([1, 2, 3], name="a")
- ser2 = Series([4, 5, 6], name="c")
- ser3 = Series([4, 5, 6], name="d")
- result = concat([concat([ser1, ser2], axis=1), ser3], axis=1)
- expected = result.copy()
- if using_copy_on_write:
- assert np.shares_memory(get_array(result, "a"), get_array(ser1, "a"))
- assert np.shares_memory(get_array(result, "c"), get_array(ser2, "c"))
- assert np.shares_memory(get_array(result, "d"), get_array(ser3, "d"))
- else:
- assert not np.shares_memory(get_array(result, "a"), get_array(ser1, "a"))
- assert not np.shares_memory(get_array(result, "c"), get_array(ser2, "c"))
- assert not np.shares_memory(get_array(result, "d"), get_array(ser3, "d"))
- ser1.iloc[0] = 100
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "a"), get_array(ser1, "a"))
- tm.assert_frame_equal(result, expected)
- def test_concat_series_updating_input(using_copy_on_write):
- ser = Series([1, 2], name="a")
- ser2 = Series([3, 4], name="b")
- expected = DataFrame({"a": [1, 2], "b": [3, 4]})
- result = concat([ser, ser2], axis=1)
- if using_copy_on_write:
- assert np.shares_memory(get_array(result, "a"), get_array(ser, "a"))
- assert np.shares_memory(get_array(result, "b"), get_array(ser2, "b"))
- else:
- assert not np.shares_memory(get_array(result, "a"), get_array(ser, "a"))
- assert not np.shares_memory(get_array(result, "b"), get_array(ser2, "b"))
- ser.iloc[0] = 100
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "a"), get_array(ser, "a"))
- assert np.shares_memory(get_array(result, "b"), get_array(ser2, "b"))
- tm.assert_frame_equal(result, expected)
- ser2.iloc[0] = 1000
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "b"), get_array(ser2, "b"))
- tm.assert_frame_equal(result, expected)
- def test_concat_mixed_series_frame(using_copy_on_write):
- df = DataFrame({"a": [1, 2, 3], "c": 1})
- ser = Series([4, 5, 6], name="d")
- result = concat([df, ser], axis=1)
- expected = result.copy()
- if using_copy_on_write:
- assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
- assert np.shares_memory(get_array(result, "c"), get_array(df, "c"))
- assert np.shares_memory(get_array(result, "d"), get_array(ser, "d"))
- else:
- assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
- assert not np.shares_memory(get_array(result, "c"), get_array(df, "c"))
- assert not np.shares_memory(get_array(result, "d"), get_array(ser, "d"))
- ser.iloc[0] = 100
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "d"), get_array(ser, "d"))
- df.iloc[0, 0] = 100
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize("copy", [True, None, False])
- def test_concat_copy_keyword(using_copy_on_write, copy):
- df = DataFrame({"a": [1, 2]})
- df2 = DataFrame({"b": [1.5, 2.5]})
- result = concat([df, df2], axis=1, copy=copy)
- if using_copy_on_write or copy is False:
- assert np.shares_memory(get_array(df, "a"), get_array(result, "a"))
- assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
- else:
- assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
- assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
- @pytest.mark.parametrize(
- "func",
- [
- lambda df1, df2, **kwargs: df1.merge(df2, **kwargs),
- lambda df1, df2, **kwargs: merge(df1, df2, **kwargs),
- ],
- )
- def test_merge_on_key(using_copy_on_write, func):
- df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]})
- df2 = DataFrame({"key": ["a", "b", "c"], "b": [4, 5, 6]})
- df1_orig = df1.copy()
- df2_orig = df2.copy()
- result = func(df1, df2, on="key")
- if using_copy_on_write:
- assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
- assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
- assert np.shares_memory(get_array(result, "key"), get_array(df1, "key"))
- assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key"))
- else:
- assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
- assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
- result.iloc[0, 1] = 0
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
- assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
- result.iloc[0, 2] = 0
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
- tm.assert_frame_equal(df1, df1_orig)
- tm.assert_frame_equal(df2, df2_orig)
- def test_merge_on_index(using_copy_on_write):
- df1 = DataFrame({"a": [1, 2, 3]})
- df2 = DataFrame({"b": [4, 5, 6]})
- df1_orig = df1.copy()
- df2_orig = df2.copy()
- result = merge(df1, df2, left_index=True, right_index=True)
- if using_copy_on_write:
- assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
- assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
- else:
- assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
- assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
- result.iloc[0, 0] = 0
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
- assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
- result.iloc[0, 1] = 0
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
- tm.assert_frame_equal(df1, df1_orig)
- tm.assert_frame_equal(df2, df2_orig)
- @pytest.mark.parametrize(
- "func, how",
- [
- (lambda df1, df2, **kwargs: merge(df2, df1, on="key", **kwargs), "right"),
- (lambda df1, df2, **kwargs: merge(df1, df2, on="key", **kwargs), "left"),
- ],
- )
- def test_merge_on_key_enlarging_one(using_copy_on_write, func, how):
- df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]})
- df2 = DataFrame({"key": ["a", "b"], "b": [4, 5]})
- df1_orig = df1.copy()
- df2_orig = df2.copy()
- result = func(df1, df2, how=how)
- if using_copy_on_write:
- assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
- assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
- assert df2._mgr._has_no_reference(1)
- assert df2._mgr._has_no_reference(0)
- assert np.shares_memory(get_array(result, "key"), get_array(df1, "key")) is (
- how == "left"
- )
- assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key"))
- else:
- assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
- assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
- if how == "left":
- result.iloc[0, 1] = 0
- else:
- result.iloc[0, 2] = 0
- if using_copy_on_write:
- assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
- tm.assert_frame_equal(df1, df1_orig)
- tm.assert_frame_equal(df2, df2_orig)
- @pytest.mark.parametrize("copy", [True, None, False])
- def test_merge_copy_keyword(using_copy_on_write, copy):
- df = DataFrame({"a": [1, 2]})
- df2 = DataFrame({"b": [3, 4.5]})
- result = df.merge(df2, copy=copy, left_index=True, right_index=True)
- if using_copy_on_write or copy is False:
- assert np.shares_memory(get_array(df, "a"), get_array(result, "a"))
- assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
- else:
- assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
- assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
|