123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428 |
- import numpy as np
- import pytest
- import pandas as pd
- import pandas._testing as tm
- from pandas.tests.extension.base.base import BaseExtensionTests
- class BaseSetitemTests(BaseExtensionTests):
- @pytest.fixture(
- params=[
- lambda x: x.index,
- lambda x: list(x.index),
- lambda x: slice(None),
- lambda x: slice(0, len(x)),
- lambda x: range(len(x)),
- lambda x: list(range(len(x))),
- lambda x: np.ones(len(x), dtype=bool),
- ],
- ids=[
- "index",
- "list[index]",
- "null_slice",
- "full_slice",
- "range",
- "list(range)",
- "mask",
- ],
- )
- def full_indexer(self, request):
- """
- Fixture for an indexer to pass to obj.loc to get/set the full length of the
- object.
- In some cases, assumes that obj.index is the default RangeIndex.
- """
- return request.param
- def test_setitem_scalar_series(self, data, box_in_series):
- if box_in_series:
- data = pd.Series(data)
- data[0] = data[1]
- assert data[0] == data[1]
- def test_setitem_sequence(self, data, box_in_series):
- if box_in_series:
- data = pd.Series(data)
- original = data.copy()
- data[[0, 1]] = [data[1], data[0]]
- assert data[0] == original[1]
- assert data[1] == original[0]
- def test_setitem_sequence_mismatched_length_raises(self, data, as_array):
- ser = pd.Series(data)
- original = ser.copy()
- value = [data[0]]
- if as_array:
- value = data._from_sequence(value)
- xpr = "cannot set using a {} indexer with a different length"
- with pytest.raises(ValueError, match=xpr.format("list-like")):
- ser[[0, 1]] = value
- # Ensure no modifications made before the exception
- self.assert_series_equal(ser, original)
- with pytest.raises(ValueError, match=xpr.format("slice")):
- ser[slice(3)] = value
- self.assert_series_equal(ser, original)
- def test_setitem_empty_indexer(self, data, box_in_series):
- if box_in_series:
- data = pd.Series(data)
- original = data.copy()
- data[np.array([], dtype=int)] = []
- self.assert_equal(data, original)
- def test_setitem_sequence_broadcasts(self, data, box_in_series):
- if box_in_series:
- data = pd.Series(data)
- data[[0, 1]] = data[2]
- assert data[0] == data[2]
- assert data[1] == data[2]
- @pytest.mark.parametrize("setter", ["loc", "iloc"])
- def test_setitem_scalar(self, data, setter):
- arr = pd.Series(data)
- setter = getattr(arr, setter)
- setter[0] = data[1]
- assert arr[0] == data[1]
- def test_setitem_loc_scalar_mixed(self, data):
- df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
- df.loc[0, "B"] = data[1]
- assert df.loc[0, "B"] == data[1]
- def test_setitem_loc_scalar_single(self, data):
- df = pd.DataFrame({"B": data})
- df.loc[10, "B"] = data[1]
- assert df.loc[10, "B"] == data[1]
- def test_setitem_loc_scalar_multiple_homogoneous(self, data):
- df = pd.DataFrame({"A": data, "B": data})
- df.loc[10, "B"] = data[1]
- assert df.loc[10, "B"] == data[1]
- def test_setitem_iloc_scalar_mixed(self, data):
- df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
- df.iloc[0, 1] = data[1]
- assert df.loc[0, "B"] == data[1]
- def test_setitem_iloc_scalar_single(self, data):
- df = pd.DataFrame({"B": data})
- df.iloc[10, 0] = data[1]
- assert df.loc[10, "B"] == data[1]
- def test_setitem_iloc_scalar_multiple_homogoneous(self, data):
- df = pd.DataFrame({"A": data, "B": data})
- df.iloc[10, 1] = data[1]
- assert df.loc[10, "B"] == data[1]
- @pytest.mark.parametrize(
- "mask",
- [
- np.array([True, True, True, False, False]),
- pd.array([True, True, True, False, False], dtype="boolean"),
- pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"),
- ],
- ids=["numpy-array", "boolean-array", "boolean-array-na"],
- )
- def test_setitem_mask(self, data, mask, box_in_series):
- arr = data[:5].copy()
- expected = arr.take([0, 0, 0, 3, 4])
- if box_in_series:
- arr = pd.Series(arr)
- expected = pd.Series(expected)
- arr[mask] = data[0]
- self.assert_equal(expected, arr)
- def test_setitem_mask_raises(self, data, box_in_series):
- # wrong length
- mask = np.array([True, False])
- if box_in_series:
- data = pd.Series(data)
- with pytest.raises(IndexError, match="wrong length"):
- data[mask] = data[0]
- mask = pd.array(mask, dtype="boolean")
- with pytest.raises(IndexError, match="wrong length"):
- data[mask] = data[0]
- def test_setitem_mask_boolean_array_with_na(self, data, box_in_series):
- mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean")
- mask[:3] = True
- mask[3:5] = pd.NA
- if box_in_series:
- data = pd.Series(data)
- data[mask] = data[0]
- assert (data[:3] == data[0]).all()
- @pytest.mark.parametrize(
- "idx",
- [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
- ids=["list", "integer-array", "numpy-array"],
- )
- def test_setitem_integer_array(self, data, idx, box_in_series):
- arr = data[:5].copy()
- expected = data.take([0, 0, 0, 3, 4])
- if box_in_series:
- arr = pd.Series(arr)
- expected = pd.Series(expected)
- arr[idx] = arr[0]
- self.assert_equal(arr, expected)
- @pytest.mark.parametrize(
- "idx, box_in_series",
- [
- ([0, 1, 2, pd.NA], False),
- pytest.param(
- [0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948")
- ),
- (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
- (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
- ],
- ids=["list-False", "list-True", "integer-array-False", "integer-array-True"],
- )
- def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
- arr = data.copy()
- # TODO(xfail) this raises KeyError about labels not found (it tries label-based)
- # for list of labels with Series
- if box_in_series:
- arr = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))])
- msg = "Cannot index with an integer indexer containing NA values"
- with pytest.raises(ValueError, match=msg):
- arr[idx] = arr[0]
- @pytest.mark.parametrize("as_callable", [True, False])
- @pytest.mark.parametrize("setter", ["loc", None])
- def test_setitem_mask_aligned(self, data, as_callable, setter):
- ser = pd.Series(data)
- mask = np.zeros(len(data), dtype=bool)
- mask[:2] = True
- if as_callable:
- mask2 = lambda x: mask
- else:
- mask2 = mask
- if setter:
- # loc
- target = getattr(ser, setter)
- else:
- # Series.__setitem__
- target = ser
- target[mask2] = data[5:7]
- ser[mask2] = data[5:7]
- assert ser[0] == data[5]
- assert ser[1] == data[6]
- @pytest.mark.parametrize("setter", ["loc", None])
- def test_setitem_mask_broadcast(self, data, setter):
- ser = pd.Series(data)
- mask = np.zeros(len(data), dtype=bool)
- mask[:2] = True
- if setter: # loc
- target = getattr(ser, setter)
- else: # __setitem__
- target = ser
- target[mask] = data[10]
- assert ser[0] == data[10]
- assert ser[1] == data[10]
- def test_setitem_expand_columns(self, data):
- df = pd.DataFrame({"A": data})
- result = df.copy()
- result["B"] = 1
- expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
- self.assert_frame_equal(result, expected)
- result = df.copy()
- result.loc[:, "B"] = 1
- self.assert_frame_equal(result, expected)
- # overwrite with new type
- result["B"] = data
- expected = pd.DataFrame({"A": data, "B": data})
- self.assert_frame_equal(result, expected)
- def test_setitem_expand_with_extension(self, data):
- df = pd.DataFrame({"A": [1] * len(data)})
- result = df.copy()
- result["B"] = data
- expected = pd.DataFrame({"A": [1] * len(data), "B": data})
- self.assert_frame_equal(result, expected)
- result = df.copy()
- result.loc[:, "B"] = data
- self.assert_frame_equal(result, expected)
- def test_setitem_frame_invalid_length(self, data):
- df = pd.DataFrame({"A": [1] * len(data)})
- xpr = (
- rf"Length of values \({len(data[:5])}\) "
- rf"does not match length of index \({len(df)}\)"
- )
- with pytest.raises(ValueError, match=xpr):
- df["B"] = data[:5]
- def test_setitem_tuple_index(self, data):
- ser = pd.Series(data[:2], index=[(0, 0), (0, 1)])
- expected = pd.Series(data.take([1, 1]), index=ser.index)
- ser[(0, 0)] = data[1]
- self.assert_series_equal(ser, expected)
- def test_setitem_slice(self, data, box_in_series):
- arr = data[:5].copy()
- expected = data.take([0, 0, 0, 3, 4])
- if box_in_series:
- arr = pd.Series(arr)
- expected = pd.Series(expected)
- arr[:3] = data[0]
- self.assert_equal(arr, expected)
- def test_setitem_loc_iloc_slice(self, data):
- arr = data[:5].copy()
- s = pd.Series(arr, index=["a", "b", "c", "d", "e"])
- expected = pd.Series(data.take([0, 0, 0, 3, 4]), index=s.index)
- result = s.copy()
- result.iloc[:3] = data[0]
- self.assert_equal(result, expected)
- result = s.copy()
- result.loc[:"c"] = data[0]
- self.assert_equal(result, expected)
- def test_setitem_slice_mismatch_length_raises(self, data):
- arr = data[:5]
- with pytest.raises(ValueError):
- arr[:1] = arr[:2]
- def test_setitem_slice_array(self, data):
- arr = data[:5].copy()
- arr[:5] = data[-5:]
- self.assert_extension_array_equal(arr, data[-5:])
- def test_setitem_scalar_key_sequence_raise(self, data):
- arr = data[:5].copy()
- with pytest.raises(ValueError):
- arr[0] = arr[[0, 1]]
- def test_setitem_preserves_views(self, data):
- # GH#28150 setitem shouldn't swap the underlying data
- view1 = data.view()
- view2 = data[:]
- data[0] = data[1]
- assert view1[0] == data[1]
- assert view2[0] == data[1]
- def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
- # https://github.com/pandas-dev/pandas/issues/32395
- df = expected = pd.DataFrame({"data": pd.Series(data)})
- result = pd.DataFrame(index=df.index)
- key = full_indexer(df)
- result.loc[key, "data"] = df["data"]
- self.assert_frame_equal(result, expected)
- def test_setitem_with_expansion_row(self, data, na_value):
- df = pd.DataFrame({"data": data[:1]})
- df.loc[1, "data"] = data[1]
- expected = pd.DataFrame({"data": data[:2]})
- self.assert_frame_equal(df, expected)
- # https://github.com/pandas-dev/pandas/issues/47284
- df.loc[2, "data"] = na_value
- expected = pd.DataFrame(
- {"data": pd.Series([data[0], data[1], na_value], dtype=data.dtype)}
- )
- self.assert_frame_equal(df, expected)
- def test_setitem_series(self, data, full_indexer):
- # https://github.com/pandas-dev/pandas/issues/32395
- ser = pd.Series(data, name="data")
- result = pd.Series(index=ser.index, dtype=object, name="data")
- # because result has object dtype, the attempt to do setting inplace
- # is successful, and object dtype is retained
- key = full_indexer(ser)
- result.loc[key] = ser
- expected = pd.Series(
- data.astype(object), index=ser.index, name="data", dtype=object
- )
- self.assert_series_equal(result, expected)
- def test_setitem_frame_2d_values(self, data):
- # GH#44514
- df = pd.DataFrame({"A": data})
- # Avoiding using_array_manager fixture
- # https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
- using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)
- using_copy_on_write = pd.options.mode.copy_on_write
- blk_data = df._mgr.arrays[0]
- orig = df.copy()
- df.iloc[:] = df
- self.assert_frame_equal(df, orig)
- df.iloc[:-1] = df.iloc[:-1]
- self.assert_frame_equal(df, orig)
- df.iloc[:] = df.values
- self.assert_frame_equal(df, orig)
- if not using_array_manager and not using_copy_on_write:
- # GH#33457 Check that this setting occurred in-place
- # FIXME(ArrayManager): this should work there too
- assert df._mgr.arrays[0] is blk_data
- df.iloc[:-1] = df.values[:-1]
- self.assert_frame_equal(df, orig)
- def test_delitem_series(self, data):
- # GH#40763
- ser = pd.Series(data, name="data")
- taker = np.arange(len(ser))
- taker = np.delete(taker, 1)
- expected = ser[taker]
- del ser[1]
- self.assert_series_equal(ser, expected)
- def test_setitem_invalid(self, data, invalid_scalar):
- msg = "" # messages vary by subclass, so we do not test it
- with pytest.raises((ValueError, TypeError), match=msg):
- data[0] = invalid_scalar
- with pytest.raises((ValueError, TypeError), match=msg):
- data[:] = invalid_scalar
- def test_setitem_2d_values(self, data):
- # GH50085
- original = data.copy()
- df = pd.DataFrame({"a": data, "b": data})
- df.loc[[0, 1], :] = df.loc[[1, 0], :].values
- assert (df.loc[0, :] == original[1]).all()
- assert (df.loc[1, :] == original[0]).all()
|