setitem.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. import pandas._testing as tm
  5. from pandas.tests.extension.base.base import BaseExtensionTests
  6. class BaseSetitemTests(BaseExtensionTests):
  7. @pytest.fixture(
  8. params=[
  9. lambda x: x.index,
  10. lambda x: list(x.index),
  11. lambda x: slice(None),
  12. lambda x: slice(0, len(x)),
  13. lambda x: range(len(x)),
  14. lambda x: list(range(len(x))),
  15. lambda x: np.ones(len(x), dtype=bool),
  16. ],
  17. ids=[
  18. "index",
  19. "list[index]",
  20. "null_slice",
  21. "full_slice",
  22. "range",
  23. "list(range)",
  24. "mask",
  25. ],
  26. )
  27. def full_indexer(self, request):
  28. """
  29. Fixture for an indexer to pass to obj.loc to get/set the full length of the
  30. object.
  31. In some cases, assumes that obj.index is the default RangeIndex.
  32. """
  33. return request.param
  34. def test_setitem_scalar_series(self, data, box_in_series):
  35. if box_in_series:
  36. data = pd.Series(data)
  37. data[0] = data[1]
  38. assert data[0] == data[1]
  39. def test_setitem_sequence(self, data, box_in_series):
  40. if box_in_series:
  41. data = pd.Series(data)
  42. original = data.copy()
  43. data[[0, 1]] = [data[1], data[0]]
  44. assert data[0] == original[1]
  45. assert data[1] == original[0]
  46. def test_setitem_sequence_mismatched_length_raises(self, data, as_array):
  47. ser = pd.Series(data)
  48. original = ser.copy()
  49. value = [data[0]]
  50. if as_array:
  51. value = data._from_sequence(value)
  52. xpr = "cannot set using a {} indexer with a different length"
  53. with pytest.raises(ValueError, match=xpr.format("list-like")):
  54. ser[[0, 1]] = value
  55. # Ensure no modifications made before the exception
  56. self.assert_series_equal(ser, original)
  57. with pytest.raises(ValueError, match=xpr.format("slice")):
  58. ser[slice(3)] = value
  59. self.assert_series_equal(ser, original)
  60. def test_setitem_empty_indexer(self, data, box_in_series):
  61. if box_in_series:
  62. data = pd.Series(data)
  63. original = data.copy()
  64. data[np.array([], dtype=int)] = []
  65. self.assert_equal(data, original)
  66. def test_setitem_sequence_broadcasts(self, data, box_in_series):
  67. if box_in_series:
  68. data = pd.Series(data)
  69. data[[0, 1]] = data[2]
  70. assert data[0] == data[2]
  71. assert data[1] == data[2]
  72. @pytest.mark.parametrize("setter", ["loc", "iloc"])
  73. def test_setitem_scalar(self, data, setter):
  74. arr = pd.Series(data)
  75. setter = getattr(arr, setter)
  76. setter[0] = data[1]
  77. assert arr[0] == data[1]
  78. def test_setitem_loc_scalar_mixed(self, data):
  79. df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
  80. df.loc[0, "B"] = data[1]
  81. assert df.loc[0, "B"] == data[1]
  82. def test_setitem_loc_scalar_single(self, data):
  83. df = pd.DataFrame({"B": data})
  84. df.loc[10, "B"] = data[1]
  85. assert df.loc[10, "B"] == data[1]
  86. def test_setitem_loc_scalar_multiple_homogoneous(self, data):
  87. df = pd.DataFrame({"A": data, "B": data})
  88. df.loc[10, "B"] = data[1]
  89. assert df.loc[10, "B"] == data[1]
  90. def test_setitem_iloc_scalar_mixed(self, data):
  91. df = pd.DataFrame({"A": np.arange(len(data)), "B": data})
  92. df.iloc[0, 1] = data[1]
  93. assert df.loc[0, "B"] == data[1]
  94. def test_setitem_iloc_scalar_single(self, data):
  95. df = pd.DataFrame({"B": data})
  96. df.iloc[10, 0] = data[1]
  97. assert df.loc[10, "B"] == data[1]
  98. def test_setitem_iloc_scalar_multiple_homogoneous(self, data):
  99. df = pd.DataFrame({"A": data, "B": data})
  100. df.iloc[10, 1] = data[1]
  101. assert df.loc[10, "B"] == data[1]
  102. @pytest.mark.parametrize(
  103. "mask",
  104. [
  105. np.array([True, True, True, False, False]),
  106. pd.array([True, True, True, False, False], dtype="boolean"),
  107. pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"),
  108. ],
  109. ids=["numpy-array", "boolean-array", "boolean-array-na"],
  110. )
  111. def test_setitem_mask(self, data, mask, box_in_series):
  112. arr = data[:5].copy()
  113. expected = arr.take([0, 0, 0, 3, 4])
  114. if box_in_series:
  115. arr = pd.Series(arr)
  116. expected = pd.Series(expected)
  117. arr[mask] = data[0]
  118. self.assert_equal(expected, arr)
  119. def test_setitem_mask_raises(self, data, box_in_series):
  120. # wrong length
  121. mask = np.array([True, False])
  122. if box_in_series:
  123. data = pd.Series(data)
  124. with pytest.raises(IndexError, match="wrong length"):
  125. data[mask] = data[0]
  126. mask = pd.array(mask, dtype="boolean")
  127. with pytest.raises(IndexError, match="wrong length"):
  128. data[mask] = data[0]
  129. def test_setitem_mask_boolean_array_with_na(self, data, box_in_series):
  130. mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean")
  131. mask[:3] = True
  132. mask[3:5] = pd.NA
  133. if box_in_series:
  134. data = pd.Series(data)
  135. data[mask] = data[0]
  136. assert (data[:3] == data[0]).all()
  137. @pytest.mark.parametrize(
  138. "idx",
  139. [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
  140. ids=["list", "integer-array", "numpy-array"],
  141. )
  142. def test_setitem_integer_array(self, data, idx, box_in_series):
  143. arr = data[:5].copy()
  144. expected = data.take([0, 0, 0, 3, 4])
  145. if box_in_series:
  146. arr = pd.Series(arr)
  147. expected = pd.Series(expected)
  148. arr[idx] = arr[0]
  149. self.assert_equal(arr, expected)
  150. @pytest.mark.parametrize(
  151. "idx, box_in_series",
  152. [
  153. ([0, 1, 2, pd.NA], False),
  154. pytest.param(
  155. [0, 1, 2, pd.NA], True, marks=pytest.mark.xfail(reason="GH-31948")
  156. ),
  157. (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
  158. (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
  159. ],
  160. ids=["list-False", "list-True", "integer-array-False", "integer-array-True"],
  161. )
  162. def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
  163. arr = data.copy()
  164. # TODO(xfail) this raises KeyError about labels not found (it tries label-based)
  165. # for list of labels with Series
  166. if box_in_series:
  167. arr = pd.Series(data, index=[tm.rands(4) for _ in range(len(data))])
  168. msg = "Cannot index with an integer indexer containing NA values"
  169. with pytest.raises(ValueError, match=msg):
  170. arr[idx] = arr[0]
  171. @pytest.mark.parametrize("as_callable", [True, False])
  172. @pytest.mark.parametrize("setter", ["loc", None])
  173. def test_setitem_mask_aligned(self, data, as_callable, setter):
  174. ser = pd.Series(data)
  175. mask = np.zeros(len(data), dtype=bool)
  176. mask[:2] = True
  177. if as_callable:
  178. mask2 = lambda x: mask
  179. else:
  180. mask2 = mask
  181. if setter:
  182. # loc
  183. target = getattr(ser, setter)
  184. else:
  185. # Series.__setitem__
  186. target = ser
  187. target[mask2] = data[5:7]
  188. ser[mask2] = data[5:7]
  189. assert ser[0] == data[5]
  190. assert ser[1] == data[6]
  191. @pytest.mark.parametrize("setter", ["loc", None])
  192. def test_setitem_mask_broadcast(self, data, setter):
  193. ser = pd.Series(data)
  194. mask = np.zeros(len(data), dtype=bool)
  195. mask[:2] = True
  196. if setter: # loc
  197. target = getattr(ser, setter)
  198. else: # __setitem__
  199. target = ser
  200. target[mask] = data[10]
  201. assert ser[0] == data[10]
  202. assert ser[1] == data[10]
  203. def test_setitem_expand_columns(self, data):
  204. df = pd.DataFrame({"A": data})
  205. result = df.copy()
  206. result["B"] = 1
  207. expected = pd.DataFrame({"A": data, "B": [1] * len(data)})
  208. self.assert_frame_equal(result, expected)
  209. result = df.copy()
  210. result.loc[:, "B"] = 1
  211. self.assert_frame_equal(result, expected)
  212. # overwrite with new type
  213. result["B"] = data
  214. expected = pd.DataFrame({"A": data, "B": data})
  215. self.assert_frame_equal(result, expected)
  216. def test_setitem_expand_with_extension(self, data):
  217. df = pd.DataFrame({"A": [1] * len(data)})
  218. result = df.copy()
  219. result["B"] = data
  220. expected = pd.DataFrame({"A": [1] * len(data), "B": data})
  221. self.assert_frame_equal(result, expected)
  222. result = df.copy()
  223. result.loc[:, "B"] = data
  224. self.assert_frame_equal(result, expected)
  225. def test_setitem_frame_invalid_length(self, data):
  226. df = pd.DataFrame({"A": [1] * len(data)})
  227. xpr = (
  228. rf"Length of values \({len(data[:5])}\) "
  229. rf"does not match length of index \({len(df)}\)"
  230. )
  231. with pytest.raises(ValueError, match=xpr):
  232. df["B"] = data[:5]
  233. def test_setitem_tuple_index(self, data):
  234. ser = pd.Series(data[:2], index=[(0, 0), (0, 1)])
  235. expected = pd.Series(data.take([1, 1]), index=ser.index)
  236. ser[(0, 0)] = data[1]
  237. self.assert_series_equal(ser, expected)
  238. def test_setitem_slice(self, data, box_in_series):
  239. arr = data[:5].copy()
  240. expected = data.take([0, 0, 0, 3, 4])
  241. if box_in_series:
  242. arr = pd.Series(arr)
  243. expected = pd.Series(expected)
  244. arr[:3] = data[0]
  245. self.assert_equal(arr, expected)
  246. def test_setitem_loc_iloc_slice(self, data):
  247. arr = data[:5].copy()
  248. s = pd.Series(arr, index=["a", "b", "c", "d", "e"])
  249. expected = pd.Series(data.take([0, 0, 0, 3, 4]), index=s.index)
  250. result = s.copy()
  251. result.iloc[:3] = data[0]
  252. self.assert_equal(result, expected)
  253. result = s.copy()
  254. result.loc[:"c"] = data[0]
  255. self.assert_equal(result, expected)
  256. def test_setitem_slice_mismatch_length_raises(self, data):
  257. arr = data[:5]
  258. with pytest.raises(ValueError):
  259. arr[:1] = arr[:2]
  260. def test_setitem_slice_array(self, data):
  261. arr = data[:5].copy()
  262. arr[:5] = data[-5:]
  263. self.assert_extension_array_equal(arr, data[-5:])
  264. def test_setitem_scalar_key_sequence_raise(self, data):
  265. arr = data[:5].copy()
  266. with pytest.raises(ValueError):
  267. arr[0] = arr[[0, 1]]
  268. def test_setitem_preserves_views(self, data):
  269. # GH#28150 setitem shouldn't swap the underlying data
  270. view1 = data.view()
  271. view2 = data[:]
  272. data[0] = data[1]
  273. assert view1[0] == data[1]
  274. assert view2[0] == data[1]
  275. def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
  276. # https://github.com/pandas-dev/pandas/issues/32395
  277. df = expected = pd.DataFrame({"data": pd.Series(data)})
  278. result = pd.DataFrame(index=df.index)
  279. key = full_indexer(df)
  280. result.loc[key, "data"] = df["data"]
  281. self.assert_frame_equal(result, expected)
  282. def test_setitem_with_expansion_row(self, data, na_value):
  283. df = pd.DataFrame({"data": data[:1]})
  284. df.loc[1, "data"] = data[1]
  285. expected = pd.DataFrame({"data": data[:2]})
  286. self.assert_frame_equal(df, expected)
  287. # https://github.com/pandas-dev/pandas/issues/47284
  288. df.loc[2, "data"] = na_value
  289. expected = pd.DataFrame(
  290. {"data": pd.Series([data[0], data[1], na_value], dtype=data.dtype)}
  291. )
  292. self.assert_frame_equal(df, expected)
  293. def test_setitem_series(self, data, full_indexer):
  294. # https://github.com/pandas-dev/pandas/issues/32395
  295. ser = pd.Series(data, name="data")
  296. result = pd.Series(index=ser.index, dtype=object, name="data")
  297. # because result has object dtype, the attempt to do setting inplace
  298. # is successful, and object dtype is retained
  299. key = full_indexer(ser)
  300. result.loc[key] = ser
  301. expected = pd.Series(
  302. data.astype(object), index=ser.index, name="data", dtype=object
  303. )
  304. self.assert_series_equal(result, expected)
  305. def test_setitem_frame_2d_values(self, data):
  306. # GH#44514
  307. df = pd.DataFrame({"A": data})
  308. # Avoiding using_array_manager fixture
  309. # https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
  310. using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)
  311. using_copy_on_write = pd.options.mode.copy_on_write
  312. blk_data = df._mgr.arrays[0]
  313. orig = df.copy()
  314. df.iloc[:] = df
  315. self.assert_frame_equal(df, orig)
  316. df.iloc[:-1] = df.iloc[:-1]
  317. self.assert_frame_equal(df, orig)
  318. df.iloc[:] = df.values
  319. self.assert_frame_equal(df, orig)
  320. if not using_array_manager and not using_copy_on_write:
  321. # GH#33457 Check that this setting occurred in-place
  322. # FIXME(ArrayManager): this should work there too
  323. assert df._mgr.arrays[0] is blk_data
  324. df.iloc[:-1] = df.values[:-1]
  325. self.assert_frame_equal(df, orig)
  326. def test_delitem_series(self, data):
  327. # GH#40763
  328. ser = pd.Series(data, name="data")
  329. taker = np.arange(len(ser))
  330. taker = np.delete(taker, 1)
  331. expected = ser[taker]
  332. del ser[1]
  333. self.assert_series_equal(ser, expected)
  334. def test_setitem_invalid(self, data, invalid_scalar):
  335. msg = "" # messages vary by subclass, so we do not test it
  336. with pytest.raises((ValueError, TypeError), match=msg):
  337. data[0] = invalid_scalar
  338. with pytest.raises((ValueError, TypeError), match=msg):
  339. data[:] = invalid_scalar
  340. def test_setitem_2d_values(self, data):
  341. # GH50085
  342. original = data.copy()
  343. df = pd.DataFrame({"a": data, "b": data})
  344. df.loc[[0, 1], :] = df.loc[[1, 0], :].values
  345. assert (df.loc[0, :] == original[1]).all()
  346. assert (df.loc[1, :] == original[0]).all()