test_indexing.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. """ test get/set & misc """
  2. from datetime import timedelta
  3. import re
  4. import numpy as np
  5. import pytest
  6. from pandas.errors import IndexingError
  7. from pandas import (
  8. NA,
  9. DataFrame,
  10. Index,
  11. IndexSlice,
  12. MultiIndex,
  13. Series,
  14. Timedelta,
  15. Timestamp,
  16. concat,
  17. date_range,
  18. period_range,
  19. timedelta_range,
  20. )
  21. import pandas._testing as tm
  22. def test_basic_indexing():
  23. s = Series(np.random.randn(5), index=["a", "b", "a", "a", "b"])
  24. msg = "index 5 is out of bounds for axis 0 with size 5"
  25. with pytest.raises(IndexError, match=msg):
  26. s[5]
  27. with pytest.raises(IndexError, match=msg):
  28. s[5] = 0
  29. with pytest.raises(KeyError, match=r"^'c'$"):
  30. s["c"]
  31. s = s.sort_index()
  32. with pytest.raises(IndexError, match=msg):
  33. s[5]
  34. msg = r"index 5 is out of bounds for axis (0|1) with size 5|^5$"
  35. with pytest.raises(IndexError, match=msg):
  36. s[5] = 0
  37. def test_getitem_numeric_should_not_fallback_to_positional(any_numeric_dtype):
  38. # GH51053
  39. dtype = any_numeric_dtype
  40. idx = Index([1, 0, 1], dtype=dtype)
  41. ser = Series(range(3), index=idx)
  42. result = ser[1]
  43. expected = Series([0, 2], index=Index([1, 1], dtype=dtype))
  44. tm.assert_series_equal(result, expected, check_exact=True)
  45. def test_setitem_numeric_should_not_fallback_to_positional(any_numeric_dtype):
  46. # GH51053
  47. dtype = any_numeric_dtype
  48. idx = Index([1, 0, 1], dtype=dtype)
  49. ser = Series(range(3), index=idx)
  50. ser[1] = 10
  51. expected = Series([10, 1, 10], index=idx)
  52. tm.assert_series_equal(ser, expected, check_exact=True)
  53. def test_basic_getitem_with_labels(datetime_series):
  54. indices = datetime_series.index[[5, 10, 15]]
  55. result = datetime_series[indices]
  56. expected = datetime_series.reindex(indices)
  57. tm.assert_series_equal(result, expected)
  58. result = datetime_series[indices[0] : indices[2]]
  59. expected = datetime_series.loc[indices[0] : indices[2]]
  60. tm.assert_series_equal(result, expected)
  61. def test_basic_getitem_dt64tz_values():
  62. # GH12089
  63. # with tz for values
  64. ser = Series(
  65. date_range("2011-01-01", periods=3, tz="US/Eastern"), index=["a", "b", "c"]
  66. )
  67. expected = Timestamp("2011-01-01", tz="US/Eastern")
  68. result = ser.loc["a"]
  69. assert result == expected
  70. result = ser.iloc[0]
  71. assert result == expected
  72. result = ser["a"]
  73. assert result == expected
  74. def test_getitem_setitem_ellipsis():
  75. s = Series(np.random.randn(10))
  76. result = s[...]
  77. tm.assert_series_equal(result, s)
  78. s[...] = 5
  79. assert (result == 5).all()
  80. @pytest.mark.parametrize(
  81. "result_1, duplicate_item, expected_1",
  82. [
  83. [
  84. Series({1: 12, 2: [1, 2, 2, 3]}),
  85. Series({1: 313}),
  86. Series({1: 12}, dtype=object),
  87. ],
  88. [
  89. Series({1: [1, 2, 3], 2: [1, 2, 2, 3]}),
  90. Series({1: [1, 2, 3]}),
  91. Series({1: [1, 2, 3]}),
  92. ],
  93. ],
  94. )
  95. def test_getitem_with_duplicates_indices(result_1, duplicate_item, expected_1):
  96. # GH 17610
  97. result = result_1._append(duplicate_item)
  98. expected = expected_1._append(duplicate_item)
  99. tm.assert_series_equal(result[1], expected)
  100. assert result[2] == result_1[2]
  101. def test_getitem_setitem_integers():
  102. # caused bug without test
  103. s = Series([1, 2, 3], ["a", "b", "c"])
  104. assert s.iloc[0] == s["a"]
  105. s.iloc[0] = 5
  106. tm.assert_almost_equal(s["a"], 5)
  107. def test_series_box_timestamp():
  108. rng = date_range("20090415", "20090519", freq="B")
  109. ser = Series(rng)
  110. assert isinstance(ser[0], Timestamp)
  111. assert isinstance(ser.at[1], Timestamp)
  112. assert isinstance(ser.iat[2], Timestamp)
  113. assert isinstance(ser.loc[3], Timestamp)
  114. assert isinstance(ser.iloc[4], Timestamp)
  115. ser = Series(rng, index=rng)
  116. assert isinstance(ser[0], Timestamp)
  117. assert isinstance(ser.at[rng[1]], Timestamp)
  118. assert isinstance(ser.iat[2], Timestamp)
  119. assert isinstance(ser.loc[rng[3]], Timestamp)
  120. assert isinstance(ser.iloc[4], Timestamp)
  121. def test_series_box_timedelta():
  122. rng = timedelta_range("1 day 1 s", periods=5, freq="h")
  123. ser = Series(rng)
  124. assert isinstance(ser[0], Timedelta)
  125. assert isinstance(ser.at[1], Timedelta)
  126. assert isinstance(ser.iat[2], Timedelta)
  127. assert isinstance(ser.loc[3], Timedelta)
  128. assert isinstance(ser.iloc[4], Timedelta)
  129. def test_getitem_ambiguous_keyerror(indexer_sl):
  130. ser = Series(range(10), index=list(range(0, 20, 2)))
  131. with pytest.raises(KeyError, match=r"^1$"):
  132. indexer_sl(ser)[1]
  133. def test_getitem_dups_with_missing(indexer_sl):
  134. # breaks reindex, so need to use .loc internally
  135. # GH 4246
  136. ser = Series([1, 2, 3, 4], ["foo", "bar", "foo", "bah"])
  137. with pytest.raises(KeyError, match=re.escape("['bam'] not in index")):
  138. indexer_sl(ser)[["foo", "bar", "bah", "bam"]]
  139. def test_setitem_ambiguous_keyerror(indexer_sl):
  140. s = Series(range(10), index=list(range(0, 20, 2)))
  141. # equivalent of an append
  142. s2 = s.copy()
  143. indexer_sl(s2)[1] = 5
  144. expected = concat([s, Series([5], index=[1])])
  145. tm.assert_series_equal(s2, expected)
  146. def test_setitem(datetime_series):
  147. datetime_series[datetime_series.index[5]] = np.NaN
  148. datetime_series[[1, 2, 17]] = np.NaN
  149. datetime_series[6] = np.NaN
  150. assert np.isnan(datetime_series[6])
  151. assert np.isnan(datetime_series[2])
  152. datetime_series[np.isnan(datetime_series)] = 5
  153. assert not np.isnan(datetime_series[2])
  154. def test_setslice(datetime_series):
  155. sl = datetime_series[5:20]
  156. assert len(sl) == len(sl.index)
  157. assert sl.index.is_unique is True
  158. def test_basic_getitem_setitem_corner(datetime_series):
  159. # invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2]
  160. msg = "key of type tuple not found and not a MultiIndex"
  161. with pytest.raises(KeyError, match=msg):
  162. datetime_series[:, 2]
  163. with pytest.raises(KeyError, match=msg):
  164. datetime_series[:, 2] = 2
  165. # weird lists. [slice(0, 5)] raises but not two slices
  166. msg = "Indexing with a single-item list"
  167. with pytest.raises(ValueError, match=msg):
  168. # GH#31299
  169. datetime_series[[slice(None, 5)]]
  170. # but we're OK with a single-element tuple
  171. result = datetime_series[(slice(None, 5),)]
  172. expected = datetime_series[:5]
  173. tm.assert_series_equal(result, expected)
  174. # OK
  175. msg = r"unhashable type(: 'slice')?"
  176. with pytest.raises(TypeError, match=msg):
  177. datetime_series[[5, slice(None, None)]]
  178. with pytest.raises(TypeError, match=msg):
  179. datetime_series[[5, slice(None, None)]] = 2
  180. def test_slice(string_series, object_series, using_copy_on_write):
  181. original = string_series.copy()
  182. numSlice = string_series[10:20]
  183. numSliceEnd = string_series[-10:]
  184. objSlice = object_series[10:20]
  185. assert string_series.index[9] not in numSlice.index
  186. assert object_series.index[9] not in objSlice.index
  187. assert len(numSlice) == len(numSlice.index)
  188. assert string_series[numSlice.index[0]] == numSlice[numSlice.index[0]]
  189. assert numSlice.index[1] == string_series.index[11]
  190. assert tm.equalContents(numSliceEnd, np.array(string_series)[-10:])
  191. # Test return view.
  192. sl = string_series[10:20]
  193. sl[:] = 0
  194. if using_copy_on_write:
  195. # Doesn't modify parent (CoW)
  196. tm.assert_series_equal(string_series, original)
  197. else:
  198. assert (string_series[10:20] == 0).all()
  199. def test_timedelta_assignment():
  200. # GH 8209
  201. s = Series([], dtype=object)
  202. s.loc["B"] = timedelta(1)
  203. tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"]))
  204. s = s.reindex(s.index.insert(0, "A"))
  205. tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"]))
  206. s.loc["A"] = timedelta(1)
  207. expected = Series(Timedelta("1 days"), index=["A", "B"])
  208. tm.assert_series_equal(s, expected)
  209. def test_underlying_data_conversion(using_copy_on_write):
  210. # GH 4080
  211. df = DataFrame({c: [1, 2, 3] for c in ["a", "b", "c"]})
  212. return_value = df.set_index(["a", "b", "c"], inplace=True)
  213. assert return_value is None
  214. s = Series([1], index=[(2, 2, 2)])
  215. df["val"] = 0
  216. df_original = df.copy()
  217. df
  218. df["val"].update(s)
  219. if using_copy_on_write:
  220. expected = df_original
  221. else:
  222. expected = DataFrame(
  223. {"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3], "val": [0, 1, 0]}
  224. )
  225. return_value = expected.set_index(["a", "b", "c"], inplace=True)
  226. assert return_value is None
  227. tm.assert_frame_equal(df, expected)
  228. def test_preserve_refs(datetime_series):
  229. seq = datetime_series[[5, 10, 15]]
  230. seq[1] = np.NaN
  231. assert not np.isnan(datetime_series[10])
  232. def test_multilevel_preserve_name(lexsorted_two_level_string_multiindex, indexer_sl):
  233. index = lexsorted_two_level_string_multiindex
  234. ser = Series(np.random.randn(len(index)), index=index, name="sth")
  235. result = indexer_sl(ser)["foo"]
  236. assert result.name == ser.name
  237. # miscellaneous methods
  238. @pytest.mark.parametrize(
  239. "index",
  240. [
  241. date_range("2014-01-01", periods=20, freq="MS"),
  242. period_range("2014-01", periods=20, freq="M"),
  243. timedelta_range("0", periods=20, freq="H"),
  244. ],
  245. )
  246. def test_slice_with_negative_step(index):
  247. keystr1 = str(index[9])
  248. keystr2 = str(index[13])
  249. ser = Series(np.arange(20), index)
  250. SLC = IndexSlice
  251. for key in [keystr1, index[9]]:
  252. tm.assert_indexing_slices_equivalent(ser, SLC[key::-1], SLC[9::-1])
  253. tm.assert_indexing_slices_equivalent(ser, SLC[:key:-1], SLC[:8:-1])
  254. for key2 in [keystr2, index[13]]:
  255. tm.assert_indexing_slices_equivalent(ser, SLC[key2:key:-1], SLC[13:8:-1])
  256. tm.assert_indexing_slices_equivalent(ser, SLC[key:key2:-1], SLC[0:0:-1])
  257. def test_tuple_index():
  258. # GH 35534 - Selecting values when a Series has an Index of tuples
  259. s = Series([1, 2], index=[("a",), ("b",)])
  260. assert s[("a",)] == 1
  261. assert s[("b",)] == 2
  262. s[("b",)] = 3
  263. assert s[("b",)] == 3
  264. def test_frozenset_index():
  265. # GH35747 - Selecting values when a Series has an Index of frozenset
  266. idx0, idx1 = frozenset("a"), frozenset("b")
  267. s = Series([1, 2], index=[idx0, idx1])
  268. assert s[idx0] == 1
  269. assert s[idx1] == 2
  270. s[idx1] = 3
  271. assert s[idx1] == 3
  272. def test_loc_setitem_all_false_indexer():
  273. # GH#45778
  274. ser = Series([1, 2], index=["a", "b"])
  275. expected = ser.copy()
  276. rhs = Series([6, 7], index=["a", "b"])
  277. ser.loc[ser > 100] = rhs
  278. tm.assert_series_equal(ser, expected)
  279. def test_loc_boolean_indexer_non_matching_index():
  280. # GH#46551
  281. ser = Series([1])
  282. result = ser.loc[Series([NA, False], dtype="boolean")]
  283. expected = Series([], dtype="int64")
  284. tm.assert_series_equal(result, expected)
  285. def test_loc_boolean_indexer_miss_matching_index():
  286. # GH#46551
  287. ser = Series([1])
  288. indexer = Series([NA, False], dtype="boolean", index=[1, 2])
  289. with pytest.raises(IndexingError, match="Unalignable"):
  290. ser.loc[indexer]
  291. def test_loc_setitem_nested_data_enlargement():
  292. # GH#48614
  293. df = DataFrame({"a": [1]})
  294. ser = Series({"label": df})
  295. ser.loc["new_label"] = df
  296. expected = Series({"label": df, "new_label": df})
  297. tm.assert_series_equal(ser, expected)
  298. def test_loc_ea_numeric_index_oob_slice_end():
  299. # GH#50161
  300. ser = Series(1, index=Index([0, 1, 2], dtype="Int64"))
  301. result = ser.loc[2:3]
  302. expected = Series(1, index=Index([2], dtype="Int64"))
  303. tm.assert_series_equal(result, expected)
  304. def test_getitem_bool_int_key():
  305. # GH#48653
  306. ser = Series({True: 1, False: 0})
  307. with pytest.raises(KeyError, match="0"):
  308. ser.loc[0]
  309. @pytest.mark.parametrize("val", [{}, {"b": "x"}])
  310. @pytest.mark.parametrize("indexer", [[], [False, False], slice(0, -1), np.array([])])
  311. def test_setitem_empty_indexer(indexer, val):
  312. # GH#45981
  313. df = DataFrame({"a": [1, 2], **val})
  314. expected = df.copy()
  315. df.loc[indexer] = 1.5
  316. tm.assert_frame_equal(df, expected)
  317. class TestDeprecatedIndexers:
  318. @pytest.mark.parametrize("key", [{1}, {1: 1}])
  319. def test_getitem_dict_and_set_deprecated(self, key):
  320. # GH#42825 enforced in 2.0
  321. ser = Series([1, 2])
  322. with pytest.raises(TypeError, match="as an indexer is not supported"):
  323. ser.loc[key]
  324. @pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)])
  325. def test_getitem_dict_and_set_deprecated_multiindex(self, key):
  326. # GH#42825 enforced in 2.0
  327. ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)]))
  328. with pytest.raises(TypeError, match="as an indexer is not supported"):
  329. ser.loc[key]
  330. @pytest.mark.parametrize("key", [{1}, {1: 1}])
  331. def test_setitem_dict_and_set_disallowed(self, key):
  332. # GH#42825 enforced in 2.0
  333. ser = Series([1, 2])
  334. with pytest.raises(TypeError, match="as an indexer is not supported"):
  335. ser.loc[key] = 1
  336. @pytest.mark.parametrize("key", [{1}, {1: 1}, ({1}, 2), ({1: 1}, 2)])
  337. def test_setitem_dict_and_set_disallowed_multiindex(self, key):
  338. # GH#42825 enforced in 2.0
  339. ser = Series([1, 2], index=MultiIndex.from_tuples([(1, 2), (3, 4)]))
  340. with pytest.raises(TypeError, match="as an indexer is not supported"):
  341. ser.loc[key] = 1