test_slice.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795
  1. from datetime import (
  2. datetime,
  3. timedelta,
  4. )
  5. import numpy as np
  6. import pytest
  7. from pandas.errors import UnsortedIndexError
  8. import pandas as pd
  9. from pandas import (
  10. DataFrame,
  11. Index,
  12. MultiIndex,
  13. Series,
  14. Timestamp,
  15. )
  16. import pandas._testing as tm
  17. from pandas.tests.indexing.common import _mklbl
  18. class TestMultiIndexSlicers:
  19. def test_per_axis_per_level_getitem(self):
  20. # GH6134
  21. # example test case
  22. ix = MultiIndex.from_product(
  23. [_mklbl("A", 5), _mklbl("B", 7), _mklbl("C", 4), _mklbl("D", 2)]
  24. )
  25. df = DataFrame(np.arange(len(ix.to_numpy())), index=ix)
  26. result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
  27. expected = df.loc[
  28. [
  29. (
  30. a,
  31. b,
  32. c,
  33. d,
  34. )
  35. for a, b, c, d in df.index.values
  36. if a in ("A1", "A2", "A3") and c in ("C1", "C3")
  37. ]
  38. ]
  39. tm.assert_frame_equal(result, expected)
  40. expected = df.loc[
  41. [
  42. (
  43. a,
  44. b,
  45. c,
  46. d,
  47. )
  48. for a, b, c, d in df.index.values
  49. if a in ("A1", "A2", "A3") and c in ("C1", "C2", "C3")
  50. ]
  51. ]
  52. result = df.loc[(slice("A1", "A3"), slice(None), slice("C1", "C3")), :]
  53. tm.assert_frame_equal(result, expected)
  54. # test multi-index slicing with per axis and per index controls
  55. index = MultiIndex.from_tuples(
  56. [("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"]
  57. )
  58. columns = MultiIndex.from_tuples(
  59. [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
  60. names=["lvl0", "lvl1"],
  61. )
  62. df = DataFrame(
  63. np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns
  64. )
  65. df = df.sort_index(axis=0).sort_index(axis=1)
  66. # identity
  67. result = df.loc[(slice(None), slice(None)), :]
  68. tm.assert_frame_equal(result, df)
  69. result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))]
  70. tm.assert_frame_equal(result, df)
  71. result = df.loc[:, (slice(None), slice(None))]
  72. tm.assert_frame_equal(result, df)
  73. # index
  74. result = df.loc[(slice(None), [1]), :]
  75. expected = df.iloc[[0, 3]]
  76. tm.assert_frame_equal(result, expected)
  77. result = df.loc[(slice(None), 1), :]
  78. expected = df.iloc[[0, 3]]
  79. tm.assert_frame_equal(result, expected)
  80. # columns
  81. result = df.loc[:, (slice(None), ["foo"])]
  82. expected = df.iloc[:, [1, 3]]
  83. tm.assert_frame_equal(result, expected)
  84. # both
  85. result = df.loc[(slice(None), 1), (slice(None), ["foo"])]
  86. expected = df.iloc[[0, 3], [1, 3]]
  87. tm.assert_frame_equal(result, expected)
  88. result = df.loc["A", "a"]
  89. expected = DataFrame(
  90. {"bar": [1, 5, 9], "foo": [0, 4, 8]},
  91. index=Index([1, 2, 3], name="two"),
  92. columns=Index(["bar", "foo"], name="lvl1"),
  93. )
  94. tm.assert_frame_equal(result, expected)
  95. result = df.loc[(slice(None), [1, 2]), :]
  96. expected = df.iloc[[0, 1, 3]]
  97. tm.assert_frame_equal(result, expected)
  98. # multi-level series
  99. s = Series(np.arange(len(ix.to_numpy())), index=ix)
  100. result = s.loc["A1":"A3", :, ["C1", "C3"]]
  101. expected = s.loc[
  102. [
  103. (
  104. a,
  105. b,
  106. c,
  107. d,
  108. )
  109. for a, b, c, d in s.index.values
  110. if a in ("A1", "A2", "A3") and c in ("C1", "C3")
  111. ]
  112. ]
  113. tm.assert_series_equal(result, expected)
  114. # boolean indexers
  115. result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
  116. expected = df.iloc[[2, 3]]
  117. tm.assert_frame_equal(result, expected)
  118. msg = (
  119. "cannot index with a boolean indexer "
  120. "that is not the same length as the index"
  121. )
  122. with pytest.raises(ValueError, match=msg):
  123. df.loc[(slice(None), np.array([True, False])), :]
  124. with pytest.raises(KeyError, match=r"\[1\] not in index"):
  125. # slice(None) is on the index, [1] is on the columns, but 1 is
  126. # not in the columns, so we raise
  127. # This used to treat [1] as positional GH#16396
  128. df.loc[slice(None), [1]]
  129. # not lexsorted
  130. assert df.index._lexsort_depth == 2
  131. df = df.sort_index(level=1, axis=0)
  132. assert df.index._lexsort_depth == 0
  133. msg = (
  134. "MultiIndex slicing requires the index to be "
  135. r"lexsorted: slicing on levels \[1\], lexsort depth 0"
  136. )
  137. with pytest.raises(UnsortedIndexError, match=msg):
  138. df.loc[(slice(None), slice("bar")), :]
  139. # GH 16734: not sorted, but no real slicing
  140. result = df.loc[(slice(None), df.loc[:, ("a", "bar")] > 5), :]
  141. tm.assert_frame_equal(result, df.iloc[[1, 3], :])
  142. def test_multiindex_slicers_non_unique(self):
  143. # GH 7106
  144. # non-unique mi index support
  145. df = (
  146. DataFrame(
  147. {
  148. "A": ["foo", "foo", "foo", "foo"],
  149. "B": ["a", "a", "a", "a"],
  150. "C": [1, 2, 1, 3],
  151. "D": [1, 2, 3, 4],
  152. }
  153. )
  154. .set_index(["A", "B", "C"])
  155. .sort_index()
  156. )
  157. assert not df.index.is_unique
  158. expected = (
  159. DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]})
  160. .set_index(["A", "B", "C"])
  161. .sort_index()
  162. )
  163. result = df.loc[(slice(None), slice(None), 1), :]
  164. tm.assert_frame_equal(result, expected)
  165. # this is equivalent of an xs expression
  166. result = df.xs(1, level=2, drop_level=False)
  167. tm.assert_frame_equal(result, expected)
  168. df = (
  169. DataFrame(
  170. {
  171. "A": ["foo", "foo", "foo", "foo"],
  172. "B": ["a", "a", "a", "a"],
  173. "C": [1, 2, 1, 2],
  174. "D": [1, 2, 3, 4],
  175. }
  176. )
  177. .set_index(["A", "B", "C"])
  178. .sort_index()
  179. )
  180. assert not df.index.is_unique
  181. expected = (
  182. DataFrame({"A": ["foo", "foo"], "B": ["a", "a"], "C": [1, 1], "D": [1, 3]})
  183. .set_index(["A", "B", "C"])
  184. .sort_index()
  185. )
  186. result = df.loc[(slice(None), slice(None), 1), :]
  187. assert not result.index.is_unique
  188. tm.assert_frame_equal(result, expected)
  189. # GH12896
  190. # numpy-implementation dependent bug
  191. ints = [
  192. 1,
  193. 2,
  194. 3,
  195. 4,
  196. 5,
  197. 6,
  198. 7,
  199. 8,
  200. 9,
  201. 10,
  202. 11,
  203. 12,
  204. 12,
  205. 13,
  206. 14,
  207. 14,
  208. 16,
  209. 17,
  210. 18,
  211. 19,
  212. 200000,
  213. 200000,
  214. ]
  215. n = len(ints)
  216. idx = MultiIndex.from_arrays([["a"] * n, ints])
  217. result = Series([1] * n, index=idx)
  218. result = result.sort_index()
  219. result = result.loc[(slice(None), slice(100000))]
  220. expected = Series([1] * (n - 2), index=idx[:-2]).sort_index()
  221. tm.assert_series_equal(result, expected)
  222. def test_multiindex_slicers_datetimelike(self):
  223. # GH 7429
  224. # buggy/inconsistent behavior when slicing with datetime-like
  225. dates = [datetime(2012, 1, 1, 12, 12, 12) + timedelta(days=i) for i in range(6)]
  226. freq = [1, 2]
  227. index = MultiIndex.from_product([dates, freq], names=["date", "frequency"])
  228. df = DataFrame(
  229. np.arange(6 * 2 * 4, dtype="int64").reshape(-1, 4),
  230. index=index,
  231. columns=list("ABCD"),
  232. )
  233. # multi-axis slicing
  234. idx = pd.IndexSlice
  235. expected = df.iloc[[0, 2, 4], [0, 1]]
  236. result = df.loc[
  237. (
  238. slice(
  239. Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12")
  240. ),
  241. slice(1, 1),
  242. ),
  243. slice("A", "B"),
  244. ]
  245. tm.assert_frame_equal(result, expected)
  246. result = df.loc[
  247. (
  248. idx[
  249. Timestamp("2012-01-01 12:12:12") : Timestamp("2012-01-03 12:12:12")
  250. ],
  251. idx[1:1],
  252. ),
  253. slice("A", "B"),
  254. ]
  255. tm.assert_frame_equal(result, expected)
  256. result = df.loc[
  257. (
  258. slice(
  259. Timestamp("2012-01-01 12:12:12"), Timestamp("2012-01-03 12:12:12")
  260. ),
  261. 1,
  262. ),
  263. slice("A", "B"),
  264. ]
  265. tm.assert_frame_equal(result, expected)
  266. # with strings
  267. result = df.loc[
  268. (slice("2012-01-01 12:12:12", "2012-01-03 12:12:12"), slice(1, 1)),
  269. slice("A", "B"),
  270. ]
  271. tm.assert_frame_equal(result, expected)
  272. result = df.loc[
  273. (idx["2012-01-01 12:12:12":"2012-01-03 12:12:12"], 1), idx["A", "B"]
  274. ]
  275. tm.assert_frame_equal(result, expected)
  276. def test_multiindex_slicers_edges(self):
  277. # GH 8132
  278. # various edge cases
  279. df = DataFrame(
  280. {
  281. "A": ["A0"] * 5 + ["A1"] * 5 + ["A2"] * 5,
  282. "B": ["B0", "B0", "B1", "B1", "B2"] * 3,
  283. "DATE": [
  284. "2013-06-11",
  285. "2013-07-02",
  286. "2013-07-09",
  287. "2013-07-30",
  288. "2013-08-06",
  289. "2013-06-11",
  290. "2013-07-02",
  291. "2013-07-09",
  292. "2013-07-30",
  293. "2013-08-06",
  294. "2013-09-03",
  295. "2013-10-01",
  296. "2013-07-09",
  297. "2013-08-06",
  298. "2013-09-03",
  299. ],
  300. "VALUES": [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2],
  301. }
  302. )
  303. df["DATE"] = pd.to_datetime(df["DATE"])
  304. df1 = df.set_index(["A", "B", "DATE"])
  305. df1 = df1.sort_index()
  306. # A1 - Get all values under "A0" and "A1"
  307. result = df1.loc[(slice("A1")), :]
  308. expected = df1.iloc[0:10]
  309. tm.assert_frame_equal(result, expected)
  310. # A2 - Get all values from the start to "A2"
  311. result = df1.loc[(slice("A2")), :]
  312. expected = df1
  313. tm.assert_frame_equal(result, expected)
  314. # A3 - Get all values under "B1" or "B2"
  315. result = df1.loc[(slice(None), slice("B1", "B2")), :]
  316. expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]]
  317. tm.assert_frame_equal(result, expected)
  318. # A4 - Get all values between 2013-07-02 and 2013-07-09
  319. result = df1.loc[(slice(None), slice(None), slice("20130702", "20130709")), :]
  320. expected = df1.iloc[[1, 2, 6, 7, 12]]
  321. tm.assert_frame_equal(result, expected)
  322. # B1 - Get all values in B0 that are also under A0, A1 and A2
  323. result = df1.loc[(slice("A2"), slice("B0")), :]
  324. expected = df1.iloc[[0, 1, 5, 6, 10, 11]]
  325. tm.assert_frame_equal(result, expected)
  326. # B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for
  327. # the As)
  328. result = df1.loc[(slice(None), slice("B2")), :]
  329. expected = df1
  330. tm.assert_frame_equal(result, expected)
  331. # B3 - Get all values from B1 to B2 and up to 2013-08-06
  332. result = df1.loc[(slice(None), slice("B1", "B2"), slice("2013-08-06")), :]
  333. expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]]
  334. tm.assert_frame_equal(result, expected)
  335. # B4 - Same as A4 but the start of the date slice is not a key.
  336. # shows indexing on a partial selection slice
  337. result = df1.loc[(slice(None), slice(None), slice("20130701", "20130709")), :]
  338. expected = df1.iloc[[1, 2, 6, 7, 12]]
  339. tm.assert_frame_equal(result, expected)
  340. def test_per_axis_per_level_doc_examples(self):
  341. # test index maker
  342. idx = pd.IndexSlice
  343. # from indexing.rst / advanced
  344. index = MultiIndex.from_product(
  345. [_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]
  346. )
  347. columns = MultiIndex.from_tuples(
  348. [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
  349. names=["lvl0", "lvl1"],
  350. )
  351. df = DataFrame(
  352. np.arange(len(index) * len(columns), dtype="int64").reshape(
  353. (len(index), len(columns))
  354. ),
  355. index=index,
  356. columns=columns,
  357. )
  358. result = df.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]
  359. expected = df.loc[
  360. [
  361. (
  362. a,
  363. b,
  364. c,
  365. d,
  366. )
  367. for a, b, c, d in df.index.values
  368. if a in ("A1", "A2", "A3") and c in ("C1", "C3")
  369. ]
  370. ]
  371. tm.assert_frame_equal(result, expected)
  372. result = df.loc[idx["A1":"A3", :, ["C1", "C3"]], :]
  373. tm.assert_frame_equal(result, expected)
  374. result = df.loc[(slice(None), slice(None), ["C1", "C3"]), :]
  375. expected = df.loc[
  376. [
  377. (
  378. a,
  379. b,
  380. c,
  381. d,
  382. )
  383. for a, b, c, d in df.index.values
  384. if c in ("C1", "C3")
  385. ]
  386. ]
  387. tm.assert_frame_equal(result, expected)
  388. result = df.loc[idx[:, :, ["C1", "C3"]], :]
  389. tm.assert_frame_equal(result, expected)
  390. # not sorted
  391. msg = (
  392. "MultiIndex slicing requires the index to be lexsorted: "
  393. r"slicing on levels \[1\], lexsort depth 1"
  394. )
  395. with pytest.raises(UnsortedIndexError, match=msg):
  396. df.loc["A1", ("a", slice("foo"))]
  397. # GH 16734: not sorted, but no real slicing
  398. tm.assert_frame_equal(
  399. df.loc["A1", (slice(None), "foo")], df.loc["A1"].iloc[:, [0, 2]]
  400. )
  401. df = df.sort_index(axis=1)
  402. # slicing
  403. df.loc["A1", (slice(None), "foo")]
  404. df.loc[(slice(None), slice(None), ["C1", "C3"]), (slice(None), "foo")]
  405. # setitem
  406. df.loc(axis=0)[:, :, ["C1", "C3"]] = -10
  407. def test_loc_axis_arguments(self):
  408. index = MultiIndex.from_product(
  409. [_mklbl("A", 4), _mklbl("B", 2), _mklbl("C", 4), _mklbl("D", 2)]
  410. )
  411. columns = MultiIndex.from_tuples(
  412. [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
  413. names=["lvl0", "lvl1"],
  414. )
  415. df = (
  416. DataFrame(
  417. np.arange(len(index) * len(columns), dtype="int64").reshape(
  418. (len(index), len(columns))
  419. ),
  420. index=index,
  421. columns=columns,
  422. )
  423. .sort_index()
  424. .sort_index(axis=1)
  425. )
  426. # axis 0
  427. result = df.loc(axis=0)["A1":"A3", :, ["C1", "C3"]]
  428. expected = df.loc[
  429. [
  430. (
  431. a,
  432. b,
  433. c,
  434. d,
  435. )
  436. for a, b, c, d in df.index.values
  437. if a in ("A1", "A2", "A3") and c in ("C1", "C3")
  438. ]
  439. ]
  440. tm.assert_frame_equal(result, expected)
  441. result = df.loc(axis="index")[:, :, ["C1", "C3"]]
  442. expected = df.loc[
  443. [
  444. (
  445. a,
  446. b,
  447. c,
  448. d,
  449. )
  450. for a, b, c, d in df.index.values
  451. if c in ("C1", "C3")
  452. ]
  453. ]
  454. tm.assert_frame_equal(result, expected)
  455. # axis 1
  456. result = df.loc(axis=1)[:, "foo"]
  457. expected = df.loc[:, (slice(None), "foo")]
  458. tm.assert_frame_equal(result, expected)
  459. result = df.loc(axis="columns")[:, "foo"]
  460. expected = df.loc[:, (slice(None), "foo")]
  461. tm.assert_frame_equal(result, expected)
  462. # invalid axis
  463. for i in [-1, 2, "foo"]:
  464. msg = f"No axis named {i} for object type DataFrame"
  465. with pytest.raises(ValueError, match=msg):
  466. df.loc(axis=i)[:, :, ["C1", "C3"]]
  467. def test_loc_axis_single_level_multi_col_indexing_multiindex_col_df(self):
  468. # GH29519
  469. df = DataFrame(
  470. np.arange(27).reshape(3, 9),
  471. columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]),
  472. )
  473. result = df.loc(axis=1)["a1":"a2"]
  474. expected = df.iloc[:, :-3]
  475. tm.assert_frame_equal(result, expected)
  476. def test_loc_axis_single_level_single_col_indexing_multiindex_col_df(self):
  477. # GH29519
  478. df = DataFrame(
  479. np.arange(27).reshape(3, 9),
  480. columns=MultiIndex.from_product([["a1", "a2", "a3"], ["b1", "b2", "b3"]]),
  481. )
  482. result = df.loc(axis=1)["a1"]
  483. expected = df.iloc[:, :3]
  484. expected.columns = ["b1", "b2", "b3"]
  485. tm.assert_frame_equal(result, expected)
  486. def test_loc_ax_single_level_indexer_simple_df(self):
  487. # GH29519
  488. # test single level indexing on single index column data frame
  489. df = DataFrame(np.arange(9).reshape(3, 3), columns=["a", "b", "c"])
  490. result = df.loc(axis=1)["a"]
  491. expected = Series(np.array([0, 3, 6]), name="a")
  492. tm.assert_series_equal(result, expected)
  493. def test_per_axis_per_level_setitem(self):
  494. # test index maker
  495. idx = pd.IndexSlice
  496. # test multi-index slicing with per axis and per index controls
  497. index = MultiIndex.from_tuples(
  498. [("A", 1), ("A", 2), ("A", 3), ("B", 1)], names=["one", "two"]
  499. )
  500. columns = MultiIndex.from_tuples(
  501. [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")],
  502. names=["lvl0", "lvl1"],
  503. )
  504. df_orig = DataFrame(
  505. np.arange(16, dtype="int64").reshape(4, 4), index=index, columns=columns
  506. )
  507. df_orig = df_orig.sort_index(axis=0).sort_index(axis=1)
  508. # identity
  509. df = df_orig.copy()
  510. df.loc[(slice(None), slice(None)), :] = 100
  511. expected = df_orig.copy()
  512. expected.iloc[:, :] = 100
  513. tm.assert_frame_equal(df, expected)
  514. df = df_orig.copy()
  515. df.loc(axis=0)[:, :] = 100
  516. expected = df_orig.copy()
  517. expected.iloc[:, :] = 100
  518. tm.assert_frame_equal(df, expected)
  519. df = df_orig.copy()
  520. df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100
  521. expected = df_orig.copy()
  522. expected.iloc[:, :] = 100
  523. tm.assert_frame_equal(df, expected)
  524. df = df_orig.copy()
  525. df.loc[:, (slice(None), slice(None))] = 100
  526. expected = df_orig.copy()
  527. expected.iloc[:, :] = 100
  528. tm.assert_frame_equal(df, expected)
  529. # index
  530. df = df_orig.copy()
  531. df.loc[(slice(None), [1]), :] = 100
  532. expected = df_orig.copy()
  533. expected.iloc[[0, 3]] = 100
  534. tm.assert_frame_equal(df, expected)
  535. df = df_orig.copy()
  536. df.loc[(slice(None), 1), :] = 100
  537. expected = df_orig.copy()
  538. expected.iloc[[0, 3]] = 100
  539. tm.assert_frame_equal(df, expected)
  540. df = df_orig.copy()
  541. df.loc(axis=0)[:, 1] = 100
  542. expected = df_orig.copy()
  543. expected.iloc[[0, 3]] = 100
  544. tm.assert_frame_equal(df, expected)
  545. # columns
  546. df = df_orig.copy()
  547. df.loc[:, (slice(None), ["foo"])] = 100
  548. expected = df_orig.copy()
  549. expected.iloc[:, [1, 3]] = 100
  550. tm.assert_frame_equal(df, expected)
  551. # both
  552. df = df_orig.copy()
  553. df.loc[(slice(None), 1), (slice(None), ["foo"])] = 100
  554. expected = df_orig.copy()
  555. expected.iloc[[0, 3], [1, 3]] = 100
  556. tm.assert_frame_equal(df, expected)
  557. df = df_orig.copy()
  558. df.loc[idx[:, 1], idx[:, ["foo"]]] = 100
  559. expected = df_orig.copy()
  560. expected.iloc[[0, 3], [1, 3]] = 100
  561. tm.assert_frame_equal(df, expected)
  562. df = df_orig.copy()
  563. df.loc["A", "a"] = 100
  564. expected = df_orig.copy()
  565. expected.iloc[0:3, 0:2] = 100
  566. tm.assert_frame_equal(df, expected)
  567. # setting with a list-like
  568. df = df_orig.copy()
  569. df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
  570. [[100, 100], [100, 100]], dtype="int64"
  571. )
  572. expected = df_orig.copy()
  573. expected.iloc[[0, 3], [1, 3]] = 100
  574. tm.assert_frame_equal(df, expected)
  575. # not enough values
  576. df = df_orig.copy()
  577. msg = "setting an array element with a sequence."
  578. with pytest.raises(ValueError, match=msg):
  579. df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
  580. [[100], [100, 100]], dtype="int64"
  581. )
  582. msg = "Must have equal len keys and value when setting with an iterable"
  583. with pytest.raises(ValueError, match=msg):
  584. df.loc[(slice(None), 1), (slice(None), ["foo"])] = np.array(
  585. [100, 100, 100, 100], dtype="int64"
  586. )
  587. # with an alignable rhs
  588. df = df_orig.copy()
  589. df.loc[(slice(None), 1), (slice(None), ["foo"])] = (
  590. df.loc[(slice(None), 1), (slice(None), ["foo"])] * 5
  591. )
  592. expected = df_orig.copy()
  593. expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5
  594. tm.assert_frame_equal(df, expected)
  595. df = df_orig.copy()
  596. df.loc[(slice(None), 1), (slice(None), ["foo"])] *= df.loc[
  597. (slice(None), 1), (slice(None), ["foo"])
  598. ]
  599. expected = df_orig.copy()
  600. expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
  601. tm.assert_frame_equal(df, expected)
  602. rhs = df_orig.loc[(slice(None), 1), (slice(None), ["foo"])].copy()
  603. rhs.loc[:, ("c", "bah")] = 10
  604. df = df_orig.copy()
  605. df.loc[(slice(None), 1), (slice(None), ["foo"])] *= rhs
  606. expected = df_orig.copy()
  607. expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]]
  608. tm.assert_frame_equal(df, expected)
  609. def test_multiindex_label_slicing_with_negative_step(self):
  610. ser = Series(
  611. np.arange(20), MultiIndex.from_product([list("abcde"), np.arange(4)])
  612. )
  613. SLC = pd.IndexSlice
  614. tm.assert_indexing_slices_equivalent(ser, SLC[::-1], SLC[::-1])
  615. tm.assert_indexing_slices_equivalent(ser, SLC["d"::-1], SLC[15::-1])
  616. tm.assert_indexing_slices_equivalent(ser, SLC[("d",)::-1], SLC[15::-1])
  617. tm.assert_indexing_slices_equivalent(ser, SLC[:"d":-1], SLC[:11:-1])
  618. tm.assert_indexing_slices_equivalent(ser, SLC[:("d",):-1], SLC[:11:-1])
  619. tm.assert_indexing_slices_equivalent(ser, SLC["d":"b":-1], SLC[15:3:-1])
  620. tm.assert_indexing_slices_equivalent(ser, SLC[("d",):"b":-1], SLC[15:3:-1])
  621. tm.assert_indexing_slices_equivalent(ser, SLC["d":("b",):-1], SLC[15:3:-1])
  622. tm.assert_indexing_slices_equivalent(ser, SLC[("d",):("b",):-1], SLC[15:3:-1])
  623. tm.assert_indexing_slices_equivalent(ser, SLC["b":"d":-1], SLC[:0])
  624. tm.assert_indexing_slices_equivalent(ser, SLC[("c", 2)::-1], SLC[10::-1])
  625. tm.assert_indexing_slices_equivalent(ser, SLC[:("c", 2):-1], SLC[:9:-1])
  626. tm.assert_indexing_slices_equivalent(
  627. ser, SLC[("e", 0):("c", 2):-1], SLC[16:9:-1]
  628. )
  629. def test_multiindex_slice_first_level(self):
  630. # GH 12697
  631. freq = ["a", "b", "c", "d"]
  632. idx = MultiIndex.from_product([freq, range(500)])
  633. df = DataFrame(list(range(2000)), index=idx, columns=["Test"])
  634. df_slice = df.loc[pd.IndexSlice[:, 30:70], :]
  635. result = df_slice.loc["a"]
  636. expected = DataFrame(list(range(30, 71)), columns=["Test"], index=range(30, 71))
  637. tm.assert_frame_equal(result, expected)
  638. result = df_slice.loc["d"]
  639. expected = DataFrame(
  640. list(range(1530, 1571)), columns=["Test"], index=range(30, 71)
  641. )
  642. tm.assert_frame_equal(result, expected)
  643. def test_int_series_slicing(self, multiindex_year_month_day_dataframe_random_data):
  644. ymd = multiindex_year_month_day_dataframe_random_data
  645. s = ymd["A"]
  646. result = s[5:]
  647. expected = s.reindex(s.index[5:])
  648. tm.assert_series_equal(result, expected)
  649. exp = ymd["A"].copy()
  650. s[5:] = 0
  651. exp.iloc[5:] = 0
  652. tm.assert_numpy_array_equal(s.values, exp.values)
  653. result = ymd[5:]
  654. expected = ymd.reindex(s.index[5:])
  655. tm.assert_frame_equal(result, expected)
  656. @pytest.mark.parametrize(
  657. "dtype, loc, iloc",
  658. [
  659. # dtype = int, step = -1
  660. ("int", slice(None, None, -1), slice(None, None, -1)),
  661. ("int", slice(3, None, -1), slice(3, None, -1)),
  662. ("int", slice(None, 1, -1), slice(None, 0, -1)),
  663. ("int", slice(3, 1, -1), slice(3, 0, -1)),
  664. # dtype = int, step = -2
  665. ("int", slice(None, None, -2), slice(None, None, -2)),
  666. ("int", slice(3, None, -2), slice(3, None, -2)),
  667. ("int", slice(None, 1, -2), slice(None, 0, -2)),
  668. ("int", slice(3, 1, -2), slice(3, 0, -2)),
  669. # dtype = str, step = -1
  670. ("str", slice(None, None, -1), slice(None, None, -1)),
  671. ("str", slice("d", None, -1), slice(3, None, -1)),
  672. ("str", slice(None, "b", -1), slice(None, 0, -1)),
  673. ("str", slice("d", "b", -1), slice(3, 0, -1)),
  674. # dtype = str, step = -2
  675. ("str", slice(None, None, -2), slice(None, None, -2)),
  676. ("str", slice("d", None, -2), slice(3, None, -2)),
  677. ("str", slice(None, "b", -2), slice(None, 0, -2)),
  678. ("str", slice("d", "b", -2), slice(3, 0, -2)),
  679. ],
  680. )
  681. def test_loc_slice_negative_stepsize(self, dtype, loc, iloc):
  682. # GH#38071
  683. labels = {
  684. "str": list("abcde"),
  685. "int": range(5),
  686. }[dtype]
  687. mi = MultiIndex.from_arrays([labels] * 2)
  688. df = DataFrame(1.0, index=mi, columns=["A"])
  689. SLC = pd.IndexSlice
  690. expected = df.iloc[iloc, :]
  691. result_get_loc = df.loc[SLC[loc], :]
  692. result_get_locs_level_0 = df.loc[SLC[loc, :], :]
  693. result_get_locs_level_1 = df.loc[SLC[:, loc], :]
  694. tm.assert_frame_equal(result_get_loc, expected)
  695. tm.assert_frame_equal(result_get_locs_level_0, expected)
  696. tm.assert_frame_equal(result_get_locs_level_1, expected)