test_loc.py 110 KB


  1. """ test label based indexing with loc """
  2. from collections import namedtuple
  3. from datetime import (
  4. date,
  5. datetime,
  6. time,
  7. timedelta,
  8. )
  9. import re
  10. from dateutil.tz import gettz
  11. import numpy as np
  12. import pytest
  13. from pandas.errors import IndexingError
  14. import pandas.util._test_decorators as td
  15. import pandas as pd
  16. from pandas import (
  17. Categorical,
  18. CategoricalDtype,
  19. CategoricalIndex,
  20. DataFrame,
  21. DatetimeIndex,
  22. Index,
  23. IndexSlice,
  24. MultiIndex,
  25. Period,
  26. PeriodIndex,
  27. Series,
  28. SparseDtype,
  29. Timedelta,
  30. Timestamp,
  31. date_range,
  32. timedelta_range,
  33. to_datetime,
  34. to_timedelta,
  35. )
  36. import pandas._testing as tm
  37. from pandas.api.types import (
  38. is_bool_dtype,
  39. is_scalar,
  40. )
  41. from pandas.core.indexing import _one_ellipsis_message
  42. from pandas.tests.indexing.common import check_indexing_smoketest_or_raises
  43. @pytest.mark.parametrize(
  44. "series, new_series, expected_ser",
  45. [
  46. [[np.nan, np.nan, "b"], ["a", np.nan, np.nan], [False, True, True]],
  47. [[np.nan, "b"], ["a", np.nan], [False, True]],
  48. ],
  49. )
  50. def test_not_change_nan_loc(series, new_series, expected_ser):
  51. # GH 28403
  52. df = DataFrame({"A": series})
  53. df.loc[:, "A"] = new_series
  54. expected = DataFrame({"A": expected_ser})
  55. tm.assert_frame_equal(df.isna(), expected)
  56. tm.assert_frame_equal(df.notna(), ~expected)
  57. class TestLoc:
  58. @pytest.mark.parametrize("kind", ["series", "frame"])
  59. def test_loc_getitem_int(self, kind, request):
  60. # int label
  61. obj = request.getfixturevalue(f"{kind}_labels")
  62. check_indexing_smoketest_or_raises(obj, "loc", 2, fails=KeyError)
  63. @pytest.mark.parametrize("kind", ["series", "frame"])
  64. def test_loc_getitem_label(self, kind, request):
  65. # label
  66. obj = request.getfixturevalue(f"{kind}_empty")
  67. check_indexing_smoketest_or_raises(obj, "loc", "c", fails=KeyError)
  68. @pytest.mark.parametrize(
  69. "key, typs, axes",
  70. [
  71. ["f", ["ints", "uints", "labels", "mixed", "ts"], None],
  72. ["f", ["floats"], None],
  73. [20, ["ints", "uints", "mixed"], None],
  74. [20, ["labels"], None],
  75. [20, ["ts"], 0],
  76. [20, ["floats"], 0],
  77. ],
  78. )
  79. @pytest.mark.parametrize("kind", ["series", "frame"])
  80. def test_loc_getitem_label_out_of_range(self, key, typs, axes, kind, request):
  81. for typ in typs:
  82. obj = request.getfixturevalue(f"{kind}_{typ}")
  83. # out of range label
  84. check_indexing_smoketest_or_raises(
  85. obj, "loc", key, axes=axes, fails=KeyError
  86. )
  87. @pytest.mark.parametrize(
  88. "key, typs",
  89. [
  90. [[0, 1, 2], ["ints", "uints", "floats"]],
  91. [[1, 3.0, "A"], ["ints", "uints", "floats"]],
  92. ],
  93. )
  94. @pytest.mark.parametrize("kind", ["series", "frame"])
  95. def test_loc_getitem_label_list(self, key, typs, kind, request):
  96. for typ in typs:
  97. obj = request.getfixturevalue(f"{kind}_{typ}")
  98. # list of labels
  99. check_indexing_smoketest_or_raises(obj, "loc", key, fails=KeyError)
  100. @pytest.mark.parametrize(
  101. "key, typs, axes",
  102. [
  103. [[0, 1, 2], ["empty"], None],
  104. [[0, 2, 10], ["ints", "uints", "floats"], 0],
  105. [[3, 6, 7], ["ints", "uints", "floats"], 1],
  106. # GH 17758 - MultiIndex and missing keys
  107. [[(1, 3), (1, 4), (2, 5)], ["multi"], 0],
  108. ],
  109. )
  110. @pytest.mark.parametrize("kind", ["series", "frame"])
  111. def test_loc_getitem_label_list_with_missing(self, key, typs, axes, kind, request):
  112. for typ in typs:
  113. obj = request.getfixturevalue(f"{kind}_{typ}")
  114. check_indexing_smoketest_or_raises(
  115. obj, "loc", key, axes=axes, fails=KeyError
  116. )
  117. @pytest.mark.parametrize("typs", ["ints", "uints"])
  118. @pytest.mark.parametrize("kind", ["series", "frame"])
  119. def test_loc_getitem_label_list_fails(self, typs, kind, request):
  120. # fails
  121. obj = request.getfixturevalue(f"{kind}_{typs}")
  122. check_indexing_smoketest_or_raises(
  123. obj, "loc", [20, 30, 40], axes=1, fails=KeyError
  124. )
  125. def test_loc_getitem_label_array_like(self):
  126. # TODO: test something?
  127. # array like
  128. pass
  129. @pytest.mark.parametrize("kind", ["series", "frame"])
  130. def test_loc_getitem_bool(self, kind, request):
  131. obj = request.getfixturevalue(f"{kind}_empty")
  132. # boolean indexers
  133. b = [True, False, True, False]
  134. check_indexing_smoketest_or_raises(obj, "loc", b, fails=IndexError)
  135. @pytest.mark.parametrize(
  136. "slc, typs, axes, fails",
  137. [
  138. [
  139. slice(1, 3),
  140. ["labels", "mixed", "empty", "ts", "floats"],
  141. None,
  142. TypeError,
  143. ],
  144. [slice("20130102", "20130104"), ["ts"], 1, TypeError],
  145. [slice(2, 8), ["mixed"], 0, TypeError],
  146. [slice(2, 8), ["mixed"], 1, KeyError],
  147. [slice(2, 4, 2), ["mixed"], 0, TypeError],
  148. ],
  149. )
  150. @pytest.mark.parametrize("kind", ["series", "frame"])
  151. def test_loc_getitem_label_slice(self, slc, typs, axes, fails, kind, request):
  152. # label slices (with ints)
  153. # real label slices
  154. # GH 14316
  155. for typ in typs:
  156. obj = request.getfixturevalue(f"{kind}_{typ}")
  157. check_indexing_smoketest_or_raises(
  158. obj,
  159. "loc",
  160. slc,
  161. axes=axes,
  162. fails=fails,
  163. )
  164. def test_setitem_from_duplicate_axis(self):
  165. # GH#34034
  166. df = DataFrame(
  167. [[20, "a"], [200, "a"], [200, "a"]],
  168. columns=["col1", "col2"],
  169. index=[10, 1, 1],
  170. )
  171. df.loc[1, "col1"] = np.arange(2)
  172. expected = DataFrame(
  173. [[20, "a"], [0, "a"], [1, "a"]], columns=["col1", "col2"], index=[10, 1, 1]
  174. )
  175. tm.assert_frame_equal(df, expected)
  176. def test_column_types_consistent(self):
  177. # GH 26779
  178. df = DataFrame(
  179. data={
  180. "channel": [1, 2, 3],
  181. "A": ["String 1", np.NaN, "String 2"],
  182. "B": [
  183. Timestamp("2019-06-11 11:00:00"),
  184. pd.NaT,
  185. Timestamp("2019-06-11 12:00:00"),
  186. ],
  187. }
  188. )
  189. df2 = DataFrame(
  190. data={"A": ["String 3"], "B": [Timestamp("2019-06-11 12:00:00")]}
  191. )
  192. # Change Columns A and B to df2.values wherever Column A is NaN
  193. df.loc[df["A"].isna(), ["A", "B"]] = df2.values
  194. expected = DataFrame(
  195. data={
  196. "channel": [1, 2, 3],
  197. "A": ["String 1", "String 3", "String 2"],
  198. "B": [
  199. Timestamp("2019-06-11 11:00:00"),
  200. Timestamp("2019-06-11 12:00:00"),
  201. Timestamp("2019-06-11 12:00:00"),
  202. ],
  203. }
  204. )
  205. tm.assert_frame_equal(df, expected)
  206. @pytest.mark.parametrize(
  207. "obj, key, exp",
  208. [
  209. (
  210. DataFrame([[1]], columns=Index([False])),
  211. IndexSlice[:, False],
  212. Series([1], name=False),
  213. ),
  214. (Series([1], index=Index([False])), False, [1]),
  215. (DataFrame([[1]], index=Index([False])), False, Series([1], name=False)),
  216. ],
  217. )
  218. def test_loc_getitem_single_boolean_arg(self, obj, key, exp):
  219. # GH 44322
  220. res = obj.loc[key]
  221. if isinstance(exp, (DataFrame, Series)):
  222. tm.assert_equal(res, exp)
  223. else:
  224. assert res == exp
  225. class TestLocBaseIndependent:
  226. # Tests for loc that do not depend on subclassing Base
  227. def test_loc_npstr(self):
  228. # GH#45580
  229. df = DataFrame(index=date_range("2021", "2022"))
  230. result = df.loc[np.array(["2021/6/1"])[0] :]
  231. expected = df.iloc[151:]
  232. tm.assert_frame_equal(result, expected)
  233. @pytest.mark.parametrize(
  234. "msg, key",
  235. [
  236. (r"Period\('2019', 'A-DEC'\), 'foo', 'bar'", (Period(2019), "foo", "bar")),
  237. (r"Period\('2019', 'A-DEC'\), 'y1', 'bar'", (Period(2019), "y1", "bar")),
  238. (r"Period\('2019', 'A-DEC'\), 'foo', 'z1'", (Period(2019), "foo", "z1")),
  239. (
  240. r"Period\('2018', 'A-DEC'\), Period\('2016', 'A-DEC'\), 'bar'",
  241. (Period(2018), Period(2016), "bar"),
  242. ),
  243. (r"Period\('2018', 'A-DEC'\), 'foo', 'y1'", (Period(2018), "foo", "y1")),
  244. (
  245. r"Period\('2017', 'A-DEC'\), 'foo', Period\('2015', 'A-DEC'\)",
  246. (Period(2017), "foo", Period(2015)),
  247. ),
  248. (r"Period\('2017', 'A-DEC'\), 'z1', 'bar'", (Period(2017), "z1", "bar")),
  249. ],
  250. )
  251. def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key):
  252. # GH#20684
  253. """
  254. parse_datetime_string_with_reso return parameter if type not matched.
  255. PeriodIndex.get_loc takes returned value from parse_datetime_string_with_reso
  256. as a tuple.
  257. If first argument is Period and a tuple has 3 items,
  258. process go on not raise exception
  259. """
  260. df = DataFrame(
  261. {
  262. "A": [Period(2019), "x1", "x2"],
  263. "B": [Period(2018), Period(2016), "y1"],
  264. "C": [Period(2017), "z1", Period(2015)],
  265. "V1": [1, 2, 3],
  266. "V2": [10, 20, 30],
  267. }
  268. ).set_index(["A", "B", "C"])
  269. with pytest.raises(KeyError, match=msg):
  270. df.loc[key]
  271. def test_loc_getitem_missing_unicode_key(self):
  272. df = DataFrame({"a": [1]})
  273. with pytest.raises(KeyError, match="\u05d0"):
  274. df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError
  275. def test_loc_getitem_dups(self):
  276. # GH 5678
  277. # repeated getitems on a dup index returning a ndarray
  278. df = DataFrame(
  279. np.random.random_sample((20, 5)), index=["ABCDE"[x % 5] for x in range(20)]
  280. )
  281. expected = df.loc["A", 0]
  282. result = df.loc[:, 0].loc["A"]
  283. tm.assert_series_equal(result, expected)
  284. def test_loc_getitem_dups2(self):
  285. # GH4726
  286. # dup indexing with iloc/loc
  287. df = DataFrame(
  288. [[1, 2, "foo", "bar", Timestamp("20130101")]],
  289. columns=["a", "a", "a", "a", "a"],
  290. index=[1],
  291. )
  292. expected = Series(
  293. [1, 2, "foo", "bar", Timestamp("20130101")],
  294. index=["a", "a", "a", "a", "a"],
  295. name=1,
  296. )
  297. result = df.iloc[0]
  298. tm.assert_series_equal(result, expected)
  299. result = df.loc[1]
  300. tm.assert_series_equal(result, expected)
  301. def test_loc_setitem_dups(self):
  302. # GH 6541
  303. df_orig = DataFrame(
  304. {
  305. "me": list("rttti"),
  306. "foo": list("aaade"),
  307. "bar": np.arange(5, dtype="float64") * 1.34 + 2,
  308. "bar2": np.arange(5, dtype="float64") * -0.34 + 2,
  309. }
  310. ).set_index("me")
  311. indexer = (
  312. "r",
  313. ["bar", "bar2"],
  314. )
  315. df = df_orig.copy()
  316. df.loc[indexer] *= 2.0
  317. tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])
  318. indexer = (
  319. "r",
  320. "bar",
  321. )
  322. df = df_orig.copy()
  323. df.loc[indexer] *= 2.0
  324. assert df.loc[indexer] == 2.0 * df_orig.loc[indexer]
  325. indexer = (
  326. "t",
  327. ["bar", "bar2"],
  328. )
  329. df = df_orig.copy()
  330. df.loc[indexer] *= 2.0
  331. tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])
  332. def test_loc_setitem_slice(self):
  333. # GH10503
  334. # assigning the same type should not change the type
  335. df1 = DataFrame({"a": [0, 1, 1], "b": Series([100, 200, 300], dtype="uint32")})
  336. ix = df1["a"] == 1
  337. newb1 = df1.loc[ix, "b"] + 1
  338. df1.loc[ix, "b"] = newb1
  339. expected = DataFrame(
  340. {"a": [0, 1, 1], "b": Series([100, 201, 301], dtype="uint32")}
  341. )
  342. tm.assert_frame_equal(df1, expected)
  343. # assigning a new type should get the inferred type
  344. df2 = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64")
  345. ix = df1["a"] == 1
  346. newb2 = df2.loc[ix, "b"]
  347. df1.loc[ix, "b"] = newb2
  348. expected = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64")
  349. tm.assert_frame_equal(df2, expected)
  350. def test_loc_setitem_dtype(self):
  351. # GH31340
  352. df = DataFrame({"id": ["A"], "a": [1.2], "b": [0.0], "c": [-2.5]})
  353. cols = ["a", "b", "c"]
  354. df.loc[:, cols] = df.loc[:, cols].astype("float32")
  355. # pre-2.0 this setting would swap in new arrays, in 2.0 it is correctly
  356. # in-place, consistent with non-split-path
  357. expected = DataFrame(
  358. {
  359. "id": ["A"],
  360. "a": np.array([1.2], dtype="float64"),
  361. "b": np.array([0.0], dtype="float64"),
  362. "c": np.array([-2.5], dtype="float64"),
  363. }
  364. ) # id is inferred as object
  365. tm.assert_frame_equal(df, expected)
  366. def test_getitem_label_list_with_missing(self):
  367. s = Series(range(3), index=["a", "b", "c"])
  368. # consistency
  369. with pytest.raises(KeyError, match="not in index"):
  370. s[["a", "d"]]
  371. s = Series(range(3))
  372. with pytest.raises(KeyError, match="not in index"):
  373. s[[0, 3]]
  374. @pytest.mark.parametrize("index", [[True, False], [True, False, True, False]])
  375. def test_loc_getitem_bool_diff_len(self, index):
  376. # GH26658
  377. s = Series([1, 2, 3])
  378. msg = f"Boolean index has wrong length: {len(index)} instead of {len(s)}"
  379. with pytest.raises(IndexError, match=msg):
  380. s.loc[index]
  381. def test_loc_getitem_int_slice(self):
  382. # TODO: test something here?
  383. pass
  384. def test_loc_to_fail(self):
  385. # GH3449
  386. df = DataFrame(
  387. np.random.random((3, 3)), index=["a", "b", "c"], columns=["e", "f", "g"]
  388. )
  389. msg = (
  390. rf"\"None of \[Index\(\[1, 2\], dtype='{np.int_().dtype}'\)\] are "
  391. r"in the \[index\]\""
  392. )
  393. with pytest.raises(KeyError, match=msg):
  394. df.loc[[1, 2], [1, 2]]
  395. def test_loc_to_fail2(self):
  396. # GH 7496
  397. # loc should not fallback
  398. s = Series(dtype=object)
  399. s.loc[1] = 1
  400. s.loc["a"] = 2
  401. with pytest.raises(KeyError, match=r"^-1$"):
  402. s.loc[-1]
  403. msg = (
  404. rf"\"None of \[Index\(\[-1, -2\], dtype='{np.int_().dtype}'\)\] are "
  405. r"in the \[index\]\""
  406. )
  407. with pytest.raises(KeyError, match=msg):
  408. s.loc[[-1, -2]]
  409. msg = r"\"None of \[Index\(\['4'\], dtype='object'\)\] are in the \[index\]\""
  410. with pytest.raises(KeyError, match=msg):
  411. s.loc[["4"]]
  412. s.loc[-1] = 3
  413. with pytest.raises(KeyError, match="not in index"):
  414. s.loc[[-1, -2]]
  415. s["a"] = 2
  416. msg = (
  417. rf"\"None of \[Index\(\[-2\], dtype='{np.int_().dtype}'\)\] are "
  418. r"in the \[index\]\""
  419. )
  420. with pytest.raises(KeyError, match=msg):
  421. s.loc[[-2]]
  422. del s["a"]
  423. with pytest.raises(KeyError, match=msg):
  424. s.loc[[-2]] = 0
  425. def test_loc_to_fail3(self):
  426. # inconsistency between .loc[values] and .loc[values,:]
  427. # GH 7999
  428. df = DataFrame([["a"], ["b"]], index=[1, 2], columns=["value"])
  429. msg = (
  430. rf"\"None of \[Index\(\[3\], dtype='{np.int_().dtype}'\)\] are "
  431. r"in the \[index\]\""
  432. )
  433. with pytest.raises(KeyError, match=msg):
  434. df.loc[[3], :]
  435. with pytest.raises(KeyError, match=msg):
  436. df.loc[[3]]
  437. def test_loc_getitem_list_with_fail(self):
  438. # 15747
  439. # should KeyError if *any* missing labels
  440. s = Series([1, 2, 3])
  441. s.loc[[2]]
  442. msg = f"\"None of [Index([3], dtype='{np.int_().dtype}')] are in the [index]"
  443. with pytest.raises(KeyError, match=re.escape(msg)):
  444. s.loc[[3]]
  445. # a non-match and a match
  446. with pytest.raises(KeyError, match="not in index"):
  447. s.loc[[2, 3]]
  448. def test_loc_index(self):
  449. # gh-17131
  450. # a boolean index should index like a boolean numpy array
  451. df = DataFrame(
  452. np.random.random(size=(5, 10)),
  453. index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"],
  454. )
  455. mask = df.index.map(lambda x: "alpha" in x)
  456. expected = df.loc[np.array(mask)]
  457. result = df.loc[mask]
  458. tm.assert_frame_equal(result, expected)
  459. result = df.loc[mask.values]
  460. tm.assert_frame_equal(result, expected)
  461. result = df.loc[pd.array(mask, dtype="boolean")]
  462. tm.assert_frame_equal(result, expected)
  463. def test_loc_general(self):
  464. df = DataFrame(
  465. np.random.rand(4, 4),
  466. columns=["A", "B", "C", "D"],
  467. index=["A", "B", "C", "D"],
  468. )
  469. # want this to work
  470. result = df.loc[:, "A":"B"].iloc[0:2, :]
  471. assert (result.columns == ["A", "B"]).all()
  472. assert (result.index == ["A", "B"]).all()
  473. # mixed type
  474. result = DataFrame({"a": [Timestamp("20130101")], "b": [1]}).iloc[0]
  475. expected = Series([Timestamp("20130101"), 1], index=["a", "b"], name=0)
  476. tm.assert_series_equal(result, expected)
  477. assert result.dtype == object
  478. @pytest.fixture
  479. def frame_for_consistency(self):
  480. return DataFrame(
  481. {
  482. "date": date_range("2000-01-01", "2000-01-5"),
  483. "val": Series(range(5), dtype=np.int64),
  484. }
  485. )
  486. @pytest.mark.parametrize(
  487. "val",
  488. [0, np.array(0, dtype=np.int64), np.array([0, 0, 0, 0, 0], dtype=np.int64)],
  489. )
  490. def test_loc_setitem_consistency(self, frame_for_consistency, val):
  491. # GH 6149
  492. # coerce similarly for setitem and loc when rows have a null-slice
  493. expected = DataFrame(
  494. {
  495. "date": Series(0, index=range(5), dtype=np.int64),
  496. "val": Series(range(5), dtype=np.int64),
  497. }
  498. )
  499. df = frame_for_consistency.copy()
  500. df.loc[:, "date"] = val
  501. tm.assert_frame_equal(df, expected)
  502. def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency):
  503. # GH 6149
  504. # coerce similarly for setitem and loc when rows have a null-slice
  505. expected = DataFrame(
  506. {
  507. "date": Series("foo", index=range(5)),
  508. "val": Series(range(5), dtype=np.int64),
  509. }
  510. )
  511. df = frame_for_consistency.copy()
  512. df.loc[:, "date"] = "foo"
  513. tm.assert_frame_equal(df, expected)
  514. def test_loc_setitem_consistency_dt64_to_float(self, frame_for_consistency):
  515. # GH 6149
  516. # coerce similarly for setitem and loc when rows have a null-slice
  517. expected = DataFrame(
  518. {
  519. "date": Series(1.0, index=range(5)),
  520. "val": Series(range(5), dtype=np.int64),
  521. }
  522. )
  523. df = frame_for_consistency.copy()
  524. df.loc[:, "date"] = 1.0
  525. tm.assert_frame_equal(df, expected)
  526. def test_loc_setitem_consistency_single_row(self):
  527. # GH 15494
  528. # setting on frame with single row
  529. df = DataFrame({"date": Series([Timestamp("20180101")])})
  530. df.loc[:, "date"] = "string"
  531. expected = DataFrame({"date": Series(["string"])})
  532. tm.assert_frame_equal(df, expected)
  533. def test_loc_setitem_consistency_empty(self):
  534. # empty (essentially noops)
  535. # before the enforcement of #45333 in 2.0, the loc.setitem here would
  536. # change the dtype of df.x to int64
  537. expected = DataFrame(columns=["x", "y"])
  538. df = DataFrame(columns=["x", "y"])
  539. with tm.assert_produces_warning(None):
  540. df.loc[:, "x"] = 1
  541. tm.assert_frame_equal(df, expected)
  542. # setting with setitem swaps in a new array, so changes the dtype
  543. df = DataFrame(columns=["x", "y"])
  544. df["x"] = 1
  545. expected["x"] = expected["x"].astype(np.int64)
  546. tm.assert_frame_equal(df, expected)
  547. def test_loc_setitem_consistency_slice_column_len(self):
  548. # .loc[:,column] setting with slice == len of the column
  549. # GH10408
  550. levels = [
  551. ["Region_1"] * 4,
  552. ["Site_1", "Site_1", "Site_2", "Site_2"],
  553. [3987227376, 3980680971, 3977723249, 3977723089],
  554. ]
  555. mi = MultiIndex.from_arrays(levels, names=["Region", "Site", "RespondentID"])
  556. clevels = [
  557. ["Respondent", "Respondent", "Respondent", "OtherCat", "OtherCat"],
  558. ["Something", "StartDate", "EndDate", "Yes/No", "SomethingElse"],
  559. ]
  560. cols = MultiIndex.from_arrays(clevels, names=["Level_0", "Level_1"])
  561. values = [
  562. ["A", "5/25/2015 10:59", "5/25/2015 11:22", "Yes", np.nan],
  563. ["A", "5/21/2015 9:40", "5/21/2015 9:52", "Yes", "Yes"],
  564. ["A", "5/20/2015 8:27", "5/20/2015 8:41", "Yes", np.nan],
  565. ["A", "5/20/2015 8:33", "5/20/2015 9:09", "Yes", "No"],
  566. ]
  567. df = DataFrame(values, index=mi, columns=cols)
  568. df.loc[:, ("Respondent", "StartDate")] = to_datetime(
  569. df.loc[:, ("Respondent", "StartDate")]
  570. )
  571. df.loc[:, ("Respondent", "EndDate")] = to_datetime(
  572. df.loc[:, ("Respondent", "EndDate")]
  573. )
  574. df = df.infer_objects(copy=False)
  575. # Adding a new key
  576. df.loc[:, ("Respondent", "Duration")] = (
  577. df.loc[:, ("Respondent", "EndDate")]
  578. - df.loc[:, ("Respondent", "StartDate")]
  579. )
  580. # timedelta64[m] -> float, so this cannot be done inplace, so
  581. # no warning
  582. df.loc[:, ("Respondent", "Duration")] = df.loc[
  583. :, ("Respondent", "Duration")
  584. ] / Timedelta(60_000_000_000)
  585. expected = Series(
  586. [23.0, 12.0, 14.0, 36.0], index=df.index, name=("Respondent", "Duration")
  587. )
  588. tm.assert_series_equal(df[("Respondent", "Duration")], expected)
  589. @pytest.mark.parametrize("unit", ["Y", "M", "D", "h", "m", "s", "ms", "us"])
  590. def test_loc_assign_non_ns_datetime(self, unit):
  591. # GH 27395, non-ns dtype assignment via .loc should work
  592. # and return the same result when using simple assignment
  593. df = DataFrame(
  594. {
  595. "timestamp": [
  596. np.datetime64("2017-02-11 12:41:29"),
  597. np.datetime64("1991-11-07 04:22:37"),
  598. ]
  599. }
  600. )
  601. df.loc[:, unit] = df.loc[:, "timestamp"].values.astype(f"datetime64[{unit}]")
  602. df["expected"] = df.loc[:, "timestamp"].values.astype(f"datetime64[{unit}]")
  603. expected = Series(df.loc[:, "expected"], name=unit)
  604. tm.assert_series_equal(df.loc[:, unit], expected)
  605. def test_loc_modify_datetime(self):
  606. # see gh-28837
  607. df = DataFrame.from_dict(
  608. {"date": [1485264372711, 1485265925110, 1540215845888, 1540282121025]}
  609. )
  610. df["date_dt"] = to_datetime(df["date"], unit="ms", cache=True)
  611. df.loc[:, "date_dt_cp"] = df.loc[:, "date_dt"]
  612. df.loc[[2, 3], "date_dt_cp"] = df.loc[[2, 3], "date_dt"]
  613. expected = DataFrame(
  614. [
  615. [1485264372711, "2017-01-24 13:26:12.711", "2017-01-24 13:26:12.711"],
  616. [1485265925110, "2017-01-24 13:52:05.110", "2017-01-24 13:52:05.110"],
  617. [1540215845888, "2018-10-22 13:44:05.888", "2018-10-22 13:44:05.888"],
  618. [1540282121025, "2018-10-23 08:08:41.025", "2018-10-23 08:08:41.025"],
  619. ],
  620. columns=["date", "date_dt", "date_dt_cp"],
  621. )
  622. columns = ["date_dt", "date_dt_cp"]
  623. expected[columns] = expected[columns].apply(to_datetime)
  624. tm.assert_frame_equal(df, expected)
  625. def test_loc_setitem_frame_with_reindex(self):
  626. # GH#6254 setting issue
  627. df = DataFrame(index=[3, 5, 4], columns=["A"], dtype=float)
  628. df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64")
  629. # setting integer values into a float dataframe with loc is inplace,
  630. # so we retain float dtype
  631. ser = Series([2, 3, 1], index=[3, 5, 4], dtype=float)
  632. expected = DataFrame({"A": ser})
  633. tm.assert_frame_equal(df, expected)
  634. def test_loc_setitem_frame_with_reindex_mixed(self):
  635. # GH#40480
  636. df = DataFrame(index=[3, 5, 4], columns=["A", "B"], dtype=float)
  637. df["B"] = "string"
  638. df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64")
  639. ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64")
  640. # pre-2.0 this setting swapped in a new array, now it is inplace
  641. # consistent with non-split-path
  642. expected = DataFrame({"A": ser.astype(float)})
  643. expected["B"] = "string"
  644. tm.assert_frame_equal(df, expected)
  645. def test_loc_setitem_frame_with_inverted_slice(self):
  646. # GH#40480
  647. df = DataFrame(index=[1, 2, 3], columns=["A", "B"], dtype=float)
  648. df["B"] = "string"
  649. df.loc[slice(3, 0, -1), "A"] = np.array([1, 2, 3], dtype="int64")
  650. # pre-2.0 this setting swapped in a new array, now it is inplace
  651. # consistent with non-split-path
  652. expected = DataFrame({"A": [3.0, 2.0, 1.0], "B": "string"}, index=[1, 2, 3])
  653. tm.assert_frame_equal(df, expected)
  654. def test_loc_setitem_empty_frame(self):
  655. # GH#6252 setting with an empty frame
  656. keys1 = ["@" + str(i) for i in range(5)]
  657. val1 = np.arange(5, dtype="int64")
  658. keys2 = ["@" + str(i) for i in range(4)]
  659. val2 = np.arange(4, dtype="int64")
  660. index = list(set(keys1).union(keys2))
  661. df = DataFrame(index=index)
  662. df["A"] = np.nan
  663. df.loc[keys1, "A"] = val1
  664. df["B"] = np.nan
  665. df.loc[keys2, "B"] = val2
  666. # Because df["A"] was initialized as float64, setting values into it
  667. # is inplace, so that dtype is retained
  668. sera = Series(val1, index=keys1, dtype=np.float64)
  669. serb = Series(val2, index=keys2)
  670. expected = DataFrame({"A": sera, "B": serb}).reindex(index=index)
  671. tm.assert_frame_equal(df, expected)
  672. def test_loc_setitem_frame(self):
  673. df = DataFrame(np.random.randn(4, 4), index=list("abcd"), columns=list("ABCD"))
  674. result = df.iloc[0, 0]
  675. df.loc["a", "A"] = 1
  676. result = df.loc["a", "A"]
  677. assert result == 1
  678. result = df.iloc[0, 0]
  679. assert result == 1
  680. df.loc[:, "B":"D"] = 0
  681. expected = df.loc[:, "B":"D"]
  682. result = df.iloc[:, 1:]
  683. tm.assert_frame_equal(result, expected)
  684. def test_loc_setitem_frame_nan_int_coercion_invalid(self):
  685. # GH 8669
  686. # invalid coercion of nan -> int
  687. df = DataFrame({"A": [1, 2, 3], "B": np.nan})
  688. df.loc[df.B > df.A, "B"] = df.A
  689. expected = DataFrame({"A": [1, 2, 3], "B": np.nan})
  690. tm.assert_frame_equal(df, expected)
  691. def test_loc_setitem_frame_mixed_labels(self):
  692. # GH 6546
  693. # setting with mixed labels
  694. df = DataFrame({1: [1, 2], 2: [3, 4], "a": ["a", "b"]})
  695. result = df.loc[0, [1, 2]]
  696. expected = Series(
  697. [1, 3], index=Index([1, 2], dtype=object), dtype=object, name=0
  698. )
  699. tm.assert_series_equal(result, expected)
  700. expected = DataFrame({1: [5, 2], 2: [6, 4], "a": ["a", "b"]})
  701. df.loc[0, [1, 2]] = [5, 6]
  702. tm.assert_frame_equal(df, expected)
  703. def test_loc_setitem_frame_multiples(self):
  704. # multiple setting
  705. df = DataFrame(
  706. {"A": ["foo", "bar", "baz"], "B": Series(range(3), dtype=np.int64)}
  707. )
  708. rhs = df.loc[1:2]
  709. rhs.index = df.index[0:2]
  710. df.loc[0:1] = rhs
  711. expected = DataFrame(
  712. {"A": ["bar", "baz", "baz"], "B": Series([1, 2, 2], dtype=np.int64)}
  713. )
  714. tm.assert_frame_equal(df, expected)
  715. # multiple setting with frame on rhs (with M8)
  716. df = DataFrame(
  717. {
  718. "date": date_range("2000-01-01", "2000-01-5"),
  719. "val": Series(range(5), dtype=np.int64),
  720. }
  721. )
  722. expected = DataFrame(
  723. {
  724. "date": [
  725. Timestamp("20000101"),
  726. Timestamp("20000102"),
  727. Timestamp("20000101"),
  728. Timestamp("20000102"),
  729. Timestamp("20000103"),
  730. ],
  731. "val": Series([0, 1, 0, 1, 2], dtype=np.int64),
  732. }
  733. )
  734. rhs = df.loc[0:2]
  735. rhs.index = df.index[2:5]
  736. df.loc[2:4] = rhs
  737. tm.assert_frame_equal(df, expected)
  738. @pytest.mark.parametrize(
  739. "indexer", [["A"], slice(None, "A", None), np.array(["A"])]
  740. )
  741. @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])])
  742. def test_loc_setitem_with_scalar_index(self, indexer, value):
  743. # GH #19474
  744. # assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated
  745. # elementwisely, not using "setter('A', ['Z'])".
  746. df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
  747. df.loc[0, indexer] = value
  748. result = df.loc[0, "A"]
  749. assert is_scalar(result) and result == "Z"
  750. @pytest.mark.parametrize(
  751. "index,box,expected",
  752. [
  753. (
  754. ([0, 2], ["A", "B", "C", "D"]),
  755. 7,
  756. DataFrame(
  757. [[7, 7, 7, 7], [3, 4, np.nan, np.nan], [7, 7, 7, 7]],
  758. columns=["A", "B", "C", "D"],
  759. ),
  760. ),
  761. (
  762. (1, ["C", "D"]),
  763. [7, 8],
  764. DataFrame(
  765. [[1, 2, np.nan, np.nan], [3, 4, 7, 8], [5, 6, np.nan, np.nan]],
  766. columns=["A", "B", "C", "D"],
  767. ),
  768. ),
  769. (
  770. (1, ["A", "B", "C"]),
  771. np.array([7, 8, 9], dtype=np.int64),
  772. DataFrame(
  773. [[1, 2, np.nan], [7, 8, 9], [5, 6, np.nan]], columns=["A", "B", "C"]
  774. ),
  775. ),
  776. (
  777. (slice(1, 3, None), ["B", "C", "D"]),
  778. [[7, 8, 9], [10, 11, 12]],
  779. DataFrame(
  780. [[1, 2, np.nan, np.nan], [3, 7, 8, 9], [5, 10, 11, 12]],
  781. columns=["A", "B", "C", "D"],
  782. ),
  783. ),
  784. (
  785. (slice(1, 3, None), ["C", "A", "D"]),
  786. np.array([[7, 8, 9], [10, 11, 12]], dtype=np.int64),
  787. DataFrame(
  788. [[1, 2, np.nan, np.nan], [8, 4, 7, 9], [11, 6, 10, 12]],
  789. columns=["A", "B", "C", "D"],
  790. ),
  791. ),
  792. (
  793. (slice(None, None, None), ["A", "C"]),
  794. DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]),
  795. DataFrame(
  796. [[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"]
  797. ),
  798. ),
  799. ],
  800. )
  801. def test_loc_setitem_missing_columns(self, index, box, expected):
  802. # GH 29334
  803. df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"])
  804. df.loc[index] = box
  805. tm.assert_frame_equal(df, expected)
  806. def test_loc_coercion(self):
  807. # GH#12411
  808. df = DataFrame({"date": [Timestamp("20130101").tz_localize("UTC"), pd.NaT]})
  809. expected = df.dtypes
  810. result = df.iloc[[0]]
  811. tm.assert_series_equal(result.dtypes, expected)
  812. result = df.iloc[[1]]
  813. tm.assert_series_equal(result.dtypes, expected)
  814. def test_loc_coercion2(self):
  815. # GH#12045
  816. df = DataFrame({"date": [datetime(2012, 1, 1), datetime(1012, 1, 2)]})
  817. expected = df.dtypes
  818. result = df.iloc[[0]]
  819. tm.assert_series_equal(result.dtypes, expected)
  820. result = df.iloc[[1]]
  821. tm.assert_series_equal(result.dtypes, expected)
  822. def test_loc_coercion3(self):
  823. # GH#11594
  824. df = DataFrame({"text": ["some words"] + [None] * 9})
  825. expected = df.dtypes
  826. result = df.iloc[0:2]
  827. tm.assert_series_equal(result.dtypes, expected)
  828. result = df.iloc[3:]
  829. tm.assert_series_equal(result.dtypes, expected)
  830. def test_setitem_new_key_tz(self, indexer_sl):
  831. # GH#12862 should not raise on assigning the second value
  832. vals = [
  833. to_datetime(42).tz_localize("UTC"),
  834. to_datetime(666).tz_localize("UTC"),
  835. ]
  836. expected = Series(vals, index=["foo", "bar"])
  837. ser = Series(dtype=object)
  838. indexer_sl(ser)["foo"] = vals[0]
  839. indexer_sl(ser)["bar"] = vals[1]
  840. tm.assert_series_equal(ser, expected)
  841. def test_loc_non_unique(self):
  842. # GH3659
  843. # non-unique indexer with loc slice
  844. # https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs
  845. # these are going to raise because the we are non monotonic
  846. df = DataFrame(
  847. {"A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3]
  848. )
  849. msg = "'Cannot get left slice bound for non-unique label: 1'"
  850. with pytest.raises(KeyError, match=msg):
  851. df.loc[1:]
  852. msg = "'Cannot get left slice bound for non-unique label: 0'"
  853. with pytest.raises(KeyError, match=msg):
  854. df.loc[0:]
  855. msg = "'Cannot get left slice bound for non-unique label: 1'"
  856. with pytest.raises(KeyError, match=msg):
  857. df.loc[1:2]
  858. # monotonic are ok
  859. df = DataFrame(
  860. {"A": [1, 2, 3, 4, 5, 6], "B": [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3]
  861. ).sort_index(axis=0)
  862. result = df.loc[1:]
  863. expected = DataFrame({"A": [2, 4, 5, 6], "B": [4, 6, 7, 8]}, index=[1, 1, 2, 3])
  864. tm.assert_frame_equal(result, expected)
  865. result = df.loc[0:]
  866. tm.assert_frame_equal(result, df)
  867. result = df.loc[1:2]
  868. expected = DataFrame({"A": [2, 4, 5], "B": [4, 6, 7]}, index=[1, 1, 2])
  869. tm.assert_frame_equal(result, expected)
  870. @pytest.mark.arm_slow
  871. @pytest.mark.parametrize("length, l2", [[900, 100], [900000, 100000]])
  872. def test_loc_non_unique_memory_error(self, length, l2):
  873. # GH 4280
  874. # non_unique index with a large selection triggers a memory error
  875. columns = list("ABCDEFG")
  876. df = pd.concat(
  877. [
  878. DataFrame(
  879. np.random.randn(length, len(columns)),
  880. index=np.arange(length),
  881. columns=columns,
  882. ),
  883. DataFrame(np.ones((l2, len(columns))), index=[0] * l2, columns=columns),
  884. ]
  885. )
  886. assert df.index.is_unique is False
  887. mask = np.arange(l2)
  888. result = df.loc[mask]
  889. expected = pd.concat(
  890. [
  891. df.take([0]),
  892. DataFrame(
  893. np.ones((len(mask), len(columns))),
  894. index=[0] * len(mask),
  895. columns=columns,
  896. ),
  897. df.take(mask[1:]),
  898. ]
  899. )
  900. tm.assert_frame_equal(result, expected)
  901. def test_loc_name(self):
  902. # GH 3880
  903. df = DataFrame([[1, 1], [1, 1]])
  904. df.index.name = "index_name"
  905. result = df.iloc[[0, 1]].index.name
  906. assert result == "index_name"
  907. result = df.loc[[0, 1]].index.name
  908. assert result == "index_name"
  909. def test_loc_empty_list_indexer_is_ok(self):
  910. df = tm.makeCustomDataframe(5, 2)
  911. # vertical empty
  912. tm.assert_frame_equal(
  913. df.loc[:, []], df.iloc[:, :0], check_index_type=True, check_column_type=True
  914. )
  915. # horizontal empty
  916. tm.assert_frame_equal(
  917. df.loc[[], :], df.iloc[:0, :], check_index_type=True, check_column_type=True
  918. )
  919. # horizontal empty
  920. tm.assert_frame_equal(
  921. df.loc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True
  922. )
  923. def test_identity_slice_returns_new_object(self, using_copy_on_write):
  924. # GH13873
  925. original_df = DataFrame({"a": [1, 2, 3]})
  926. sliced_df = original_df.loc[:]
  927. assert sliced_df is not original_df
  928. assert original_df[:] is not original_df
  929. assert original_df.loc[:, :] is not original_df
  930. # should be a shallow copy
  931. assert np.shares_memory(original_df["a"]._values, sliced_df["a"]._values)
  932. # Setting using .loc[:, "a"] sets inplace so alters both sliced and orig
  933. # depending on CoW
  934. original_df.loc[:, "a"] = [4, 4, 4]
  935. if using_copy_on_write:
  936. assert (sliced_df["a"] == [1, 2, 3]).all()
  937. else:
  938. assert (sliced_df["a"] == 4).all()
  939. # These should not return copies
  940. df = DataFrame(np.random.randn(10, 4))
  941. if using_copy_on_write:
  942. assert df[0] is not df.loc[:, 0]
  943. else:
  944. assert df[0] is df.loc[:, 0]
  945. # Same tests for Series
  946. original_series = Series([1, 2, 3, 4, 5, 6])
  947. sliced_series = original_series.loc[:]
  948. assert sliced_series is not original_series
  949. assert original_series[:] is not original_series
  950. original_series[:3] = [7, 8, 9]
  951. if using_copy_on_write:
  952. assert all(sliced_series[:3] == [1, 2, 3])
  953. else:
  954. assert all(sliced_series[:3] == [7, 8, 9])
  955. def test_loc_copy_vs_view(self, request, using_copy_on_write):
  956. # GH 15631
  957. if not using_copy_on_write:
  958. mark = pytest.mark.xfail(reason="accidental fix reverted - GH37497")
  959. request.node.add_marker(mark)
  960. x = DataFrame(zip(range(3), range(3)), columns=["a", "b"])
  961. y = x.copy()
  962. q = y.loc[:, "a"]
  963. q += 2
  964. tm.assert_frame_equal(x, y)
  965. z = x.copy()
  966. q = z.loc[x.index, "a"]
  967. q += 2
  968. tm.assert_frame_equal(x, z)
  969. def test_loc_uint64(self):
  970. # GH20722
  971. # Test whether loc accept uint64 max value as index.
  972. umax = np.iinfo("uint64").max
  973. ser = Series([1, 2], index=[umax - 1, umax])
  974. result = ser.loc[umax - 1]
  975. expected = ser.iloc[0]
  976. assert result == expected
  977. result = ser.loc[[umax - 1]]
  978. expected = ser.iloc[[0]]
  979. tm.assert_series_equal(result, expected)
  980. result = ser.loc[[umax - 1, umax]]
  981. tm.assert_series_equal(result, ser)
  982. def test_loc_uint64_disallow_negative(self):
  983. # GH#41775
  984. umax = np.iinfo("uint64").max
  985. ser = Series([1, 2], index=[umax - 1, umax])
  986. with pytest.raises(KeyError, match="-1"):
  987. # don't wrap around
  988. ser.loc[-1]
  989. with pytest.raises(KeyError, match="-1"):
  990. # don't wrap around
  991. ser.loc[[-1]]
  992. def test_loc_setitem_empty_append_expands_rows(self):
  993. # GH6173, various appends to an empty dataframe
  994. data = [1, 2, 3]
  995. expected = DataFrame({"x": data, "y": [None] * len(data)})
  996. # appends to fit length of data
  997. df = DataFrame(columns=["x", "y"])
  998. df.loc[:, "x"] = data
  999. tm.assert_frame_equal(df, expected)
  1000. def test_loc_setitem_empty_append_expands_rows_mixed_dtype(self):
  1001. # GH#37932 same as test_loc_setitem_empty_append_expands_rows
  1002. # but with mixed dtype so we go through take_split_path
  1003. data = [1, 2, 3]
  1004. expected = DataFrame({"x": data, "y": [None] * len(data)})
  1005. df = DataFrame(columns=["x", "y"])
  1006. df["x"] = df["x"].astype(np.int64)
  1007. df.loc[:, "x"] = data
  1008. tm.assert_frame_equal(df, expected)
  1009. def test_loc_setitem_empty_append_single_value(self):
  1010. # only appends one value
  1011. expected = DataFrame({"x": [1.0], "y": [np.nan]})
  1012. df = DataFrame(columns=["x", "y"], dtype=float)
  1013. df.loc[0, "x"] = expected.loc[0, "x"]
  1014. tm.assert_frame_equal(df, expected)
  1015. def test_loc_setitem_empty_append_raises(self):
  1016. # GH6173, various appends to an empty dataframe
  1017. data = [1, 2]
  1018. df = DataFrame(columns=["x", "y"])
  1019. df.index = df.index.astype(np.int64)
  1020. msg = (
  1021. rf"None of \[Index\(\[0, 1\], dtype='{np.int_().dtype}'\)\] "
  1022. r"are in the \[index\]"
  1023. )
  1024. with pytest.raises(KeyError, match=msg):
  1025. df.loc[[0, 1], "x"] = data
  1026. msg = "|".join(
  1027. [
  1028. "cannot copy sequence with size 2 to array axis with dimension 0",
  1029. r"could not broadcast input array from shape \(2,\) into shape \(0,\)",
  1030. "Must have equal len keys and value when setting with an iterable",
  1031. ]
  1032. )
  1033. with pytest.raises(ValueError, match=msg):
  1034. df.loc[0:2, "x"] = data
  1035. def test_indexing_zerodim_np_array(self):
  1036. # GH24924
  1037. df = DataFrame([[1, 2], [3, 4]])
  1038. result = df.loc[np.array(0)]
  1039. s = Series([1, 2], name=0)
  1040. tm.assert_series_equal(result, s)
  1041. def test_series_indexing_zerodim_np_array(self):
  1042. # GH24924
  1043. s = Series([1, 2])
  1044. result = s.loc[np.array(0)]
  1045. assert result == 1
  1046. def test_loc_reverse_assignment(self):
  1047. # GH26939
  1048. data = [1, 2, 3, 4, 5, 6] + [None] * 4
  1049. expected = Series(data, index=range(2010, 2020))
  1050. result = Series(index=range(2010, 2020), dtype=np.float64)
  1051. result.loc[2015:2010:-1] = [6, 5, 4, 3, 2, 1]
  1052. tm.assert_series_equal(result, expected)
  1053. def test_loc_setitem_str_to_small_float_conversion_type(self):
  1054. # GH#20388
  1055. np.random.seed(13)
  1056. col_data = [str(np.random.random() * 1e-12) for _ in range(5)]
  1057. result = DataFrame(col_data, columns=["A"])
  1058. expected = DataFrame(col_data, columns=["A"], dtype=object)
  1059. tm.assert_frame_equal(result, expected)
  1060. # assigning with loc/iloc attempts to set the values inplace, which
  1061. # in this case is successful
  1062. result.loc[result.index, "A"] = [float(x) for x in col_data]
  1063. expected = DataFrame(col_data, columns=["A"], dtype=float).astype(object)
  1064. tm.assert_frame_equal(result, expected)
  1065. # assigning the entire column using __setitem__ swaps in the new array
  1066. # GH#???
  1067. result["A"] = [float(x) for x in col_data]
  1068. expected = DataFrame(col_data, columns=["A"], dtype=float)
  1069. tm.assert_frame_equal(result, expected)
  1070. def test_loc_getitem_time_object(self, frame_or_series):
  1071. rng = date_range("1/1/2000", "1/5/2000", freq="5min")
  1072. mask = (rng.hour == 9) & (rng.minute == 30)
  1073. obj = DataFrame(np.random.randn(len(rng), 3), index=rng)
  1074. obj = tm.get_obj(obj, frame_or_series)
  1075. result = obj.loc[time(9, 30)]
  1076. exp = obj.loc[mask]
  1077. tm.assert_equal(result, exp)
  1078. chunk = obj.loc["1/4/2000":]
  1079. result = chunk.loc[time(9, 30)]
  1080. expected = result[-1:]
  1081. # Without resetting the freqs, these are 5 min and 1440 min, respectively
  1082. result.index = result.index._with_freq(None)
  1083. expected.index = expected.index._with_freq(None)
  1084. tm.assert_equal(result, expected)
  1085. @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
  1086. @pytest.mark.parametrize("dtype", [np.int64, np.float64, complex])
  1087. @td.skip_if_no_scipy
  1088. def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype):
  1089. import scipy.sparse
  1090. spmatrix_t = getattr(scipy.sparse, spmatrix_t)
  1091. # The bug is triggered by a sparse matrix with purely sparse columns. So the
  1092. # recipe below generates a rectangular matrix of dimension (5, 7) where all the
  1093. # diagonal cells are ones, meaning the last two columns are purely sparse.
  1094. rows, cols = 5, 7
  1095. spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype)
  1096. df = DataFrame.sparse.from_spmatrix(spmatrix)
  1097. # regression test for GH#34526
  1098. itr_idx = range(2, rows)
  1099. result = df.loc[itr_idx].values
  1100. expected = spmatrix.toarray()[itr_idx]
  1101. tm.assert_numpy_array_equal(result, expected)
  1102. # regression test for GH#34540
  1103. result = df.loc[itr_idx].dtypes.values
  1104. expected = np.full(cols, SparseDtype(dtype, fill_value=0))
  1105. tm.assert_numpy_array_equal(result, expected)
  1106. def test_loc_getitem_listlike_all_retains_sparse(self):
  1107. df = DataFrame({"A": pd.array([0, 0], dtype=SparseDtype("int64"))})
  1108. result = df.loc[[0, 1]]
  1109. tm.assert_frame_equal(result, df)
  1110. @td.skip_if_no_scipy
  1111. def test_loc_getitem_sparse_frame(self):
  1112. # GH34687
  1113. from scipy.sparse import eye
  1114. df = DataFrame.sparse.from_spmatrix(eye(5))
  1115. result = df.loc[range(2)]
  1116. expected = DataFrame(
  1117. [[1.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0]],
  1118. dtype=SparseDtype("float64", 0.0),
  1119. )
  1120. tm.assert_frame_equal(result, expected)
  1121. result = df.loc[range(2)].loc[range(1)]
  1122. expected = DataFrame(
  1123. [[1.0, 0.0, 0.0, 0.0, 0.0]], dtype=SparseDtype("float64", 0.0)
  1124. )
  1125. tm.assert_frame_equal(result, expected)
  1126. def test_loc_getitem_sparse_series(self):
  1127. # GH34687
  1128. s = Series([1.0, 0.0, 0.0, 0.0, 0.0], dtype=SparseDtype("float64", 0.0))
  1129. result = s.loc[range(2)]
  1130. expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0))
  1131. tm.assert_series_equal(result, expected)
  1132. result = s.loc[range(3)].loc[range(2)]
  1133. expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0))
  1134. tm.assert_series_equal(result, expected)
  1135. @pytest.mark.parametrize("indexer", ["loc", "iloc"])
  1136. def test_getitem_single_row_sparse_df(self, indexer):
  1137. # GH#46406
  1138. df = DataFrame([[1.0, 0.0, 1.5], [0.0, 2.0, 0.0]], dtype=SparseDtype(float))
  1139. result = getattr(df, indexer)[0]
  1140. expected = Series([1.0, 0.0, 1.5], dtype=SparseDtype(float), name=0)
  1141. tm.assert_series_equal(result, expected)
  1142. @pytest.mark.parametrize("key_type", [iter, np.array, Series, Index])
  1143. def test_loc_getitem_iterable(self, float_frame, key_type):
  1144. idx = key_type(["A", "B", "C"])
  1145. result = float_frame.loc[:, idx]
  1146. expected = float_frame.loc[:, ["A", "B", "C"]]
  1147. tm.assert_frame_equal(result, expected)
  1148. def test_loc_getitem_timedelta_0seconds(self):
  1149. # GH#10583
  1150. df = DataFrame(np.random.normal(size=(10, 4)))
  1151. df.index = timedelta_range(start="0s", periods=10, freq="s")
  1152. expected = df.loc[Timedelta("0s") :, :]
  1153. result = df.loc["0s":, :]
  1154. tm.assert_frame_equal(result, expected)
  1155. @pytest.mark.parametrize(
  1156. "val,expected", [(2**63 - 1, Series([1])), (2**63, Series([2]))]
  1157. )
  1158. def test_loc_getitem_uint64_scalar(self, val, expected):
  1159. # see GH#19399
  1160. df = DataFrame([1, 2], index=[2**63 - 1, 2**63])
  1161. result = df.loc[val]
  1162. expected.name = val
  1163. tm.assert_series_equal(result, expected)
  1164. def test_loc_setitem_int_label_with_float_index(self, float_numpy_dtype):
  1165. # note labels are floats
  1166. dtype = float_numpy_dtype
  1167. ser = Series(["a", "b", "c"], index=Index([0, 0.5, 1], dtype=dtype))
  1168. expected = ser.copy()
  1169. ser.loc[1] = "zoo"
  1170. expected.iloc[2] = "zoo"
  1171. tm.assert_series_equal(ser, expected)
  1172. @pytest.mark.parametrize(
  1173. "indexer, expected",
  1174. [
  1175. # The test name is a misnomer in the 0 case as df.index[indexer]
  1176. # is a scalar.
  1177. (0, [20, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
  1178. (slice(4, 8), [0, 1, 2, 3, 20, 20, 20, 20, 8, 9]),
  1179. ([3, 5], [0, 1, 2, 20, 4, 20, 6, 7, 8, 9]),
  1180. ],
  1181. )
  1182. def test_loc_setitem_listlike_with_timedelta64index(self, indexer, expected):
  1183. # GH#16637
  1184. tdi = to_timedelta(range(10), unit="s")
  1185. df = DataFrame({"x": range(10)}, dtype="int64", index=tdi)
  1186. df.loc[df.index[indexer], "x"] = 20
  1187. expected = DataFrame(
  1188. expected,
  1189. index=tdi,
  1190. columns=["x"],
  1191. dtype="int64",
  1192. )
  1193. tm.assert_frame_equal(expected, df)
  1194. def test_loc_setitem_categorical_values_partial_column_slice(self):
  1195. # Assigning a Category to parts of a int/... column uses the values of
  1196. # the Categorical
  1197. df = DataFrame({"a": [1, 1, 1, 1, 1], "b": list("aaaaa")})
  1198. exp = DataFrame({"a": [1, "b", "b", 1, 1], "b": list("aabba")})
  1199. df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"])
  1200. df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"])
  1201. tm.assert_frame_equal(df, exp)
  1202. def test_loc_setitem_single_row_categorical(self):
  1203. # GH#25495
  1204. df = DataFrame({"Alpha": ["a"], "Numeric": [0]})
  1205. categories = Categorical(df["Alpha"], categories=["a", "b", "c"])
  1206. # pre-2.0 this swapped in a new array, in 2.0 it operates inplace,
  1207. # consistent with non-split-path
  1208. df.loc[:, "Alpha"] = categories
  1209. result = df["Alpha"]
  1210. expected = Series(categories, index=df.index, name="Alpha").astype(object)
  1211. tm.assert_series_equal(result, expected)
  1212. # double-check that the non-loc setting retains categoricalness
  1213. df["Alpha"] = categories
  1214. tm.assert_series_equal(df["Alpha"], Series(categories, name="Alpha"))
  1215. def test_loc_setitem_datetime_coercion(self):
  1216. # GH#1048
  1217. df = DataFrame({"c": [Timestamp("2010-10-01")] * 3})
  1218. df.loc[0:1, "c"] = np.datetime64("2008-08-08")
  1219. assert Timestamp("2008-08-08") == df.loc[0, "c"]
  1220. assert Timestamp("2008-08-08") == df.loc[1, "c"]
  1221. df.loc[2, "c"] = date(2005, 5, 5)
  1222. assert Timestamp("2005-05-05").date() == df.loc[2, "c"]
  1223. @pytest.mark.parametrize("idxer", ["var", ["var"]])
  1224. def test_loc_setitem_datetimeindex_tz(self, idxer, tz_naive_fixture):
  1225. # GH#11365
  1226. tz = tz_naive_fixture
  1227. idx = date_range(start="2015-07-12", periods=3, freq="H", tz=tz)
  1228. expected = DataFrame(1.2, index=idx, columns=["var"])
  1229. # if result started off with object dtype, then the .loc.__setitem__
  1230. # below would retain object dtype
  1231. result = DataFrame(index=idx, columns=["var"], dtype=np.float64)
  1232. result.loc[:, idxer] = expected
  1233. tm.assert_frame_equal(result, expected)
  1234. def test_loc_setitem_time_key(self, using_array_manager):
  1235. index = date_range("2012-01-01", "2012-01-05", freq="30min")
  1236. df = DataFrame(np.random.randn(len(index), 5), index=index)
  1237. akey = time(12, 0, 0)
  1238. bkey = slice(time(13, 0, 0), time(14, 0, 0))
  1239. ainds = [24, 72, 120, 168]
  1240. binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172]
  1241. result = df.copy()
  1242. result.loc[akey] = 0
  1243. result = result.loc[akey]
  1244. expected = df.loc[akey].copy()
  1245. expected.loc[:] = 0
  1246. if using_array_manager:
  1247. # TODO(ArrayManager) we are still overwriting columns
  1248. expected = expected.astype(float)
  1249. tm.assert_frame_equal(result, expected)
  1250. result = df.copy()
  1251. result.loc[akey] = 0
  1252. result.loc[akey] = df.iloc[ainds]
  1253. tm.assert_frame_equal(result, df)
  1254. result = df.copy()
  1255. result.loc[bkey] = 0
  1256. result = result.loc[bkey]
  1257. expected = df.loc[bkey].copy()
  1258. expected.loc[:] = 0
  1259. if using_array_manager:
  1260. # TODO(ArrayManager) we are still overwriting columns
  1261. expected = expected.astype(float)
  1262. tm.assert_frame_equal(result, expected)
  1263. result = df.copy()
  1264. result.loc[bkey] = 0
  1265. result.loc[bkey] = df.iloc[binds]
  1266. tm.assert_frame_equal(result, df)
  1267. @pytest.mark.parametrize("key", ["A", ["A"], ("A", slice(None))])
  1268. def test_loc_setitem_unsorted_multiindex_columns(self, key):
  1269. # GH#38601
  1270. mi = MultiIndex.from_tuples([("A", 4), ("B", "3"), ("A", "2")])
  1271. df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=mi)
  1272. obj = df.copy()
  1273. obj.loc[:, key] = np.zeros((2, 2), dtype="int64")
  1274. expected = DataFrame([[0, 2, 0], [0, 5, 0]], columns=mi)
  1275. tm.assert_frame_equal(obj, expected)
  1276. df = df.sort_index(axis=1)
  1277. df.loc[:, key] = np.zeros((2, 2), dtype="int64")
  1278. expected = expected.sort_index(axis=1)
  1279. tm.assert_frame_equal(df, expected)
  1280. def test_loc_setitem_uint_drop(self, any_int_numpy_dtype):
  1281. # see GH#18311
  1282. # assigning series.loc[0] = 4 changed series.dtype to int
  1283. series = Series([1, 2, 3], dtype=any_int_numpy_dtype)
  1284. series.loc[0] = 4
  1285. expected = Series([4, 2, 3], dtype=any_int_numpy_dtype)
  1286. tm.assert_series_equal(series, expected)
  1287. def test_loc_setitem_td64_non_nano(self):
  1288. # GH#14155
  1289. ser = Series(10 * [np.timedelta64(10, "m")])
  1290. ser.loc[[1, 2, 3]] = np.timedelta64(20, "m")
  1291. expected = Series(10 * [np.timedelta64(10, "m")])
  1292. expected.loc[[1, 2, 3]] = Timedelta(np.timedelta64(20, "m"))
  1293. tm.assert_series_equal(ser, expected)
  1294. def test_loc_setitem_2d_to_1d_raises(self):
  1295. data = np.random.randn(2, 2)
  1296. ser = Series(range(2))
  1297. msg = "|".join(
  1298. [
  1299. r"shape mismatch: value array of shape \(2,2\)",
  1300. r"cannot reshape array of size 4 into shape \(2,\)",
  1301. ]
  1302. )
  1303. with pytest.raises(ValueError, match=msg):
  1304. ser.loc[range(2)] = data
  1305. msg = r"could not broadcast input array from shape \(2,2\) into shape \(2,?\)"
  1306. with pytest.raises(ValueError, match=msg):
  1307. ser.loc[:] = data
  1308. def test_loc_getitem_interval_index(self):
  1309. # GH#19977
  1310. index = pd.interval_range(start=0, periods=3)
  1311. df = DataFrame(
  1312. [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=index, columns=["A", "B", "C"]
  1313. )
  1314. expected = 1
  1315. result = df.loc[0.5, "A"]
  1316. tm.assert_almost_equal(result, expected)
  1317. def test_loc_getitem_interval_index2(self):
  1318. # GH#19977
  1319. index = pd.interval_range(start=0, periods=3, closed="both")
  1320. df = DataFrame(
  1321. [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=index, columns=["A", "B", "C"]
  1322. )
  1323. index_exp = pd.interval_range(start=0, periods=2, freq=1, closed="both")
  1324. expected = Series([1, 4], index=index_exp, name="A")
  1325. result = df.loc[1, "A"]
  1326. tm.assert_series_equal(result, expected)
  1327. @pytest.mark.parametrize("tpl", [(1,), (1, 2)])
  1328. def test_loc_getitem_index_single_double_tuples(self, tpl):
  1329. # GH#20991
  1330. idx = Index(
  1331. [(1,), (1, 2)],
  1332. name="A",
  1333. tupleize_cols=False,
  1334. )
  1335. df = DataFrame(index=idx)
  1336. result = df.loc[[tpl]]
  1337. idx = Index([tpl], name="A", tupleize_cols=False)
  1338. expected = DataFrame(index=idx)
  1339. tm.assert_frame_equal(result, expected)
  1340. def test_loc_getitem_index_namedtuple(self):
  1341. IndexType = namedtuple("IndexType", ["a", "b"])
  1342. idx1 = IndexType("foo", "bar")
  1343. idx2 = IndexType("baz", "bof")
  1344. index = Index([idx1, idx2], name="composite_index", tupleize_cols=False)
  1345. df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"])
  1346. result = df.loc[IndexType("foo", "bar")]["A"]
  1347. assert result == 1
  1348. def test_loc_setitem_single_column_mixed(self):
  1349. df = DataFrame(
  1350. np.random.randn(5, 3),
  1351. index=["a", "b", "c", "d", "e"],
  1352. columns=["foo", "bar", "baz"],
  1353. )
  1354. df["str"] = "qux"
  1355. df.loc[df.index[::2], "str"] = np.nan
  1356. expected = np.array([np.nan, "qux", np.nan, "qux", np.nan], dtype=object)
  1357. tm.assert_almost_equal(df["str"].values, expected)
  1358. def test_loc_setitem_cast2(self):
  1359. # GH#7704
  1360. # dtype conversion on setting
  1361. df = DataFrame(np.random.rand(30, 3), columns=tuple("ABC"))
  1362. df["event"] = np.nan
  1363. df.loc[10, "event"] = "foo"
  1364. result = df.dtypes
  1365. expected = Series(
  1366. [np.dtype("float64")] * 3 + [np.dtype("object")],
  1367. index=["A", "B", "C", "event"],
  1368. )
  1369. tm.assert_series_equal(result, expected)
  1370. def test_loc_setitem_cast3(self):
  1371. # Test that data type is preserved . GH#5782
  1372. df = DataFrame({"one": np.arange(6, dtype=np.int8)})
  1373. df.loc[1, "one"] = 6
  1374. assert df.dtypes.one == np.dtype(np.int8)
  1375. df.one = np.int8(7)
  1376. assert df.dtypes.one == np.dtype(np.int8)
  1377. def test_loc_setitem_range_key(self, frame_or_series):
  1378. # GH#45479 don't treat range key as positional
  1379. obj = frame_or_series(range(5), index=[3, 4, 1, 0, 2])
  1380. values = [9, 10, 11]
  1381. if obj.ndim == 2:
  1382. values = [[9], [10], [11]]
  1383. obj.loc[range(3)] = values
  1384. expected = frame_or_series([0, 1, 10, 9, 11], index=obj.index)
  1385. tm.assert_equal(obj, expected)
  1386. class TestLocWithEllipsis:
  1387. @pytest.fixture(params=[tm.loc, tm.iloc])
  1388. def indexer(self, request):
  1389. # Test iloc while we're here
  1390. return request.param
  1391. @pytest.fixture
  1392. def obj(self, series_with_simple_index, frame_or_series):
  1393. obj = series_with_simple_index
  1394. if frame_or_series is not Series:
  1395. obj = obj.to_frame()
  1396. return obj
  1397. def test_loc_iloc_getitem_ellipsis(self, obj, indexer):
  1398. result = indexer(obj)[...]
  1399. tm.assert_equal(result, obj)
  1400. def test_loc_iloc_getitem_leading_ellipses(self, series_with_simple_index, indexer):
  1401. obj = series_with_simple_index
  1402. key = 0 if (indexer is tm.iloc or len(obj) == 0) else obj.index[0]
  1403. if indexer is tm.loc and is_bool_dtype(obj.index):
  1404. # passing [False] will get interpreted as a boolean mask
  1405. # TODO: should it? unambiguous when lengths dont match?
  1406. return
  1407. if indexer is tm.loc and isinstance(obj.index, MultiIndex):
  1408. msg = "MultiIndex does not support indexing with Ellipsis"
  1409. with pytest.raises(NotImplementedError, match=msg):
  1410. result = indexer(obj)[..., [key]]
  1411. elif len(obj) != 0:
  1412. result = indexer(obj)[..., [key]]
  1413. expected = indexer(obj)[[key]]
  1414. tm.assert_series_equal(result, expected)
  1415. key2 = 0 if indexer is tm.iloc else obj.name
  1416. df = obj.to_frame()
  1417. result = indexer(df)[..., [key2]]
  1418. expected = indexer(df)[:, [key2]]
  1419. tm.assert_frame_equal(result, expected)
  1420. def test_loc_iloc_getitem_ellipses_only_one_ellipsis(self, obj, indexer):
  1421. # GH37750
  1422. key = 0 if (indexer is tm.iloc or len(obj) == 0) else obj.index[0]
  1423. with pytest.raises(IndexingError, match=_one_ellipsis_message):
  1424. indexer(obj)[..., ...]
  1425. with pytest.raises(IndexingError, match=_one_ellipsis_message):
  1426. indexer(obj)[..., [key], ...]
  1427. with pytest.raises(IndexingError, match=_one_ellipsis_message):
  1428. indexer(obj)[..., ..., key]
  1429. # one_ellipsis_message takes precedence over "Too many indexers"
  1430. # only when the first key is Ellipsis
  1431. with pytest.raises(IndexingError, match="Too many indexers"):
  1432. indexer(obj)[key, ..., ...]
  1433. class TestLocWithMultiIndex:
  1434. @pytest.mark.parametrize(
  1435. "keys, expected",
  1436. [
  1437. (["b", "a"], [["b", "b", "a", "a"], [1, 2, 1, 2]]),
  1438. (["a", "b"], [["a", "a", "b", "b"], [1, 2, 1, 2]]),
  1439. ((["a", "b"], [1, 2]), [["a", "a", "b", "b"], [1, 2, 1, 2]]),
  1440. ((["a", "b"], [2, 1]), [["a", "a", "b", "b"], [2, 1, 2, 1]]),
  1441. ((["b", "a"], [2, 1]), [["b", "b", "a", "a"], [2, 1, 2, 1]]),
  1442. ((["b", "a"], [1, 2]), [["b", "b", "a", "a"], [1, 2, 1, 2]]),
  1443. ((["c", "a"], [2, 1]), [["c", "a", "a"], [1, 2, 1]]),
  1444. ],
  1445. )
  1446. @pytest.mark.parametrize("dim", ["index", "columns"])
  1447. def test_loc_getitem_multilevel_index_order(self, dim, keys, expected):
  1448. # GH#22797
  1449. # Try to respect order of keys given for MultiIndex.loc
  1450. kwargs = {dim: [["c", "a", "a", "b", "b"], [1, 1, 2, 1, 2]]}
  1451. df = DataFrame(np.arange(25).reshape(5, 5), **kwargs)
  1452. exp_index = MultiIndex.from_arrays(expected)
  1453. if dim == "index":
  1454. res = df.loc[keys, :]
  1455. tm.assert_index_equal(res.index, exp_index)
  1456. elif dim == "columns":
  1457. res = df.loc[:, keys]
  1458. tm.assert_index_equal(res.columns, exp_index)
  1459. def test_loc_preserve_names(self, multiindex_year_month_day_dataframe_random_data):
  1460. ymd = multiindex_year_month_day_dataframe_random_data
  1461. result = ymd.loc[2000]
  1462. result2 = ymd["A"].loc[2000]
  1463. assert result.index.names == ymd.index.names[1:]
  1464. assert result2.index.names == ymd.index.names[1:]
  1465. result = ymd.loc[2000, 2]
  1466. result2 = ymd["A"].loc[2000, 2]
  1467. assert result.index.name == ymd.index.names[2]
  1468. assert result2.index.name == ymd.index.names[2]
  1469. def test_loc_getitem_multiindex_nonunique_len_zero(self):
  1470. # GH#13691
  1471. mi = MultiIndex.from_product([[0], [1, 1]])
  1472. ser = Series(0, index=mi)
  1473. res = ser.loc[[]]
  1474. expected = ser[:0]
  1475. tm.assert_series_equal(res, expected)
  1476. res2 = ser.loc[ser.iloc[0:0]]
  1477. tm.assert_series_equal(res2, expected)
  1478. def test_loc_getitem_access_none_value_in_multiindex(self):
  1479. # GH#34318: test that you can access a None value using .loc
  1480. # through a Multiindex
  1481. ser = Series([None], MultiIndex.from_arrays([["Level1"], ["Level2"]]))
  1482. result = ser.loc[("Level1", "Level2")]
  1483. assert result is None
  1484. midx = MultiIndex.from_product([["Level1"], ["Level2_a", "Level2_b"]])
  1485. ser = Series([None] * len(midx), dtype=object, index=midx)
  1486. result = ser.loc[("Level1", "Level2_a")]
  1487. assert result is None
  1488. ser = Series([1] * len(midx), dtype=object, index=midx)
  1489. result = ser.loc[("Level1", "Level2_a")]
  1490. assert result == 1
  1491. def test_loc_setitem_multiindex_slice(self):
  1492. # GH 34870
  1493. index = MultiIndex.from_tuples(
  1494. zip(
  1495. ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
  1496. ["one", "two", "one", "two", "one", "two", "one", "two"],
  1497. ),
  1498. names=["first", "second"],
  1499. )
  1500. result = Series([1, 1, 1, 1, 1, 1, 1, 1], index=index)
  1501. result.loc[("baz", "one"):("foo", "two")] = 100
  1502. expected = Series([1, 1, 100, 100, 100, 100, 1, 1], index=index)
  1503. tm.assert_series_equal(result, expected)
  1504. def test_loc_getitem_slice_datetime_objs_with_datetimeindex(self):
  1505. times = date_range("2000-01-01", freq="10min", periods=100000)
  1506. ser = Series(range(100000), times)
  1507. result = ser.loc[datetime(1900, 1, 1) : datetime(2100, 1, 1)]
  1508. tm.assert_series_equal(result, ser)
  1509. def test_loc_getitem_datetime_string_with_datetimeindex(self):
  1510. # GH 16710
  1511. df = DataFrame(
  1512. {"a": range(10), "b": range(10)},
  1513. index=date_range("2010-01-01", "2010-01-10"),
  1514. )
  1515. result = df.loc[["2010-01-01", "2010-01-05"], ["a", "b"]]
  1516. expected = DataFrame(
  1517. {"a": [0, 4], "b": [0, 4]},
  1518. index=DatetimeIndex(["2010-01-01", "2010-01-05"]),
  1519. )
  1520. tm.assert_frame_equal(result, expected)
  1521. def test_loc_getitem_sorted_index_level_with_duplicates(self):
  1522. # GH#4516 sorting a MultiIndex with duplicates and multiple dtypes
  1523. mi = MultiIndex.from_tuples(
  1524. [
  1525. ("foo", "bar"),
  1526. ("foo", "bar"),
  1527. ("bah", "bam"),
  1528. ("bah", "bam"),
  1529. ("foo", "bar"),
  1530. ("bah", "bam"),
  1531. ],
  1532. names=["A", "B"],
  1533. )
  1534. df = DataFrame(
  1535. [
  1536. [1.0, 1],
  1537. [2.0, 2],
  1538. [3.0, 3],
  1539. [4.0, 4],
  1540. [5.0, 5],
  1541. [6.0, 6],
  1542. ],
  1543. index=mi,
  1544. columns=["C", "D"],
  1545. )
  1546. df = df.sort_index(level=0)
  1547. expected = DataFrame(
  1548. [[1.0, 1], [2.0, 2], [5.0, 5]], columns=["C", "D"], index=mi.take([0, 1, 4])
  1549. )
  1550. result = df.loc[("foo", "bar")]
  1551. tm.assert_frame_equal(result, expected)
  1552. def test_additional_element_to_categorical_series_loc(self):
  1553. # GH#47677
  1554. result = Series(["a", "b", "c"], dtype="category")
  1555. result.loc[3] = 0
  1556. expected = Series(["a", "b", "c", 0], dtype="object")
  1557. tm.assert_series_equal(result, expected)
  1558. def test_additional_categorical_element_loc(self):
  1559. # GH#47677
  1560. result = Series(["a", "b", "c"], dtype="category")
  1561. result.loc[3] = "a"
  1562. expected = Series(["a", "b", "c", "a"], dtype="category")
  1563. tm.assert_series_equal(result, expected)
  1564. def test_loc_set_nan_in_categorical_series(self, any_numeric_ea_dtype):
  1565. # GH#47677
  1566. srs = Series(
  1567. [1, 2, 3],
  1568. dtype=CategoricalDtype(Index([1, 2, 3], dtype=any_numeric_ea_dtype)),
  1569. )
  1570. # enlarge
  1571. srs.loc[3] = np.nan
  1572. expected = Series(
  1573. [1, 2, 3, np.nan],
  1574. dtype=CategoricalDtype(Index([1, 2, 3], dtype=any_numeric_ea_dtype)),
  1575. )
  1576. tm.assert_series_equal(srs, expected)
  1577. # set into
  1578. srs.loc[1] = np.nan
  1579. expected = Series(
  1580. [1, np.nan, 3, np.nan],
  1581. dtype=CategoricalDtype(Index([1, 2, 3], dtype=any_numeric_ea_dtype)),
  1582. )
  1583. tm.assert_series_equal(srs, expected)
  1584. @pytest.mark.parametrize("na", (np.nan, pd.NA, None, pd.NaT))
  1585. def test_loc_consistency_series_enlarge_set_into(self, na):
  1586. # GH#47677
  1587. srs_enlarge = Series(["a", "b", "c"], dtype="category")
  1588. srs_enlarge.loc[3] = na
  1589. srs_setinto = Series(["a", "b", "c", "a"], dtype="category")
  1590. srs_setinto.loc[3] = na
  1591. tm.assert_series_equal(srs_enlarge, srs_setinto)
  1592. expected = Series(["a", "b", "c", na], dtype="category")
  1593. tm.assert_series_equal(srs_enlarge, expected)
  1594. def test_loc_getitem_preserves_index_level_category_dtype(self):
  1595. # GH#15166
  1596. df = DataFrame(
  1597. data=np.arange(2, 22, 2),
  1598. index=MultiIndex(
  1599. levels=[CategoricalIndex(["a", "b"]), range(10)],
  1600. codes=[[0] * 5 + [1] * 5, range(10)],
  1601. names=["Index1", "Index2"],
  1602. ),
  1603. )
  1604. expected = CategoricalIndex(
  1605. ["a", "b"],
  1606. categories=["a", "b"],
  1607. ordered=False,
  1608. name="Index1",
  1609. dtype="category",
  1610. )
  1611. result = df.index.levels[0]
  1612. tm.assert_index_equal(result, expected)
  1613. result = df.loc[["a"]].index.levels[0]
  1614. tm.assert_index_equal(result, expected)
  1615. @pytest.mark.parametrize("lt_value", [30, 10])
  1616. def test_loc_multiindex_levels_contain_values_not_in_index_anymore(self, lt_value):
  1617. # GH#41170
  1618. df = DataFrame({"a": [12, 23, 34, 45]}, index=[list("aabb"), [0, 1, 2, 3]])
  1619. with pytest.raises(KeyError, match=r"\['b'\] not in index"):
  1620. df.loc[df["a"] < lt_value, :].loc[["b"], :]
  1621. def test_loc_multiindex_null_slice_na_level(self):
  1622. # GH#42055
  1623. lev1 = np.array([np.nan, np.nan])
  1624. lev2 = ["bar", "baz"]
  1625. mi = MultiIndex.from_arrays([lev1, lev2])
  1626. ser = Series([0, 1], index=mi)
  1627. result = ser.loc[:, "bar"]
  1628. # TODO: should we have name="bar"?
  1629. expected = Series([0], index=[np.nan])
  1630. tm.assert_series_equal(result, expected)
  1631. def test_loc_drops_level(self):
  1632. # Based on test_series_varied_multiindex_alignment, where
  1633. # this used to fail to drop the first level
  1634. mi = MultiIndex.from_product(
  1635. [list("ab"), list("xy"), [1, 2]], names=["ab", "xy", "num"]
  1636. )
  1637. ser = Series(range(8), index=mi)
  1638. loc_result = ser.loc["a", :, :]
  1639. expected = ser.index.droplevel(0)[:4]
  1640. tm.assert_index_equal(loc_result.index, expected)
  1641. class TestLocSetitemWithExpansion:
  1642. @pytest.mark.slow
  1643. def test_loc_setitem_with_expansion_large_dataframe(self):
  1644. # GH#10692
  1645. result = DataFrame({"x": range(10**6)}, dtype="int64")
  1646. result.loc[len(result)] = len(result) + 1
  1647. expected = DataFrame({"x": range(10**6 + 1)}, dtype="int64")
  1648. tm.assert_frame_equal(result, expected)
  1649. def test_loc_setitem_empty_series(self):
  1650. # GH#5226
  1651. # partially set with an empty object series
  1652. ser = Series(dtype=object)
  1653. ser.loc[1] = 1
  1654. tm.assert_series_equal(ser, Series([1], index=[1]))
  1655. ser.loc[3] = 3
  1656. tm.assert_series_equal(ser, Series([1, 3], index=[1, 3]))
  1657. def test_loc_setitem_empty_series_float(self):
  1658. # GH#5226
  1659. # partially set with an empty object series
  1660. ser = Series(dtype=object)
  1661. ser.loc[1] = 1.0
  1662. tm.assert_series_equal(ser, Series([1.0], index=[1]))
  1663. ser.loc[3] = 3.0
  1664. tm.assert_series_equal(ser, Series([1.0, 3.0], index=[1, 3]))
  1665. def test_loc_setitem_empty_series_str_idx(self):
  1666. # GH#5226
  1667. # partially set with an empty object series
  1668. ser = Series(dtype=object)
  1669. ser.loc["foo"] = 1
  1670. tm.assert_series_equal(ser, Series([1], index=["foo"]))
  1671. ser.loc["bar"] = 3
  1672. tm.assert_series_equal(ser, Series([1, 3], index=["foo", "bar"]))
  1673. ser.loc[3] = 4
  1674. tm.assert_series_equal(ser, Series([1, 3, 4], index=["foo", "bar", 3]))
  1675. def test_loc_setitem_incremental_with_dst(self):
  1676. # GH#20724
  1677. base = datetime(2015, 11, 1, tzinfo=gettz("US/Pacific"))
  1678. idxs = [base + timedelta(seconds=i * 900) for i in range(16)]
  1679. result = Series([0], index=[idxs[0]])
  1680. for ts in idxs:
  1681. result.loc[ts] = 1
  1682. expected = Series(1, index=idxs)
  1683. tm.assert_series_equal(result, expected)
  1684. @pytest.mark.parametrize(
  1685. "conv",
  1686. [
  1687. lambda x: x,
  1688. lambda x: x.to_datetime64(),
  1689. lambda x: x.to_pydatetime(),
  1690. lambda x: np.datetime64(x),
  1691. ],
  1692. ids=["self", "to_datetime64", "to_pydatetime", "np.datetime64"],
  1693. )
  1694. def test_loc_setitem_datetime_keys_cast(self, conv):
  1695. # GH#9516
  1696. dt1 = Timestamp("20130101 09:00:00")
  1697. dt2 = Timestamp("20130101 10:00:00")
  1698. df = DataFrame()
  1699. df.loc[conv(dt1), "one"] = 100
  1700. df.loc[conv(dt2), "one"] = 200
  1701. expected = DataFrame({"one": [100.0, 200.0]}, index=[dt1, dt2])
  1702. tm.assert_frame_equal(df, expected)
  1703. def test_loc_setitem_categorical_column_retains_dtype(self, ordered):
  1704. # GH16360
  1705. result = DataFrame({"A": [1]})
  1706. result.loc[:, "B"] = Categorical(["b"], ordered=ordered)
  1707. expected = DataFrame({"A": [1], "B": Categorical(["b"], ordered=ordered)})
  1708. tm.assert_frame_equal(result, expected)
  1709. def test_loc_setitem_with_expansion_and_existing_dst(self):
  1710. # GH#18308
  1711. start = Timestamp("2017-10-29 00:00:00+0200", tz="Europe/Madrid")
  1712. end = Timestamp("2017-10-29 03:00:00+0100", tz="Europe/Madrid")
  1713. ts = Timestamp("2016-10-10 03:00:00", tz="Europe/Madrid")
  1714. idx = date_range(start, end, inclusive="left", freq="H")
  1715. assert ts not in idx # i.e. result.loc setitem is with-expansion
  1716. result = DataFrame(index=idx, columns=["value"])
  1717. result.loc[ts, "value"] = 12
  1718. expected = DataFrame(
  1719. [np.nan] * len(idx) + [12],
  1720. index=idx.append(DatetimeIndex([ts])),
  1721. columns=["value"],
  1722. dtype=object,
  1723. )
  1724. tm.assert_frame_equal(result, expected)
  1725. def test_setitem_with_expansion(self):
  1726. # indexing - setting an element
  1727. df = DataFrame(
  1728. data=to_datetime(["2015-03-30 20:12:32", "2015-03-12 00:11:11"]),
  1729. columns=["time"],
  1730. )
  1731. df["new_col"] = ["new", "old"]
  1732. df.time = df.set_index("time").index.tz_localize("UTC")
  1733. v = df[df.new_col == "new"].set_index("time").index.tz_convert("US/Pacific")
  1734. # pre-2.0 trying to set a single element on a part of a different
  1735. # timezone converted to object; in 2.0 it retains dtype
  1736. df2 = df.copy()
  1737. df2.loc[df2.new_col == "new", "time"] = v
  1738. expected = Series([v[0].tz_convert("UTC"), df.loc[1, "time"]], name="time")
  1739. tm.assert_series_equal(df2.time, expected)
  1740. v = df.loc[df.new_col == "new", "time"] + Timedelta("1s")
  1741. df.loc[df.new_col == "new", "time"] = v
  1742. tm.assert_series_equal(df.loc[df.new_col == "new", "time"], v)
  1743. def test_loc_setitem_with_expansion_inf_upcast_empty(self):
  1744. # Test with np.inf in columns
  1745. df = DataFrame()
  1746. df.loc[0, 0] = 1
  1747. df.loc[1, 1] = 2
  1748. df.loc[0, np.inf] = 3
  1749. result = df.columns
  1750. expected = Index([0, 1, np.inf], dtype=np.float64)
  1751. tm.assert_index_equal(result, expected)
  1752. @pytest.mark.filterwarnings("ignore:indexing past lexsort depth")
  1753. def test_loc_setitem_with_expansion_nonunique_index(self, index):
  1754. # GH#40096
  1755. if not len(index):
  1756. return
  1757. index = index.repeat(2) # ensure non-unique
  1758. N = len(index)
  1759. arr = np.arange(N).astype(np.int64)
  1760. orig = DataFrame(arr, index=index, columns=[0])
  1761. # key that will requiring object-dtype casting in the index
  1762. key = "kapow"
  1763. assert key not in index # otherwise test is invalid
  1764. # TODO: using a tuple key breaks here in many cases
  1765. exp_index = index.insert(len(index), key)
  1766. if isinstance(index, MultiIndex):
  1767. assert exp_index[-1][0] == key
  1768. else:
  1769. assert exp_index[-1] == key
  1770. exp_data = np.arange(N + 1).astype(np.float64)
  1771. expected = DataFrame(exp_data, index=exp_index, columns=[0])
  1772. # Add new row, but no new columns
  1773. df = orig.copy()
  1774. df.loc[key, 0] = N
  1775. tm.assert_frame_equal(df, expected)
  1776. # add new row on a Series
  1777. ser = orig.copy()[0]
  1778. ser.loc[key] = N
  1779. # the series machinery lets us preserve int dtype instead of float
  1780. expected = expected[0].astype(np.int64)
  1781. tm.assert_series_equal(ser, expected)
  1782. # add new row and new column
  1783. df = orig.copy()
  1784. df.loc[key, 1] = N
  1785. expected = DataFrame(
  1786. {0: list(arr) + [np.nan], 1: [np.nan] * N + [float(N)]},
  1787. index=exp_index,
  1788. )
  1789. tm.assert_frame_equal(df, expected)
  1790. @pytest.mark.parametrize(
  1791. "dtype", ["Int32", "Int64", "UInt32", "UInt64", "Float32", "Float64"]
  1792. )
  1793. def test_loc_setitem_with_expansion_preserves_nullable_int(self, dtype):
  1794. # GH#42099
  1795. ser = Series([0, 1, 2, 3], dtype=dtype)
  1796. df = DataFrame({"data": ser})
  1797. result = DataFrame(index=df.index)
  1798. result.loc[df.index, "data"] = ser
  1799. tm.assert_frame_equal(result, df)
  1800. result = DataFrame(index=df.index)
  1801. result.loc[df.index, "data"] = ser._values
  1802. tm.assert_frame_equal(result, df)
  1803. class TestLocCallable:
  1804. def test_frame_loc_getitem_callable(self):
  1805. # GH#11485
  1806. df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]})
  1807. # iloc cannot use boolean Series (see GH3635)
  1808. # return bool indexer
  1809. res = df.loc[lambda x: x.A > 2]
  1810. tm.assert_frame_equal(res, df.loc[df.A > 2])
  1811. res = df.loc[lambda x: x.B == "b", :]
  1812. tm.assert_frame_equal(res, df.loc[df.B == "b", :])
  1813. res = df.loc[lambda x: x.A > 2, lambda x: x.columns == "B"]
  1814. tm.assert_frame_equal(res, df.loc[df.A > 2, [False, True, False]])
  1815. res = df.loc[lambda x: x.A > 2, lambda x: "B"]
  1816. tm.assert_series_equal(res, df.loc[df.A > 2, "B"])
  1817. res = df.loc[lambda x: x.A > 2, lambda x: ["A", "B"]]
  1818. tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]])
  1819. res = df.loc[lambda x: x.A == 2, lambda x: ["A", "B"]]
  1820. tm.assert_frame_equal(res, df.loc[df.A == 2, ["A", "B"]])
  1821. # scalar
  1822. res = df.loc[lambda x: 1, lambda x: "A"]
  1823. assert res == df.loc[1, "A"]
  1824. def test_frame_loc_getitem_callable_mixture(self):
  1825. # GH#11485
  1826. df = DataFrame({"A": [1, 2, 3, 4], "B": list("aabb"), "C": [1, 2, 3, 4]})
  1827. res = df.loc[lambda x: x.A > 2, ["A", "B"]]
  1828. tm.assert_frame_equal(res, df.loc[df.A > 2, ["A", "B"]])
  1829. res = df.loc[[2, 3], lambda x: ["A", "B"]]
  1830. tm.assert_frame_equal(res, df.loc[[2, 3], ["A", "B"]])
  1831. res = df.loc[3, lambda x: ["A", "B"]]
  1832. tm.assert_series_equal(res, df.loc[3, ["A", "B"]])
  1833. def test_frame_loc_getitem_callable_labels(self):
  1834. # GH#11485
  1835. df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD"))
  1836. # return label
  1837. res = df.loc[lambda x: ["A", "C"]]
  1838. tm.assert_frame_equal(res, df.loc[["A", "C"]])
  1839. res = df.loc[lambda x: ["A", "C"], :]
  1840. tm.assert_frame_equal(res, df.loc[["A", "C"], :])
  1841. res = df.loc[lambda x: ["A", "C"], lambda x: "X"]
  1842. tm.assert_series_equal(res, df.loc[["A", "C"], "X"])
  1843. res = df.loc[lambda x: ["A", "C"], lambda x: ["X"]]
  1844. tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]])
  1845. # mixture
  1846. res = df.loc[["A", "C"], lambda x: "X"]
  1847. tm.assert_series_equal(res, df.loc[["A", "C"], "X"])
  1848. res = df.loc[["A", "C"], lambda x: ["X"]]
  1849. tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]])
  1850. res = df.loc[lambda x: ["A", "C"], "X"]
  1851. tm.assert_series_equal(res, df.loc[["A", "C"], "X"])
  1852. res = df.loc[lambda x: ["A", "C"], ["X"]]
  1853. tm.assert_frame_equal(res, df.loc[["A", "C"], ["X"]])
  1854. def test_frame_loc_setitem_callable(self):
  1855. # GH#11485
  1856. df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD"))
  1857. # return label
  1858. res = df.copy()
  1859. res.loc[lambda x: ["A", "C"]] = -20
  1860. exp = df.copy()
  1861. exp.loc[["A", "C"]] = -20
  1862. tm.assert_frame_equal(res, exp)
  1863. res = df.copy()
  1864. res.loc[lambda x: ["A", "C"], :] = 20
  1865. exp = df.copy()
  1866. exp.loc[["A", "C"], :] = 20
  1867. tm.assert_frame_equal(res, exp)
  1868. res = df.copy()
  1869. res.loc[lambda x: ["A", "C"], lambda x: "X"] = -1
  1870. exp = df.copy()
  1871. exp.loc[["A", "C"], "X"] = -1
  1872. tm.assert_frame_equal(res, exp)
  1873. res = df.copy()
  1874. res.loc[lambda x: ["A", "C"], lambda x: ["X"]] = [5, 10]
  1875. exp = df.copy()
  1876. exp.loc[["A", "C"], ["X"]] = [5, 10]
  1877. tm.assert_frame_equal(res, exp)
  1878. # mixture
  1879. res = df.copy()
  1880. res.loc[["A", "C"], lambda x: "X"] = np.array([-1, -2])
  1881. exp = df.copy()
  1882. exp.loc[["A", "C"], "X"] = np.array([-1, -2])
  1883. tm.assert_frame_equal(res, exp)
  1884. res = df.copy()
  1885. res.loc[["A", "C"], lambda x: ["X"]] = 10
  1886. exp = df.copy()
  1887. exp.loc[["A", "C"], ["X"]] = 10
  1888. tm.assert_frame_equal(res, exp)
  1889. res = df.copy()
  1890. res.loc[lambda x: ["A", "C"], "X"] = -2
  1891. exp = df.copy()
  1892. exp.loc[["A", "C"], "X"] = -2
  1893. tm.assert_frame_equal(res, exp)
  1894. res = df.copy()
  1895. res.loc[lambda x: ["A", "C"], ["X"]] = -4
  1896. exp = df.copy()
  1897. exp.loc[["A", "C"], ["X"]] = -4
  1898. tm.assert_frame_equal(res, exp)
  1899. class TestPartialStringSlicing:
  1900. def test_loc_getitem_partial_string_slicing_datetimeindex(self):
  1901. # GH#35509
  1902. df = DataFrame(
  1903. {"col1": ["a", "b", "c"], "col2": [1, 2, 3]},
  1904. index=to_datetime(["2020-08-01", "2020-07-02", "2020-08-05"]),
  1905. )
  1906. expected = DataFrame(
  1907. {"col1": ["a", "c"], "col2": [1, 3]},
  1908. index=to_datetime(["2020-08-01", "2020-08-05"]),
  1909. )
  1910. result = df.loc["2020-08"]
  1911. tm.assert_frame_equal(result, expected)
  1912. def test_loc_getitem_partial_string_slicing_with_periodindex(self):
  1913. pi = pd.period_range(start="2017-01-01", end="2018-01-01", freq="M")
  1914. ser = pi.to_series()
  1915. result = ser.loc[:"2017-12"]
  1916. expected = ser.iloc[:-1]
  1917. tm.assert_series_equal(result, expected)
  1918. def test_loc_getitem_partial_string_slicing_with_timedeltaindex(self):
  1919. ix = timedelta_range(start="1 day", end="2 days", freq="1H")
  1920. ser = ix.to_series()
  1921. result = ser.loc[:"1 days"]
  1922. expected = ser.iloc[:-1]
  1923. tm.assert_series_equal(result, expected)
  1924. def test_loc_getitem_str_timedeltaindex(self):
  1925. # GH#16896
  1926. df = DataFrame({"x": range(3)}, index=to_timedelta(range(3), unit="days"))
  1927. expected = df.iloc[0]
  1928. sliced = df.loc["0 days"]
  1929. tm.assert_series_equal(sliced, expected)
  1930. @pytest.mark.parametrize("indexer_end", [None, "2020-01-02 23:59:59.999999999"])
  1931. def test_loc_getitem_partial_slice_non_monotonicity(
  1932. self, tz_aware_fixture, indexer_end, frame_or_series
  1933. ):
  1934. # GH#33146
  1935. obj = frame_or_series(
  1936. [1] * 5,
  1937. index=DatetimeIndex(
  1938. [
  1939. Timestamp("2019-12-30"),
  1940. Timestamp("2020-01-01"),
  1941. Timestamp("2019-12-25"),
  1942. Timestamp("2020-01-02 23:59:59.999999999"),
  1943. Timestamp("2019-12-19"),
  1944. ],
  1945. tz=tz_aware_fixture,
  1946. ),
  1947. )
  1948. expected = frame_or_series(
  1949. [1] * 2,
  1950. index=DatetimeIndex(
  1951. [
  1952. Timestamp("2020-01-01"),
  1953. Timestamp("2020-01-02 23:59:59.999999999"),
  1954. ],
  1955. tz=tz_aware_fixture,
  1956. ),
  1957. )
  1958. indexer = slice("2020-01-01", indexer_end)
  1959. result = obj[indexer]
  1960. tm.assert_equal(result, expected)
  1961. result = obj.loc[indexer]
  1962. tm.assert_equal(result, expected)
  1963. class TestLabelSlicing:
  1964. def test_loc_getitem_slicing_datetimes_frame(self):
  1965. # GH#7523
  1966. # unique
  1967. df_unique = DataFrame(
  1968. np.arange(4.0, dtype="float64"),
  1969. index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 3, 4]],
  1970. )
  1971. # duplicates
  1972. df_dups = DataFrame(
  1973. np.arange(5.0, dtype="float64"),
  1974. index=[datetime(2001, 1, i, 10, 00) for i in [1, 2, 2, 3, 4]],
  1975. )
  1976. for df in [df_unique, df_dups]:
  1977. result = df.loc[datetime(2001, 1, 1, 10) :]
  1978. tm.assert_frame_equal(result, df)
  1979. result = df.loc[: datetime(2001, 1, 4, 10)]
  1980. tm.assert_frame_equal(result, df)
  1981. result = df.loc[datetime(2001, 1, 1, 10) : datetime(2001, 1, 4, 10)]
  1982. tm.assert_frame_equal(result, df)
  1983. result = df.loc[datetime(2001, 1, 1, 11) :]
  1984. expected = df.iloc[1:]
  1985. tm.assert_frame_equal(result, expected)
  1986. result = df.loc["20010101 11":]
  1987. tm.assert_frame_equal(result, expected)
  1988. def test_loc_getitem_label_slice_across_dst(self):
  1989. # GH#21846
  1990. idx = date_range(
  1991. "2017-10-29 01:30:00", tz="Europe/Berlin", periods=5, freq="30 min"
  1992. )
  1993. series2 = Series([0, 1, 2, 3, 4], index=idx)
  1994. t_1 = Timestamp("2017-10-29 02:30:00+02:00", tz="Europe/Berlin")
  1995. t_2 = Timestamp("2017-10-29 02:00:00+01:00", tz="Europe/Berlin")
  1996. result = series2.loc[t_1:t_2]
  1997. expected = Series([2, 3], index=idx[2:4])
  1998. tm.assert_series_equal(result, expected)
  1999. result = series2[t_1]
  2000. expected = 2
  2001. assert result == expected
  2002. @pytest.mark.parametrize(
  2003. "index",
  2004. [
  2005. pd.period_range(start="2017-01-01", end="2018-01-01", freq="M"),
  2006. timedelta_range(start="1 day", end="2 days", freq="1H"),
  2007. ],
  2008. )
  2009. def test_loc_getitem_label_slice_period_timedelta(self, index):
  2010. ser = index.to_series()
  2011. result = ser.loc[: index[-2]]
  2012. expected = ser.iloc[:-1]
  2013. tm.assert_series_equal(result, expected)
  2014. def test_loc_getitem_slice_floats_inexact(self):
  2015. index = [52195.504153, 52196.303147, 52198.369883]
  2016. df = DataFrame(np.random.rand(3, 2), index=index)
  2017. s1 = df.loc[52195.1:52196.5]
  2018. assert len(s1) == 2
  2019. s1 = df.loc[52195.1:52196.6]
  2020. assert len(s1) == 2
  2021. s1 = df.loc[52195.1:52198.9]
  2022. assert len(s1) == 3
  2023. def test_loc_getitem_float_slice_floatindex(self, float_numpy_dtype):
  2024. dtype = float_numpy_dtype
  2025. ser = Series(np.random.rand(10), index=np.arange(10, 20, dtype=dtype))
  2026. assert len(ser.loc[12.0:]) == 8
  2027. assert len(ser.loc[12.5:]) == 7
  2028. idx = np.arange(10, 20, dtype=dtype)
  2029. idx[2] = 12.2
  2030. ser.index = idx
  2031. assert len(ser.loc[12.0:]) == 8
  2032. assert len(ser.loc[12.5:]) == 7
  2033. @pytest.mark.parametrize(
  2034. "start,stop, expected_slice",
  2035. [
  2036. [np.timedelta64(0, "ns"), None, slice(0, 11)],
  2037. [np.timedelta64(1, "D"), np.timedelta64(6, "D"), slice(1, 7)],
  2038. [None, np.timedelta64(4, "D"), slice(0, 5)],
  2039. ],
  2040. )
  2041. def test_loc_getitem_slice_label_td64obj(self, start, stop, expected_slice):
  2042. # GH#20393
  2043. ser = Series(range(11), timedelta_range("0 days", "10 days"))
  2044. result = ser.loc[slice(start, stop)]
  2045. expected = ser.iloc[expected_slice]
  2046. tm.assert_series_equal(result, expected)
  2047. @pytest.mark.parametrize("start", ["2018", "2020"])
  2048. def test_loc_getitem_slice_unordered_dt_index(self, frame_or_series, start):
  2049. obj = frame_or_series(
  2050. [1, 2, 3],
  2051. index=[Timestamp("2016"), Timestamp("2019"), Timestamp("2017")],
  2052. )
  2053. with pytest.raises(
  2054. KeyError, match="Value based partial slicing on non-monotonic"
  2055. ):
  2056. obj.loc[start:"2022"]
  2057. @pytest.mark.parametrize("value", [1, 1.5])
  2058. def test_loc_getitem_slice_labels_int_in_object_index(self, frame_or_series, value):
  2059. # GH: 26491
  2060. obj = frame_or_series(range(4), index=[value, "first", 2, "third"])
  2061. result = obj.loc[value:"third"]
  2062. expected = frame_or_series(range(4), index=[value, "first", 2, "third"])
  2063. tm.assert_equal(result, expected)
  2064. def test_loc_getitem_slice_columns_mixed_dtype(self):
  2065. # GH: 20975
  2066. df = DataFrame({"test": 1, 1: 2, 2: 3}, index=[0])
  2067. expected = DataFrame(
  2068. data=[[2, 3]], index=[0], columns=Index([1, 2], dtype=object)
  2069. )
  2070. tm.assert_frame_equal(df.loc[:, 1:], expected)
  2071. class TestLocBooleanLabelsAndSlices:
  2072. @pytest.mark.parametrize("bool_value", [True, False])
  2073. def test_loc_bool_incompatible_index_raises(
  2074. self, index, frame_or_series, bool_value
  2075. ):
  2076. # GH20432
  2077. message = f"{bool_value}: boolean label can not be used without a boolean index"
  2078. if index.inferred_type != "boolean":
  2079. obj = frame_or_series(index=index, dtype="object")
  2080. with pytest.raises(KeyError, match=message):
  2081. obj.loc[bool_value]
  2082. @pytest.mark.parametrize("bool_value", [True, False])
  2083. def test_loc_bool_should_not_raise(self, frame_or_series, bool_value):
  2084. obj = frame_or_series(
  2085. index=Index([True, False], dtype="boolean"), dtype="object"
  2086. )
  2087. obj.loc[bool_value]
  2088. def test_loc_bool_slice_raises(self, index, frame_or_series):
  2089. # GH20432
  2090. message = (
  2091. r"slice\(True, False, None\): boolean values can not be used in a slice"
  2092. )
  2093. obj = frame_or_series(index=index, dtype="object")
  2094. with pytest.raises(TypeError, match=message):
  2095. obj.loc[True:False]
  2096. class TestLocBooleanMask:
  2097. def test_loc_setitem_bool_mask_timedeltaindex(self):
  2098. # GH#14946
  2099. df = DataFrame({"x": range(10)})
  2100. df.index = to_timedelta(range(10), unit="s")
  2101. conditions = [df["x"] > 3, df["x"] == 3, df["x"] < 3]
  2102. expected_data = [
  2103. [0, 1, 2, 3, 10, 10, 10, 10, 10, 10],
  2104. [0, 1, 2, 10, 4, 5, 6, 7, 8, 9],
  2105. [10, 10, 10, 3, 4, 5, 6, 7, 8, 9],
  2106. ]
  2107. for cond, data in zip(conditions, expected_data):
  2108. result = df.copy()
  2109. result.loc[cond, "x"] = 10
  2110. expected = DataFrame(
  2111. data,
  2112. index=to_timedelta(range(10), unit="s"),
  2113. columns=["x"],
  2114. dtype="int64",
  2115. )
  2116. tm.assert_frame_equal(expected, result)
  2117. @pytest.mark.parametrize("tz", [None, "UTC"])
  2118. def test_loc_setitem_mask_with_datetimeindex_tz(self, tz):
  2119. # GH#16889
  2120. # support .loc with alignment and tz-aware DatetimeIndex
  2121. mask = np.array([True, False, True, False])
  2122. idx = date_range("20010101", periods=4, tz=tz)
  2123. df = DataFrame({"a": np.arange(4)}, index=idx).astype("float64")
  2124. result = df.copy()
  2125. result.loc[mask, :] = df.loc[mask, :]
  2126. tm.assert_frame_equal(result, df)
  2127. result = df.copy()
  2128. result.loc[mask] = df.loc[mask]
  2129. tm.assert_frame_equal(result, df)
  2130. def test_loc_setitem_mask_and_label_with_datetimeindex(self):
  2131. # GH#9478
  2132. # a datetimeindex alignment issue with partial setting
  2133. df = DataFrame(
  2134. np.arange(6.0).reshape(3, 2),
  2135. columns=list("AB"),
  2136. index=date_range("1/1/2000", periods=3, freq="1H"),
  2137. )
  2138. expected = df.copy()
  2139. expected["C"] = [expected.index[0]] + [pd.NaT, pd.NaT]
  2140. mask = df.A < 1
  2141. df.loc[mask, "C"] = df.loc[mask].index
  2142. tm.assert_frame_equal(df, expected)
  2143. def test_loc_setitem_mask_td64_series_value(self):
  2144. # GH#23462 key list of bools, value is a Series
  2145. td1 = Timedelta(0)
  2146. td2 = Timedelta(28767471428571405)
  2147. df = DataFrame({"col": Series([td1, td2])})
  2148. df_copy = df.copy()
  2149. ser = Series([td1])
  2150. expected = df["col"].iloc[1]._value
  2151. df.loc[[True, False]] = ser
  2152. result = df["col"].iloc[1]._value
  2153. assert expected == result
  2154. tm.assert_frame_equal(df, df_copy)
  2155. @td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values
  2156. def test_loc_setitem_boolean_and_column(self, float_frame):
  2157. expected = float_frame.copy()
  2158. mask = float_frame["A"] > 0
  2159. float_frame.loc[mask, "B"] = 0
  2160. values = expected.values.copy()
  2161. values[mask.values, 1] = 0
  2162. expected = DataFrame(values, index=expected.index, columns=expected.columns)
  2163. tm.assert_frame_equal(float_frame, expected)
  2164. def test_loc_setitem_ndframe_values_alignment(self, using_copy_on_write):
  2165. # GH#45501
  2166. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  2167. df.loc[[False, False, True], ["a"]] = DataFrame(
  2168. {"a": [10, 20, 30]}, index=[2, 1, 0]
  2169. )
  2170. expected = DataFrame({"a": [1, 2, 10], "b": [4, 5, 6]})
  2171. tm.assert_frame_equal(df, expected)
  2172. # same thing with Series RHS
  2173. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  2174. df.loc[[False, False, True], ["a"]] = Series([10, 11, 12], index=[2, 1, 0])
  2175. tm.assert_frame_equal(df, expected)
  2176. # same thing but setting "a" instead of ["a"]
  2177. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  2178. df.loc[[False, False, True], "a"] = Series([10, 11, 12], index=[2, 1, 0])
  2179. tm.assert_frame_equal(df, expected)
  2180. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  2181. df_orig = df.copy()
  2182. ser = df["a"]
  2183. ser.loc[[False, False, True]] = Series([10, 11, 12], index=[2, 1, 0])
  2184. if using_copy_on_write:
  2185. tm.assert_frame_equal(df, df_orig)
  2186. else:
  2187. tm.assert_frame_equal(df, expected)
  2188. def test_loc_indexer_empty_broadcast(self):
  2189. # GH#51450
  2190. df = DataFrame({"a": [], "b": []}, dtype=object)
  2191. expected = df.copy()
  2192. df.loc[np.array([], dtype=np.bool_), ["a"]] = df["a"]
  2193. tm.assert_frame_equal(df, expected)
  2194. def test_loc_indexer_all_false_broadcast(self):
  2195. # GH#51450
  2196. df = DataFrame({"a": ["x"], "b": ["y"]}, dtype=object)
  2197. expected = df.copy()
  2198. df.loc[np.array([False], dtype=np.bool_), ["a"]] = df["b"]
  2199. tm.assert_frame_equal(df, expected)
  2200. class TestLocListlike:
  2201. @pytest.mark.parametrize("box", [lambda x: x, np.asarray, list])
  2202. def test_loc_getitem_list_of_labels_categoricalindex_with_na(self, box):
  2203. # passing a list can include valid categories _or_ NA values
  2204. ci = CategoricalIndex(["A", "B", np.nan])
  2205. ser = Series(range(3), index=ci)
  2206. result = ser.loc[box(ci)]
  2207. tm.assert_series_equal(result, ser)
  2208. result = ser[box(ci)]
  2209. tm.assert_series_equal(result, ser)
  2210. result = ser.to_frame().loc[box(ci)]
  2211. tm.assert_frame_equal(result, ser.to_frame())
  2212. ser2 = ser[:-1]
  2213. ci2 = ci[1:]
  2214. # but if there are no NAs present, this should raise KeyError
  2215. msg = "not in index"
  2216. with pytest.raises(KeyError, match=msg):
  2217. ser2.loc[box(ci2)]
  2218. with pytest.raises(KeyError, match=msg):
  2219. ser2[box(ci2)]
  2220. with pytest.raises(KeyError, match=msg):
  2221. ser2.to_frame().loc[box(ci2)]
  2222. def test_loc_getitem_series_label_list_missing_values(self):
  2223. # gh-11428
  2224. key = np.array(
  2225. ["2001-01-04", "2001-01-02", "2001-01-04", "2001-01-14"], dtype="datetime64"
  2226. )
  2227. ser = Series([2, 5, 8, 11], date_range("2001-01-01", freq="D", periods=4))
  2228. with pytest.raises(KeyError, match="not in index"):
  2229. ser.loc[key]
  2230. def test_loc_getitem_series_label_list_missing_integer_values(self):
  2231. # GH: 25927
  2232. ser = Series(
  2233. index=np.array([9730701000001104, 10049011000001109]),
  2234. data=np.array([999000011000001104, 999000011000001104]),
  2235. )
  2236. with pytest.raises(KeyError, match="not in index"):
  2237. ser.loc[np.array([9730701000001104, 10047311000001102])]
  2238. @pytest.mark.parametrize("to_period", [True, False])
  2239. def test_loc_getitem_listlike_of_datetimelike_keys(self, to_period):
  2240. # GH#11497
  2241. idx = date_range("2011-01-01", "2011-01-02", freq="D", name="idx")
  2242. if to_period:
  2243. idx = idx.to_period("D")
  2244. ser = Series([0.1, 0.2], index=idx, name="s")
  2245. keys = [Timestamp("2011-01-01"), Timestamp("2011-01-02")]
  2246. if to_period:
  2247. keys = [x.to_period("D") for x in keys]
  2248. result = ser.loc[keys]
  2249. exp = Series([0.1, 0.2], index=idx, name="s")
  2250. if not to_period:
  2251. exp.index = exp.index._with_freq(None)
  2252. tm.assert_series_equal(result, exp, check_index_type=True)
  2253. keys = [
  2254. Timestamp("2011-01-02"),
  2255. Timestamp("2011-01-02"),
  2256. Timestamp("2011-01-01"),
  2257. ]
  2258. if to_period:
  2259. keys = [x.to_period("D") for x in keys]
  2260. exp = Series(
  2261. [0.2, 0.2, 0.1], index=Index(keys, name="idx", dtype=idx.dtype), name="s"
  2262. )
  2263. result = ser.loc[keys]
  2264. tm.assert_series_equal(result, exp, check_index_type=True)
  2265. keys = [
  2266. Timestamp("2011-01-03"),
  2267. Timestamp("2011-01-02"),
  2268. Timestamp("2011-01-03"),
  2269. ]
  2270. if to_period:
  2271. keys = [x.to_period("D") for x in keys]
  2272. with pytest.raises(KeyError, match="not in index"):
  2273. ser.loc[keys]
  2274. def test_loc_named_index(self):
  2275. # GH 42790
  2276. df = DataFrame(
  2277. [[1, 2], [4, 5], [7, 8]],
  2278. index=["cobra", "viper", "sidewinder"],
  2279. columns=["max_speed", "shield"],
  2280. )
  2281. expected = df.iloc[:2]
  2282. expected.index.name = "foo"
  2283. result = df.loc[Index(["cobra", "viper"], name="foo")]
  2284. tm.assert_frame_equal(result, expected)
  2285. @pytest.mark.parametrize(
  2286. "columns, column_key, expected_columns",
  2287. [
  2288. ([2011, 2012, 2013], [2011, 2012], [0, 1]),
  2289. ([2011, 2012, "All"], [2011, 2012], [0, 1]),
  2290. ([2011, 2012, "All"], [2011, "All"], [0, 2]),
  2291. ],
  2292. )
  2293. def test_loc_getitem_label_list_integer_labels(columns, column_key, expected_columns):
  2294. # gh-14836
  2295. df = DataFrame(np.random.rand(3, 3), columns=columns, index=list("ABC"))
  2296. expected = df.iloc[:, expected_columns]
  2297. result = df.loc[["A", "B", "C"], column_key]
  2298. tm.assert_frame_equal(result, expected, check_column_type=True)
  2299. def test_loc_setitem_float_intindex():
  2300. # GH 8720
  2301. rand_data = np.random.randn(8, 4)
  2302. result = DataFrame(rand_data)
  2303. result.loc[:, 0.5] = np.nan
  2304. expected_data = np.hstack((rand_data, np.array([np.nan] * 8).reshape(8, 1)))
  2305. expected = DataFrame(expected_data, columns=[0.0, 1.0, 2.0, 3.0, 0.5])
  2306. tm.assert_frame_equal(result, expected)
  2307. result = DataFrame(rand_data)
  2308. result.loc[:, 0.5] = np.nan
  2309. tm.assert_frame_equal(result, expected)
  2310. def test_loc_axis_1_slice():
  2311. # GH 10586
  2312. cols = [(yr, m) for yr in [2014, 2015] for m in [7, 8, 9, 10]]
  2313. df = DataFrame(
  2314. np.ones((10, 8)),
  2315. index=tuple("ABCDEFGHIJ"),
  2316. columns=MultiIndex.from_tuples(cols),
  2317. )
  2318. result = df.loc(axis=1)[(2014, 9):(2015, 8)]
  2319. expected = DataFrame(
  2320. np.ones((10, 4)),
  2321. index=tuple("ABCDEFGHIJ"),
  2322. columns=MultiIndex.from_tuples([(2014, 9), (2014, 10), (2015, 7), (2015, 8)]),
  2323. )
  2324. tm.assert_frame_equal(result, expected)
  2325. def test_loc_set_dataframe_multiindex():
  2326. # GH 14592
  2327. expected = DataFrame(
  2328. "a", index=range(2), columns=MultiIndex.from_product([range(2), range(2)])
  2329. )
  2330. result = expected.copy()
  2331. result.loc[0, [(0, 1)]] = result.loc[0, [(0, 1)]]
  2332. tm.assert_frame_equal(result, expected)
  2333. def test_loc_mixed_int_float():
  2334. # GH#19456
  2335. ser = Series(range(2), Index([1, 2.0], dtype=object))
  2336. result = ser.loc[1]
  2337. assert result == 0
  2338. def test_loc_with_positional_slice_raises():
  2339. # GH#31840
  2340. ser = Series(range(4), index=["A", "B", "C", "D"])
  2341. with pytest.raises(TypeError, match="Slicing a positional slice with .loc"):
  2342. ser.loc[:3] = 2
  2343. def test_loc_slice_disallows_positional():
  2344. # GH#16121, GH#24612, GH#31810
  2345. dti = date_range("2016-01-01", periods=3)
  2346. df = DataFrame(np.random.random((3, 2)), index=dti)
  2347. ser = df[0]
  2348. msg = (
  2349. "cannot do slice indexing on DatetimeIndex with these "
  2350. r"indexers \[1\] of type int"
  2351. )
  2352. for obj in [df, ser]:
  2353. with pytest.raises(TypeError, match=msg):
  2354. obj.loc[1:3]
  2355. with pytest.raises(TypeError, match="Slicing a positional slice with .loc"):
  2356. # GH#31840 enforce incorrect behavior
  2357. obj.loc[1:3] = 1
  2358. with pytest.raises(TypeError, match=msg):
  2359. df.loc[1:3, 1]
  2360. with pytest.raises(TypeError, match="Slicing a positional slice with .loc"):
  2361. # GH#31840 enforce incorrect behavior
  2362. df.loc[1:3, 1] = 2
  2363. def test_loc_datetimelike_mismatched_dtypes():
  2364. # GH#32650 dont mix and match datetime/timedelta/period dtypes
  2365. df = DataFrame(
  2366. np.random.randn(5, 3),
  2367. columns=["a", "b", "c"],
  2368. index=date_range("2012", freq="H", periods=5),
  2369. )
  2370. # create dataframe with non-unique DatetimeIndex
  2371. df = df.iloc[[0, 2, 2, 3]].copy()
  2372. dti = df.index
  2373. tdi = pd.TimedeltaIndex(dti.asi8) # matching i8 values
  2374. msg = r"None of \[TimedeltaIndex.* are in the \[index\]"
  2375. with pytest.raises(KeyError, match=msg):
  2376. df.loc[tdi]
  2377. with pytest.raises(KeyError, match=msg):
  2378. df["a"].loc[tdi]
  2379. def test_loc_with_period_index_indexer():
  2380. # GH#4125
  2381. idx = pd.period_range("2002-01", "2003-12", freq="M")
  2382. df = DataFrame(np.random.randn(24, 10), index=idx)
  2383. tm.assert_frame_equal(df, df.loc[idx])
  2384. tm.assert_frame_equal(df, df.loc[list(idx)])
  2385. tm.assert_frame_equal(df, df.loc[list(idx)])
  2386. tm.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]])
  2387. tm.assert_frame_equal(df, df.loc[list(idx)])
  2388. def test_loc_setitem_multiindex_timestamp():
  2389. # GH#13831
  2390. vals = np.random.randn(8, 6)
  2391. idx = date_range("1/1/2000", periods=8)
  2392. cols = ["A", "B", "C", "D", "E", "F"]
  2393. exp = DataFrame(vals, index=idx, columns=cols)
  2394. exp.loc[exp.index[1], ("A", "B")] = np.nan
  2395. vals[1][0:2] = np.nan
  2396. res = DataFrame(vals, index=idx, columns=cols)
  2397. tm.assert_frame_equal(res, exp)
  2398. def test_loc_getitem_multiindex_tuple_level():
  2399. # GH#27591
  2400. lev1 = ["a", "b", "c"]
  2401. lev2 = [(0, 1), (1, 0)]
  2402. lev3 = [0, 1]
  2403. cols = MultiIndex.from_product([lev1, lev2, lev3], names=["x", "y", "z"])
  2404. df = DataFrame(6, index=range(5), columns=cols)
  2405. # the lev2[0] here should be treated as a single label, not as a sequence
  2406. # of labels
  2407. result = df.loc[:, (lev1[0], lev2[0], lev3[0])]
  2408. # TODO: i think this actually should drop levels
  2409. expected = df.iloc[:, :1]
  2410. tm.assert_frame_equal(result, expected)
  2411. alt = df.xs((lev1[0], lev2[0], lev3[0]), level=[0, 1, 2], axis=1)
  2412. tm.assert_frame_equal(alt, expected)
  2413. # same thing on a Series
  2414. ser = df.iloc[0]
  2415. expected2 = ser.iloc[:1]
  2416. alt2 = ser.xs((lev1[0], lev2[0], lev3[0]), level=[0, 1, 2], axis=0)
  2417. tm.assert_series_equal(alt2, expected2)
  2418. result2 = ser.loc[lev1[0], lev2[0], lev3[0]]
  2419. assert result2 == 6
  2420. def test_loc_getitem_nullable_index_with_duplicates():
  2421. # GH#34497
  2422. df = DataFrame(
  2423. data=np.array([[1, 2, 3, 4], [5, 6, 7, 8], [1, 2, np.nan, np.nan]]).T,
  2424. columns=["a", "b", "c"],
  2425. dtype="Int64",
  2426. )
  2427. df2 = df.set_index("c")
  2428. assert df2.index.dtype == "Int64"
  2429. res = df2.loc[1]
  2430. expected = Series([1, 5], index=df2.columns, dtype="Int64", name=1)
  2431. tm.assert_series_equal(res, expected)
  2432. # pd.NA and duplicates in an object-dtype Index
  2433. df2.index = df2.index.astype(object)
  2434. res = df2.loc[1]
  2435. tm.assert_series_equal(res, expected)
  2436. @pytest.mark.parametrize("value", [300, np.uint16(300), np.int16(300)])
  2437. def test_loc_setitem_uint8_upcast(value):
  2438. # GH#26049
  2439. df = DataFrame([1, 2, 3, 4], columns=["col1"], dtype="uint8")
  2440. df.loc[2, "col1"] = value # value that can't be held in uint8
  2441. expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype="uint16")
  2442. tm.assert_frame_equal(df, expected)
  2443. @pytest.mark.parametrize(
  2444. "fill_val,exp_dtype",
  2445. [
  2446. (Timestamp("2022-01-06"), "datetime64[ns]"),
  2447. (Timestamp("2022-01-07", tz="US/Eastern"), "datetime64[ns, US/Eastern]"),
  2448. ],
  2449. )
  2450. def test_loc_setitem_using_datetimelike_str_as_index(fill_val, exp_dtype):
  2451. data = ["2022-01-02", "2022-01-03", "2022-01-04", fill_val.date()]
  2452. index = DatetimeIndex(data, tz=fill_val.tz, dtype=exp_dtype)
  2453. df = DataFrame([10, 11, 12, 14], columns=["a"], index=index)
  2454. # adding new row using an unexisting datetime-like str index
  2455. df.loc["2022-01-08", "a"] = 13
  2456. data.append("2022-01-08")
  2457. expected_index = DatetimeIndex(data, dtype=exp_dtype)
  2458. tm.assert_index_equal(df.index, expected_index, exact=True)
  2459. def test_loc_set_int_dtype():
  2460. # GH#23326
  2461. df = DataFrame([list("abc")])
  2462. df.loc[:, "col1"] = 5
  2463. expected = DataFrame({0: ["a"], 1: ["b"], 2: ["c"], "col1": [5]})
  2464. tm.assert_frame_equal(df, expected)
  2465. def test_loc_periodindex_3_levels():
  2466. # GH#24091
  2467. p_index = PeriodIndex(
  2468. ["20181101 1100", "20181101 1200", "20181102 1300", "20181102 1400"],
  2469. name="datetime",
  2470. freq="B",
  2471. )
  2472. mi_series = DataFrame(
  2473. [["A", "B", 1.0], ["A", "C", 2.0], ["Z", "Q", 3.0], ["W", "F", 4.0]],
  2474. index=p_index,
  2475. columns=["ONE", "TWO", "VALUES"],
  2476. )
  2477. mi_series = mi_series.set_index(["ONE", "TWO"], append=True)["VALUES"]
  2478. assert mi_series.loc[(p_index[0], "A", "B")] == 1.0
  2479. class TestLocSeries:
  2480. @pytest.mark.parametrize("val,expected", [(2**63 - 1, 3), (2**63, 4)])
  2481. def test_loc_uint64(self, val, expected):
  2482. # see GH#19399
  2483. ser = Series({2**63 - 1: 3, 2**63: 4})
  2484. assert ser.loc[val] == expected
  2485. def test_loc_getitem(self, string_series, datetime_series):
  2486. inds = string_series.index[[3, 4, 7]]
  2487. tm.assert_series_equal(string_series.loc[inds], string_series.reindex(inds))
  2488. tm.assert_series_equal(string_series.iloc[5::2], string_series[5::2])
  2489. # slice with indices
  2490. d1, d2 = datetime_series.index[[5, 15]]
  2491. result = datetime_series.loc[d1:d2]
  2492. expected = datetime_series.truncate(d1, d2)
  2493. tm.assert_series_equal(result, expected)
  2494. # boolean
  2495. mask = string_series > string_series.median()
  2496. tm.assert_series_equal(string_series.loc[mask], string_series[mask])
  2497. # ask for index value
  2498. assert datetime_series.loc[d1] == datetime_series[d1]
  2499. assert datetime_series.loc[d2] == datetime_series[d2]
  2500. def test_loc_getitem_not_monotonic(self, datetime_series):
  2501. d1, d2 = datetime_series.index[[5, 15]]
  2502. ts2 = datetime_series[::2][[1, 2, 0]]
  2503. msg = r"Timestamp\('2000-01-10 00:00:00'\)"
  2504. with pytest.raises(KeyError, match=msg):
  2505. ts2.loc[d1:d2]
  2506. with pytest.raises(KeyError, match=msg):
  2507. ts2.loc[d1:d2] = 0
  2508. def test_loc_getitem_setitem_integer_slice_keyerrors(self):
  2509. ser = Series(np.random.randn(10), index=list(range(0, 20, 2)))
  2510. # this is OK
  2511. cp = ser.copy()
  2512. cp.iloc[4:10] = 0
  2513. assert (cp.iloc[4:10] == 0).all()
  2514. # so is this
  2515. cp = ser.copy()
  2516. cp.iloc[3:11] = 0
  2517. assert (cp.iloc[3:11] == 0).values.all()
  2518. result = ser.iloc[2:6]
  2519. result2 = ser.loc[3:11]
  2520. expected = ser.reindex([4, 6, 8, 10])
  2521. tm.assert_series_equal(result, expected)
  2522. tm.assert_series_equal(result2, expected)
  2523. # non-monotonic, raise KeyError
  2524. s2 = ser.iloc[list(range(5)) + list(range(9, 4, -1))]
  2525. with pytest.raises(KeyError, match=r"^3$"):
  2526. s2.loc[3:11]
  2527. with pytest.raises(KeyError, match=r"^3$"):
  2528. s2.loc[3:11] = 0
  2529. def test_loc_getitem_iterator(self, string_series):
  2530. idx = iter(string_series.index[:10])
  2531. result = string_series.loc[idx]
  2532. tm.assert_series_equal(result, string_series[:10])
  2533. def test_loc_setitem_boolean(self, string_series):
  2534. mask = string_series > string_series.median()
  2535. result = string_series.copy()
  2536. result.loc[mask] = 0
  2537. expected = string_series
  2538. expected[mask] = 0
  2539. tm.assert_series_equal(result, expected)
  2540. def test_loc_setitem_corner(self, string_series):
  2541. inds = list(string_series.index[[5, 8, 12]])
  2542. string_series.loc[inds] = 5
  2543. msg = r"\['foo'\] not in index"
  2544. with pytest.raises(KeyError, match=msg):
  2545. string_series.loc[inds + ["foo"]] = 5
  2546. def test_basic_setitem_with_labels(self, datetime_series):
  2547. indices = datetime_series.index[[5, 10, 15]]
  2548. cp = datetime_series.copy()
  2549. exp = datetime_series.copy()
  2550. cp[indices] = 0
  2551. exp.loc[indices] = 0
  2552. tm.assert_series_equal(cp, exp)
  2553. cp = datetime_series.copy()
  2554. exp = datetime_series.copy()
  2555. cp[indices[0] : indices[2]] = 0
  2556. exp.loc[indices[0] : indices[2]] = 0
  2557. tm.assert_series_equal(cp, exp)
  2558. def test_loc_setitem_listlike_of_ints(self):
  2559. # integer indexes, be careful
  2560. ser = Series(np.random.randn(10), index=list(range(0, 20, 2)))
  2561. inds = [0, 4, 6]
  2562. arr_inds = np.array([0, 4, 6])
  2563. cp = ser.copy()
  2564. exp = ser.copy()
  2565. ser[inds] = 0
  2566. ser.loc[inds] = 0
  2567. tm.assert_series_equal(cp, exp)
  2568. cp = ser.copy()
  2569. exp = ser.copy()
  2570. ser[arr_inds] = 0
  2571. ser.loc[arr_inds] = 0
  2572. tm.assert_series_equal(cp, exp)
  2573. inds_notfound = [0, 4, 5, 6]
  2574. arr_inds_notfound = np.array([0, 4, 5, 6])
  2575. msg = r"\[5\] not in index"
  2576. with pytest.raises(KeyError, match=msg):
  2577. ser[inds_notfound] = 0
  2578. with pytest.raises(Exception, match=msg):
  2579. ser[arr_inds_notfound] = 0
  2580. def test_loc_setitem_dt64tz_values(self):
  2581. # GH#12089
  2582. ser = Series(
  2583. date_range("2011-01-01", periods=3, tz="US/Eastern"),
  2584. index=["a", "b", "c"],
  2585. )
  2586. s2 = ser.copy()
  2587. expected = Timestamp("2011-01-03", tz="US/Eastern")
  2588. s2.loc["a"] = expected
  2589. result = s2.loc["a"]
  2590. assert result == expected
  2591. s2 = ser.copy()
  2592. s2.iloc[0] = expected
  2593. result = s2.iloc[0]
  2594. assert result == expected
  2595. s2 = ser.copy()
  2596. s2["a"] = expected
  2597. result = s2["a"]
  2598. assert result == expected
  2599. @pytest.mark.parametrize("array_fn", [np.array, pd.array, list, tuple])
  2600. @pytest.mark.parametrize("size", [0, 4, 5, 6])
  2601. def test_loc_iloc_setitem_with_listlike(self, size, array_fn):
  2602. # GH37748
  2603. # testing insertion, in a Series of size N (here 5), of a listlike object
  2604. # of size 0, N-1, N, N+1
  2605. arr = array_fn([0] * size)
  2606. expected = Series([arr, 0, 0, 0, 0], index=list("abcde"), dtype=object)
  2607. ser = Series(0, index=list("abcde"), dtype=object)
  2608. ser.loc["a"] = arr
  2609. tm.assert_series_equal(ser, expected)
  2610. ser = Series(0, index=list("abcde"), dtype=object)
  2611. ser.iloc[0] = arr
  2612. tm.assert_series_equal(ser, expected)
  2613. @pytest.mark.parametrize("indexer", [IndexSlice["A", :], ("A", slice(None))])
  2614. def test_loc_series_getitem_too_many_dimensions(self, indexer):
  2615. # GH#35349
  2616. ser = Series(
  2617. index=MultiIndex.from_tuples([("A", "0"), ("A", "1"), ("B", "0")]),
  2618. data=[21, 22, 23],
  2619. )
  2620. msg = "Too many indexers"
  2621. with pytest.raises(IndexingError, match=msg):
  2622. ser.loc[indexer, :]
  2623. with pytest.raises(IndexingError, match=msg):
  2624. ser.loc[indexer, :] = 1
  2625. def test_loc_setitem(self, string_series):
  2626. inds = string_series.index[[3, 4, 7]]
  2627. result = string_series.copy()
  2628. result.loc[inds] = 5
  2629. expected = string_series.copy()
  2630. expected[[3, 4, 7]] = 5
  2631. tm.assert_series_equal(result, expected)
  2632. result.iloc[5:10] = 10
  2633. expected[5:10] = 10
  2634. tm.assert_series_equal(result, expected)
  2635. # set slice with indices
  2636. d1, d2 = string_series.index[[5, 15]]
  2637. result.loc[d1:d2] = 6
  2638. expected[5:16] = 6 # because it's inclusive
  2639. tm.assert_series_equal(result, expected)
  2640. # set index value
  2641. string_series.loc[d1] = 4
  2642. string_series.loc[d2] = 6
  2643. assert string_series[d1] == 4
  2644. assert string_series[d2] == 6
  2645. @pytest.mark.parametrize("dtype", ["object", "string"])
  2646. def test_loc_assign_dict_to_row(self, dtype):
  2647. # GH41044
  2648. df = DataFrame({"A": ["abc", "def"], "B": ["ghi", "jkl"]}, dtype=dtype)
  2649. df.loc[0, :] = {"A": "newA", "B": "newB"}
  2650. expected = DataFrame({"A": ["newA", "def"], "B": ["newB", "jkl"]}, dtype=dtype)
  2651. tm.assert_frame_equal(df, expected)
  2652. @td.skip_array_manager_invalid_test
  2653. def test_loc_setitem_dict_timedelta_multiple_set(self):
  2654. # GH 16309
  2655. result = DataFrame(columns=["time", "value"])
  2656. result.loc[1] = {"time": Timedelta(6, unit="s"), "value": "foo"}
  2657. result.loc[1] = {"time": Timedelta(6, unit="s"), "value": "foo"}
  2658. expected = DataFrame(
  2659. [[Timedelta(6, unit="s"), "foo"]], columns=["time", "value"], index=[1]
  2660. )
  2661. tm.assert_frame_equal(result, expected)
  2662. def test_loc_set_multiple_items_in_multiple_new_columns(self):
  2663. # GH 25594
  2664. df = DataFrame(index=[1, 2], columns=["a"])
  2665. df.loc[1, ["b", "c"]] = [6, 7]
  2666. expected = DataFrame(
  2667. {
  2668. "a": Series([np.nan, np.nan], dtype="object"),
  2669. "b": [6, np.nan],
  2670. "c": [7, np.nan],
  2671. },
  2672. index=[1, 2],
  2673. )
  2674. tm.assert_frame_equal(df, expected)
  2675. def test_getitem_loc_str_periodindex(self):
  2676. # GH#33964
  2677. index = pd.period_range(start="2000", periods=20, freq="B")
  2678. series = Series(range(20), index=index)
  2679. assert series.loc["2000-01-14"] == 9