indexing.py 89 KB


  1. from __future__ import annotations
  2. from contextlib import suppress
  3. import sys
  4. from typing import (
  5. TYPE_CHECKING,
  6. Hashable,
  7. Sequence,
  8. TypeVar,
  9. cast,
  10. final,
  11. )
  12. import warnings
  13. import numpy as np
  14. from pandas._config import using_copy_on_write
  15. from pandas._libs.indexing import NDFrameIndexerBase
  16. from pandas._libs.lib import item_from_zerodim
  17. from pandas._typing import (
  18. Axis,
  19. AxisInt,
  20. )
  21. from pandas.compat import PYPY
  22. from pandas.errors import (
  23. AbstractMethodError,
  24. ChainedAssignmentError,
  25. IndexingError,
  26. InvalidIndexError,
  27. LossySetitemError,
  28. _chained_assignment_msg,
  29. )
  30. from pandas.util._decorators import doc
  31. from pandas.core.dtypes.cast import (
  32. can_hold_element,
  33. maybe_promote,
  34. )
  35. from pandas.core.dtypes.common import (
  36. is_array_like,
  37. is_bool_dtype,
  38. is_extension_array_dtype,
  39. is_hashable,
  40. is_integer,
  41. is_iterator,
  42. is_list_like,
  43. is_numeric_dtype,
  44. is_object_dtype,
  45. is_scalar,
  46. is_sequence,
  47. )
  48. from pandas.core.dtypes.concat import concat_compat
  49. from pandas.core.dtypes.generic import (
  50. ABCDataFrame,
  51. ABCSeries,
  52. )
  53. from pandas.core.dtypes.missing import (
  54. infer_fill_value,
  55. is_valid_na_for_dtype,
  56. isna,
  57. na_value_for_dtype,
  58. )
  59. from pandas.core import algorithms as algos
  60. import pandas.core.common as com
  61. from pandas.core.construction import (
  62. array as pd_array,
  63. extract_array,
  64. )
  65. from pandas.core.indexers import (
  66. check_array_indexer,
  67. is_list_like_indexer,
  68. is_scalar_indexer,
  69. length_of_indexer,
  70. )
  71. from pandas.core.indexes.api import (
  72. Index,
  73. MultiIndex,
  74. )
  75. if TYPE_CHECKING:
  76. from pandas import (
  77. DataFrame,
  78. Series,
  79. )
  80. _LocationIndexerT = TypeVar("_LocationIndexerT", bound="_LocationIndexer")
  81. # "null slice"
  82. _NS = slice(None, None)
  83. _one_ellipsis_message = "indexer may only contain one '...' entry"
  84. # the public IndexSlicerMaker
  85. class _IndexSlice:
  86. """
  87. Create an object to more easily perform multi-index slicing.
  88. See Also
  89. --------
  90. MultiIndex.remove_unused_levels : New MultiIndex with no unused levels.
  91. Notes
  92. -----
  93. See :ref:`Defined Levels <advanced.shown_levels>`
  94. for further info on slicing a MultiIndex.
  95. Examples
  96. --------
  97. >>> midx = pd.MultiIndex.from_product([['A0','A1'], ['B0','B1','B2','B3']])
  98. >>> columns = ['foo', 'bar']
  99. >>> dfmi = pd.DataFrame(np.arange(16).reshape((len(midx), len(columns))),
  100. ... index=midx, columns=columns)
  101. Using the default slice command:
  102. >>> dfmi.loc[(slice(None), slice('B0', 'B1')), :]
  103. foo bar
  104. A0 B0 0 1
  105. B1 2 3
  106. A1 B0 8 9
  107. B1 10 11
  108. Using the IndexSlice class for a more intuitive command:
  109. >>> idx = pd.IndexSlice
  110. >>> dfmi.loc[idx[:, 'B0':'B1'], :]
  111. foo bar
  112. A0 B0 0 1
  113. B1 2 3
  114. A1 B0 8 9
  115. B1 10 11
  116. """
  117. def __getitem__(self, arg):
  118. return arg
  119. IndexSlice = _IndexSlice()
  120. class IndexingMixin:
  121. """
  122. Mixin for adding .loc/.iloc/.at/.iat to Dataframes and Series.
  123. """
  124. @property
  125. def iloc(self) -> _iLocIndexer:
  126. """
  127. Purely integer-location based indexing for selection by position.
  128. ``.iloc[]`` is primarily integer position based (from ``0`` to
  129. ``length-1`` of the axis), but may also be used with a boolean
  130. array.
  131. Allowed inputs are:
  132. - An integer, e.g. ``5``.
  133. - A list or array of integers, e.g. ``[4, 3, 0]``.
  134. - A slice object with ints, e.g. ``1:7``.
  135. - A boolean array.
  136. - A ``callable`` function with one argument (the calling Series or
  137. DataFrame) and that returns valid output for indexing (one of the above).
  138. This is useful in method chains, when you don't have a reference to the
  139. calling object, but would like to base your selection on some value.
  140. - A tuple of row and column indexes. The tuple elements consist of one of the
  141. above inputs, e.g. ``(0, 1)``.
  142. ``.iloc`` will raise ``IndexError`` if a requested indexer is
  143. out-of-bounds, except *slice* indexers which allow out-of-bounds
  144. indexing (this conforms with python/numpy *slice* semantics).
  145. See more at :ref:`Selection by Position <indexing.integer>`.
  146. See Also
  147. --------
  148. DataFrame.iat : Fast integer location scalar accessor.
  149. DataFrame.loc : Purely label-location based indexer for selection by label.
  150. Series.iloc : Purely integer-location based indexing for
  151. selection by position.
  152. Examples
  153. --------
  154. >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},
  155. ... {'a': 100, 'b': 200, 'c': 300, 'd': 400},
  156. ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }]
  157. >>> df = pd.DataFrame(mydict)
  158. >>> df
  159. a b c d
  160. 0 1 2 3 4
  161. 1 100 200 300 400
  162. 2 1000 2000 3000 4000
  163. **Indexing just the rows**
  164. With a scalar integer.
  165. >>> type(df.iloc[0])
  166. <class 'pandas.core.series.Series'>
  167. >>> df.iloc[0]
  168. a 1
  169. b 2
  170. c 3
  171. d 4
  172. Name: 0, dtype: int64
  173. With a list of integers.
  174. >>> df.iloc[[0]]
  175. a b c d
  176. 0 1 2 3 4
  177. >>> type(df.iloc[[0]])
  178. <class 'pandas.core.frame.DataFrame'>
  179. >>> df.iloc[[0, 1]]
  180. a b c d
  181. 0 1 2 3 4
  182. 1 100 200 300 400
  183. With a `slice` object.
  184. >>> df.iloc[:3]
  185. a b c d
  186. 0 1 2 3 4
  187. 1 100 200 300 400
  188. 2 1000 2000 3000 4000
  189. With a boolean mask the same length as the index.
  190. >>> df.iloc[[True, False, True]]
  191. a b c d
  192. 0 1 2 3 4
  193. 2 1000 2000 3000 4000
  194. With a callable, useful in method chains. The `x` passed
  195. to the ``lambda`` is the DataFrame being sliced. This selects
  196. the rows whose index label even.
  197. >>> df.iloc[lambda x: x.index % 2 == 0]
  198. a b c d
  199. 0 1 2 3 4
  200. 2 1000 2000 3000 4000
  201. **Indexing both axes**
  202. You can mix the indexer types for the index and columns. Use ``:`` to
  203. select the entire axis.
  204. With scalar integers.
  205. >>> df.iloc[0, 1]
  206. 2
  207. With lists of integers.
  208. >>> df.iloc[[0, 2], [1, 3]]
  209. b d
  210. 0 2 4
  211. 2 2000 4000
  212. With `slice` objects.
  213. >>> df.iloc[1:3, 0:3]
  214. a b c
  215. 1 100 200 300
  216. 2 1000 2000 3000
  217. With a boolean array whose length matches the columns.
  218. >>> df.iloc[:, [True, False, True, False]]
  219. a c
  220. 0 1 3
  221. 1 100 300
  222. 2 1000 3000
  223. With a callable function that expects the Series or DataFrame.
  224. >>> df.iloc[:, lambda df: [0, 2]]
  225. a c
  226. 0 1 3
  227. 1 100 300
  228. 2 1000 3000
  229. """
  230. return _iLocIndexer("iloc", self)
  231. @property
  232. def loc(self) -> _LocIndexer:
  233. """
  234. Access a group of rows and columns by label(s) or a boolean array.
  235. ``.loc[]`` is primarily label based, but may also be used with a
  236. boolean array.
  237. Allowed inputs are:
  238. - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is
  239. interpreted as a *label* of the index, and **never** as an
  240. integer position along the index).
  241. - A list or array of labels, e.g. ``['a', 'b', 'c']``.
  242. - A slice object with labels, e.g. ``'a':'f'``.
  243. .. warning:: Note that contrary to usual python slices, **both** the
  244. start and the stop are included
  245. - A boolean array of the same length as the axis being sliced,
  246. e.g. ``[True, False, True]``.
  247. - An alignable boolean Series. The index of the key will be aligned before
  248. masking.
  249. - An alignable Index. The Index of the returned selection will be the input.
  250. - A ``callable`` function with one argument (the calling Series or
  251. DataFrame) and that returns valid output for indexing (one of the above)
  252. See more at :ref:`Selection by Label <indexing.label>`.
  253. Raises
  254. ------
  255. KeyError
  256. If any items are not found.
  257. IndexingError
  258. If an indexed key is passed and its index is unalignable to the frame index.
  259. See Also
  260. --------
  261. DataFrame.at : Access a single value for a row/column label pair.
  262. DataFrame.iloc : Access group of rows and columns by integer position(s).
  263. DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the
  264. Series/DataFrame.
  265. Series.loc : Access group of values using labels.
  266. Examples
  267. --------
  268. **Getting values**
  269. >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
  270. ... index=['cobra', 'viper', 'sidewinder'],
  271. ... columns=['max_speed', 'shield'])
  272. >>> df
  273. max_speed shield
  274. cobra 1 2
  275. viper 4 5
  276. sidewinder 7 8
  277. Single label. Note this returns the row as a Series.
  278. >>> df.loc['viper']
  279. max_speed 4
  280. shield 5
  281. Name: viper, dtype: int64
  282. List of labels. Note using ``[[]]`` returns a DataFrame.
  283. >>> df.loc[['viper', 'sidewinder']]
  284. max_speed shield
  285. viper 4 5
  286. sidewinder 7 8
  287. Single label for row and column
  288. >>> df.loc['cobra', 'shield']
  289. 2
  290. Slice with labels for row and single label for column. As mentioned
  291. above, note that both the start and stop of the slice are included.
  292. >>> df.loc['cobra':'viper', 'max_speed']
  293. cobra 1
  294. viper 4
  295. Name: max_speed, dtype: int64
  296. Boolean list with the same length as the row axis
  297. >>> df.loc[[False, False, True]]
  298. max_speed shield
  299. sidewinder 7 8
  300. Alignable boolean Series:
  301. >>> df.loc[pd.Series([False, True, False],
  302. ... index=['viper', 'sidewinder', 'cobra'])]
  303. max_speed shield
  304. sidewinder 7 8
  305. Index (same behavior as ``df.reindex``)
  306. >>> df.loc[pd.Index(["cobra", "viper"], name="foo")]
  307. max_speed shield
  308. foo
  309. cobra 1 2
  310. viper 4 5
  311. Conditional that returns a boolean Series
  312. >>> df.loc[df['shield'] > 6]
  313. max_speed shield
  314. sidewinder 7 8
  315. Conditional that returns a boolean Series with column labels specified
  316. >>> df.loc[df['shield'] > 6, ['max_speed']]
  317. max_speed
  318. sidewinder 7
  319. Callable that returns a boolean Series
  320. >>> df.loc[lambda df: df['shield'] == 8]
  321. max_speed shield
  322. sidewinder 7 8
  323. **Setting values**
  324. Set value for all items matching the list of labels
  325. >>> df.loc[['viper', 'sidewinder'], ['shield']] = 50
  326. >>> df
  327. max_speed shield
  328. cobra 1 2
  329. viper 4 50
  330. sidewinder 7 50
  331. Set value for an entire row
  332. >>> df.loc['cobra'] = 10
  333. >>> df
  334. max_speed shield
  335. cobra 10 10
  336. viper 4 50
  337. sidewinder 7 50
  338. Set value for an entire column
  339. >>> df.loc[:, 'max_speed'] = 30
  340. >>> df
  341. max_speed shield
  342. cobra 30 10
  343. viper 30 50
  344. sidewinder 30 50
  345. Set value for rows matching callable condition
  346. >>> df.loc[df['shield'] > 35] = 0
  347. >>> df
  348. max_speed shield
  349. cobra 30 10
  350. viper 0 0
  351. sidewinder 0 0
  352. **Getting values on a DataFrame with an index that has integer labels**
  353. Another example using integers for the index
  354. >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
  355. ... index=[7, 8, 9], columns=['max_speed', 'shield'])
  356. >>> df
  357. max_speed shield
  358. 7 1 2
  359. 8 4 5
  360. 9 7 8
  361. Slice with integer labels for rows. As mentioned above, note that both
  362. the start and stop of the slice are included.
  363. >>> df.loc[7:9]
  364. max_speed shield
  365. 7 1 2
  366. 8 4 5
  367. 9 7 8
  368. **Getting values with a MultiIndex**
  369. A number of examples using a DataFrame with a MultiIndex
  370. >>> tuples = [
  371. ... ('cobra', 'mark i'), ('cobra', 'mark ii'),
  372. ... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),
  373. ... ('viper', 'mark ii'), ('viper', 'mark iii')
  374. ... ]
  375. >>> index = pd.MultiIndex.from_tuples(tuples)
  376. >>> values = [[12, 2], [0, 4], [10, 20],
  377. ... [1, 4], [7, 1], [16, 36]]
  378. >>> df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index)
  379. >>> df
  380. max_speed shield
  381. cobra mark i 12 2
  382. mark ii 0 4
  383. sidewinder mark i 10 20
  384. mark ii 1 4
  385. viper mark ii 7 1
  386. mark iii 16 36
  387. Single label. Note this returns a DataFrame with a single index.
  388. >>> df.loc['cobra']
  389. max_speed shield
  390. mark i 12 2
  391. mark ii 0 4
  392. Single index tuple. Note this returns a Series.
  393. >>> df.loc[('cobra', 'mark ii')]
  394. max_speed 0
  395. shield 4
  396. Name: (cobra, mark ii), dtype: int64
  397. Single label for row and column. Similar to passing in a tuple, this
  398. returns a Series.
  399. >>> df.loc['cobra', 'mark i']
  400. max_speed 12
  401. shield 2
  402. Name: (cobra, mark i), dtype: int64
  403. Single tuple. Note using ``[[]]`` returns a DataFrame.
  404. >>> df.loc[[('cobra', 'mark ii')]]
  405. max_speed shield
  406. cobra mark ii 0 4
  407. Single tuple for the index with a single label for the column
  408. >>> df.loc[('cobra', 'mark i'), 'shield']
  409. 2
  410. Slice from index tuple to single label
  411. >>> df.loc[('cobra', 'mark i'):'viper']
  412. max_speed shield
  413. cobra mark i 12 2
  414. mark ii 0 4
  415. sidewinder mark i 10 20
  416. mark ii 1 4
  417. viper mark ii 7 1
  418. mark iii 16 36
  419. Slice from index tuple to index tuple
  420. >>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')]
  421. max_speed shield
  422. cobra mark i 12 2
  423. mark ii 0 4
  424. sidewinder mark i 10 20
  425. mark ii 1 4
  426. viper mark ii 7 1
  427. Please see the :ref:`user guide<advanced.advanced_hierarchical>`
  428. for more details and explanations of advanced indexing.
  429. """
  430. return _LocIndexer("loc", self)
  431. @property
  432. def at(self) -> _AtIndexer:
  433. """
  434. Access a single value for a row/column label pair.
  435. Similar to ``loc``, in that both provide label-based lookups. Use
  436. ``at`` if you only need to get or set a single value in a DataFrame
  437. or Series.
  438. Raises
  439. ------
  440. KeyError
  441. * If getting a value and 'label' does not exist in a DataFrame or
  442. Series.
  443. ValueError
  444. * If row/column label pair is not a tuple or if any label from
  445. the pair is not a scalar for DataFrame.
  446. * If label is list-like (*excluding* NamedTuple) for Series.
  447. See Also
  448. --------
  449. DataFrame.at : Access a single value for a row/column pair by label.
  450. DataFrame.iat : Access a single value for a row/column pair by integer
  451. position.
  452. DataFrame.loc : Access a group of rows and columns by label(s).
  453. DataFrame.iloc : Access a group of rows and columns by integer
  454. position(s).
  455. Series.at : Access a single value by label.
  456. Series.iat : Access a single value by integer position.
  457. Series.loc : Access a group of rows by label(s).
  458. Series.iloc : Access a group of rows by integer position(s).
  459. Notes
  460. -----
  461. See :ref:`Fast scalar value getting and setting <indexing.basics.get_value>`
  462. for more details.
  463. Examples
  464. --------
  465. >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
  466. ... index=[4, 5, 6], columns=['A', 'B', 'C'])
  467. >>> df
  468. A B C
  469. 4 0 2 3
  470. 5 0 4 1
  471. 6 10 20 30
  472. Get value at specified row/column pair
  473. >>> df.at[4, 'B']
  474. 2
  475. Set value at specified row/column pair
  476. >>> df.at[4, 'B'] = 10
  477. >>> df.at[4, 'B']
  478. 10
  479. Get value within a Series
  480. >>> df.loc[5].at['B']
  481. 4
  482. """
  483. return _AtIndexer("at", self)
  484. @property
  485. def iat(self) -> _iAtIndexer:
  486. """
  487. Access a single value for a row/column pair by integer position.
  488. Similar to ``iloc``, in that both provide integer-based lookups. Use
  489. ``iat`` if you only need to get or set a single value in a DataFrame
  490. or Series.
  491. Raises
  492. ------
  493. IndexError
  494. When integer position is out of bounds.
  495. See Also
  496. --------
  497. DataFrame.at : Access a single value for a row/column label pair.
  498. DataFrame.loc : Access a group of rows and columns by label(s).
  499. DataFrame.iloc : Access a group of rows and columns by integer position(s).
  500. Examples
  501. --------
  502. >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
  503. ... columns=['A', 'B', 'C'])
  504. >>> df
  505. A B C
  506. 0 0 2 3
  507. 1 0 4 1
  508. 2 10 20 30
  509. Get value at specified row/column pair
  510. >>> df.iat[1, 2]
  511. 1
  512. Set value at specified row/column pair
  513. >>> df.iat[1, 2] = 10
  514. >>> df.iat[1, 2]
  515. 10
  516. Get value within a series
  517. >>> df.loc[0].iat[1]
  518. 2
  519. """
  520. return _iAtIndexer("iat", self)
  521. class _LocationIndexer(NDFrameIndexerBase):
  522. _valid_types: str
  523. axis: AxisInt | None = None
  524. # sub-classes need to set _takeable
  525. _takeable: bool
  526. @final
  527. def __call__(
  528. self: _LocationIndexerT, axis: Axis | None = None
  529. ) -> _LocationIndexerT:
  530. # we need to return a copy of ourselves
  531. new_self = type(self)(self.name, self.obj)
  532. if axis is not None:
  533. axis_int_none = self.obj._get_axis_number(axis)
  534. else:
  535. axis_int_none = axis
  536. new_self.axis = axis_int_none
  537. return new_self
  538. def _get_setitem_indexer(self, key):
  539. """
  540. Convert a potentially-label-based key into a positional indexer.
  541. """
  542. if self.name == "loc":
  543. # always holds here bc iloc overrides _get_setitem_indexer
  544. self._ensure_listlike_indexer(key)
  545. if isinstance(key, tuple):
  546. for x in key:
  547. check_dict_or_set_indexers(x)
  548. if self.axis is not None:
  549. key = _tupleize_axis_indexer(self.ndim, self.axis, key)
  550. ax = self.obj._get_axis(0)
  551. if isinstance(ax, MultiIndex) and self.name != "iloc" and is_hashable(key):
  552. with suppress(KeyError, InvalidIndexError):
  553. # TypeError e.g. passed a bool
  554. return ax.get_loc(key)
  555. if isinstance(key, tuple):
  556. with suppress(IndexingError):
  557. # suppress "Too many indexers"
  558. return self._convert_tuple(key)
  559. if isinstance(key, range):
  560. # GH#45479 test_loc_setitem_range_key
  561. key = list(key)
  562. return self._convert_to_indexer(key, axis=0)
  563. @final
  564. def _maybe_mask_setitem_value(self, indexer, value):
  565. """
  566. If we have obj.iloc[mask] = series_or_frame and series_or_frame has the
  567. same length as obj, we treat this as obj.iloc[mask] = series_or_frame[mask],
  568. similar to Series.__setitem__.
  569. Note this is only for loc, not iloc.
  570. """
  571. if (
  572. isinstance(indexer, tuple)
  573. and len(indexer) == 2
  574. and isinstance(value, (ABCSeries, ABCDataFrame))
  575. ):
  576. pi, icols = indexer
  577. ndim = value.ndim
  578. if com.is_bool_indexer(pi) and len(value) == len(pi):
  579. newkey = pi.nonzero()[0]
  580. if is_scalar_indexer(icols, self.ndim - 1) and ndim == 1:
  581. # e.g. test_loc_setitem_boolean_mask_allfalse
  582. if len(newkey) == 0:
  583. # FIXME: kludge for test_loc_setitem_boolean_mask_allfalse
  584. # TODO(GH#45333): may be fixed when deprecation is enforced
  585. value = value.iloc[:0]
  586. else:
  587. # test_loc_setitem_ndframe_values_alignment
  588. value = self.obj.iloc._align_series(indexer, value)
  589. indexer = (newkey, icols)
  590. elif (
  591. isinstance(icols, np.ndarray)
  592. and icols.dtype.kind == "i"
  593. and len(icols) == 1
  594. ):
  595. if ndim == 1:
  596. # We implicitly broadcast, though numpy does not, see
  597. # github.com/pandas-dev/pandas/pull/45501#discussion_r789071825
  598. # test_loc_setitem_ndframe_values_alignment
  599. value = self.obj.iloc._align_series(indexer, value)
  600. indexer = (newkey, icols)
  601. elif ndim == 2 and value.shape[1] == 1:
  602. if len(newkey) == 0:
  603. # FIXME: kludge for
  604. # test_loc_setitem_all_false_boolean_two_blocks
  605. # TODO(GH#45333): may be fixed when deprecation is enforced
  606. value = value.iloc[:0]
  607. else:
  608. # test_loc_setitem_ndframe_values_alignment
  609. value = self.obj.iloc._align_frame(indexer, value)
  610. indexer = (newkey, icols)
  611. elif com.is_bool_indexer(indexer):
  612. indexer = indexer.nonzero()[0]
  613. return indexer, value
  614. @final
  615. def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
  616. """
  617. Ensure that a list-like of column labels are all present by adding them if
  618. they do not already exist.
  619. Parameters
  620. ----------
  621. key : list-like of column labels
  622. Target labels.
  623. axis : key axis if known
  624. """
  625. column_axis = 1
  626. # column only exists in 2-dimensional DataFrame
  627. if self.ndim != 2:
  628. return
  629. orig_key = key
  630. if isinstance(key, tuple) and len(key) > 1:
  631. # key may be a tuple if we are .loc
  632. # if length of key is > 1 set key to column part
  633. key = key[column_axis]
  634. axis = column_axis
  635. if (
  636. axis == column_axis
  637. and not isinstance(self.obj.columns, MultiIndex)
  638. and is_list_like_indexer(key)
  639. and not com.is_bool_indexer(key)
  640. and all(is_hashable(k) for k in key)
  641. ):
  642. # GH#38148
  643. keys = self.obj.columns.union(key, sort=False)
  644. diff = Index(key).difference(self.obj.columns, sort=False)
  645. if len(diff) and com.is_null_slice(orig_key[0]):
  646. # e.g. if we are doing df.loc[:, ["A", "B"]] = 7 and "B"
  647. # is a new column, add the new columns with dtype=np.void
  648. # so that later when we go through setitem_single_column
  649. # we will use isetitem. Without this, the reindex_axis
  650. # below would create float64 columns in this example, which
  651. # would successfully hold 7, so we would end up with the wrong
  652. # dtype.
  653. indexer = np.arange(len(keys), dtype=np.intp)
  654. indexer[len(self.obj.columns) :] = -1
  655. new_mgr = self.obj._mgr.reindex_indexer(
  656. keys, indexer=indexer, axis=0, only_slice=True, use_na_proxy=True
  657. )
  658. self.obj._mgr = new_mgr
  659. return
  660. self.obj._mgr = self.obj._mgr.reindex_axis(keys, axis=0, only_slice=True)
  661. @final
  662. def __setitem__(self, key, value) -> None:
  663. if not PYPY and using_copy_on_write():
  664. if sys.getrefcount(self.obj) <= 2:
  665. warnings.warn(
  666. _chained_assignment_msg, ChainedAssignmentError, stacklevel=2
  667. )
  668. check_dict_or_set_indexers(key)
  669. if isinstance(key, tuple):
  670. key = tuple(list(x) if is_iterator(x) else x for x in key)
  671. key = tuple(com.apply_if_callable(x, self.obj) for x in key)
  672. else:
  673. key = com.apply_if_callable(key, self.obj)
  674. indexer = self._get_setitem_indexer(key)
  675. self._has_valid_setitem_indexer(key)
  676. iloc = self if self.name == "iloc" else self.obj.iloc
  677. iloc._setitem_with_indexer(indexer, value, self.name)
  678. def _validate_key(self, key, axis: AxisInt):
  679. """
  680. Ensure that key is valid for current indexer.
  681. Parameters
  682. ----------
  683. key : scalar, slice or list-like
  684. Key requested.
  685. axis : int
  686. Dimension on which the indexing is being made.
  687. Raises
  688. ------
  689. TypeError
  690. If the key (or some element of it) has wrong type.
  691. IndexError
  692. If the key (or some element of it) is out of bounds.
  693. KeyError
  694. If the key was not found.
  695. """
  696. raise AbstractMethodError(self)
  697. @final
  698. def _expand_ellipsis(self, tup: tuple) -> tuple:
  699. """
  700. If a tuple key includes an Ellipsis, replace it with an appropriate
  701. number of null slices.
  702. """
  703. if any(x is Ellipsis for x in tup):
  704. if tup.count(Ellipsis) > 1:
  705. raise IndexingError(_one_ellipsis_message)
  706. if len(tup) == self.ndim:
  707. # It is unambiguous what axis this Ellipsis is indexing,
  708. # treat as a single null slice.
  709. i = tup.index(Ellipsis)
  710. # FIXME: this assumes only one Ellipsis
  711. new_key = tup[:i] + (_NS,) + tup[i + 1 :]
  712. return new_key
  713. # TODO: other cases? only one test gets here, and that is covered
  714. # by _validate_key_length
  715. return tup
  716. @final
  717. def _validate_tuple_indexer(self, key: tuple) -> tuple:
  718. """
  719. Check the key for valid keys across my indexer.
  720. """
  721. key = self._validate_key_length(key)
  722. key = self._expand_ellipsis(key)
  723. for i, k in enumerate(key):
  724. try:
  725. self._validate_key(k, i)
  726. except ValueError as err:
  727. raise ValueError(
  728. "Location based indexing can only have "
  729. f"[{self._valid_types}] types"
  730. ) from err
  731. return key
  732. @final
  733. def _is_nested_tuple_indexer(self, tup: tuple) -> bool:
  734. """
  735. Returns
  736. -------
  737. bool
  738. """
  739. if any(isinstance(ax, MultiIndex) for ax in self.obj.axes):
  740. return any(is_nested_tuple(tup, ax) for ax in self.obj.axes)
  741. return False
  742. @final
  743. def _convert_tuple(self, key: tuple) -> tuple:
  744. # Note: we assume _tupleize_axis_indexer has been called, if necessary.
  745. self._validate_key_length(key)
  746. keyidx = [self._convert_to_indexer(k, axis=i) for i, k in enumerate(key)]
  747. return tuple(keyidx)
  748. @final
  749. def _validate_key_length(self, key: tuple) -> tuple:
  750. if len(key) > self.ndim:
  751. if key[0] is Ellipsis:
  752. # e.g. Series.iloc[..., 3] reduces to just Series.iloc[3]
  753. key = key[1:]
  754. if Ellipsis in key:
  755. raise IndexingError(_one_ellipsis_message)
  756. return self._validate_key_length(key)
  757. raise IndexingError("Too many indexers")
  758. return key
  759. @final
  760. def _getitem_tuple_same_dim(self, tup: tuple):
  761. """
  762. Index with indexers that should return an object of the same dimension
  763. as self.obj.
  764. This is only called after a failed call to _getitem_lowerdim.
  765. """
  766. retval = self.obj
  767. for i, key in enumerate(tup):
  768. if com.is_null_slice(key):
  769. continue
  770. retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
  771. # We should never have retval.ndim < self.ndim, as that should
  772. # be handled by the _getitem_lowerdim call above.
  773. assert retval.ndim == self.ndim
  774. if retval is self.obj:
  775. # if all axes were a null slice (`df.loc[:, :]`), ensure we still
  776. # return a new object (https://github.com/pandas-dev/pandas/pull/49469)
  777. retval = retval.copy(deep=False)
  778. return retval
  779. @final
  780. def _getitem_lowerdim(self, tup: tuple):
  781. # we can directly get the axis result since the axis is specified
  782. if self.axis is not None:
  783. axis = self.obj._get_axis_number(self.axis)
  784. return self._getitem_axis(tup, axis=axis)
  785. # we may have a nested tuples indexer here
  786. if self._is_nested_tuple_indexer(tup):
  787. return self._getitem_nested_tuple(tup)
  788. # we maybe be using a tuple to represent multiple dimensions here
  789. ax0 = self.obj._get_axis(0)
  790. # ...but iloc should handle the tuple as simple integer-location
  791. # instead of checking it as multiindex representation (GH 13797)
  792. if (
  793. isinstance(ax0, MultiIndex)
  794. and self.name != "iloc"
  795. and not any(isinstance(x, slice) for x in tup)
  796. ):
  797. # Note: in all extant test cases, replacing the slice condition with
  798. # `all(is_hashable(x) or com.is_null_slice(x) for x in tup)`
  799. # is equivalent.
  800. # (see the other place where we call _handle_lowerdim_multi_index_axis0)
  801. with suppress(IndexingError):
  802. return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(tup)
  803. tup = self._validate_key_length(tup)
  804. for i, key in enumerate(tup):
  805. if is_label_like(key):
  806. # We don't need to check for tuples here because those are
  807. # caught by the _is_nested_tuple_indexer check above.
  808. section = self._getitem_axis(key, axis=i)
  809. # We should never have a scalar section here, because
  810. # _getitem_lowerdim is only called after a check for
  811. # is_scalar_access, which that would be.
  812. if section.ndim == self.ndim:
  813. # we're in the middle of slicing through a MultiIndex
  814. # revise the key wrt to `section` by inserting an _NS
  815. new_key = tup[:i] + (_NS,) + tup[i + 1 :]
  816. else:
  817. # Note: the section.ndim == self.ndim check above
  818. # rules out having DataFrame here, so we dont need to worry
  819. # about transposing.
  820. new_key = tup[:i] + tup[i + 1 :]
  821. if len(new_key) == 1:
  822. new_key = new_key[0]
  823. # Slices should return views, but calling iloc/loc with a null
  824. # slice returns a new object.
  825. if com.is_null_slice(new_key):
  826. return section
  827. # This is an elided recursive call to iloc/loc
  828. return getattr(section, self.name)[new_key]
  829. raise IndexingError("not applicable")
  830. @final
  831. def _getitem_nested_tuple(self, tup: tuple):
  832. # we have a nested tuple so have at least 1 multi-index level
  833. # we should be able to match up the dimensionality here
  834. for key in tup:
  835. check_dict_or_set_indexers(key)
  836. # we have too many indexers for our dim, but have at least 1
  837. # multi-index dimension, try to see if we have something like
  838. # a tuple passed to a series with a multi-index
  839. if len(tup) > self.ndim:
  840. if self.name != "loc":
  841. # This should never be reached, but let's be explicit about it
  842. raise ValueError("Too many indices") # pragma: no cover
  843. if all(is_hashable(x) or com.is_null_slice(x) for x in tup):
  844. # GH#10521 Series should reduce MultiIndex dimensions instead of
  845. # DataFrame, IndexingError is not raised when slice(None,None,None)
  846. # with one row.
  847. with suppress(IndexingError):
  848. return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(
  849. tup
  850. )
  851. elif isinstance(self.obj, ABCSeries) and any(
  852. isinstance(k, tuple) for k in tup
  853. ):
  854. # GH#35349 Raise if tuple in tuple for series
  855. # Do this after the all-hashable-or-null-slice check so that
  856. # we are only getting non-hashable tuples, in particular ones
  857. # that themselves contain a slice entry
  858. # See test_loc_series_getitem_too_many_dimensions
  859. raise IndexingError("Too many indexers")
  860. # this is a series with a multi-index specified a tuple of
  861. # selectors
  862. axis = self.axis or 0
  863. return self._getitem_axis(tup, axis=axis)
  864. # handle the multi-axis by taking sections and reducing
  865. # this is iterative
  866. obj = self.obj
  867. # GH#41369 Loop in reverse order ensures indexing along columns before rows
  868. # which selects only necessary blocks which avoids dtype conversion if possible
  869. axis = len(tup) - 1
  870. for key in tup[::-1]:
  871. if com.is_null_slice(key):
  872. axis -= 1
  873. continue
  874. obj = getattr(obj, self.name)._getitem_axis(key, axis=axis)
  875. axis -= 1
  876. # if we have a scalar, we are done
  877. if is_scalar(obj) or not hasattr(obj, "ndim"):
  878. break
  879. return obj
  880. def _convert_to_indexer(self, key, axis: AxisInt):
  881. raise AbstractMethodError(self)
  882. @final
  883. def __getitem__(self, key):
  884. check_dict_or_set_indexers(key)
  885. if type(key) is tuple:
  886. key = tuple(list(x) if is_iterator(x) else x for x in key)
  887. key = tuple(com.apply_if_callable(x, self.obj) for x in key)
  888. if self._is_scalar_access(key):
  889. return self.obj._get_value(*key, takeable=self._takeable)
  890. return self._getitem_tuple(key)
  891. else:
  892. # we by definition only have the 0th axis
  893. axis = self.axis or 0
  894. maybe_callable = com.apply_if_callable(key, self.obj)
  895. return self._getitem_axis(maybe_callable, axis=axis)
  896. def _is_scalar_access(self, key: tuple):
  897. raise NotImplementedError()
  898. def _getitem_tuple(self, tup: tuple):
  899. raise AbstractMethodError(self)
  900. def _getitem_axis(self, key, axis: AxisInt):
  901. raise NotImplementedError()
  902. def _has_valid_setitem_indexer(self, indexer) -> bool:
  903. raise AbstractMethodError(self)
  904. @final
  905. def _getbool_axis(self, key, axis: AxisInt):
  906. # caller is responsible for ensuring non-None axis
  907. labels = self.obj._get_axis(axis)
  908. key = check_bool_indexer(labels, key)
  909. inds = key.nonzero()[0]
  910. return self.obj._take_with_is_copy(inds, axis=axis)
  911. @doc(IndexingMixin.loc)
  912. class _LocIndexer(_LocationIndexer):
  913. _takeable: bool = False
  914. _valid_types = (
  915. "labels (MUST BE IN THE INDEX), slices of labels (BOTH "
  916. "endpoints included! Can be slices of integers if the "
  917. "index is integers), listlike of labels, boolean"
  918. )
  919. # -------------------------------------------------------------------
  920. # Key Checks
  921. @doc(_LocationIndexer._validate_key)
  922. def _validate_key(self, key, axis: Axis):
  923. # valid for a collection of labels (we check their presence later)
  924. # slice of labels (where start-end in labels)
  925. # slice of integers (only if in the labels)
  926. # boolean not in slice and with boolean index
  927. ax = self.obj._get_axis(axis)
  928. if isinstance(key, bool) and not (
  929. is_bool_dtype(ax)
  930. or ax.dtype.name == "boolean"
  931. or isinstance(ax, MultiIndex)
  932. and is_bool_dtype(ax.get_level_values(0))
  933. ):
  934. raise KeyError(
  935. f"{key}: boolean label can not be used without a boolean index"
  936. )
  937. if isinstance(key, slice) and (
  938. isinstance(key.start, bool) or isinstance(key.stop, bool)
  939. ):
  940. raise TypeError(f"{key}: boolean values can not be used in a slice")
  941. def _has_valid_setitem_indexer(self, indexer) -> bool:
  942. return True
  943. def _is_scalar_access(self, key: tuple) -> bool:
  944. """
  945. Returns
  946. -------
  947. bool
  948. """
  949. # this is a shortcut accessor to both .loc and .iloc
  950. # that provide the equivalent access of .at and .iat
  951. # a) avoid getting things via sections and (to minimize dtype changes)
  952. # b) provide a performant path
  953. if len(key) != self.ndim:
  954. return False
  955. for i, k in enumerate(key):
  956. if not is_scalar(k):
  957. return False
  958. ax = self.obj.axes[i]
  959. if isinstance(ax, MultiIndex):
  960. return False
  961. if isinstance(k, str) and ax._supports_partial_string_indexing:
  962. # partial string indexing, df.loc['2000', 'A']
  963. # should not be considered scalar
  964. return False
  965. if not ax._index_as_unique:
  966. return False
  967. return True
  968. # -------------------------------------------------------------------
  969. # MultiIndex Handling
  970. def _multi_take_opportunity(self, tup: tuple) -> bool:
  971. """
  972. Check whether there is the possibility to use ``_multi_take``.
  973. Currently the limit is that all axes being indexed, must be indexed with
  974. list-likes.
  975. Parameters
  976. ----------
  977. tup : tuple
  978. Tuple of indexers, one per axis.
  979. Returns
  980. -------
  981. bool
  982. Whether the current indexing,
  983. can be passed through `_multi_take`.
  984. """
  985. if not all(is_list_like_indexer(x) for x in tup):
  986. return False
  987. # just too complicated
  988. return not any(com.is_bool_indexer(x) for x in tup)
  989. def _multi_take(self, tup: tuple):
  990. """
  991. Create the indexers for the passed tuple of keys, and
  992. executes the take operation. This allows the take operation to be
  993. executed all at once, rather than once for each dimension.
  994. Improving efficiency.
  995. Parameters
  996. ----------
  997. tup : tuple
  998. Tuple of indexers, one per axis.
  999. Returns
  1000. -------
  1001. values: same type as the object being indexed
  1002. """
  1003. # GH 836
  1004. d = {
  1005. axis: self._get_listlike_indexer(key, axis)
  1006. for (key, axis) in zip(tup, self.obj._AXIS_ORDERS)
  1007. }
  1008. return self.obj._reindex_with_indexers(d, copy=True, allow_dups=True)
  1009. # -------------------------------------------------------------------
  1010. def _getitem_iterable(self, key, axis: AxisInt):
  1011. """
  1012. Index current object with an iterable collection of keys.
  1013. Parameters
  1014. ----------
  1015. key : iterable
  1016. Targeted labels.
  1017. axis : int
  1018. Dimension on which the indexing is being made.
  1019. Raises
  1020. ------
  1021. KeyError
  1022. If no key was found. Will change in the future to raise if not all
  1023. keys were found.
  1024. Returns
  1025. -------
  1026. scalar, DataFrame, or Series: indexed value(s).
  1027. """
  1028. # we assume that not com.is_bool_indexer(key), as that is
  1029. # handled before we get here.
  1030. self._validate_key(key, axis)
  1031. # A collection of keys
  1032. keyarr, indexer = self._get_listlike_indexer(key, axis)
  1033. return self.obj._reindex_with_indexers(
  1034. {axis: [keyarr, indexer]}, copy=True, allow_dups=True
  1035. )
  1036. def _getitem_tuple(self, tup: tuple):
  1037. with suppress(IndexingError):
  1038. tup = self._expand_ellipsis(tup)
  1039. return self._getitem_lowerdim(tup)
  1040. # no multi-index, so validate all of the indexers
  1041. tup = self._validate_tuple_indexer(tup)
  1042. # ugly hack for GH #836
  1043. if self._multi_take_opportunity(tup):
  1044. return self._multi_take(tup)
  1045. return self._getitem_tuple_same_dim(tup)
  1046. def _get_label(self, label, axis: AxisInt):
  1047. # GH#5567 this will fail if the label is not present in the axis.
  1048. return self.obj.xs(label, axis=axis)
  1049. def _handle_lowerdim_multi_index_axis0(self, tup: tuple):
  1050. # we have an axis0 multi-index, handle or raise
  1051. axis = self.axis or 0
  1052. try:
  1053. # fast path for series or for tup devoid of slices
  1054. return self._get_label(tup, axis=axis)
  1055. except KeyError as ek:
  1056. # raise KeyError if number of indexers match
  1057. # else IndexingError will be raised
  1058. if self.ndim < len(tup) <= self.obj.index.nlevels:
  1059. raise ek
  1060. raise IndexingError("No label returned") from ek
  1061. def _getitem_axis(self, key, axis: AxisInt):
  1062. key = item_from_zerodim(key)
  1063. if is_iterator(key):
  1064. key = list(key)
  1065. if key is Ellipsis:
  1066. key = slice(None)
  1067. labels = self.obj._get_axis(axis)
  1068. if isinstance(key, tuple) and isinstance(labels, MultiIndex):
  1069. key = tuple(key)
  1070. if isinstance(key, slice):
  1071. self._validate_key(key, axis)
  1072. return self._get_slice_axis(key, axis=axis)
  1073. elif com.is_bool_indexer(key):
  1074. return self._getbool_axis(key, axis=axis)
  1075. elif is_list_like_indexer(key):
  1076. # an iterable multi-selection
  1077. if not (isinstance(key, tuple) and isinstance(labels, MultiIndex)):
  1078. if hasattr(key, "ndim") and key.ndim > 1:
  1079. raise ValueError("Cannot index with multidimensional key")
  1080. return self._getitem_iterable(key, axis=axis)
  1081. # nested tuple slicing
  1082. if is_nested_tuple(key, labels):
  1083. locs = labels.get_locs(key)
  1084. indexer = [slice(None)] * self.ndim
  1085. indexer[axis] = locs
  1086. return self.obj.iloc[tuple(indexer)]
  1087. # fall thru to straight lookup
  1088. self._validate_key(key, axis)
  1089. return self._get_label(key, axis=axis)
  1090. def _get_slice_axis(self, slice_obj: slice, axis: AxisInt):
  1091. """
  1092. This is pretty simple as we just have to deal with labels.
  1093. """
  1094. # caller is responsible for ensuring non-None axis
  1095. obj = self.obj
  1096. if not need_slice(slice_obj):
  1097. return obj.copy(deep=False)
  1098. labels = obj._get_axis(axis)
  1099. indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step)
  1100. if isinstance(indexer, slice):
  1101. return self.obj._slice(indexer, axis=axis)
  1102. else:
  1103. # DatetimeIndex overrides Index.slice_indexer and may
  1104. # return a DatetimeIndex instead of a slice object.
  1105. return self.obj.take(indexer, axis=axis)
  1106. def _convert_to_indexer(self, key, axis: AxisInt):
  1107. """
  1108. Convert indexing key into something we can use to do actual fancy
  1109. indexing on a ndarray.
  1110. Examples
  1111. ix[:5] -> slice(0, 5)
  1112. ix[[1,2,3]] -> [1,2,3]
  1113. ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz)
  1114. Going by Zen of Python?
  1115. 'In the face of ambiguity, refuse the temptation to guess.'
  1116. raise AmbiguousIndexError with integer labels?
  1117. - No, prefer label-based indexing
  1118. """
  1119. labels = self.obj._get_axis(axis)
  1120. if isinstance(key, slice):
  1121. return labels._convert_slice_indexer(key, kind="loc")
  1122. if (
  1123. isinstance(key, tuple)
  1124. and not isinstance(labels, MultiIndex)
  1125. and self.ndim < 2
  1126. and len(key) > 1
  1127. ):
  1128. raise IndexingError("Too many indexers")
  1129. if is_scalar(key) or (isinstance(labels, MultiIndex) and is_hashable(key)):
  1130. # Otherwise get_loc will raise InvalidIndexError
  1131. # if we are a label return me
  1132. try:
  1133. return labels.get_loc(key)
  1134. except LookupError:
  1135. if isinstance(key, tuple) and isinstance(labels, MultiIndex):
  1136. if len(key) == labels.nlevels:
  1137. return {"key": key}
  1138. raise
  1139. except InvalidIndexError:
  1140. # GH35015, using datetime as column indices raises exception
  1141. if not isinstance(labels, MultiIndex):
  1142. raise
  1143. except ValueError:
  1144. if not is_integer(key):
  1145. raise
  1146. return {"key": key}
  1147. if is_nested_tuple(key, labels):
  1148. if self.ndim == 1 and any(isinstance(k, tuple) for k in key):
  1149. # GH#35349 Raise if tuple in tuple for series
  1150. raise IndexingError("Too many indexers")
  1151. return labels.get_locs(key)
  1152. elif is_list_like_indexer(key):
  1153. if is_iterator(key):
  1154. key = list(key)
  1155. if com.is_bool_indexer(key):
  1156. key = check_bool_indexer(labels, key)
  1157. return key
  1158. else:
  1159. return self._get_listlike_indexer(key, axis)[1]
  1160. else:
  1161. try:
  1162. return labels.get_loc(key)
  1163. except LookupError:
  1164. # allow a not found key only if we are a setter
  1165. if not is_list_like_indexer(key):
  1166. return {"key": key}
  1167. raise
  1168. def _get_listlike_indexer(self, key, axis: AxisInt):
  1169. """
  1170. Transform a list-like of keys into a new index and an indexer.
  1171. Parameters
  1172. ----------
  1173. key : list-like
  1174. Targeted labels.
  1175. axis: int
  1176. Dimension on which the indexing is being made.
  1177. Raises
  1178. ------
  1179. KeyError
  1180. If at least one key was requested but none was found.
  1181. Returns
  1182. -------
  1183. keyarr: Index
  1184. New index (coinciding with 'key' if the axis is unique).
  1185. values : array-like
  1186. Indexer for the return object, -1 denotes keys not found.
  1187. """
  1188. ax = self.obj._get_axis(axis)
  1189. axis_name = self.obj._get_axis_name(axis)
  1190. keyarr, indexer = ax._get_indexer_strict(key, axis_name)
  1191. return keyarr, indexer
  1192. @doc(IndexingMixin.iloc)
  1193. class _iLocIndexer(_LocationIndexer):
  1194. _valid_types = (
  1195. "integer, integer slice (START point is INCLUDED, END "
  1196. "point is EXCLUDED), listlike of integers, boolean array"
  1197. )
  1198. _takeable = True
  1199. # -------------------------------------------------------------------
  1200. # Key Checks
  1201. def _validate_key(self, key, axis: AxisInt):
  1202. if com.is_bool_indexer(key):
  1203. if hasattr(key, "index") and isinstance(key.index, Index):
  1204. if key.index.inferred_type == "integer":
  1205. raise NotImplementedError(
  1206. "iLocation based boolean "
  1207. "indexing on an integer type "
  1208. "is not available"
  1209. )
  1210. raise ValueError(
  1211. "iLocation based boolean indexing cannot use "
  1212. "an indexable as a mask"
  1213. )
  1214. return
  1215. if isinstance(key, slice):
  1216. return
  1217. elif is_integer(key):
  1218. self._validate_integer(key, axis)
  1219. elif isinstance(key, tuple):
  1220. # a tuple should already have been caught by this point
  1221. # so don't treat a tuple as a valid indexer
  1222. raise IndexingError("Too many indexers")
  1223. elif is_list_like_indexer(key):
  1224. if isinstance(key, ABCSeries):
  1225. arr = key._values
  1226. elif is_array_like(key):
  1227. arr = key
  1228. else:
  1229. arr = np.array(key)
  1230. len_axis = len(self.obj._get_axis(axis))
  1231. # check that the key has a numeric dtype
  1232. if not is_numeric_dtype(arr.dtype):
  1233. raise IndexError(f".iloc requires numeric indexers, got {arr}")
  1234. # check that the key does not exceed the maximum size of the index
  1235. if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis):
  1236. raise IndexError("positional indexers are out-of-bounds")
  1237. else:
  1238. raise ValueError(f"Can only index by location with a [{self._valid_types}]")
  1239. def _has_valid_setitem_indexer(self, indexer) -> bool:
  1240. """
  1241. Validate that a positional indexer cannot enlarge its target
  1242. will raise if needed, does not modify the indexer externally.
  1243. Returns
  1244. -------
  1245. bool
  1246. """
  1247. if isinstance(indexer, dict):
  1248. raise IndexError("iloc cannot enlarge its target object")
  1249. if isinstance(indexer, ABCDataFrame):
  1250. raise TypeError(
  1251. "DataFrame indexer for .iloc is not supported. "
  1252. "Consider using .loc with a DataFrame indexer for automatic alignment.",
  1253. )
  1254. if not isinstance(indexer, tuple):
  1255. indexer = _tuplify(self.ndim, indexer)
  1256. for ax, i in zip(self.obj.axes, indexer):
  1257. if isinstance(i, slice):
  1258. # should check the stop slice?
  1259. pass
  1260. elif is_list_like_indexer(i):
  1261. # should check the elements?
  1262. pass
  1263. elif is_integer(i):
  1264. if i >= len(ax):
  1265. raise IndexError("iloc cannot enlarge its target object")
  1266. elif isinstance(i, dict):
  1267. raise IndexError("iloc cannot enlarge its target object")
  1268. return True
  1269. def _is_scalar_access(self, key: tuple) -> bool:
  1270. """
  1271. Returns
  1272. -------
  1273. bool
  1274. """
  1275. # this is a shortcut accessor to both .loc and .iloc
  1276. # that provide the equivalent access of .at and .iat
  1277. # a) avoid getting things via sections and (to minimize dtype changes)
  1278. # b) provide a performant path
  1279. if len(key) != self.ndim:
  1280. return False
  1281. return all(is_integer(k) for k in key)
  1282. def _validate_integer(self, key: int, axis: AxisInt) -> None:
  1283. """
  1284. Check that 'key' is a valid position in the desired axis.
  1285. Parameters
  1286. ----------
  1287. key : int
  1288. Requested position.
  1289. axis : int
  1290. Desired axis.
  1291. Raises
  1292. ------
  1293. IndexError
  1294. If 'key' is not a valid position in axis 'axis'.
  1295. """
  1296. len_axis = len(self.obj._get_axis(axis))
  1297. if key >= len_axis or key < -len_axis:
  1298. raise IndexError("single positional indexer is out-of-bounds")
  1299. # -------------------------------------------------------------------
  1300. def _getitem_tuple(self, tup: tuple):
  1301. tup = self._validate_tuple_indexer(tup)
  1302. with suppress(IndexingError):
  1303. return self._getitem_lowerdim(tup)
  1304. return self._getitem_tuple_same_dim(tup)
  1305. def _get_list_axis(self, key, axis: AxisInt):
  1306. """
  1307. Return Series values by list or array of integers.
  1308. Parameters
  1309. ----------
  1310. key : list-like positional indexer
  1311. axis : int
  1312. Returns
  1313. -------
  1314. Series object
  1315. Notes
  1316. -----
  1317. `axis` can only be zero.
  1318. """
  1319. try:
  1320. return self.obj._take_with_is_copy(key, axis=axis)
  1321. except IndexError as err:
  1322. # re-raise with different error message
  1323. raise IndexError("positional indexers are out-of-bounds") from err
  1324. def _getitem_axis(self, key, axis: AxisInt):
  1325. if key is Ellipsis:
  1326. key = slice(None)
  1327. elif isinstance(key, ABCDataFrame):
  1328. raise IndexError(
  1329. "DataFrame indexer is not allowed for .iloc\n"
  1330. "Consider using .loc for automatic alignment."
  1331. )
  1332. if isinstance(key, slice):
  1333. return self._get_slice_axis(key, axis=axis)
  1334. if is_iterator(key):
  1335. key = list(key)
  1336. if isinstance(key, list):
  1337. key = np.asarray(key)
  1338. if com.is_bool_indexer(key):
  1339. self._validate_key(key, axis)
  1340. return self._getbool_axis(key, axis=axis)
  1341. # a list of integers
  1342. elif is_list_like_indexer(key):
  1343. return self._get_list_axis(key, axis=axis)
  1344. # a single integer
  1345. else:
  1346. key = item_from_zerodim(key)
  1347. if not is_integer(key):
  1348. raise TypeError("Cannot index by location index with a non-integer key")
  1349. # validate the location
  1350. self._validate_integer(key, axis)
  1351. return self.obj._ixs(key, axis=axis)
  1352. def _get_slice_axis(self, slice_obj: slice, axis: AxisInt):
  1353. # caller is responsible for ensuring non-None axis
  1354. obj = self.obj
  1355. if not need_slice(slice_obj):
  1356. return obj.copy(deep=False)
  1357. labels = obj._get_axis(axis)
  1358. labels._validate_positional_slice(slice_obj)
  1359. return self.obj._slice(slice_obj, axis=axis)
  1360. def _convert_to_indexer(self, key, axis: AxisInt):
  1361. """
  1362. Much simpler as we only have to deal with our valid types.
  1363. """
  1364. return key
  1365. def _get_setitem_indexer(self, key):
  1366. # GH#32257 Fall through to let numpy do validation
  1367. if is_iterator(key):
  1368. key = list(key)
  1369. if self.axis is not None:
  1370. key = _tupleize_axis_indexer(self.ndim, self.axis, key)
  1371. return key
  1372. # -------------------------------------------------------------------
  1373. def _setitem_with_indexer(self, indexer, value, name: str = "iloc"):
  1374. """
  1375. _setitem_with_indexer is for setting values on a Series/DataFrame
  1376. using positional indexers.
  1377. If the relevant keys are not present, the Series/DataFrame may be
  1378. expanded.
  1379. This method is currently broken when dealing with non-unique Indexes,
  1380. since it goes from positional indexers back to labels when calling
  1381. BlockManager methods, see GH#12991, GH#22046, GH#15686.
  1382. """
  1383. info_axis = self.obj._info_axis_number
  1384. # maybe partial set
  1385. take_split_path = not self.obj._mgr.is_single_block
  1386. if not take_split_path and isinstance(value, ABCDataFrame):
  1387. # Avoid cast of values
  1388. take_split_path = not value._mgr.is_single_block
  1389. # if there is only one block/type, still have to take split path
  1390. # unless the block is one-dimensional or it can hold the value
  1391. if not take_split_path and len(self.obj._mgr.arrays) and self.ndim > 1:
  1392. # in case of dict, keys are indices
  1393. val = list(value.values()) if isinstance(value, dict) else value
  1394. arr = self.obj._mgr.arrays[0]
  1395. take_split_path = not can_hold_element(
  1396. arr, extract_array(val, extract_numpy=True)
  1397. )
  1398. # if we have any multi-indexes that have non-trivial slices
  1399. # (not null slices) then we must take the split path, xref
  1400. # GH 10360, GH 27841
  1401. if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes):
  1402. for i, ax in zip(indexer, self.obj.axes):
  1403. if isinstance(ax, MultiIndex) and not (
  1404. is_integer(i) or com.is_null_slice(i)
  1405. ):
  1406. take_split_path = True
  1407. break
  1408. if isinstance(indexer, tuple):
  1409. nindexer = []
  1410. for i, idx in enumerate(indexer):
  1411. if isinstance(idx, dict):
  1412. # reindex the axis to the new value
  1413. # and set inplace
  1414. key, _ = convert_missing_indexer(idx)
  1415. # if this is the items axes, then take the main missing
  1416. # path first
  1417. # this correctly sets the dtype and avoids cache issues
  1418. # essentially this separates out the block that is needed
  1419. # to possibly be modified
  1420. if self.ndim > 1 and i == info_axis:
  1421. # add the new item, and set the value
  1422. # must have all defined axes if we have a scalar
  1423. # or a list-like on the non-info axes if we have a
  1424. # list-like
  1425. if not len(self.obj):
  1426. if not is_list_like_indexer(value):
  1427. raise ValueError(
  1428. "cannot set a frame with no "
  1429. "defined index and a scalar"
  1430. )
  1431. self.obj[key] = value
  1432. return
  1433. # add a new item with the dtype setup
  1434. if com.is_null_slice(indexer[0]):
  1435. # We are setting an entire column
  1436. self.obj[key] = value
  1437. return
  1438. elif is_array_like(value):
  1439. # GH#42099
  1440. arr = extract_array(value, extract_numpy=True)
  1441. taker = -1 * np.ones(len(self.obj), dtype=np.intp)
  1442. empty_value = algos.take_nd(arr, taker)
  1443. if not isinstance(value, ABCSeries):
  1444. # if not Series (in which case we need to align),
  1445. # we can short-circuit
  1446. if (
  1447. isinstance(arr, np.ndarray)
  1448. and arr.ndim == 1
  1449. and len(arr) == 1
  1450. ):
  1451. # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615
  1452. arr = arr[0, ...]
  1453. empty_value[indexer[0]] = arr
  1454. self.obj[key] = empty_value
  1455. return
  1456. self.obj[key] = empty_value
  1457. else:
  1458. # FIXME: GH#42099#issuecomment-864326014
  1459. self.obj[key] = infer_fill_value(value)
  1460. new_indexer = convert_from_missing_indexer_tuple(
  1461. indexer, self.obj.axes
  1462. )
  1463. self._setitem_with_indexer(new_indexer, value, name)
  1464. return
  1465. # reindex the axis
  1466. # make sure to clear the cache because we are
  1467. # just replacing the block manager here
  1468. # so the object is the same
  1469. index = self.obj._get_axis(i)
  1470. labels = index.insert(len(index), key)
  1471. # We are expanding the Series/DataFrame values to match
  1472. # the length of thenew index `labels`. GH#40096 ensure
  1473. # this is valid even if the index has duplicates.
  1474. taker = np.arange(len(index) + 1, dtype=np.intp)
  1475. taker[-1] = -1
  1476. reindexers = {i: (labels, taker)}
  1477. new_obj = self.obj._reindex_with_indexers(
  1478. reindexers, allow_dups=True
  1479. )
  1480. self.obj._mgr = new_obj._mgr
  1481. self.obj._maybe_update_cacher(clear=True)
  1482. self.obj._is_copy = None
  1483. nindexer.append(labels.get_loc(key))
  1484. else:
  1485. nindexer.append(idx)
  1486. indexer = tuple(nindexer)
  1487. else:
  1488. indexer, missing = convert_missing_indexer(indexer)
  1489. if missing:
  1490. self._setitem_with_indexer_missing(indexer, value)
  1491. return
  1492. if name == "loc":
  1493. # must come after setting of missing
  1494. indexer, value = self._maybe_mask_setitem_value(indexer, value)
  1495. # align and set the values
  1496. if take_split_path:
  1497. # We have to operate column-wise
  1498. self._setitem_with_indexer_split_path(indexer, value, name)
  1499. else:
  1500. self._setitem_single_block(indexer, value, name)
  1501. def _setitem_with_indexer_split_path(self, indexer, value, name: str):
  1502. """
  1503. Setitem column-wise.
  1504. """
  1505. # Above we only set take_split_path to True for 2D cases
  1506. assert self.ndim == 2
  1507. if not isinstance(indexer, tuple):
  1508. indexer = _tuplify(self.ndim, indexer)
  1509. if len(indexer) > self.ndim:
  1510. raise IndexError("too many indices for array")
  1511. if isinstance(indexer[0], np.ndarray) and indexer[0].ndim > 2:
  1512. raise ValueError(r"Cannot set values with ndim > 2")
  1513. if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):
  1514. from pandas import Series
  1515. value = self._align_series(indexer, Series(value))
  1516. # Ensure we have something we can iterate over
  1517. info_axis = indexer[1]
  1518. ilocs = self._ensure_iterable_column_indexer(info_axis)
  1519. pi = indexer[0]
  1520. lplane_indexer = length_of_indexer(pi, self.obj.index)
  1521. # lplane_indexer gives the expected length of obj[indexer[0]]
  1522. # we need an iterable, with a ndim of at least 1
  1523. # eg. don't pass through np.array(0)
  1524. if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0:
  1525. if isinstance(value, ABCDataFrame):
  1526. self._setitem_with_indexer_frame_value(indexer, value, name)
  1527. elif np.ndim(value) == 2:
  1528. # TODO: avoid np.ndim call in case it isn't an ndarray, since
  1529. # that will construct an ndarray, which will be wasteful
  1530. self._setitem_with_indexer_2d_value(indexer, value)
  1531. elif len(ilocs) == 1 and lplane_indexer == len(value) and not is_scalar(pi):
  1532. # We are setting multiple rows in a single column.
  1533. self._setitem_single_column(ilocs[0], value, pi)
  1534. elif len(ilocs) == 1 and 0 != lplane_indexer != len(value):
  1535. # We are trying to set N values into M entries of a single
  1536. # column, which is invalid for N != M
  1537. # Exclude zero-len for e.g. boolean masking that is all-false
  1538. if len(value) == 1 and not is_integer(info_axis):
  1539. # This is a case like df.iloc[:3, [1]] = [0]
  1540. # where we treat as df.iloc[:3, 1] = 0
  1541. return self._setitem_with_indexer((pi, info_axis[0]), value[0])
  1542. raise ValueError(
  1543. "Must have equal len keys and value "
  1544. "when setting with an iterable"
  1545. )
  1546. elif lplane_indexer == 0 and len(value) == len(self.obj.index):
  1547. # We get here in one case via .loc with a all-False mask
  1548. pass
  1549. elif self._is_scalar_access(indexer) and is_object_dtype(
  1550. self.obj.dtypes[ilocs[0]]
  1551. ):
  1552. # We are setting nested data, only possible for object dtype data
  1553. self._setitem_single_column(indexer[1], value, pi)
  1554. elif len(ilocs) == len(value):
  1555. # We are setting multiple columns in a single row.
  1556. for loc, v in zip(ilocs, value):
  1557. self._setitem_single_column(loc, v, pi)
  1558. elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0:
  1559. # This is a setitem-with-expansion, see
  1560. # test_loc_setitem_empty_append_expands_rows_mixed_dtype
  1561. # e.g. df = DataFrame(columns=["x", "y"])
  1562. # df["x"] = df["x"].astype(np.int64)
  1563. # df.loc[:, "x"] = [1, 2, 3]
  1564. self._setitem_single_column(ilocs[0], value, pi)
  1565. else:
  1566. raise ValueError(
  1567. "Must have equal len keys and value "
  1568. "when setting with an iterable"
  1569. )
  1570. else:
  1571. # scalar value
  1572. for loc in ilocs:
  1573. self._setitem_single_column(loc, value, pi)
  1574. def _setitem_with_indexer_2d_value(self, indexer, value):
  1575. # We get here with np.ndim(value) == 2, excluding DataFrame,
  1576. # which goes through _setitem_with_indexer_frame_value
  1577. pi = indexer[0]
  1578. ilocs = self._ensure_iterable_column_indexer(indexer[1])
  1579. if not is_array_like(value):
  1580. # cast lists to array
  1581. value = np.array(value, dtype=object)
  1582. if len(ilocs) != value.shape[1]:
  1583. raise ValueError(
  1584. "Must have equal len keys and value when setting with an ndarray"
  1585. )
  1586. for i, loc in enumerate(ilocs):
  1587. value_col = value[:, i]
  1588. if is_object_dtype(value_col.dtype):
  1589. # casting to list so that we do type inference in setitem_single_column
  1590. value_col = value_col.tolist()
  1591. self._setitem_single_column(loc, value_col, pi)
  1592. def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str):
  1593. ilocs = self._ensure_iterable_column_indexer(indexer[1])
  1594. sub_indexer = list(indexer)
  1595. pi = indexer[0]
  1596. multiindex_indexer = isinstance(self.obj.columns, MultiIndex)
  1597. unique_cols = value.columns.is_unique
  1598. # We do not want to align the value in case of iloc GH#37728
  1599. if name == "iloc":
  1600. for i, loc in enumerate(ilocs):
  1601. val = value.iloc[:, i]
  1602. self._setitem_single_column(loc, val, pi)
  1603. elif not unique_cols and value.columns.equals(self.obj.columns):
  1604. # We assume we are already aligned, see
  1605. # test_iloc_setitem_frame_duplicate_columns_multiple_blocks
  1606. for loc in ilocs:
  1607. item = self.obj.columns[loc]
  1608. if item in value:
  1609. sub_indexer[1] = item
  1610. val = self._align_series(
  1611. tuple(sub_indexer),
  1612. value.iloc[:, loc],
  1613. multiindex_indexer,
  1614. )
  1615. else:
  1616. val = np.nan
  1617. self._setitem_single_column(loc, val, pi)
  1618. elif not unique_cols:
  1619. raise ValueError("Setting with non-unique columns is not allowed.")
  1620. else:
  1621. for loc in ilocs:
  1622. item = self.obj.columns[loc]
  1623. if item in value:
  1624. sub_indexer[1] = item
  1625. val = self._align_series(
  1626. tuple(sub_indexer), value[item], multiindex_indexer
  1627. )
  1628. else:
  1629. val = np.nan
  1630. self._setitem_single_column(loc, val, pi)
  1631. def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
  1632. """
  1633. Parameters
  1634. ----------
  1635. loc : int
  1636. Indexer for column position
  1637. plane_indexer : int, slice, listlike[int]
  1638. The indexer we use for setitem along axis=0.
  1639. """
  1640. pi = plane_indexer
  1641. is_full_setter = com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj))
  1642. is_null_setter = com.is_empty_slice(pi) or is_array_like(pi) and len(pi) == 0
  1643. if is_null_setter:
  1644. # no-op, don't cast dtype later
  1645. return
  1646. elif is_full_setter:
  1647. try:
  1648. self.obj._mgr.column_setitem(
  1649. loc, plane_indexer, value, inplace_only=True
  1650. )
  1651. except (ValueError, TypeError, LossySetitemError):
  1652. # If we're setting an entire column and we can't do it inplace,
  1653. # then we can use value's dtype (or inferred dtype)
  1654. # instead of object
  1655. self.obj.isetitem(loc, value)
  1656. else:
  1657. # set value into the column (first attempting to operate inplace, then
  1658. # falling back to casting if necessary)
  1659. self.obj._mgr.column_setitem(loc, plane_indexer, value)
  1660. self.obj._clear_item_cache()
  1661. def _setitem_single_block(self, indexer, value, name: str) -> None:
  1662. """
  1663. _setitem_with_indexer for the case when we have a single Block.
  1664. """
  1665. from pandas import Series
  1666. info_axis = self.obj._info_axis_number
  1667. item_labels = self.obj._get_axis(info_axis)
  1668. if isinstance(indexer, tuple):
  1669. # if we are setting on the info axis ONLY
  1670. # set using those methods to avoid block-splitting
  1671. # logic here
  1672. if (
  1673. self.ndim == len(indexer) == 2
  1674. and is_integer(indexer[1])
  1675. and com.is_null_slice(indexer[0])
  1676. ):
  1677. col = item_labels[indexer[info_axis]]
  1678. if len(item_labels.get_indexer_for([col])) == 1:
  1679. # e.g. test_loc_setitem_empty_append_expands_rows
  1680. loc = item_labels.get_loc(col)
  1681. self._setitem_single_column(loc, value, indexer[0])
  1682. return
  1683. indexer = maybe_convert_ix(*indexer) # e.g. test_setitem_frame_align
  1684. if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):
  1685. # TODO(EA): ExtensionBlock.setitem this causes issues with
  1686. # setting for extensionarrays that store dicts. Need to decide
  1687. # if it's worth supporting that.
  1688. value = self._align_series(indexer, Series(value))
  1689. elif isinstance(value, ABCDataFrame) and name != "iloc":
  1690. value = self._align_frame(indexer, value)._values
  1691. # check for chained assignment
  1692. self.obj._check_is_chained_assignment_possible()
  1693. # actually do the set
  1694. self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value)
  1695. self.obj._maybe_update_cacher(clear=True, inplace=True)
  1696. def _setitem_with_indexer_missing(self, indexer, value):
  1697. """
  1698. Insert new row(s) or column(s) into the Series or DataFrame.
  1699. """
  1700. from pandas import Series
  1701. # reindex the axis to the new value
  1702. # and set inplace
  1703. if self.ndim == 1:
  1704. index = self.obj.index
  1705. new_index = index.insert(len(index), indexer)
  1706. # we have a coerced indexer, e.g. a float
  1707. # that matches in an int64 Index, so
  1708. # we will not create a duplicate index, rather
  1709. # index to that element
  1710. # e.g. 0.0 -> 0
  1711. # GH#12246
  1712. if index.is_unique:
  1713. # pass new_index[-1:] instead if [new_index[-1]]
  1714. # so that we retain dtype
  1715. new_indexer = index.get_indexer(new_index[-1:])
  1716. if (new_indexer != -1).any():
  1717. # We get only here with loc, so can hard code
  1718. return self._setitem_with_indexer(new_indexer, value, "loc")
  1719. # this preserves dtype of the value and of the object
  1720. if not is_scalar(value):
  1721. new_dtype = None
  1722. elif is_valid_na_for_dtype(value, self.obj.dtype):
  1723. if not is_object_dtype(self.obj.dtype):
  1724. # Every NA value is suitable for object, no conversion needed
  1725. value = na_value_for_dtype(self.obj.dtype, compat=False)
  1726. new_dtype = maybe_promote(self.obj.dtype, value)[0]
  1727. elif isna(value):
  1728. new_dtype = None
  1729. elif not self.obj.empty and not is_object_dtype(self.obj.dtype):
  1730. # We should not cast, if we have object dtype because we can
  1731. # set timedeltas into object series
  1732. curr_dtype = self.obj.dtype
  1733. curr_dtype = getattr(curr_dtype, "numpy_dtype", curr_dtype)
  1734. new_dtype = maybe_promote(curr_dtype, value)[0]
  1735. else:
  1736. new_dtype = None
  1737. new_values = Series([value], dtype=new_dtype)._values
  1738. if len(self.obj._values):
  1739. # GH#22717 handle casting compatibility that np.concatenate
  1740. # does incorrectly
  1741. new_values = concat_compat([self.obj._values, new_values])
  1742. self.obj._mgr = self.obj._constructor(
  1743. new_values, index=new_index, name=self.obj.name
  1744. )._mgr
  1745. self.obj._maybe_update_cacher(clear=True)
  1746. elif self.ndim == 2:
  1747. if not len(self.obj.columns):
  1748. # no columns and scalar
  1749. raise ValueError("cannot set a frame with no defined columns")
  1750. has_dtype = hasattr(value, "dtype")
  1751. if isinstance(value, ABCSeries):
  1752. # append a Series
  1753. value = value.reindex(index=self.obj.columns, copy=True)
  1754. value.name = indexer
  1755. elif isinstance(value, dict):
  1756. value = Series(
  1757. value, index=self.obj.columns, name=indexer, dtype=object
  1758. )
  1759. else:
  1760. # a list-list
  1761. if is_list_like_indexer(value):
  1762. # must have conforming columns
  1763. if len(value) != len(self.obj.columns):
  1764. raise ValueError("cannot set a row with mismatched columns")
  1765. value = Series(value, index=self.obj.columns, name=indexer)
  1766. if not len(self.obj):
  1767. # We will ignore the existing dtypes instead of using
  1768. # internals.concat logic
  1769. df = value.to_frame().T
  1770. idx = self.obj.index
  1771. if isinstance(idx, MultiIndex):
  1772. name = idx.names
  1773. else:
  1774. name = idx.name
  1775. df.index = Index([indexer], name=name)
  1776. if not has_dtype:
  1777. # i.e. if we already had a Series or ndarray, keep that
  1778. # dtype. But if we had a list or dict, then do inference
  1779. df = df.infer_objects(copy=False)
  1780. self.obj._mgr = df._mgr
  1781. else:
  1782. self.obj._mgr = self.obj._append(value)._mgr
  1783. self.obj._maybe_update_cacher(clear=True)
  1784. def _ensure_iterable_column_indexer(self, column_indexer):
  1785. """
  1786. Ensure that our column indexer is something that can be iterated over.
  1787. """
  1788. ilocs: Sequence[int] | np.ndarray
  1789. if is_integer(column_indexer):
  1790. ilocs = [column_indexer]
  1791. elif isinstance(column_indexer, slice):
  1792. ilocs = np.arange(len(self.obj.columns))[column_indexer]
  1793. elif isinstance(column_indexer, np.ndarray) and is_bool_dtype(
  1794. column_indexer.dtype
  1795. ):
  1796. ilocs = np.arange(len(column_indexer))[column_indexer]
  1797. else:
  1798. ilocs = column_indexer
  1799. return ilocs
  1800. def _align_series(self, indexer, ser: Series, multiindex_indexer: bool = False):
  1801. """
  1802. Parameters
  1803. ----------
  1804. indexer : tuple, slice, scalar
  1805. Indexer used to get the locations that will be set to `ser`.
  1806. ser : pd.Series
  1807. Values to assign to the locations specified by `indexer`.
  1808. multiindex_indexer : bool, optional
  1809. Defaults to False. Should be set to True if `indexer` was from
  1810. a `pd.MultiIndex`, to avoid unnecessary broadcasting.
  1811. Returns
  1812. -------
  1813. `np.array` of `ser` broadcast to the appropriate shape for assignment
  1814. to the locations selected by `indexer`
  1815. """
  1816. if isinstance(indexer, (slice, np.ndarray, list, Index)):
  1817. indexer = (indexer,)
  1818. if isinstance(indexer, tuple):
  1819. # flatten np.ndarray indexers
  1820. def ravel(i):
  1821. return i.ravel() if isinstance(i, np.ndarray) else i
  1822. indexer = tuple(map(ravel, indexer))
  1823. aligners = [not com.is_null_slice(idx) for idx in indexer]
  1824. sum_aligners = sum(aligners)
  1825. single_aligner = sum_aligners == 1
  1826. is_frame = self.ndim == 2
  1827. obj = self.obj
  1828. # are we a single alignable value on a non-primary
  1829. # dim (e.g. panel: 1,2, or frame: 0) ?
  1830. # hence need to align to a single axis dimension
  1831. # rather that find all valid dims
  1832. # frame
  1833. if is_frame:
  1834. single_aligner = single_aligner and aligners[0]
  1835. # we have a frame, with multiple indexers on both axes; and a
  1836. # series, so need to broadcast (see GH5206)
  1837. if sum_aligners == self.ndim and all(is_sequence(_) for _ in indexer):
  1838. ser_values = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values
  1839. # single indexer
  1840. if len(indexer) > 1 and not multiindex_indexer:
  1841. len_indexer = len(indexer[1])
  1842. ser_values = (
  1843. np.tile(ser_values, len_indexer).reshape(len_indexer, -1).T
  1844. )
  1845. return ser_values
  1846. for i, idx in enumerate(indexer):
  1847. ax = obj.axes[i]
  1848. # multiple aligners (or null slices)
  1849. if is_sequence(idx) or isinstance(idx, slice):
  1850. if single_aligner and com.is_null_slice(idx):
  1851. continue
  1852. new_ix = ax[idx]
  1853. if not is_list_like_indexer(new_ix):
  1854. new_ix = Index([new_ix])
  1855. else:
  1856. new_ix = Index(new_ix)
  1857. if ser.index.equals(new_ix) or not len(new_ix):
  1858. return ser._values.copy()
  1859. return ser.reindex(new_ix)._values
  1860. # 2 dims
  1861. elif single_aligner:
  1862. # reindex along index
  1863. ax = self.obj.axes[1]
  1864. if ser.index.equals(ax) or not len(ax):
  1865. return ser._values.copy()
  1866. return ser.reindex(ax)._values
  1867. elif is_integer(indexer) and self.ndim == 1:
  1868. if is_object_dtype(self.obj):
  1869. return ser
  1870. ax = self.obj._get_axis(0)
  1871. if ser.index.equals(ax):
  1872. return ser._values.copy()
  1873. return ser.reindex(ax)._values[indexer]
  1874. elif is_integer(indexer):
  1875. ax = self.obj._get_axis(1)
  1876. if ser.index.equals(ax):
  1877. return ser._values.copy()
  1878. return ser.reindex(ax)._values
  1879. raise ValueError("Incompatible indexer with Series")
  1880. def _align_frame(self, indexer, df: DataFrame) -> DataFrame:
  1881. is_frame = self.ndim == 2
  1882. if isinstance(indexer, tuple):
  1883. idx, cols = None, None
  1884. sindexers = []
  1885. for i, ix in enumerate(indexer):
  1886. ax = self.obj.axes[i]
  1887. if is_sequence(ix) or isinstance(ix, slice):
  1888. if isinstance(ix, np.ndarray):
  1889. ix = ix.ravel()
  1890. if idx is None:
  1891. idx = ax[ix]
  1892. elif cols is None:
  1893. cols = ax[ix]
  1894. else:
  1895. break
  1896. else:
  1897. sindexers.append(i)
  1898. if idx is not None and cols is not None:
  1899. if df.index.equals(idx) and df.columns.equals(cols):
  1900. val = df.copy()
  1901. else:
  1902. val = df.reindex(idx, columns=cols)
  1903. return val
  1904. elif (isinstance(indexer, slice) or is_list_like_indexer(indexer)) and is_frame:
  1905. ax = self.obj.index[indexer]
  1906. if df.index.equals(ax):
  1907. val = df.copy()
  1908. else:
  1909. # we have a multi-index and are trying to align
  1910. # with a particular, level GH3738
  1911. if (
  1912. isinstance(ax, MultiIndex)
  1913. and isinstance(df.index, MultiIndex)
  1914. and ax.nlevels != df.index.nlevels
  1915. ):
  1916. raise TypeError(
  1917. "cannot align on a multi-index with out "
  1918. "specifying the join levels"
  1919. )
  1920. val = df.reindex(index=ax)
  1921. return val
  1922. raise ValueError("Incompatible indexer with DataFrame")
  1923. class _ScalarAccessIndexer(NDFrameIndexerBase):
  1924. """
  1925. Access scalars quickly.
  1926. """
  1927. # sub-classes need to set _takeable
  1928. _takeable: bool
  1929. def _convert_key(self, key):
  1930. raise AbstractMethodError(self)
  1931. def __getitem__(self, key):
  1932. if not isinstance(key, tuple):
  1933. # we could have a convertible item here (e.g. Timestamp)
  1934. if not is_list_like_indexer(key):
  1935. key = (key,)
  1936. else:
  1937. raise ValueError("Invalid call for scalar access (getting)!")
  1938. key = self._convert_key(key)
  1939. return self.obj._get_value(*key, takeable=self._takeable)
  1940. def __setitem__(self, key, value) -> None:
  1941. if isinstance(key, tuple):
  1942. key = tuple(com.apply_if_callable(x, self.obj) for x in key)
  1943. else:
  1944. # scalar callable may return tuple
  1945. key = com.apply_if_callable(key, self.obj)
  1946. if not isinstance(key, tuple):
  1947. key = _tuplify(self.ndim, key)
  1948. key = list(self._convert_key(key))
  1949. if len(key) != self.ndim:
  1950. raise ValueError("Not enough indexers for scalar access (setting)!")
  1951. self.obj._set_value(*key, value=value, takeable=self._takeable)
  1952. @doc(IndexingMixin.at)
  1953. class _AtIndexer(_ScalarAccessIndexer):
  1954. _takeable = False
  1955. def _convert_key(self, key):
  1956. """
  1957. Require they keys to be the same type as the index. (so we don't
  1958. fallback)
  1959. """
  1960. # GH 26989
  1961. # For series, unpacking key needs to result in the label.
  1962. # This is already the case for len(key) == 1; e.g. (1,)
  1963. if self.ndim == 1 and len(key) > 1:
  1964. key = (key,)
  1965. return key
  1966. @property
  1967. def _axes_are_unique(self) -> bool:
  1968. # Only relevant for self.ndim == 2
  1969. assert self.ndim == 2
  1970. return self.obj.index.is_unique and self.obj.columns.is_unique
  1971. def __getitem__(self, key):
  1972. if self.ndim == 2 and not self._axes_are_unique:
  1973. # GH#33041 fall back to .loc
  1974. if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
  1975. raise ValueError("Invalid call for scalar access (getting)!")
  1976. return self.obj.loc[key]
  1977. return super().__getitem__(key)
  1978. def __setitem__(self, key, value):
  1979. if self.ndim == 2 and not self._axes_are_unique:
  1980. # GH#33041 fall back to .loc
  1981. if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
  1982. raise ValueError("Invalid call for scalar access (setting)!")
  1983. self.obj.loc[key] = value
  1984. return
  1985. return super().__setitem__(key, value)
  1986. @doc(IndexingMixin.iat)
  1987. class _iAtIndexer(_ScalarAccessIndexer):
  1988. _takeable = True
  1989. def _convert_key(self, key):
  1990. """
  1991. Require integer args. (and convert to label arguments)
  1992. """
  1993. for i in key:
  1994. if not is_integer(i):
  1995. raise ValueError("iAt based indexing can only have integer indexers")
  1996. return key
  1997. def _tuplify(ndim: int, loc: Hashable) -> tuple[Hashable | slice, ...]:
  1998. """
  1999. Given an indexer for the first dimension, create an equivalent tuple
  2000. for indexing over all dimensions.
  2001. Parameters
  2002. ----------
  2003. ndim : int
  2004. loc : object
  2005. Returns
  2006. -------
  2007. tuple
  2008. """
  2009. _tup: list[Hashable | slice]
  2010. _tup = [slice(None, None) for _ in range(ndim)]
  2011. _tup[0] = loc
  2012. return tuple(_tup)
  2013. def _tupleize_axis_indexer(ndim: int, axis: AxisInt, key) -> tuple:
  2014. """
  2015. If we have an axis, adapt the given key to be axis-independent.
  2016. """
  2017. new_key = [slice(None)] * ndim
  2018. new_key[axis] = key
  2019. return tuple(new_key)
  2020. def check_bool_indexer(index: Index, key) -> np.ndarray:
  2021. """
  2022. Check if key is a valid boolean indexer for an object with such index and
  2023. perform reindexing or conversion if needed.
  2024. This function assumes that is_bool_indexer(key) == True.
  2025. Parameters
  2026. ----------
  2027. index : Index
  2028. Index of the object on which the indexing is done.
  2029. key : list-like
  2030. Boolean indexer to check.
  2031. Returns
  2032. -------
  2033. np.array
  2034. Resulting key.
  2035. Raises
  2036. ------
  2037. IndexError
  2038. If the key does not have the same length as index.
  2039. IndexingError
  2040. If the index of the key is unalignable to index.
  2041. """
  2042. result = key
  2043. if isinstance(key, ABCSeries) and not key.index.equals(index):
  2044. indexer = result.index.get_indexer_for(index)
  2045. if -1 in indexer:
  2046. raise IndexingError(
  2047. "Unalignable boolean Series provided as "
  2048. "indexer (index of the boolean Series and of "
  2049. "the indexed object do not match)."
  2050. )
  2051. result = result.take(indexer)
  2052. # fall through for boolean
  2053. if not is_extension_array_dtype(result.dtype):
  2054. return result.astype(bool)._values
  2055. if is_object_dtype(key):
  2056. # key might be object-dtype bool, check_array_indexer needs bool array
  2057. result = np.asarray(result, dtype=bool)
  2058. elif not is_array_like(result):
  2059. # GH 33924
  2060. # key may contain nan elements, check_array_indexer needs bool array
  2061. result = pd_array(result, dtype=bool)
  2062. return check_array_indexer(index, result)
  2063. def convert_missing_indexer(indexer):
  2064. """
  2065. Reverse convert a missing indexer, which is a dict
  2066. return the scalar indexer and a boolean indicating if we converted
  2067. """
  2068. if isinstance(indexer, dict):
  2069. # a missing key (but not a tuple indexer)
  2070. indexer = indexer["key"]
  2071. if isinstance(indexer, bool):
  2072. raise KeyError("cannot use a single bool to index into setitem")
  2073. return indexer, True
  2074. return indexer, False
  2075. def convert_from_missing_indexer_tuple(indexer, axes):
  2076. """
  2077. Create a filtered indexer that doesn't have any missing indexers.
  2078. """
  2079. def get_indexer(_i, _idx):
  2080. return axes[_i].get_loc(_idx["key"]) if isinstance(_idx, dict) else _idx
  2081. return tuple(get_indexer(_i, _idx) for _i, _idx in enumerate(indexer))
  2082. def maybe_convert_ix(*args):
  2083. """
  2084. We likely want to take the cross-product.
  2085. """
  2086. for arg in args:
  2087. if not isinstance(arg, (np.ndarray, list, ABCSeries, Index)):
  2088. return args
  2089. return np.ix_(*args)
  2090. def is_nested_tuple(tup, labels) -> bool:
  2091. """
  2092. Returns
  2093. -------
  2094. bool
  2095. """
  2096. # check for a compatible nested tuple and multiindexes among the axes
  2097. if not isinstance(tup, tuple):
  2098. return False
  2099. for k in tup:
  2100. if is_list_like(k) or isinstance(k, slice):
  2101. return isinstance(labels, MultiIndex)
  2102. return False
  2103. def is_label_like(key) -> bool:
  2104. """
  2105. Returns
  2106. -------
  2107. bool
  2108. """
  2109. # select a label or row
  2110. return (
  2111. not isinstance(key, slice)
  2112. and not is_list_like_indexer(key)
  2113. and key is not Ellipsis
  2114. )
  2115. def need_slice(obj: slice) -> bool:
  2116. """
  2117. Returns
  2118. -------
  2119. bool
  2120. """
  2121. return (
  2122. obj.start is not None
  2123. or obj.stop is not None
  2124. or (obj.step is not None and obj.step != 1)
  2125. )
  2126. def check_dict_or_set_indexers(key) -> None:
  2127. """
  2128. Check if the indexer is or contains a dict or set, which is no longer allowed.
  2129. """
  2130. if (
  2131. isinstance(key, set)
  2132. or isinstance(key, tuple)
  2133. and any(isinstance(x, set) for x in key)
  2134. ):
  2135. raise TypeError(
  2136. "Passing a set as an indexer is not supported. Use a list instead."
  2137. )
  2138. if (
  2139. isinstance(key, dict)
  2140. or isinstance(key, tuple)
  2141. and any(isinstance(x, dict) for x in key)
  2142. ):
  2143. raise TypeError(
  2144. "Passing a dict as an indexer is not supported. Use a list instead."
  2145. )