1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629 |
- from __future__ import annotations
- from contextlib import suppress
- import sys
- from typing import (
- TYPE_CHECKING,
- Hashable,
- Sequence,
- TypeVar,
- cast,
- final,
- )
- import warnings
- import numpy as np
- from pandas._config import using_copy_on_write
- from pandas._libs.indexing import NDFrameIndexerBase
- from pandas._libs.lib import item_from_zerodim
- from pandas._typing import (
- Axis,
- AxisInt,
- )
- from pandas.compat import PYPY
- from pandas.errors import (
- AbstractMethodError,
- ChainedAssignmentError,
- IndexingError,
- InvalidIndexError,
- LossySetitemError,
- _chained_assignment_msg,
- )
- from pandas.util._decorators import doc
- from pandas.core.dtypes.cast import (
- can_hold_element,
- maybe_promote,
- )
- from pandas.core.dtypes.common import (
- is_array_like,
- is_bool_dtype,
- is_extension_array_dtype,
- is_hashable,
- is_integer,
- is_iterator,
- is_list_like,
- is_numeric_dtype,
- is_object_dtype,
- is_scalar,
- is_sequence,
- )
- from pandas.core.dtypes.concat import concat_compat
- from pandas.core.dtypes.generic import (
- ABCDataFrame,
- ABCSeries,
- )
- from pandas.core.dtypes.missing import (
- infer_fill_value,
- is_valid_na_for_dtype,
- isna,
- na_value_for_dtype,
- )
- from pandas.core import algorithms as algos
- import pandas.core.common as com
- from pandas.core.construction import (
- array as pd_array,
- extract_array,
- )
- from pandas.core.indexers import (
- check_array_indexer,
- is_list_like_indexer,
- is_scalar_indexer,
- length_of_indexer,
- )
- from pandas.core.indexes.api import (
- Index,
- MultiIndex,
- )
- if TYPE_CHECKING:
- from pandas import (
- DataFrame,
- Series,
- )
- _LocationIndexerT = TypeVar("_LocationIndexerT", bound="_LocationIndexer")
- # "null slice"
- _NS = slice(None, None)
- _one_ellipsis_message = "indexer may only contain one '...' entry"
- # the public IndexSlicerMaker
- class _IndexSlice:
- """
- Create an object to more easily perform multi-index slicing.
- See Also
- --------
- MultiIndex.remove_unused_levels : New MultiIndex with no unused levels.
- Notes
- -----
- See :ref:`Defined Levels <advanced.shown_levels>`
- for further info on slicing a MultiIndex.
- Examples
- --------
- >>> midx = pd.MultiIndex.from_product([['A0','A1'], ['B0','B1','B2','B3']])
- >>> columns = ['foo', 'bar']
- >>> dfmi = pd.DataFrame(np.arange(16).reshape((len(midx), len(columns))),
- ... index=midx, columns=columns)
- Using the default slice command:
- >>> dfmi.loc[(slice(None), slice('B0', 'B1')), :]
- foo bar
- A0 B0 0 1
- B1 2 3
- A1 B0 8 9
- B1 10 11
- Using the IndexSlice class for a more intuitive command:
- >>> idx = pd.IndexSlice
- >>> dfmi.loc[idx[:, 'B0':'B1'], :]
- foo bar
- A0 B0 0 1
- B1 2 3
- A1 B0 8 9
- B1 10 11
- """
- def __getitem__(self, arg):
- return arg
- IndexSlice = _IndexSlice()
- class IndexingMixin:
- """
- Mixin for adding .loc/.iloc/.at/.iat to Dataframes and Series.
- """
- @property
- def iloc(self) -> _iLocIndexer:
- """
- Purely integer-location based indexing for selection by position.
- ``.iloc[]`` is primarily integer position based (from ``0`` to
- ``length-1`` of the axis), but may also be used with a boolean
- array.
- Allowed inputs are:
- - An integer, e.g. ``5``.
- - A list or array of integers, e.g. ``[4, 3, 0]``.
- - A slice object with ints, e.g. ``1:7``.
- - A boolean array.
- - A ``callable`` function with one argument (the calling Series or
- DataFrame) and that returns valid output for indexing (one of the above).
- This is useful in method chains, when you don't have a reference to the
- calling object, but would like to base your selection on some value.
- - A tuple of row and column indexes. The tuple elements consist of one of the
- above inputs, e.g. ``(0, 1)``.
- ``.iloc`` will raise ``IndexError`` if a requested indexer is
- out-of-bounds, except *slice* indexers which allow out-of-bounds
- indexing (this conforms with python/numpy *slice* semantics).
- See more at :ref:`Selection by Position <indexing.integer>`.
- See Also
- --------
- DataFrame.iat : Fast integer location scalar accessor.
- DataFrame.loc : Purely label-location based indexer for selection by label.
- Series.iloc : Purely integer-location based indexing for
- selection by position.
- Examples
- --------
- >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},
- ... {'a': 100, 'b': 200, 'c': 300, 'd': 400},
- ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }]
- >>> df = pd.DataFrame(mydict)
- >>> df
- a b c d
- 0 1 2 3 4
- 1 100 200 300 400
- 2 1000 2000 3000 4000
- **Indexing just the rows**
- With a scalar integer.
- >>> type(df.iloc[0])
- <class 'pandas.core.series.Series'>
- >>> df.iloc[0]
- a 1
- b 2
- c 3
- d 4
- Name: 0, dtype: int64
- With a list of integers.
- >>> df.iloc[[0]]
- a b c d
- 0 1 2 3 4
- >>> type(df.iloc[[0]])
- <class 'pandas.core.frame.DataFrame'>
- >>> df.iloc[[0, 1]]
- a b c d
- 0 1 2 3 4
- 1 100 200 300 400
- With a `slice` object.
- >>> df.iloc[:3]
- a b c d
- 0 1 2 3 4
- 1 100 200 300 400
- 2 1000 2000 3000 4000
- With a boolean mask the same length as the index.
- >>> df.iloc[[True, False, True]]
- a b c d
- 0 1 2 3 4
- 2 1000 2000 3000 4000
- With a callable, useful in method chains. The `x` passed
- to the ``lambda`` is the DataFrame being sliced. This selects
- the rows whose index label even.
- >>> df.iloc[lambda x: x.index % 2 == 0]
- a b c d
- 0 1 2 3 4
- 2 1000 2000 3000 4000
- **Indexing both axes**
- You can mix the indexer types for the index and columns. Use ``:`` to
- select the entire axis.
- With scalar integers.
- >>> df.iloc[0, 1]
- 2
- With lists of integers.
- >>> df.iloc[[0, 2], [1, 3]]
- b d
- 0 2 4
- 2 2000 4000
- With `slice` objects.
- >>> df.iloc[1:3, 0:3]
- a b c
- 1 100 200 300
- 2 1000 2000 3000
- With a boolean array whose length matches the columns.
- >>> df.iloc[:, [True, False, True, False]]
- a c
- 0 1 3
- 1 100 300
- 2 1000 3000
- With a callable function that expects the Series or DataFrame.
- >>> df.iloc[:, lambda df: [0, 2]]
- a c
- 0 1 3
- 1 100 300
- 2 1000 3000
- """
- return _iLocIndexer("iloc", self)
- @property
- def loc(self) -> _LocIndexer:
- """
- Access a group of rows and columns by label(s) or a boolean array.
- ``.loc[]`` is primarily label based, but may also be used with a
- boolean array.
- Allowed inputs are:
- - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is
- interpreted as a *label* of the index, and **never** as an
- integer position along the index).
- - A list or array of labels, e.g. ``['a', 'b', 'c']``.
- - A slice object with labels, e.g. ``'a':'f'``.
- .. warning:: Note that contrary to usual python slices, **both** the
- start and the stop are included
- - A boolean array of the same length as the axis being sliced,
- e.g. ``[True, False, True]``.
- - An alignable boolean Series. The index of the key will be aligned before
- masking.
- - An alignable Index. The Index of the returned selection will be the input.
- - A ``callable`` function with one argument (the calling Series or
- DataFrame) and that returns valid output for indexing (one of the above)
- See more at :ref:`Selection by Label <indexing.label>`.
- Raises
- ------
- KeyError
- If any items are not found.
- IndexingError
- If an indexed key is passed and its index is unalignable to the frame index.
- See Also
- --------
- DataFrame.at : Access a single value for a row/column label pair.
- DataFrame.iloc : Access group of rows and columns by integer position(s).
- DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the
- Series/DataFrame.
- Series.loc : Access group of values using labels.
- Examples
- --------
- **Getting values**
- >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
- ... index=['cobra', 'viper', 'sidewinder'],
- ... columns=['max_speed', 'shield'])
- >>> df
- max_speed shield
- cobra 1 2
- viper 4 5
- sidewinder 7 8
- Single label. Note this returns the row as a Series.
- >>> df.loc['viper']
- max_speed 4
- shield 5
- Name: viper, dtype: int64
- List of labels. Note using ``[[]]`` returns a DataFrame.
- >>> df.loc[['viper', 'sidewinder']]
- max_speed shield
- viper 4 5
- sidewinder 7 8
- Single label for row and column
- >>> df.loc['cobra', 'shield']
- 2
- Slice with labels for row and single label for column. As mentioned
- above, note that both the start and stop of the slice are included.
- >>> df.loc['cobra':'viper', 'max_speed']
- cobra 1
- viper 4
- Name: max_speed, dtype: int64
- Boolean list with the same length as the row axis
- >>> df.loc[[False, False, True]]
- max_speed shield
- sidewinder 7 8
- Alignable boolean Series:
- >>> df.loc[pd.Series([False, True, False],
- ... index=['viper', 'sidewinder', 'cobra'])]
- max_speed shield
- sidewinder 7 8
- Index (same behavior as ``df.reindex``)
- >>> df.loc[pd.Index(["cobra", "viper"], name="foo")]
- max_speed shield
- foo
- cobra 1 2
- viper 4 5
- Conditional that returns a boolean Series
- >>> df.loc[df['shield'] > 6]
- max_speed shield
- sidewinder 7 8
- Conditional that returns a boolean Series with column labels specified
- >>> df.loc[df['shield'] > 6, ['max_speed']]
- max_speed
- sidewinder 7
- Callable that returns a boolean Series
- >>> df.loc[lambda df: df['shield'] == 8]
- max_speed shield
- sidewinder 7 8
- **Setting values**
- Set value for all items matching the list of labels
- >>> df.loc[['viper', 'sidewinder'], ['shield']] = 50
- >>> df
- max_speed shield
- cobra 1 2
- viper 4 50
- sidewinder 7 50
- Set value for an entire row
- >>> df.loc['cobra'] = 10
- >>> df
- max_speed shield
- cobra 10 10
- viper 4 50
- sidewinder 7 50
- Set value for an entire column
- >>> df.loc[:, 'max_speed'] = 30
- >>> df
- max_speed shield
- cobra 30 10
- viper 30 50
- sidewinder 30 50
- Set value for rows matching callable condition
- >>> df.loc[df['shield'] > 35] = 0
- >>> df
- max_speed shield
- cobra 30 10
- viper 0 0
- sidewinder 0 0
- **Getting values on a DataFrame with an index that has integer labels**
- Another example using integers for the index
- >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
- ... index=[7, 8, 9], columns=['max_speed', 'shield'])
- >>> df
- max_speed shield
- 7 1 2
- 8 4 5
- 9 7 8
- Slice with integer labels for rows. As mentioned above, note that both
- the start and stop of the slice are included.
- >>> df.loc[7:9]
- max_speed shield
- 7 1 2
- 8 4 5
- 9 7 8
- **Getting values with a MultiIndex**
- A number of examples using a DataFrame with a MultiIndex
- >>> tuples = [
- ... ('cobra', 'mark i'), ('cobra', 'mark ii'),
- ... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),
- ... ('viper', 'mark ii'), ('viper', 'mark iii')
- ... ]
- >>> index = pd.MultiIndex.from_tuples(tuples)
- >>> values = [[12, 2], [0, 4], [10, 20],
- ... [1, 4], [7, 1], [16, 36]]
- >>> df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index)
- >>> df
- max_speed shield
- cobra mark i 12 2
- mark ii 0 4
- sidewinder mark i 10 20
- mark ii 1 4
- viper mark ii 7 1
- mark iii 16 36
- Single label. Note this returns a DataFrame with a single index.
- >>> df.loc['cobra']
- max_speed shield
- mark i 12 2
- mark ii 0 4
- Single index tuple. Note this returns a Series.
- >>> df.loc[('cobra', 'mark ii')]
- max_speed 0
- shield 4
- Name: (cobra, mark ii), dtype: int64
- Single label for row and column. Similar to passing in a tuple, this
- returns a Series.
- >>> df.loc['cobra', 'mark i']
- max_speed 12
- shield 2
- Name: (cobra, mark i), dtype: int64
- Single tuple. Note using ``[[]]`` returns a DataFrame.
- >>> df.loc[[('cobra', 'mark ii')]]
- max_speed shield
- cobra mark ii 0 4
- Single tuple for the index with a single label for the column
- >>> df.loc[('cobra', 'mark i'), 'shield']
- 2
- Slice from index tuple to single label
- >>> df.loc[('cobra', 'mark i'):'viper']
- max_speed shield
- cobra mark i 12 2
- mark ii 0 4
- sidewinder mark i 10 20
- mark ii 1 4
- viper mark ii 7 1
- mark iii 16 36
- Slice from index tuple to index tuple
- >>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')]
- max_speed shield
- cobra mark i 12 2
- mark ii 0 4
- sidewinder mark i 10 20
- mark ii 1 4
- viper mark ii 7 1
- Please see the :ref:`user guide<advanced.advanced_hierarchical>`
- for more details and explanations of advanced indexing.
- """
- return _LocIndexer("loc", self)
- @property
- def at(self) -> _AtIndexer:
- """
- Access a single value for a row/column label pair.
- Similar to ``loc``, in that both provide label-based lookups. Use
- ``at`` if you only need to get or set a single value in a DataFrame
- or Series.
- Raises
- ------
- KeyError
- * If getting a value and 'label' does not exist in a DataFrame or
- Series.
- ValueError
- * If row/column label pair is not a tuple or if any label from
- the pair is not a scalar for DataFrame.
- * If label is list-like (*excluding* NamedTuple) for Series.
- See Also
- --------
- DataFrame.at : Access a single value for a row/column pair by label.
- DataFrame.iat : Access a single value for a row/column pair by integer
- position.
- DataFrame.loc : Access a group of rows and columns by label(s).
- DataFrame.iloc : Access a group of rows and columns by integer
- position(s).
- Series.at : Access a single value by label.
- Series.iat : Access a single value by integer position.
- Series.loc : Access a group of rows by label(s).
- Series.iloc : Access a group of rows by integer position(s).
- Notes
- -----
- See :ref:`Fast scalar value getting and setting <indexing.basics.get_value>`
- for more details.
- Examples
- --------
- >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
- ... index=[4, 5, 6], columns=['A', 'B', 'C'])
- >>> df
- A B C
- 4 0 2 3
- 5 0 4 1
- 6 10 20 30
- Get value at specified row/column pair
- >>> df.at[4, 'B']
- 2
- Set value at specified row/column pair
- >>> df.at[4, 'B'] = 10
- >>> df.at[4, 'B']
- 10
- Get value within a Series
- >>> df.loc[5].at['B']
- 4
- """
- return _AtIndexer("at", self)
- @property
- def iat(self) -> _iAtIndexer:
- """
- Access a single value for a row/column pair by integer position.
- Similar to ``iloc``, in that both provide integer-based lookups. Use
- ``iat`` if you only need to get or set a single value in a DataFrame
- or Series.
- Raises
- ------
- IndexError
- When integer position is out of bounds.
- See Also
- --------
- DataFrame.at : Access a single value for a row/column label pair.
- DataFrame.loc : Access a group of rows and columns by label(s).
- DataFrame.iloc : Access a group of rows and columns by integer position(s).
- Examples
- --------
- >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
- ... columns=['A', 'B', 'C'])
- >>> df
- A B C
- 0 0 2 3
- 1 0 4 1
- 2 10 20 30
- Get value at specified row/column pair
- >>> df.iat[1, 2]
- 1
- Set value at specified row/column pair
- >>> df.iat[1, 2] = 10
- >>> df.iat[1, 2]
- 10
- Get value within a series
- >>> df.loc[0].iat[1]
- 2
- """
- return _iAtIndexer("iat", self)
- class _LocationIndexer(NDFrameIndexerBase):
- _valid_types: str
- axis: AxisInt | None = None
- # sub-classes need to set _takeable
- _takeable: bool
- @final
- def __call__(
- self: _LocationIndexerT, axis: Axis | None = None
- ) -> _LocationIndexerT:
- # we need to return a copy of ourselves
- new_self = type(self)(self.name, self.obj)
- if axis is not None:
- axis_int_none = self.obj._get_axis_number(axis)
- else:
- axis_int_none = axis
- new_self.axis = axis_int_none
- return new_self
- def _get_setitem_indexer(self, key):
- """
- Convert a potentially-label-based key into a positional indexer.
- """
- if self.name == "loc":
- # always holds here bc iloc overrides _get_setitem_indexer
- self._ensure_listlike_indexer(key)
- if isinstance(key, tuple):
- for x in key:
- check_dict_or_set_indexers(x)
- if self.axis is not None:
- key = _tupleize_axis_indexer(self.ndim, self.axis, key)
- ax = self.obj._get_axis(0)
- if isinstance(ax, MultiIndex) and self.name != "iloc" and is_hashable(key):
- with suppress(KeyError, InvalidIndexError):
- # TypeError e.g. passed a bool
- return ax.get_loc(key)
- if isinstance(key, tuple):
- with suppress(IndexingError):
- # suppress "Too many indexers"
- return self._convert_tuple(key)
- if isinstance(key, range):
- # GH#45479 test_loc_setitem_range_key
- key = list(key)
- return self._convert_to_indexer(key, axis=0)
- @final
- def _maybe_mask_setitem_value(self, indexer, value):
- """
- If we have obj.iloc[mask] = series_or_frame and series_or_frame has the
- same length as obj, we treat this as obj.iloc[mask] = series_or_frame[mask],
- similar to Series.__setitem__.
- Note this is only for loc, not iloc.
- """
- if (
- isinstance(indexer, tuple)
- and len(indexer) == 2
- and isinstance(value, (ABCSeries, ABCDataFrame))
- ):
- pi, icols = indexer
- ndim = value.ndim
- if com.is_bool_indexer(pi) and len(value) == len(pi):
- newkey = pi.nonzero()[0]
- if is_scalar_indexer(icols, self.ndim - 1) and ndim == 1:
- # e.g. test_loc_setitem_boolean_mask_allfalse
- if len(newkey) == 0:
- # FIXME: kludge for test_loc_setitem_boolean_mask_allfalse
- # TODO(GH#45333): may be fixed when deprecation is enforced
- value = value.iloc[:0]
- else:
- # test_loc_setitem_ndframe_values_alignment
- value = self.obj.iloc._align_series(indexer, value)
- indexer = (newkey, icols)
- elif (
- isinstance(icols, np.ndarray)
- and icols.dtype.kind == "i"
- and len(icols) == 1
- ):
- if ndim == 1:
- # We implicitly broadcast, though numpy does not, see
- # github.com/pandas-dev/pandas/pull/45501#discussion_r789071825
- # test_loc_setitem_ndframe_values_alignment
- value = self.obj.iloc._align_series(indexer, value)
- indexer = (newkey, icols)
- elif ndim == 2 and value.shape[1] == 1:
- if len(newkey) == 0:
- # FIXME: kludge for
- # test_loc_setitem_all_false_boolean_two_blocks
- # TODO(GH#45333): may be fixed when deprecation is enforced
- value = value.iloc[:0]
- else:
- # test_loc_setitem_ndframe_values_alignment
- value = self.obj.iloc._align_frame(indexer, value)
- indexer = (newkey, icols)
- elif com.is_bool_indexer(indexer):
- indexer = indexer.nonzero()[0]
- return indexer, value
- @final
- def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
- """
- Ensure that a list-like of column labels are all present by adding them if
- they do not already exist.
- Parameters
- ----------
- key : list-like of column labels
- Target labels.
- axis : key axis if known
- """
- column_axis = 1
- # column only exists in 2-dimensional DataFrame
- if self.ndim != 2:
- return
- orig_key = key
- if isinstance(key, tuple) and len(key) > 1:
- # key may be a tuple if we are .loc
- # if length of key is > 1 set key to column part
- key = key[column_axis]
- axis = column_axis
- if (
- axis == column_axis
- and not isinstance(self.obj.columns, MultiIndex)
- and is_list_like_indexer(key)
- and not com.is_bool_indexer(key)
- and all(is_hashable(k) for k in key)
- ):
- # GH#38148
- keys = self.obj.columns.union(key, sort=False)
- diff = Index(key).difference(self.obj.columns, sort=False)
- if len(diff) and com.is_null_slice(orig_key[0]):
- # e.g. if we are doing df.loc[:, ["A", "B"]] = 7 and "B"
- # is a new column, add the new columns with dtype=np.void
- # so that later when we go through setitem_single_column
- # we will use isetitem. Without this, the reindex_axis
- # below would create float64 columns in this example, which
- # would successfully hold 7, so we would end up with the wrong
- # dtype.
- indexer = np.arange(len(keys), dtype=np.intp)
- indexer[len(self.obj.columns) :] = -1
- new_mgr = self.obj._mgr.reindex_indexer(
- keys, indexer=indexer, axis=0, only_slice=True, use_na_proxy=True
- )
- self.obj._mgr = new_mgr
- return
- self.obj._mgr = self.obj._mgr.reindex_axis(keys, axis=0, only_slice=True)
- @final
- def __setitem__(self, key, value) -> None:
- if not PYPY and using_copy_on_write():
- if sys.getrefcount(self.obj) <= 2:
- warnings.warn(
- _chained_assignment_msg, ChainedAssignmentError, stacklevel=2
- )
- check_dict_or_set_indexers(key)
- if isinstance(key, tuple):
- key = tuple(list(x) if is_iterator(x) else x for x in key)
- key = tuple(com.apply_if_callable(x, self.obj) for x in key)
- else:
- key = com.apply_if_callable(key, self.obj)
- indexer = self._get_setitem_indexer(key)
- self._has_valid_setitem_indexer(key)
- iloc = self if self.name == "iloc" else self.obj.iloc
- iloc._setitem_with_indexer(indexer, value, self.name)
- def _validate_key(self, key, axis: AxisInt):
- """
- Ensure that key is valid for current indexer.
- Parameters
- ----------
- key : scalar, slice or list-like
- Key requested.
- axis : int
- Dimension on which the indexing is being made.
- Raises
- ------
- TypeError
- If the key (or some element of it) has wrong type.
- IndexError
- If the key (or some element of it) is out of bounds.
- KeyError
- If the key was not found.
- """
- raise AbstractMethodError(self)
- @final
- def _expand_ellipsis(self, tup: tuple) -> tuple:
- """
- If a tuple key includes an Ellipsis, replace it with an appropriate
- number of null slices.
- """
- if any(x is Ellipsis for x in tup):
- if tup.count(Ellipsis) > 1:
- raise IndexingError(_one_ellipsis_message)
- if len(tup) == self.ndim:
- # It is unambiguous what axis this Ellipsis is indexing,
- # treat as a single null slice.
- i = tup.index(Ellipsis)
- # FIXME: this assumes only one Ellipsis
- new_key = tup[:i] + (_NS,) + tup[i + 1 :]
- return new_key
- # TODO: other cases? only one test gets here, and that is covered
- # by _validate_key_length
- return tup
- @final
- def _validate_tuple_indexer(self, key: tuple) -> tuple:
- """
- Check the key for valid keys across my indexer.
- """
- key = self._validate_key_length(key)
- key = self._expand_ellipsis(key)
- for i, k in enumerate(key):
- try:
- self._validate_key(k, i)
- except ValueError as err:
- raise ValueError(
- "Location based indexing can only have "
- f"[{self._valid_types}] types"
- ) from err
- return key
- @final
- def _is_nested_tuple_indexer(self, tup: tuple) -> bool:
- """
- Returns
- -------
- bool
- """
- if any(isinstance(ax, MultiIndex) for ax in self.obj.axes):
- return any(is_nested_tuple(tup, ax) for ax in self.obj.axes)
- return False
- @final
- def _convert_tuple(self, key: tuple) -> tuple:
- # Note: we assume _tupleize_axis_indexer has been called, if necessary.
- self._validate_key_length(key)
- keyidx = [self._convert_to_indexer(k, axis=i) for i, k in enumerate(key)]
- return tuple(keyidx)
- @final
- def _validate_key_length(self, key: tuple) -> tuple:
- if len(key) > self.ndim:
- if key[0] is Ellipsis:
- # e.g. Series.iloc[..., 3] reduces to just Series.iloc[3]
- key = key[1:]
- if Ellipsis in key:
- raise IndexingError(_one_ellipsis_message)
- return self._validate_key_length(key)
- raise IndexingError("Too many indexers")
- return key
- @final
- def _getitem_tuple_same_dim(self, tup: tuple):
- """
- Index with indexers that should return an object of the same dimension
- as self.obj.
- This is only called after a failed call to _getitem_lowerdim.
- """
- retval = self.obj
- for i, key in enumerate(tup):
- if com.is_null_slice(key):
- continue
- retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
- # We should never have retval.ndim < self.ndim, as that should
- # be handled by the _getitem_lowerdim call above.
- assert retval.ndim == self.ndim
- if retval is self.obj:
- # if all axes were a null slice (`df.loc[:, :]`), ensure we still
- # return a new object (https://github.com/pandas-dev/pandas/pull/49469)
- retval = retval.copy(deep=False)
- return retval
- @final
- def _getitem_lowerdim(self, tup: tuple):
- # we can directly get the axis result since the axis is specified
- if self.axis is not None:
- axis = self.obj._get_axis_number(self.axis)
- return self._getitem_axis(tup, axis=axis)
- # we may have a nested tuples indexer here
- if self._is_nested_tuple_indexer(tup):
- return self._getitem_nested_tuple(tup)
- # we maybe be using a tuple to represent multiple dimensions here
- ax0 = self.obj._get_axis(0)
- # ...but iloc should handle the tuple as simple integer-location
- # instead of checking it as multiindex representation (GH 13797)
- if (
- isinstance(ax0, MultiIndex)
- and self.name != "iloc"
- and not any(isinstance(x, slice) for x in tup)
- ):
- # Note: in all extant test cases, replacing the slice condition with
- # `all(is_hashable(x) or com.is_null_slice(x) for x in tup)`
- # is equivalent.
- # (see the other place where we call _handle_lowerdim_multi_index_axis0)
- with suppress(IndexingError):
- return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(tup)
- tup = self._validate_key_length(tup)
- for i, key in enumerate(tup):
- if is_label_like(key):
- # We don't need to check for tuples here because those are
- # caught by the _is_nested_tuple_indexer check above.
- section = self._getitem_axis(key, axis=i)
- # We should never have a scalar section here, because
- # _getitem_lowerdim is only called after a check for
- # is_scalar_access, which that would be.
- if section.ndim == self.ndim:
- # we're in the middle of slicing through a MultiIndex
- # revise the key wrt to `section` by inserting an _NS
- new_key = tup[:i] + (_NS,) + tup[i + 1 :]
- else:
- # Note: the section.ndim == self.ndim check above
- # rules out having DataFrame here, so we dont need to worry
- # about transposing.
- new_key = tup[:i] + tup[i + 1 :]
- if len(new_key) == 1:
- new_key = new_key[0]
- # Slices should return views, but calling iloc/loc with a null
- # slice returns a new object.
- if com.is_null_slice(new_key):
- return section
- # This is an elided recursive call to iloc/loc
- return getattr(section, self.name)[new_key]
- raise IndexingError("not applicable")
- @final
- def _getitem_nested_tuple(self, tup: tuple):
- # we have a nested tuple so have at least 1 multi-index level
- # we should be able to match up the dimensionality here
- for key in tup:
- check_dict_or_set_indexers(key)
- # we have too many indexers for our dim, but have at least 1
- # multi-index dimension, try to see if we have something like
- # a tuple passed to a series with a multi-index
- if len(tup) > self.ndim:
- if self.name != "loc":
- # This should never be reached, but let's be explicit about it
- raise ValueError("Too many indices") # pragma: no cover
- if all(is_hashable(x) or com.is_null_slice(x) for x in tup):
- # GH#10521 Series should reduce MultiIndex dimensions instead of
- # DataFrame, IndexingError is not raised when slice(None,None,None)
- # with one row.
- with suppress(IndexingError):
- return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(
- tup
- )
- elif isinstance(self.obj, ABCSeries) and any(
- isinstance(k, tuple) for k in tup
- ):
- # GH#35349 Raise if tuple in tuple for series
- # Do this after the all-hashable-or-null-slice check so that
- # we are only getting non-hashable tuples, in particular ones
- # that themselves contain a slice entry
- # See test_loc_series_getitem_too_many_dimensions
- raise IndexingError("Too many indexers")
- # this is a series with a multi-index specified a tuple of
- # selectors
- axis = self.axis or 0
- return self._getitem_axis(tup, axis=axis)
- # handle the multi-axis by taking sections and reducing
- # this is iterative
- obj = self.obj
- # GH#41369 Loop in reverse order ensures indexing along columns before rows
- # which selects only necessary blocks which avoids dtype conversion if possible
- axis = len(tup) - 1
- for key in tup[::-1]:
- if com.is_null_slice(key):
- axis -= 1
- continue
- obj = getattr(obj, self.name)._getitem_axis(key, axis=axis)
- axis -= 1
- # if we have a scalar, we are done
- if is_scalar(obj) or not hasattr(obj, "ndim"):
- break
- return obj
- def _convert_to_indexer(self, key, axis: AxisInt):
- raise AbstractMethodError(self)
- @final
- def __getitem__(self, key):
- check_dict_or_set_indexers(key)
- if type(key) is tuple:
- key = tuple(list(x) if is_iterator(x) else x for x in key)
- key = tuple(com.apply_if_callable(x, self.obj) for x in key)
- if self._is_scalar_access(key):
- return self.obj._get_value(*key, takeable=self._takeable)
- return self._getitem_tuple(key)
- else:
- # we by definition only have the 0th axis
- axis = self.axis or 0
- maybe_callable = com.apply_if_callable(key, self.obj)
- return self._getitem_axis(maybe_callable, axis=axis)
- def _is_scalar_access(self, key: tuple):
- raise NotImplementedError()
- def _getitem_tuple(self, tup: tuple):
- raise AbstractMethodError(self)
- def _getitem_axis(self, key, axis: AxisInt):
- raise NotImplementedError()
- def _has_valid_setitem_indexer(self, indexer) -> bool:
- raise AbstractMethodError(self)
- @final
- def _getbool_axis(self, key, axis: AxisInt):
- # caller is responsible for ensuring non-None axis
- labels = self.obj._get_axis(axis)
- key = check_bool_indexer(labels, key)
- inds = key.nonzero()[0]
- return self.obj._take_with_is_copy(inds, axis=axis)
- @doc(IndexingMixin.loc)
- class _LocIndexer(_LocationIndexer):
- _takeable: bool = False
- _valid_types = (
- "labels (MUST BE IN THE INDEX), slices of labels (BOTH "
- "endpoints included! Can be slices of integers if the "
- "index is integers), listlike of labels, boolean"
- )
- # -------------------------------------------------------------------
- # Key Checks
- @doc(_LocationIndexer._validate_key)
- def _validate_key(self, key, axis: Axis):
- # valid for a collection of labels (we check their presence later)
- # slice of labels (where start-end in labels)
- # slice of integers (only if in the labels)
- # boolean not in slice and with boolean index
- ax = self.obj._get_axis(axis)
- if isinstance(key, bool) and not (
- is_bool_dtype(ax)
- or ax.dtype.name == "boolean"
- or isinstance(ax, MultiIndex)
- and is_bool_dtype(ax.get_level_values(0))
- ):
- raise KeyError(
- f"{key}: boolean label can not be used without a boolean index"
- )
- if isinstance(key, slice) and (
- isinstance(key.start, bool) or isinstance(key.stop, bool)
- ):
- raise TypeError(f"{key}: boolean values can not be used in a slice")
- def _has_valid_setitem_indexer(self, indexer) -> bool:
- return True
- def _is_scalar_access(self, key: tuple) -> bool:
- """
- Returns
- -------
- bool
- """
- # this is a shortcut accessor to both .loc and .iloc
- # that provide the equivalent access of .at and .iat
- # a) avoid getting things via sections and (to minimize dtype changes)
- # b) provide a performant path
- if len(key) != self.ndim:
- return False
- for i, k in enumerate(key):
- if not is_scalar(k):
- return False
- ax = self.obj.axes[i]
- if isinstance(ax, MultiIndex):
- return False
- if isinstance(k, str) and ax._supports_partial_string_indexing:
- # partial string indexing, df.loc['2000', 'A']
- # should not be considered scalar
- return False
- if not ax._index_as_unique:
- return False
- return True
- # -------------------------------------------------------------------
- # MultiIndex Handling
- def _multi_take_opportunity(self, tup: tuple) -> bool:
- """
- Check whether there is the possibility to use ``_multi_take``.
- Currently the limit is that all axes being indexed, must be indexed with
- list-likes.
- Parameters
- ----------
- tup : tuple
- Tuple of indexers, one per axis.
- Returns
- -------
- bool
- Whether the current indexing,
- can be passed through `_multi_take`.
- """
- if not all(is_list_like_indexer(x) for x in tup):
- return False
- # just too complicated
- return not any(com.is_bool_indexer(x) for x in tup)
- def _multi_take(self, tup: tuple):
- """
- Create the indexers for the passed tuple of keys, and
- executes the take operation. This allows the take operation to be
- executed all at once, rather than once for each dimension.
- Improving efficiency.
- Parameters
- ----------
- tup : tuple
- Tuple of indexers, one per axis.
- Returns
- -------
- values: same type as the object being indexed
- """
- # GH 836
- d = {
- axis: self._get_listlike_indexer(key, axis)
- for (key, axis) in zip(tup, self.obj._AXIS_ORDERS)
- }
- return self.obj._reindex_with_indexers(d, copy=True, allow_dups=True)
- # -------------------------------------------------------------------
- def _getitem_iterable(self, key, axis: AxisInt):
- """
- Index current object with an iterable collection of keys.
- Parameters
- ----------
- key : iterable
- Targeted labels.
- axis : int
- Dimension on which the indexing is being made.
- Raises
- ------
- KeyError
- If no key was found. Will change in the future to raise if not all
- keys were found.
- Returns
- -------
- scalar, DataFrame, or Series: indexed value(s).
- """
- # we assume that not com.is_bool_indexer(key), as that is
- # handled before we get here.
- self._validate_key(key, axis)
- # A collection of keys
- keyarr, indexer = self._get_listlike_indexer(key, axis)
- return self.obj._reindex_with_indexers(
- {axis: [keyarr, indexer]}, copy=True, allow_dups=True
- )
- def _getitem_tuple(self, tup: tuple):
- with suppress(IndexingError):
- tup = self._expand_ellipsis(tup)
- return self._getitem_lowerdim(tup)
- # no multi-index, so validate all of the indexers
- tup = self._validate_tuple_indexer(tup)
- # ugly hack for GH #836
- if self._multi_take_opportunity(tup):
- return self._multi_take(tup)
- return self._getitem_tuple_same_dim(tup)
- def _get_label(self, label, axis: AxisInt):
- # GH#5567 this will fail if the label is not present in the axis.
- return self.obj.xs(label, axis=axis)
- def _handle_lowerdim_multi_index_axis0(self, tup: tuple):
- # we have an axis0 multi-index, handle or raise
- axis = self.axis or 0
- try:
- # fast path for series or for tup devoid of slices
- return self._get_label(tup, axis=axis)
- except KeyError as ek:
- # raise KeyError if number of indexers match
- # else IndexingError will be raised
- if self.ndim < len(tup) <= self.obj.index.nlevels:
- raise ek
- raise IndexingError("No label returned") from ek
- def _getitem_axis(self, key, axis: AxisInt):
- key = item_from_zerodim(key)
- if is_iterator(key):
- key = list(key)
- if key is Ellipsis:
- key = slice(None)
- labels = self.obj._get_axis(axis)
- if isinstance(key, tuple) and isinstance(labels, MultiIndex):
- key = tuple(key)
- if isinstance(key, slice):
- self._validate_key(key, axis)
- return self._get_slice_axis(key, axis=axis)
- elif com.is_bool_indexer(key):
- return self._getbool_axis(key, axis=axis)
- elif is_list_like_indexer(key):
- # an iterable multi-selection
- if not (isinstance(key, tuple) and isinstance(labels, MultiIndex)):
- if hasattr(key, "ndim") and key.ndim > 1:
- raise ValueError("Cannot index with multidimensional key")
- return self._getitem_iterable(key, axis=axis)
- # nested tuple slicing
- if is_nested_tuple(key, labels):
- locs = labels.get_locs(key)
- indexer = [slice(None)] * self.ndim
- indexer[axis] = locs
- return self.obj.iloc[tuple(indexer)]
- # fall thru to straight lookup
- self._validate_key(key, axis)
- return self._get_label(key, axis=axis)
- def _get_slice_axis(self, slice_obj: slice, axis: AxisInt):
- """
- This is pretty simple as we just have to deal with labels.
- """
- # caller is responsible for ensuring non-None axis
- obj = self.obj
- if not need_slice(slice_obj):
- return obj.copy(deep=False)
- labels = obj._get_axis(axis)
- indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step)
- if isinstance(indexer, slice):
- return self.obj._slice(indexer, axis=axis)
- else:
- # DatetimeIndex overrides Index.slice_indexer and may
- # return a DatetimeIndex instead of a slice object.
- return self.obj.take(indexer, axis=axis)
- def _convert_to_indexer(self, key, axis: AxisInt):
- """
- Convert indexing key into something we can use to do actual fancy
- indexing on a ndarray.
- Examples
- ix[:5] -> slice(0, 5)
- ix[[1,2,3]] -> [1,2,3]
- ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz)
- Going by Zen of Python?
- 'In the face of ambiguity, refuse the temptation to guess.'
- raise AmbiguousIndexError with integer labels?
- - No, prefer label-based indexing
- """
- labels = self.obj._get_axis(axis)
- if isinstance(key, slice):
- return labels._convert_slice_indexer(key, kind="loc")
- if (
- isinstance(key, tuple)
- and not isinstance(labels, MultiIndex)
- and self.ndim < 2
- and len(key) > 1
- ):
- raise IndexingError("Too many indexers")
- if is_scalar(key) or (isinstance(labels, MultiIndex) and is_hashable(key)):
- # Otherwise get_loc will raise InvalidIndexError
- # if we are a label return me
- try:
- return labels.get_loc(key)
- except LookupError:
- if isinstance(key, tuple) and isinstance(labels, MultiIndex):
- if len(key) == labels.nlevels:
- return {"key": key}
- raise
- except InvalidIndexError:
- # GH35015, using datetime as column indices raises exception
- if not isinstance(labels, MultiIndex):
- raise
- except ValueError:
- if not is_integer(key):
- raise
- return {"key": key}
- if is_nested_tuple(key, labels):
- if self.ndim == 1 and any(isinstance(k, tuple) for k in key):
- # GH#35349 Raise if tuple in tuple for series
- raise IndexingError("Too many indexers")
- return labels.get_locs(key)
- elif is_list_like_indexer(key):
- if is_iterator(key):
- key = list(key)
- if com.is_bool_indexer(key):
- key = check_bool_indexer(labels, key)
- return key
- else:
- return self._get_listlike_indexer(key, axis)[1]
- else:
- try:
- return labels.get_loc(key)
- except LookupError:
- # allow a not found key only if we are a setter
- if not is_list_like_indexer(key):
- return {"key": key}
- raise
- def _get_listlike_indexer(self, key, axis: AxisInt):
- """
- Transform a list-like of keys into a new index and an indexer.
- Parameters
- ----------
- key : list-like
- Targeted labels.
- axis: int
- Dimension on which the indexing is being made.
- Raises
- ------
- KeyError
- If at least one key was requested but none was found.
- Returns
- -------
- keyarr: Index
- New index (coinciding with 'key' if the axis is unique).
- values : array-like
- Indexer for the return object, -1 denotes keys not found.
- """
- ax = self.obj._get_axis(axis)
- axis_name = self.obj._get_axis_name(axis)
- keyarr, indexer = ax._get_indexer_strict(key, axis_name)
- return keyarr, indexer
- @doc(IndexingMixin.iloc)
- class _iLocIndexer(_LocationIndexer):
- _valid_types = (
- "integer, integer slice (START point is INCLUDED, END "
- "point is EXCLUDED), listlike of integers, boolean array"
- )
- _takeable = True
- # -------------------------------------------------------------------
- # Key Checks
- def _validate_key(self, key, axis: AxisInt):
- if com.is_bool_indexer(key):
- if hasattr(key, "index") and isinstance(key.index, Index):
- if key.index.inferred_type == "integer":
- raise NotImplementedError(
- "iLocation based boolean "
- "indexing on an integer type "
- "is not available"
- )
- raise ValueError(
- "iLocation based boolean indexing cannot use "
- "an indexable as a mask"
- )
- return
- if isinstance(key, slice):
- return
- elif is_integer(key):
- self._validate_integer(key, axis)
- elif isinstance(key, tuple):
- # a tuple should already have been caught by this point
- # so don't treat a tuple as a valid indexer
- raise IndexingError("Too many indexers")
- elif is_list_like_indexer(key):
- if isinstance(key, ABCSeries):
- arr = key._values
- elif is_array_like(key):
- arr = key
- else:
- arr = np.array(key)
- len_axis = len(self.obj._get_axis(axis))
- # check that the key has a numeric dtype
- if not is_numeric_dtype(arr.dtype):
- raise IndexError(f".iloc requires numeric indexers, got {arr}")
- # check that the key does not exceed the maximum size of the index
- if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis):
- raise IndexError("positional indexers are out-of-bounds")
- else:
- raise ValueError(f"Can only index by location with a [{self._valid_types}]")
- def _has_valid_setitem_indexer(self, indexer) -> bool:
- """
- Validate that a positional indexer cannot enlarge its target
- will raise if needed, does not modify the indexer externally.
- Returns
- -------
- bool
- """
- if isinstance(indexer, dict):
- raise IndexError("iloc cannot enlarge its target object")
- if isinstance(indexer, ABCDataFrame):
- raise TypeError(
- "DataFrame indexer for .iloc is not supported. "
- "Consider using .loc with a DataFrame indexer for automatic alignment.",
- )
- if not isinstance(indexer, tuple):
- indexer = _tuplify(self.ndim, indexer)
- for ax, i in zip(self.obj.axes, indexer):
- if isinstance(i, slice):
- # should check the stop slice?
- pass
- elif is_list_like_indexer(i):
- # should check the elements?
- pass
- elif is_integer(i):
- if i >= len(ax):
- raise IndexError("iloc cannot enlarge its target object")
- elif isinstance(i, dict):
- raise IndexError("iloc cannot enlarge its target object")
- return True
- def _is_scalar_access(self, key: tuple) -> bool:
- """
- Returns
- -------
- bool
- """
- # this is a shortcut accessor to both .loc and .iloc
- # that provide the equivalent access of .at and .iat
- # a) avoid getting things via sections and (to minimize dtype changes)
- # b) provide a performant path
- if len(key) != self.ndim:
- return False
- return all(is_integer(k) for k in key)
- def _validate_integer(self, key: int, axis: AxisInt) -> None:
- """
- Check that 'key' is a valid position in the desired axis.
- Parameters
- ----------
- key : int
- Requested position.
- axis : int
- Desired axis.
- Raises
- ------
- IndexError
- If 'key' is not a valid position in axis 'axis'.
- """
- len_axis = len(self.obj._get_axis(axis))
- if key >= len_axis or key < -len_axis:
- raise IndexError("single positional indexer is out-of-bounds")
- # -------------------------------------------------------------------
- def _getitem_tuple(self, tup: tuple):
- tup = self._validate_tuple_indexer(tup)
- with suppress(IndexingError):
- return self._getitem_lowerdim(tup)
- return self._getitem_tuple_same_dim(tup)
- def _get_list_axis(self, key, axis: AxisInt):
- """
- Return Series values by list or array of integers.
- Parameters
- ----------
- key : list-like positional indexer
- axis : int
- Returns
- -------
- Series object
- Notes
- -----
- `axis` can only be zero.
- """
- try:
- return self.obj._take_with_is_copy(key, axis=axis)
- except IndexError as err:
- # re-raise with different error message
- raise IndexError("positional indexers are out-of-bounds") from err
- def _getitem_axis(self, key, axis: AxisInt):
- if key is Ellipsis:
- key = slice(None)
- elif isinstance(key, ABCDataFrame):
- raise IndexError(
- "DataFrame indexer is not allowed for .iloc\n"
- "Consider using .loc for automatic alignment."
- )
- if isinstance(key, slice):
- return self._get_slice_axis(key, axis=axis)
- if is_iterator(key):
- key = list(key)
- if isinstance(key, list):
- key = np.asarray(key)
- if com.is_bool_indexer(key):
- self._validate_key(key, axis)
- return self._getbool_axis(key, axis=axis)
- # a list of integers
- elif is_list_like_indexer(key):
- return self._get_list_axis(key, axis=axis)
- # a single integer
- else:
- key = item_from_zerodim(key)
- if not is_integer(key):
- raise TypeError("Cannot index by location index with a non-integer key")
- # validate the location
- self._validate_integer(key, axis)
- return self.obj._ixs(key, axis=axis)
- def _get_slice_axis(self, slice_obj: slice, axis: AxisInt):
- # caller is responsible for ensuring non-None axis
- obj = self.obj
- if not need_slice(slice_obj):
- return obj.copy(deep=False)
- labels = obj._get_axis(axis)
- labels._validate_positional_slice(slice_obj)
- return self.obj._slice(slice_obj, axis=axis)
- def _convert_to_indexer(self, key, axis: AxisInt):
- """
- Much simpler as we only have to deal with our valid types.
- """
- return key
- def _get_setitem_indexer(self, key):
- # GH#32257 Fall through to let numpy do validation
- if is_iterator(key):
- key = list(key)
- if self.axis is not None:
- key = _tupleize_axis_indexer(self.ndim, self.axis, key)
- return key
- # -------------------------------------------------------------------
- def _setitem_with_indexer(self, indexer, value, name: str = "iloc"):
- """
- _setitem_with_indexer is for setting values on a Series/DataFrame
- using positional indexers.
- If the relevant keys are not present, the Series/DataFrame may be
- expanded.
- This method is currently broken when dealing with non-unique Indexes,
- since it goes from positional indexers back to labels when calling
- BlockManager methods, see GH#12991, GH#22046, GH#15686.
- """
- info_axis = self.obj._info_axis_number
- # maybe partial set
- take_split_path = not self.obj._mgr.is_single_block
- if not take_split_path and isinstance(value, ABCDataFrame):
- # Avoid cast of values
- take_split_path = not value._mgr.is_single_block
- # if there is only one block/type, still have to take split path
- # unless the block is one-dimensional or it can hold the value
- if not take_split_path and len(self.obj._mgr.arrays) and self.ndim > 1:
- # in case of dict, keys are indices
- val = list(value.values()) if isinstance(value, dict) else value
- arr = self.obj._mgr.arrays[0]
- take_split_path = not can_hold_element(
- arr, extract_array(val, extract_numpy=True)
- )
- # if we have any multi-indexes that have non-trivial slices
- # (not null slices) then we must take the split path, xref
- # GH 10360, GH 27841
- if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes):
- for i, ax in zip(indexer, self.obj.axes):
- if isinstance(ax, MultiIndex) and not (
- is_integer(i) or com.is_null_slice(i)
- ):
- take_split_path = True
- break
- if isinstance(indexer, tuple):
- nindexer = []
- for i, idx in enumerate(indexer):
- if isinstance(idx, dict):
- # reindex the axis to the new value
- # and set inplace
- key, _ = convert_missing_indexer(idx)
- # if this is the items axes, then take the main missing
- # path first
- # this correctly sets the dtype and avoids cache issues
- # essentially this separates out the block that is needed
- # to possibly be modified
- if self.ndim > 1 and i == info_axis:
- # add the new item, and set the value
- # must have all defined axes if we have a scalar
- # or a list-like on the non-info axes if we have a
- # list-like
- if not len(self.obj):
- if not is_list_like_indexer(value):
- raise ValueError(
- "cannot set a frame with no "
- "defined index and a scalar"
- )
- self.obj[key] = value
- return
- # add a new item with the dtype setup
- if com.is_null_slice(indexer[0]):
- # We are setting an entire column
- self.obj[key] = value
- return
- elif is_array_like(value):
- # GH#42099
- arr = extract_array(value, extract_numpy=True)
- taker = -1 * np.ones(len(self.obj), dtype=np.intp)
- empty_value = algos.take_nd(arr, taker)
- if not isinstance(value, ABCSeries):
- # if not Series (in which case we need to align),
- # we can short-circuit
- if (
- isinstance(arr, np.ndarray)
- and arr.ndim == 1
- and len(arr) == 1
- ):
- # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615
- arr = arr[0, ...]
- empty_value[indexer[0]] = arr
- self.obj[key] = empty_value
- return
- self.obj[key] = empty_value
- else:
- # FIXME: GH#42099#issuecomment-864326014
- self.obj[key] = infer_fill_value(value)
- new_indexer = convert_from_missing_indexer_tuple(
- indexer, self.obj.axes
- )
- self._setitem_with_indexer(new_indexer, value, name)
- return
- # reindex the axis
- # make sure to clear the cache because we are
- # just replacing the block manager here
- # so the object is the same
- index = self.obj._get_axis(i)
- labels = index.insert(len(index), key)
- # We are expanding the Series/DataFrame values to match
- # the length of thenew index `labels`. GH#40096 ensure
- # this is valid even if the index has duplicates.
- taker = np.arange(len(index) + 1, dtype=np.intp)
- taker[-1] = -1
- reindexers = {i: (labels, taker)}
- new_obj = self.obj._reindex_with_indexers(
- reindexers, allow_dups=True
- )
- self.obj._mgr = new_obj._mgr
- self.obj._maybe_update_cacher(clear=True)
- self.obj._is_copy = None
- nindexer.append(labels.get_loc(key))
- else:
- nindexer.append(idx)
- indexer = tuple(nindexer)
- else:
- indexer, missing = convert_missing_indexer(indexer)
- if missing:
- self._setitem_with_indexer_missing(indexer, value)
- return
- if name == "loc":
- # must come after setting of missing
- indexer, value = self._maybe_mask_setitem_value(indexer, value)
- # align and set the values
- if take_split_path:
- # We have to operate column-wise
- self._setitem_with_indexer_split_path(indexer, value, name)
- else:
- self._setitem_single_block(indexer, value, name)
- def _setitem_with_indexer_split_path(self, indexer, value, name: str):
- """
- Setitem column-wise.
- """
- # Above we only set take_split_path to True for 2D cases
- assert self.ndim == 2
- if not isinstance(indexer, tuple):
- indexer = _tuplify(self.ndim, indexer)
- if len(indexer) > self.ndim:
- raise IndexError("too many indices for array")
- if isinstance(indexer[0], np.ndarray) and indexer[0].ndim > 2:
- raise ValueError(r"Cannot set values with ndim > 2")
- if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):
- from pandas import Series
- value = self._align_series(indexer, Series(value))
- # Ensure we have something we can iterate over
- info_axis = indexer[1]
- ilocs = self._ensure_iterable_column_indexer(info_axis)
- pi = indexer[0]
- lplane_indexer = length_of_indexer(pi, self.obj.index)
- # lplane_indexer gives the expected length of obj[indexer[0]]
- # we need an iterable, with a ndim of at least 1
- # eg. don't pass through np.array(0)
- if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0:
- if isinstance(value, ABCDataFrame):
- self._setitem_with_indexer_frame_value(indexer, value, name)
- elif np.ndim(value) == 2:
- # TODO: avoid np.ndim call in case it isn't an ndarray, since
- # that will construct an ndarray, which will be wasteful
- self._setitem_with_indexer_2d_value(indexer, value)
- elif len(ilocs) == 1 and lplane_indexer == len(value) and not is_scalar(pi):
- # We are setting multiple rows in a single column.
- self._setitem_single_column(ilocs[0], value, pi)
- elif len(ilocs) == 1 and 0 != lplane_indexer != len(value):
- # We are trying to set N values into M entries of a single
- # column, which is invalid for N != M
- # Exclude zero-len for e.g. boolean masking that is all-false
- if len(value) == 1 and not is_integer(info_axis):
- # This is a case like df.iloc[:3, [1]] = [0]
- # where we treat as df.iloc[:3, 1] = 0
- return self._setitem_with_indexer((pi, info_axis[0]), value[0])
- raise ValueError(
- "Must have equal len keys and value "
- "when setting with an iterable"
- )
- elif lplane_indexer == 0 and len(value) == len(self.obj.index):
- # We get here in one case via .loc with a all-False mask
- pass
- elif self._is_scalar_access(indexer) and is_object_dtype(
- self.obj.dtypes[ilocs[0]]
- ):
- # We are setting nested data, only possible for object dtype data
- self._setitem_single_column(indexer[1], value, pi)
- elif len(ilocs) == len(value):
- # We are setting multiple columns in a single row.
- for loc, v in zip(ilocs, value):
- self._setitem_single_column(loc, v, pi)
- elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0:
- # This is a setitem-with-expansion, see
- # test_loc_setitem_empty_append_expands_rows_mixed_dtype
- # e.g. df = DataFrame(columns=["x", "y"])
- # df["x"] = df["x"].astype(np.int64)
- # df.loc[:, "x"] = [1, 2, 3]
- self._setitem_single_column(ilocs[0], value, pi)
- else:
- raise ValueError(
- "Must have equal len keys and value "
- "when setting with an iterable"
- )
- else:
- # scalar value
- for loc in ilocs:
- self._setitem_single_column(loc, value, pi)
- def _setitem_with_indexer_2d_value(self, indexer, value):
- # We get here with np.ndim(value) == 2, excluding DataFrame,
- # which goes through _setitem_with_indexer_frame_value
- pi = indexer[0]
- ilocs = self._ensure_iterable_column_indexer(indexer[1])
- if not is_array_like(value):
- # cast lists to array
- value = np.array(value, dtype=object)
- if len(ilocs) != value.shape[1]:
- raise ValueError(
- "Must have equal len keys and value when setting with an ndarray"
- )
- for i, loc in enumerate(ilocs):
- value_col = value[:, i]
- if is_object_dtype(value_col.dtype):
- # casting to list so that we do type inference in setitem_single_column
- value_col = value_col.tolist()
- self._setitem_single_column(loc, value_col, pi)
- def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str):
- ilocs = self._ensure_iterable_column_indexer(indexer[1])
- sub_indexer = list(indexer)
- pi = indexer[0]
- multiindex_indexer = isinstance(self.obj.columns, MultiIndex)
- unique_cols = value.columns.is_unique
- # We do not want to align the value in case of iloc GH#37728
- if name == "iloc":
- for i, loc in enumerate(ilocs):
- val = value.iloc[:, i]
- self._setitem_single_column(loc, val, pi)
- elif not unique_cols and value.columns.equals(self.obj.columns):
- # We assume we are already aligned, see
- # test_iloc_setitem_frame_duplicate_columns_multiple_blocks
- for loc in ilocs:
- item = self.obj.columns[loc]
- if item in value:
- sub_indexer[1] = item
- val = self._align_series(
- tuple(sub_indexer),
- value.iloc[:, loc],
- multiindex_indexer,
- )
- else:
- val = np.nan
- self._setitem_single_column(loc, val, pi)
- elif not unique_cols:
- raise ValueError("Setting with non-unique columns is not allowed.")
- else:
- for loc in ilocs:
- item = self.obj.columns[loc]
- if item in value:
- sub_indexer[1] = item
- val = self._align_series(
- tuple(sub_indexer), value[item], multiindex_indexer
- )
- else:
- val = np.nan
- self._setitem_single_column(loc, val, pi)
- def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
- """
- Parameters
- ----------
- loc : int
- Indexer for column position
- plane_indexer : int, slice, listlike[int]
- The indexer we use for setitem along axis=0.
- """
- pi = plane_indexer
- is_full_setter = com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj))
- is_null_setter = com.is_empty_slice(pi) or is_array_like(pi) and len(pi) == 0
- if is_null_setter:
- # no-op, don't cast dtype later
- return
- elif is_full_setter:
- try:
- self.obj._mgr.column_setitem(
- loc, plane_indexer, value, inplace_only=True
- )
- except (ValueError, TypeError, LossySetitemError):
- # If we're setting an entire column and we can't do it inplace,
- # then we can use value's dtype (or inferred dtype)
- # instead of object
- self.obj.isetitem(loc, value)
- else:
- # set value into the column (first attempting to operate inplace, then
- # falling back to casting if necessary)
- self.obj._mgr.column_setitem(loc, plane_indexer, value)
- self.obj._clear_item_cache()
- def _setitem_single_block(self, indexer, value, name: str) -> None:
- """
- _setitem_with_indexer for the case when we have a single Block.
- """
- from pandas import Series
- info_axis = self.obj._info_axis_number
- item_labels = self.obj._get_axis(info_axis)
- if isinstance(indexer, tuple):
- # if we are setting on the info axis ONLY
- # set using those methods to avoid block-splitting
- # logic here
- if (
- self.ndim == len(indexer) == 2
- and is_integer(indexer[1])
- and com.is_null_slice(indexer[0])
- ):
- col = item_labels[indexer[info_axis]]
- if len(item_labels.get_indexer_for([col])) == 1:
- # e.g. test_loc_setitem_empty_append_expands_rows
- loc = item_labels.get_loc(col)
- self._setitem_single_column(loc, value, indexer[0])
- return
- indexer = maybe_convert_ix(*indexer) # e.g. test_setitem_frame_align
- if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):
- # TODO(EA): ExtensionBlock.setitem this causes issues with
- # setting for extensionarrays that store dicts. Need to decide
- # if it's worth supporting that.
- value = self._align_series(indexer, Series(value))
- elif isinstance(value, ABCDataFrame) and name != "iloc":
- value = self._align_frame(indexer, value)._values
- # check for chained assignment
- self.obj._check_is_chained_assignment_possible()
- # actually do the set
- self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value)
- self.obj._maybe_update_cacher(clear=True, inplace=True)
- def _setitem_with_indexer_missing(self, indexer, value):
- """
- Insert new row(s) or column(s) into the Series or DataFrame.
- """
- from pandas import Series
- # reindex the axis to the new value
- # and set inplace
- if self.ndim == 1:
- index = self.obj.index
- new_index = index.insert(len(index), indexer)
- # we have a coerced indexer, e.g. a float
- # that matches in an int64 Index, so
- # we will not create a duplicate index, rather
- # index to that element
- # e.g. 0.0 -> 0
- # GH#12246
- if index.is_unique:
- # pass new_index[-1:] instead if [new_index[-1]]
- # so that we retain dtype
- new_indexer = index.get_indexer(new_index[-1:])
- if (new_indexer != -1).any():
- # We get only here with loc, so can hard code
- return self._setitem_with_indexer(new_indexer, value, "loc")
- # this preserves dtype of the value and of the object
- if not is_scalar(value):
- new_dtype = None
- elif is_valid_na_for_dtype(value, self.obj.dtype):
- if not is_object_dtype(self.obj.dtype):
- # Every NA value is suitable for object, no conversion needed
- value = na_value_for_dtype(self.obj.dtype, compat=False)
- new_dtype = maybe_promote(self.obj.dtype, value)[0]
- elif isna(value):
- new_dtype = None
- elif not self.obj.empty and not is_object_dtype(self.obj.dtype):
- # We should not cast, if we have object dtype because we can
- # set timedeltas into object series
- curr_dtype = self.obj.dtype
- curr_dtype = getattr(curr_dtype, "numpy_dtype", curr_dtype)
- new_dtype = maybe_promote(curr_dtype, value)[0]
- else:
- new_dtype = None
- new_values = Series([value], dtype=new_dtype)._values
- if len(self.obj._values):
- # GH#22717 handle casting compatibility that np.concatenate
- # does incorrectly
- new_values = concat_compat([self.obj._values, new_values])
- self.obj._mgr = self.obj._constructor(
- new_values, index=new_index, name=self.obj.name
- )._mgr
- self.obj._maybe_update_cacher(clear=True)
- elif self.ndim == 2:
- if not len(self.obj.columns):
- # no columns and scalar
- raise ValueError("cannot set a frame with no defined columns")
- has_dtype = hasattr(value, "dtype")
- if isinstance(value, ABCSeries):
- # append a Series
- value = value.reindex(index=self.obj.columns, copy=True)
- value.name = indexer
- elif isinstance(value, dict):
- value = Series(
- value, index=self.obj.columns, name=indexer, dtype=object
- )
- else:
- # a list-list
- if is_list_like_indexer(value):
- # must have conforming columns
- if len(value) != len(self.obj.columns):
- raise ValueError("cannot set a row with mismatched columns")
- value = Series(value, index=self.obj.columns, name=indexer)
- if not len(self.obj):
- # We will ignore the existing dtypes instead of using
- # internals.concat logic
- df = value.to_frame().T
- idx = self.obj.index
- if isinstance(idx, MultiIndex):
- name = idx.names
- else:
- name = idx.name
- df.index = Index([indexer], name=name)
- if not has_dtype:
- # i.e. if we already had a Series or ndarray, keep that
- # dtype. But if we had a list or dict, then do inference
- df = df.infer_objects(copy=False)
- self.obj._mgr = df._mgr
- else:
- self.obj._mgr = self.obj._append(value)._mgr
- self.obj._maybe_update_cacher(clear=True)
- def _ensure_iterable_column_indexer(self, column_indexer):
- """
- Ensure that our column indexer is something that can be iterated over.
- """
- ilocs: Sequence[int] | np.ndarray
- if is_integer(column_indexer):
- ilocs = [column_indexer]
- elif isinstance(column_indexer, slice):
- ilocs = np.arange(len(self.obj.columns))[column_indexer]
- elif isinstance(column_indexer, np.ndarray) and is_bool_dtype(
- column_indexer.dtype
- ):
- ilocs = np.arange(len(column_indexer))[column_indexer]
- else:
- ilocs = column_indexer
- return ilocs
- def _align_series(self, indexer, ser: Series, multiindex_indexer: bool = False):
- """
- Parameters
- ----------
- indexer : tuple, slice, scalar
- Indexer used to get the locations that will be set to `ser`.
- ser : pd.Series
- Values to assign to the locations specified by `indexer`.
- multiindex_indexer : bool, optional
- Defaults to False. Should be set to True if `indexer` was from
- a `pd.MultiIndex`, to avoid unnecessary broadcasting.
- Returns
- -------
- `np.array` of `ser` broadcast to the appropriate shape for assignment
- to the locations selected by `indexer`
- """
- if isinstance(indexer, (slice, np.ndarray, list, Index)):
- indexer = (indexer,)
- if isinstance(indexer, tuple):
- # flatten np.ndarray indexers
- def ravel(i):
- return i.ravel() if isinstance(i, np.ndarray) else i
- indexer = tuple(map(ravel, indexer))
- aligners = [not com.is_null_slice(idx) for idx in indexer]
- sum_aligners = sum(aligners)
- single_aligner = sum_aligners == 1
- is_frame = self.ndim == 2
- obj = self.obj
- # are we a single alignable value on a non-primary
- # dim (e.g. panel: 1,2, or frame: 0) ?
- # hence need to align to a single axis dimension
- # rather that find all valid dims
- # frame
- if is_frame:
- single_aligner = single_aligner and aligners[0]
- # we have a frame, with multiple indexers on both axes; and a
- # series, so need to broadcast (see GH5206)
- if sum_aligners == self.ndim and all(is_sequence(_) for _ in indexer):
- ser_values = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values
- # single indexer
- if len(indexer) > 1 and not multiindex_indexer:
- len_indexer = len(indexer[1])
- ser_values = (
- np.tile(ser_values, len_indexer).reshape(len_indexer, -1).T
- )
- return ser_values
- for i, idx in enumerate(indexer):
- ax = obj.axes[i]
- # multiple aligners (or null slices)
- if is_sequence(idx) or isinstance(idx, slice):
- if single_aligner and com.is_null_slice(idx):
- continue
- new_ix = ax[idx]
- if not is_list_like_indexer(new_ix):
- new_ix = Index([new_ix])
- else:
- new_ix = Index(new_ix)
- if ser.index.equals(new_ix) or not len(new_ix):
- return ser._values.copy()
- return ser.reindex(new_ix)._values
- # 2 dims
- elif single_aligner:
- # reindex along index
- ax = self.obj.axes[1]
- if ser.index.equals(ax) or not len(ax):
- return ser._values.copy()
- return ser.reindex(ax)._values
- elif is_integer(indexer) and self.ndim == 1:
- if is_object_dtype(self.obj):
- return ser
- ax = self.obj._get_axis(0)
- if ser.index.equals(ax):
- return ser._values.copy()
- return ser.reindex(ax)._values[indexer]
- elif is_integer(indexer):
- ax = self.obj._get_axis(1)
- if ser.index.equals(ax):
- return ser._values.copy()
- return ser.reindex(ax)._values
- raise ValueError("Incompatible indexer with Series")
- def _align_frame(self, indexer, df: DataFrame) -> DataFrame:
- is_frame = self.ndim == 2
- if isinstance(indexer, tuple):
- idx, cols = None, None
- sindexers = []
- for i, ix in enumerate(indexer):
- ax = self.obj.axes[i]
- if is_sequence(ix) or isinstance(ix, slice):
- if isinstance(ix, np.ndarray):
- ix = ix.ravel()
- if idx is None:
- idx = ax[ix]
- elif cols is None:
- cols = ax[ix]
- else:
- break
- else:
- sindexers.append(i)
- if idx is not None and cols is not None:
- if df.index.equals(idx) and df.columns.equals(cols):
- val = df.copy()
- else:
- val = df.reindex(idx, columns=cols)
- return val
- elif (isinstance(indexer, slice) or is_list_like_indexer(indexer)) and is_frame:
- ax = self.obj.index[indexer]
- if df.index.equals(ax):
- val = df.copy()
- else:
- # we have a multi-index and are trying to align
- # with a particular, level GH3738
- if (
- isinstance(ax, MultiIndex)
- and isinstance(df.index, MultiIndex)
- and ax.nlevels != df.index.nlevels
- ):
- raise TypeError(
- "cannot align on a multi-index with out "
- "specifying the join levels"
- )
- val = df.reindex(index=ax)
- return val
- raise ValueError("Incompatible indexer with DataFrame")
- class _ScalarAccessIndexer(NDFrameIndexerBase):
- """
- Access scalars quickly.
- """
- # sub-classes need to set _takeable
- _takeable: bool
- def _convert_key(self, key):
- raise AbstractMethodError(self)
- def __getitem__(self, key):
- if not isinstance(key, tuple):
- # we could have a convertible item here (e.g. Timestamp)
- if not is_list_like_indexer(key):
- key = (key,)
- else:
- raise ValueError("Invalid call for scalar access (getting)!")
- key = self._convert_key(key)
- return self.obj._get_value(*key, takeable=self._takeable)
- def __setitem__(self, key, value) -> None:
- if isinstance(key, tuple):
- key = tuple(com.apply_if_callable(x, self.obj) for x in key)
- else:
- # scalar callable may return tuple
- key = com.apply_if_callable(key, self.obj)
- if not isinstance(key, tuple):
- key = _tuplify(self.ndim, key)
- key = list(self._convert_key(key))
- if len(key) != self.ndim:
- raise ValueError("Not enough indexers for scalar access (setting)!")
- self.obj._set_value(*key, value=value, takeable=self._takeable)
- @doc(IndexingMixin.at)
- class _AtIndexer(_ScalarAccessIndexer):
- _takeable = False
- def _convert_key(self, key):
- """
- Require they keys to be the same type as the index. (so we don't
- fallback)
- """
- # GH 26989
- # For series, unpacking key needs to result in the label.
- # This is already the case for len(key) == 1; e.g. (1,)
- if self.ndim == 1 and len(key) > 1:
- key = (key,)
- return key
- @property
- def _axes_are_unique(self) -> bool:
- # Only relevant for self.ndim == 2
- assert self.ndim == 2
- return self.obj.index.is_unique and self.obj.columns.is_unique
- def __getitem__(self, key):
- if self.ndim == 2 and not self._axes_are_unique:
- # GH#33041 fall back to .loc
- if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
- raise ValueError("Invalid call for scalar access (getting)!")
- return self.obj.loc[key]
- return super().__getitem__(key)
- def __setitem__(self, key, value):
- if self.ndim == 2 and not self._axes_are_unique:
- # GH#33041 fall back to .loc
- if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
- raise ValueError("Invalid call for scalar access (setting)!")
- self.obj.loc[key] = value
- return
- return super().__setitem__(key, value)
- @doc(IndexingMixin.iat)
- class _iAtIndexer(_ScalarAccessIndexer):
- _takeable = True
- def _convert_key(self, key):
- """
- Require integer args. (and convert to label arguments)
- """
- for i in key:
- if not is_integer(i):
- raise ValueError("iAt based indexing can only have integer indexers")
- return key
- def _tuplify(ndim: int, loc: Hashable) -> tuple[Hashable | slice, ...]:
- """
- Given an indexer for the first dimension, create an equivalent tuple
- for indexing over all dimensions.
- Parameters
- ----------
- ndim : int
- loc : object
- Returns
- -------
- tuple
- """
- _tup: list[Hashable | slice]
- _tup = [slice(None, None) for _ in range(ndim)]
- _tup[0] = loc
- return tuple(_tup)
- def _tupleize_axis_indexer(ndim: int, axis: AxisInt, key) -> tuple:
- """
- If we have an axis, adapt the given key to be axis-independent.
- """
- new_key = [slice(None)] * ndim
- new_key[axis] = key
- return tuple(new_key)
- def check_bool_indexer(index: Index, key) -> np.ndarray:
- """
- Check if key is a valid boolean indexer for an object with such index and
- perform reindexing or conversion if needed.
- This function assumes that is_bool_indexer(key) == True.
- Parameters
- ----------
- index : Index
- Index of the object on which the indexing is done.
- key : list-like
- Boolean indexer to check.
- Returns
- -------
- np.array
- Resulting key.
- Raises
- ------
- IndexError
- If the key does not have the same length as index.
- IndexingError
- If the index of the key is unalignable to index.
- """
- result = key
- if isinstance(key, ABCSeries) and not key.index.equals(index):
- indexer = result.index.get_indexer_for(index)
- if -1 in indexer:
- raise IndexingError(
- "Unalignable boolean Series provided as "
- "indexer (index of the boolean Series and of "
- "the indexed object do not match)."
- )
- result = result.take(indexer)
- # fall through for boolean
- if not is_extension_array_dtype(result.dtype):
- return result.astype(bool)._values
- if is_object_dtype(key):
- # key might be object-dtype bool, check_array_indexer needs bool array
- result = np.asarray(result, dtype=bool)
- elif not is_array_like(result):
- # GH 33924
- # key may contain nan elements, check_array_indexer needs bool array
- result = pd_array(result, dtype=bool)
- return check_array_indexer(index, result)
- def convert_missing_indexer(indexer):
- """
- Reverse convert a missing indexer, which is a dict
- return the scalar indexer and a boolean indicating if we converted
- """
- if isinstance(indexer, dict):
- # a missing key (but not a tuple indexer)
- indexer = indexer["key"]
- if isinstance(indexer, bool):
- raise KeyError("cannot use a single bool to index into setitem")
- return indexer, True
- return indexer, False
- def convert_from_missing_indexer_tuple(indexer, axes):
- """
- Create a filtered indexer that doesn't have any missing indexers.
- """
- def get_indexer(_i, _idx):
- return axes[_i].get_loc(_idx["key"]) if isinstance(_idx, dict) else _idx
- return tuple(get_indexer(_i, _idx) for _i, _idx in enumerate(indexer))
- def maybe_convert_ix(*args):
- """
- We likely want to take the cross-product.
- """
- for arg in args:
- if not isinstance(arg, (np.ndarray, list, ABCSeries, Index)):
- return args
- return np.ix_(*args)
- def is_nested_tuple(tup, labels) -> bool:
- """
- Returns
- -------
- bool
- """
- # check for a compatible nested tuple and multiindexes among the axes
- if not isinstance(tup, tuple):
- return False
- for k in tup:
- if is_list_like(k) or isinstance(k, slice):
- return isinstance(labels, MultiIndex)
- return False
- def is_label_like(key) -> bool:
- """
- Returns
- -------
- bool
- """
- # select a label or row
- return (
- not isinstance(key, slice)
- and not is_list_like_indexer(key)
- and key is not Ellipsis
- )
- def need_slice(obj: slice) -> bool:
- """
- Returns
- -------
- bool
- """
- return (
- obj.start is not None
- or obj.stop is not None
- or (obj.step is not None and obj.step != 1)
- )
- def check_dict_or_set_indexers(key) -> None:
- """
- Check if the indexer is or contains a dict or set, which is no longer allowed.
- """
- if (
- isinstance(key, set)
- or isinstance(key, tuple)
- and any(isinstance(x, set) for x in key)
- ):
- raise TypeError(
- "Passing a set as an indexer is not supported. Use a list instead."
- )
- if (
- isinstance(key, dict)
- or isinstance(key, tuple)
- and any(isinstance(x, dict) for x in key)
- ):
- raise TypeError(
- "Passing a dict as an indexer is not supported. Use a list instead."
- )
|