123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918 |
- from __future__ import annotations
- from functools import wraps
- from sys import getsizeof
- from typing import (
- TYPE_CHECKING,
- Any,
- Callable,
- Collection,
- Generator,
- Hashable,
- Iterable,
- List,
- Literal,
- Sequence,
- Tuple,
- cast,
- )
- import warnings
- import numpy as np
- from pandas._config import get_option
- from pandas._libs import (
- algos as libalgos,
- index as libindex,
- lib,
- )
- from pandas._libs.hashtable import duplicated
- from pandas._typing import (
- AnyAll,
- AnyArrayLike,
- Axis,
- DropKeep,
- DtypeObj,
- F,
- IgnoreRaise,
- IndexLabel,
- Scalar,
- Shape,
- npt,
- )
- from pandas.compat.numpy import function as nv
- from pandas.errors import (
- InvalidIndexError,
- PerformanceWarning,
- UnsortedIndexError,
- )
- from pandas.util._decorators import (
- Appender,
- cache_readonly,
- doc,
- )
- from pandas.util._exceptions import find_stack_level
- from pandas.core.dtypes.cast import coerce_indexer_dtype
- from pandas.core.dtypes.common import (
- ensure_int64,
- ensure_platform_int,
- is_categorical_dtype,
- is_extension_array_dtype,
- is_hashable,
- is_integer,
- is_iterator,
- is_list_like,
- is_object_dtype,
- is_scalar,
- pandas_dtype,
- )
- from pandas.core.dtypes.dtypes import ExtensionDtype
- from pandas.core.dtypes.generic import (
- ABCDataFrame,
- ABCDatetimeIndex,
- ABCTimedeltaIndex,
- )
- from pandas.core.dtypes.missing import (
- array_equivalent,
- isna,
- )
- import pandas.core.algorithms as algos
- from pandas.core.array_algos.putmask import validate_putmask
- from pandas.core.arrays import Categorical
- from pandas.core.arrays.categorical import factorize_from_iterables
- import pandas.core.common as com
- import pandas.core.indexes.base as ibase
- from pandas.core.indexes.base import (
- Index,
- _index_shared_docs,
- ensure_index,
- get_unanimous_names,
- )
- from pandas.core.indexes.frozen import FrozenList
- from pandas.core.ops.invalid import make_invalid_op
- from pandas.core.sorting import (
- get_group_index,
- indexer_from_factorized,
- lexsort_indexer,
- )
- from pandas.io.formats.printing import pprint_thing
- if TYPE_CHECKING:
- from pandas import (
- CategoricalIndex,
- DataFrame,
- Series,
- )
- _index_doc_kwargs = dict(ibase._index_doc_kwargs)
- _index_doc_kwargs.update(
- {"klass": "MultiIndex", "target_klass": "MultiIndex or list of tuples"}
- )
- class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine):
- """
- This class manages a MultiIndex by mapping label combinations to positive
- integers.
- """
- _base = libindex.UInt64Engine
- def _codes_to_ints(self, codes):
- """
- Transform combination(s) of uint64 in one uint64 (each), in a strictly
- monotonic way (i.e. respecting the lexicographic order of integer
- combinations): see BaseMultiIndexCodesEngine documentation.
- Parameters
- ----------
- codes : 1- or 2-dimensional array of dtype uint64
- Combinations of integers (one per row)
- Returns
- -------
- scalar or 1-dimensional array, of dtype uint64
- Integer(s) representing one combination (each).
- """
- # Shift the representation of each level by the pre-calculated number
- # of bits:
- codes <<= self.offsets
- # Now sum and OR are in fact interchangeable. This is a simple
- # composition of the (disjunct) significant bits of each level (i.e.
- # each column in "codes") in a single positive integer:
- if codes.ndim == 1:
- # Single key
- return np.bitwise_or.reduce(codes)
- # Multiple keys
- return np.bitwise_or.reduce(codes, axis=1)
- class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine):
- """
- This class manages those (extreme) cases in which the number of possible
- label combinations overflows the 64 bits integers, and uses an ObjectEngine
- containing Python integers.
- """
- _base = libindex.ObjectEngine
- def _codes_to_ints(self, codes):
- """
- Transform combination(s) of uint64 in one Python integer (each), in a
- strictly monotonic way (i.e. respecting the lexicographic order of
- integer combinations): see BaseMultiIndexCodesEngine documentation.
- Parameters
- ----------
- codes : 1- or 2-dimensional array of dtype uint64
- Combinations of integers (one per row)
- Returns
- -------
- int, or 1-dimensional array of dtype object
- Integer(s) representing one combination (each).
- """
- # Shift the representation of each level by the pre-calculated number
- # of bits. Since this can overflow uint64, first make sure we are
- # working with Python integers:
- codes = codes.astype("object") << self.offsets
- # Now sum and OR are in fact interchangeable. This is a simple
- # composition of the (disjunct) significant bits of each level (i.e.
- # each column in "codes") in a single positive integer (per row):
- if codes.ndim == 1:
- # Single key
- return np.bitwise_or.reduce(codes)
- # Multiple keys
- return np.bitwise_or.reduce(codes, axis=1)
- def names_compat(meth: F) -> F:
- """
- A decorator to allow either `name` or `names` keyword but not both.
- This makes it easier to share code with base class.
- """
- @wraps(meth)
- def new_meth(self_or_cls, *args, **kwargs):
- if "name" in kwargs and "names" in kwargs:
- raise TypeError("Can only provide one of `names` and `name`")
- if "name" in kwargs:
- kwargs["names"] = kwargs.pop("name")
- return meth(self_or_cls, *args, **kwargs)
- return cast(F, new_meth)
- class MultiIndex(Index):
- """
- A multi-level, or hierarchical, index object for pandas objects.
- Parameters
- ----------
- levels : sequence of arrays
- The unique labels for each level.
- codes : sequence of arrays
- Integers for each level designating which label at each location.
- sortorder : optional int
- Level of sortedness (must be lexicographically sorted by that
- level).
- names : optional sequence of objects
- Names for each of the index levels. (name is accepted for compat).
- copy : bool, default False
- Copy the meta-data.
- verify_integrity : bool, default True
- Check that the levels/codes are consistent and valid.
- Attributes
- ----------
- names
- levels
- codes
- nlevels
- levshape
- dtypes
- Methods
- -------
- from_arrays
- from_tuples
- from_product
- from_frame
- set_levels
- set_codes
- to_frame
- to_flat_index
- sortlevel
- droplevel
- swaplevel
- reorder_levels
- remove_unused_levels
- get_level_values
- get_indexer
- get_loc
- get_locs
- get_loc_level
- drop
- See Also
- --------
- MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
- MultiIndex.from_product : Create a MultiIndex from the cartesian product
- of iterables.
- MultiIndex.from_tuples : Convert list of tuples to a MultiIndex.
- MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
- Index : The base pandas Index type.
- Notes
- -----
- See the `user guide
- <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html>`__
- for more.
- Examples
- --------
- A new ``MultiIndex`` is typically constructed using one of the helper
- methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product`
- and :meth:`MultiIndex.from_tuples`. For example (using ``.from_arrays``):
- >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
- >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
- MultiIndex([(1, 'red'),
- (1, 'blue'),
- (2, 'red'),
- (2, 'blue')],
- names=['number', 'color'])
- See further examples for how to construct a MultiIndex in the doc strings
- of the mentioned helper methods.
- """
- _hidden_attrs = Index._hidden_attrs | frozenset()
- # initialize to zero-length tuples to make everything work
- _typ = "multiindex"
- _names: list[Hashable | None] = []
- _levels = FrozenList()
- _codes = FrozenList()
- _comparables = ["names"]
- sortorder: int | None
- # --------------------------------------------------------------------
- # Constructors
- def __new__(
- cls,
- levels=None,
- codes=None,
- sortorder=None,
- names=None,
- dtype=None,
- copy: bool = False,
- name=None,
- verify_integrity: bool = True,
- ) -> MultiIndex:
- # compat with Index
- if name is not None:
- names = name
- if levels is None or codes is None:
- raise TypeError("Must pass both levels and codes")
- if len(levels) != len(codes):
- raise ValueError("Length of levels and codes must be the same.")
- if len(levels) == 0:
- raise ValueError("Must pass non-zero number of levels/codes")
- result = object.__new__(cls)
- result._cache = {}
- # we've already validated levels and codes, so shortcut here
- result._set_levels(levels, copy=copy, validate=False)
- result._set_codes(codes, copy=copy, validate=False)
- result._names = [None] * len(levels)
- if names is not None:
- # handles name validation
- result._set_names(names)
- if sortorder is not None:
- result.sortorder = int(sortorder)
- else:
- result.sortorder = sortorder
- if verify_integrity:
- new_codes = result._verify_integrity()
- result._codes = new_codes
- result._reset_identity()
- result._references = None
- return result
- def _validate_codes(self, level: list, code: list):
- """
- Reassign code values as -1 if their corresponding levels are NaN.
- Parameters
- ----------
- code : list
- Code to reassign.
- level : list
- Level to check for missing values (NaN, NaT, None).
- Returns
- -------
- new code where code value = -1 if it corresponds
- to a level with missing values (NaN, NaT, None).
- """
- null_mask = isna(level)
- if np.any(null_mask):
- # error: Incompatible types in assignment
- # (expression has type "ndarray[Any, dtype[Any]]",
- # variable has type "List[Any]")
- code = np.where(null_mask[code], -1, code) # type: ignore[assignment]
- return code
- def _verify_integrity(self, codes: list | None = None, levels: list | None = None):
- """
- Parameters
- ----------
- codes : optional list
- Codes to check for validity. Defaults to current codes.
- levels : optional list
- Levels to check for validity. Defaults to current levels.
- Raises
- ------
- ValueError
- If length of levels and codes don't match, if the codes for any
- level would exceed level bounds, or there are any duplicate levels.
- Returns
- -------
- new codes where code value = -1 if it corresponds to a
- NaN level.
- """
- # NOTE: Currently does not check, among other things, that cached
- # nlevels matches nor that sortorder matches actually sortorder.
- codes = codes or self.codes
- levels = levels or self.levels
- if len(levels) != len(codes):
- raise ValueError(
- "Length of levels and codes must match. NOTE: "
- "this index is in an inconsistent state."
- )
- codes_length = len(codes[0])
- for i, (level, level_codes) in enumerate(zip(levels, codes)):
- if len(level_codes) != codes_length:
- raise ValueError(
- f"Unequal code lengths: {[len(code_) for code_ in codes]}"
- )
- if len(level_codes) and level_codes.max() >= len(level):
- raise ValueError(
- f"On level {i}, code max ({level_codes.max()}) >= length of "
- f"level ({len(level)}). NOTE: this index is in an "
- "inconsistent state"
- )
- if len(level_codes) and level_codes.min() < -1:
- raise ValueError(f"On level {i}, code value ({level_codes.min()}) < -1")
- if not level.is_unique:
- raise ValueError(
- f"Level values must be unique: {list(level)} on level {i}"
- )
- if self.sortorder is not None:
- if self.sortorder > _lexsort_depth(self.codes, self.nlevels):
- raise ValueError(
- "Value for sortorder must be inferior or equal to actual "
- f"lexsort_depth: sortorder {self.sortorder} "
- f"with lexsort_depth {_lexsort_depth(self.codes, self.nlevels)}"
- )
- codes = [
- self._validate_codes(level, code) for level, code in zip(levels, codes)
- ]
- new_codes = FrozenList(codes)
- return new_codes
- @classmethod
- def from_arrays(
- cls,
- arrays,
- sortorder=None,
- names: Sequence[Hashable] | Hashable | lib.NoDefault = lib.no_default,
- ) -> MultiIndex:
- """
- Convert arrays to MultiIndex.
- Parameters
- ----------
- arrays : list / sequence of array-likes
- Each array-like gives one level's value for each data point.
- len(arrays) is the number of levels.
- sortorder : int or None
- Level of sortedness (must be lexicographically sorted by that
- level).
- names : list / sequence of str, optional
- Names for the levels in the index.
- Returns
- -------
- MultiIndex
- See Also
- --------
- MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
- MultiIndex.from_product : Make a MultiIndex from cartesian product
- of iterables.
- MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
- Examples
- --------
- >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
- >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
- MultiIndex([(1, 'red'),
- (1, 'blue'),
- (2, 'red'),
- (2, 'blue')],
- names=['number', 'color'])
- """
- error_msg = "Input must be a list / sequence of array-likes."
- if not is_list_like(arrays):
- raise TypeError(error_msg)
- if is_iterator(arrays):
- arrays = list(arrays)
- # Check if elements of array are list-like
- for array in arrays:
- if not is_list_like(array):
- raise TypeError(error_msg)
- # Check if lengths of all arrays are equal or not,
- # raise ValueError, if not
- for i in range(1, len(arrays)):
- if len(arrays[i]) != len(arrays[i - 1]):
- raise ValueError("all arrays must be same length")
- codes, levels = factorize_from_iterables(arrays)
- if names is lib.no_default:
- names = [getattr(arr, "name", None) for arr in arrays]
- return cls(
- levels=levels,
- codes=codes,
- sortorder=sortorder,
- names=names,
- verify_integrity=False,
- )
- @classmethod
- @names_compat
- def from_tuples(
- cls,
- tuples: Iterable[tuple[Hashable, ...]],
- sortorder: int | None = None,
- names: Sequence[Hashable] | Hashable = None,
- ) -> MultiIndex:
- """
- Convert list of tuples to MultiIndex.
- Parameters
- ----------
- tuples : list / sequence of tuple-likes
- Each tuple is the index of one row/column.
- sortorder : int or None
- Level of sortedness (must be lexicographically sorted by that
- level).
- names : list / sequence of str, optional
- Names for the levels in the index.
- Returns
- -------
- MultiIndex
- See Also
- --------
- MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
- MultiIndex.from_product : Make a MultiIndex from cartesian product
- of iterables.
- MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
- Examples
- --------
- >>> tuples = [(1, 'red'), (1, 'blue'),
- ... (2, 'red'), (2, 'blue')]
- >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color'))
- MultiIndex([(1, 'red'),
- (1, 'blue'),
- (2, 'red'),
- (2, 'blue')],
- names=['number', 'color'])
- """
- if not is_list_like(tuples):
- raise TypeError("Input must be a list / sequence of tuple-likes.")
- if is_iterator(tuples):
- tuples = list(tuples)
- tuples = cast(Collection[Tuple[Hashable, ...]], tuples)
- # handling the empty tuple cases
- if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples):
- codes = [np.zeros(len(tuples))]
- levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))]
- return cls(
- levels=levels,
- codes=codes,
- sortorder=sortorder,
- names=names,
- verify_integrity=False,
- )
- arrays: list[Sequence[Hashable]]
- if len(tuples) == 0:
- if names is None:
- raise TypeError("Cannot infer number of levels from empty list")
- # error: Argument 1 to "len" has incompatible type "Hashable";
- # expected "Sized"
- arrays = [[]] * len(names) # type: ignore[arg-type]
- elif isinstance(tuples, (np.ndarray, Index)):
- if isinstance(tuples, Index):
- tuples = np.asarray(tuples._values)
- arrays = list(lib.tuples_to_object_array(tuples).T)
- elif isinstance(tuples, list):
- arrays = list(lib.to_object_array_tuples(tuples).T)
- else:
- arrs = zip(*tuples)
- arrays = cast(List[Sequence[Hashable]], arrs)
- return cls.from_arrays(arrays, sortorder=sortorder, names=names)
- @classmethod
- def from_product(
- cls,
- iterables: Sequence[Iterable[Hashable]],
- sortorder: int | None = None,
- names: Sequence[Hashable] | Hashable | lib.NoDefault = lib.no_default,
- ) -> MultiIndex:
- """
- Make a MultiIndex from the cartesian product of multiple iterables.
- Parameters
- ----------
- iterables : list / sequence of iterables
- Each iterable has unique labels for each level of the index.
- sortorder : int or None
- Level of sortedness (must be lexicographically sorted by that
- level).
- names : list / sequence of str, optional
- Names for the levels in the index.
- If not explicitly provided, names will be inferred from the
- elements of iterables if an element has a name attribute.
- Returns
- -------
- MultiIndex
- See Also
- --------
- MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
- MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
- MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
- Examples
- --------
- >>> numbers = [0, 1, 2]
- >>> colors = ['green', 'purple']
- >>> pd.MultiIndex.from_product([numbers, colors],
- ... names=['number', 'color'])
- MultiIndex([(0, 'green'),
- (0, 'purple'),
- (1, 'green'),
- (1, 'purple'),
- (2, 'green'),
- (2, 'purple')],
- names=['number', 'color'])
- """
- from pandas.core.reshape.util import cartesian_product
- if not is_list_like(iterables):
- raise TypeError("Input must be a list / sequence of iterables.")
- if is_iterator(iterables):
- iterables = list(iterables)
- codes, levels = factorize_from_iterables(iterables)
- if names is lib.no_default:
- names = [getattr(it, "name", None) for it in iterables]
- # codes are all ndarrays, so cartesian_product is lossless
- codes = cartesian_product(codes)
- return cls(levels, codes, sortorder=sortorder, names=names)
- @classmethod
- def from_frame(cls, df: DataFrame, sortorder=None, names=None) -> MultiIndex:
- """
- Make a MultiIndex from a DataFrame.
- Parameters
- ----------
- df : DataFrame
- DataFrame to be converted to MultiIndex.
- sortorder : int, optional
- Level of sortedness (must be lexicographically sorted by that
- level).
- names : list-like, optional
- If no names are provided, use the column names, or tuple of column
- names if the columns is a MultiIndex. If a sequence, overwrite
- names with the given sequence.
- Returns
- -------
- MultiIndex
- The MultiIndex representation of the given DataFrame.
- See Also
- --------
- MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
- MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
- MultiIndex.from_product : Make a MultiIndex from cartesian product
- of iterables.
- Examples
- --------
- >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'],
- ... ['NJ', 'Temp'], ['NJ', 'Precip']],
- ... columns=['a', 'b'])
- >>> df
- a b
- 0 HI Temp
- 1 HI Precip
- 2 NJ Temp
- 3 NJ Precip
- >>> pd.MultiIndex.from_frame(df)
- MultiIndex([('HI', 'Temp'),
- ('HI', 'Precip'),
- ('NJ', 'Temp'),
- ('NJ', 'Precip')],
- names=['a', 'b'])
- Using explicit names, instead of the column names
- >>> pd.MultiIndex.from_frame(df, names=['state', 'observation'])
- MultiIndex([('HI', 'Temp'),
- ('HI', 'Precip'),
- ('NJ', 'Temp'),
- ('NJ', 'Precip')],
- names=['state', 'observation'])
- """
- if not isinstance(df, ABCDataFrame):
- raise TypeError("Input must be a DataFrame")
- column_names, columns = zip(*df.items())
- names = column_names if names is None else names
- return cls.from_arrays(columns, sortorder=sortorder, names=names)
- # --------------------------------------------------------------------
- @cache_readonly
- def _values(self) -> np.ndarray:
- # We override here, since our parent uses _data, which we don't use.
- values = []
- for i in range(self.nlevels):
- index = self.levels[i]
- codes = self.codes[i]
- vals = index
- if is_categorical_dtype(vals.dtype):
- vals = cast("CategoricalIndex", vals)
- vals = vals._data._internal_get_values()
- if isinstance(vals.dtype, ExtensionDtype) or isinstance(
- vals, (ABCDatetimeIndex, ABCTimedeltaIndex)
- ):
- vals = vals.astype(object)
- vals = np.array(vals, copy=False)
- vals = algos.take_nd(vals, codes, fill_value=index._na_value)
- values.append(vals)
- arr = lib.fast_zip(values)
- return arr
- @property
- def values(self) -> np.ndarray:
- return self._values
- @property
- def array(self):
- """
- Raises a ValueError for `MultiIndex` because there's no single
- array backing a MultiIndex.
- Raises
- ------
- ValueError
- """
- raise ValueError(
- "MultiIndex has no single backing array. Use "
- "'MultiIndex.to_numpy()' to get a NumPy array of tuples."
- )
- @cache_readonly
- def dtypes(self) -> Series:
- """
- Return the dtypes as a Series for the underlying MultiIndex.
- """
- from pandas import Series
- names = com.fill_missing_names([level.name for level in self.levels])
- return Series([level.dtype for level in self.levels], index=Index(names))
- def __len__(self) -> int:
- return len(self.codes[0])
- @property
- def size(self) -> int:
- """
- Return the number of elements in the underlying data.
- """
- # override Index.size to avoid materializing _values
- return len(self)
- # --------------------------------------------------------------------
- # Levels Methods
- @cache_readonly
- def levels(self) -> FrozenList:
- # Use cache_readonly to ensure that self.get_locs doesn't repeatedly
- # create new IndexEngine
- # https://github.com/pandas-dev/pandas/issues/31648
- result = [x._rename(name=name) for x, name in zip(self._levels, self._names)]
- for level in result:
- # disallow midx.levels[0].name = "foo"
- level._no_setting_name = True
- return FrozenList(result)
- def _set_levels(
- self,
- levels,
- *,
- level=None,
- copy: bool = False,
- validate: bool = True,
- verify_integrity: bool = False,
- ) -> None:
- # This is NOT part of the levels property because it should be
- # externally not allowed to set levels. User beware if you change
- # _levels directly
- if validate:
- if len(levels) == 0:
- raise ValueError("Must set non-zero number of levels.")
- if level is None and len(levels) != self.nlevels:
- raise ValueError("Length of levels must match number of levels.")
- if level is not None and len(levels) != len(level):
- raise ValueError("Length of levels must match length of level.")
- if level is None:
- new_levels = FrozenList(
- ensure_index(lev, copy=copy)._view() for lev in levels
- )
- else:
- level_numbers = [self._get_level_number(lev) for lev in level]
- new_levels_list = list(self._levels)
- for lev_num, lev in zip(level_numbers, levels):
- new_levels_list[lev_num] = ensure_index(lev, copy=copy)._view()
- new_levels = FrozenList(new_levels_list)
- if verify_integrity:
- new_codes = self._verify_integrity(levels=new_levels)
- self._codes = new_codes
- names = self.names
- self._levels = new_levels
- if any(names):
- self._set_names(names)
- self._reset_cache()
- def set_levels(
- self, levels, *, level=None, verify_integrity: bool = True
- ) -> MultiIndex:
- """
- Set new levels on MultiIndex. Defaults to returning new index.
- Parameters
- ----------
- levels : sequence or list of sequence
- New level(s) to apply.
- level : int, level name, or sequence of int/level names (default None)
- Level(s) to set (None for all levels).
- verify_integrity : bool, default True
- If True, checks that levels and codes are compatible.
- Returns
- -------
- MultiIndex
- Examples
- --------
- >>> idx = pd.MultiIndex.from_tuples(
- ... [
- ... (1, "one"),
- ... (1, "two"),
- ... (2, "one"),
- ... (2, "two"),
- ... (3, "one"),
- ... (3, "two")
- ... ],
- ... names=["foo", "bar"]
- ... )
- >>> idx
- MultiIndex([(1, 'one'),
- (1, 'two'),
- (2, 'one'),
- (2, 'two'),
- (3, 'one'),
- (3, 'two')],
- names=['foo', 'bar'])
- >>> idx.set_levels([['a', 'b', 'c'], [1, 2]])
- MultiIndex([('a', 1),
- ('a', 2),
- ('b', 1),
- ('b', 2),
- ('c', 1),
- ('c', 2)],
- names=['foo', 'bar'])
- >>> idx.set_levels(['a', 'b', 'c'], level=0)
- MultiIndex([('a', 'one'),
- ('a', 'two'),
- ('b', 'one'),
- ('b', 'two'),
- ('c', 'one'),
- ('c', 'two')],
- names=['foo', 'bar'])
- >>> idx.set_levels(['a', 'b'], level='bar')
- MultiIndex([(1, 'a'),
- (1, 'b'),
- (2, 'a'),
- (2, 'b'),
- (3, 'a'),
- (3, 'b')],
- names=['foo', 'bar'])
- If any of the levels passed to ``set_levels()`` exceeds the
- existing length, all of the values from that argument will
- be stored in the MultiIndex levels, though the values will
- be truncated in the MultiIndex output.
- >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1])
- MultiIndex([('a', 1),
- ('a', 2),
- ('b', 1),
- ('b', 2),
- ('c', 1),
- ('c', 2)],
- names=['foo', 'bar'])
- >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]).levels
- FrozenList([['a', 'b', 'c'], [1, 2, 3, 4]])
- """
- if is_list_like(levels) and not isinstance(levels, Index):
- levels = list(levels)
- level, levels = _require_listlike(level, levels, "Levels")
- idx = self._view()
- idx._reset_identity()
- idx._set_levels(
- levels, level=level, validate=True, verify_integrity=verify_integrity
- )
- return idx
- @property
- def nlevels(self) -> int:
- """
- Integer number of levels in this MultiIndex.
- Examples
- --------
- >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])
- >>> mi
- MultiIndex([('a', 'b', 'c')],
- )
- >>> mi.nlevels
- 3
- """
- return len(self._levels)
- @property
- def levshape(self) -> Shape:
- """
- A tuple with the length of each level.
- Examples
- --------
- >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])
- >>> mi
- MultiIndex([('a', 'b', 'c')],
- )
- >>> mi.levshape
- (1, 1, 1)
- """
- return tuple(len(x) for x in self.levels)
- # --------------------------------------------------------------------
- # Codes Methods
- @property
- def codes(self):
- return self._codes
- def _set_codes(
- self,
- codes,
- *,
- level=None,
- copy: bool = False,
- validate: bool = True,
- verify_integrity: bool = False,
- ) -> None:
- if validate:
- if level is None and len(codes) != self.nlevels:
- raise ValueError("Length of codes must match number of levels")
- if level is not None and len(codes) != len(level):
- raise ValueError("Length of codes must match length of levels.")
- if level is None:
- new_codes = FrozenList(
- _coerce_indexer_frozen(level_codes, lev, copy=copy).view()
- for lev, level_codes in zip(self._levels, codes)
- )
- else:
- level_numbers = [self._get_level_number(lev) for lev in level]
- new_codes_list = list(self._codes)
- for lev_num, level_codes in zip(level_numbers, codes):
- lev = self.levels[lev_num]
- new_codes_list[lev_num] = _coerce_indexer_frozen(
- level_codes, lev, copy=copy
- )
- new_codes = FrozenList(new_codes_list)
- if verify_integrity:
- new_codes = self._verify_integrity(codes=new_codes)
- self._codes = new_codes
- self._reset_cache()
- def set_codes(self, codes, *, level=None, verify_integrity: bool = True):
- """
- Set new codes on MultiIndex. Defaults to returning new index.
- Parameters
- ----------
- codes : sequence or list of sequence
- New codes to apply.
- level : int, level name, or sequence of int/level names (default None)
- Level(s) to set (None for all levels).
- verify_integrity : bool, default True
- If True, checks that levels and codes are compatible.
- Returns
- -------
- new index (of same type and class...etc) or None
- The same type as the caller or None if ``inplace=True``.
- Examples
- --------
- >>> idx = pd.MultiIndex.from_tuples(
- ... [(1, "one"), (1, "two"), (2, "one"), (2, "two")], names=["foo", "bar"]
- ... )
- >>> idx
- MultiIndex([(1, 'one'),
- (1, 'two'),
- (2, 'one'),
- (2, 'two')],
- names=['foo', 'bar'])
- >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]])
- MultiIndex([(2, 'one'),
- (1, 'one'),
- (2, 'two'),
- (1, 'two')],
- names=['foo', 'bar'])
- >>> idx.set_codes([1, 0, 1, 0], level=0)
- MultiIndex([(2, 'one'),
- (1, 'two'),
- (2, 'one'),
- (1, 'two')],
- names=['foo', 'bar'])
- >>> idx.set_codes([0, 0, 1, 1], level='bar')
- MultiIndex([(1, 'one'),
- (1, 'one'),
- (2, 'two'),
- (2, 'two')],
- names=['foo', 'bar'])
- >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1])
- MultiIndex([(2, 'one'),
- (1, 'one'),
- (2, 'two'),
- (1, 'two')],
- names=['foo', 'bar'])
- """
- level, codes = _require_listlike(level, codes, "Codes")
- idx = self._view()
- idx._reset_identity()
- idx._set_codes(codes, level=level, verify_integrity=verify_integrity)
- return idx
- # --------------------------------------------------------------------
- # Index Internals
- @cache_readonly
- def _engine(self):
- # Calculate the number of bits needed to represent labels in each
- # level, as log2 of their sizes:
- # NaN values are shifted to 1 and missing values in other while
- # calculating the indexer are shifted to 0
- sizes = np.ceil(
- np.log2(
- [
- len(level)
- + libindex.multiindex_nulls_shift # type: ignore[attr-defined]
- for level in self.levels
- ]
- )
- )
- # Sum bit counts, starting from the _right_....
- lev_bits = np.cumsum(sizes[::-1])[::-1]
- # ... in order to obtain offsets such that sorting the combination of
- # shifted codes (one for each level, resulting in a unique integer) is
- # equivalent to sorting lexicographically the codes themselves. Notice
- # that each level needs to be shifted by the number of bits needed to
- # represent the _previous_ ones:
- offsets = np.concatenate([lev_bits[1:], [0]]).astype("uint64")
- # Check the total number of bits needed for our representation:
- if lev_bits[0] > 64:
- # The levels would overflow a 64 bit uint - use Python integers:
- return MultiIndexPyIntEngine(self.levels, self.codes, offsets)
- return MultiIndexUIntEngine(self.levels, self.codes, offsets)
- # Return type "Callable[..., MultiIndex]" of "_constructor" incompatible with return
- # type "Type[MultiIndex]" in supertype "Index"
- @property
- def _constructor(self) -> Callable[..., MultiIndex]: # type: ignore[override]
- return type(self).from_tuples
- @doc(Index._shallow_copy)
- def _shallow_copy(self, values: np.ndarray, name=lib.no_default) -> MultiIndex:
- names = name if name is not lib.no_default else self.names
- return type(self).from_tuples(values, sortorder=None, names=names)
- def _view(self) -> MultiIndex:
- result = type(self)(
- levels=self.levels,
- codes=self.codes,
- sortorder=self.sortorder,
- names=self.names,
- verify_integrity=False,
- )
- result._cache = self._cache.copy()
- result._cache.pop("levels", None) # GH32669
- return result
- # --------------------------------------------------------------------
- # error: Signature of "copy" incompatible with supertype "Index"
- def copy( # type: ignore[override]
- self,
- names=None,
- deep: bool = False,
- name=None,
- ):
- """
- Make a copy of this object.
- Names, dtype, levels and codes can be passed and will be set on new copy.
- Parameters
- ----------
- names : sequence, optional
- deep : bool, default False
- name : Label
- Kept for compatibility with 1-dimensional Index. Should not be used.
- Returns
- -------
- MultiIndex
- Notes
- -----
- In most cases, there should be no functional difference from using
- ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
- This could be potentially expensive on large MultiIndex objects.
- Examples
- --------
- >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])
- >>> mi
- MultiIndex([('a', 'b', 'c')],
- )
- >>> mi.copy()
- MultiIndex([('a', 'b', 'c')],
- )
- """
- names = self._validate_names(name=name, names=names, deep=deep)
- keep_id = not deep
- levels, codes = None, None
- if deep:
- from copy import deepcopy
- levels = deepcopy(self.levels)
- codes = deepcopy(self.codes)
- levels = levels if levels is not None else self.levels
- codes = codes if codes is not None else self.codes
- new_index = type(self)(
- levels=levels,
- codes=codes,
- sortorder=self.sortorder,
- names=names,
- verify_integrity=False,
- )
- new_index._cache = self._cache.copy()
- new_index._cache.pop("levels", None) # GH32669
- if keep_id:
- new_index._id = self._id
- return new_index
- def __array__(self, dtype=None) -> np.ndarray:
- """the array interface, return my values"""
- return self.values
- def view(self, cls=None):
- """this is defined as a copy with the same identity"""
- result = self.copy()
- result._id = self._id
- return result
- @doc(Index.__contains__)
- def __contains__(self, key: Any) -> bool:
- hash(key)
- try:
- self.get_loc(key)
- return True
- except (LookupError, TypeError, ValueError):
- return False
- @cache_readonly
- def dtype(self) -> np.dtype:
- return np.dtype("O")
- def _is_memory_usage_qualified(self) -> bool:
- """return a boolean if we need a qualified .info display"""
- def f(level) -> bool:
- return "mixed" in level or "string" in level or "unicode" in level
- return any(f(level) for level in self._inferred_type_levels)
- # Cannot determine type of "memory_usage"
- @doc(Index.memory_usage) # type: ignore[has-type]
- def memory_usage(self, deep: bool = False) -> int:
- # we are overwriting our base class to avoid
- # computing .values here which could materialize
- # a tuple representation unnecessarily
- return self._nbytes(deep)
- @cache_readonly
- def nbytes(self) -> int:
- """return the number of bytes in the underlying data"""
- return self._nbytes(False)
- def _nbytes(self, deep: bool = False) -> int:
- """
- return the number of bytes in the underlying data
- deeply introspect the level data if deep=True
- include the engine hashtable
- *this is in internal routine*
- """
- # for implementations with no useful getsizeof (PyPy)
- objsize = 24
- level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels)
- label_nbytes = sum(i.nbytes for i in self.codes)
- names_nbytes = sum(getsizeof(i, objsize) for i in self.names)
- result = level_nbytes + label_nbytes + names_nbytes
- # include our engine hashtable
- result += self._engine.sizeof(deep=deep)
- return result
- # --------------------------------------------------------------------
- # Rendering Methods
- def _formatter_func(self, tup):
- """
- Formats each item in tup according to its level's formatter function.
- """
- formatter_funcs = [level._formatter_func for level in self.levels]
- return tuple(func(val) for func, val in zip(formatter_funcs, tup))
- def _format_native_types(
- self, *, na_rep: str = "nan", **kwargs
- ) -> npt.NDArray[np.object_]:
- new_levels = []
- new_codes = []
- # go through the levels and format them
- for level, level_codes in zip(self.levels, self.codes):
- level_strs = level._format_native_types(na_rep=na_rep, **kwargs)
- # add nan values, if there are any
- mask = level_codes == -1
- if mask.any():
- nan_index = len(level_strs)
- # numpy 1.21 deprecated implicit string casting
- level_strs = level_strs.astype(str)
- level_strs = np.append(level_strs, na_rep)
- assert not level_codes.flags.writeable # i.e. copy is needed
- level_codes = level_codes.copy() # make writeable
- level_codes[mask] = nan_index
- new_levels.append(level_strs)
- new_codes.append(level_codes)
- if len(new_levels) == 1:
- # a single-level multi-index
- return Index(new_levels[0].take(new_codes[0]))._format_native_types()
- else:
- # reconstruct the multi-index
- mi = MultiIndex(
- levels=new_levels,
- codes=new_codes,
- names=self.names,
- sortorder=self.sortorder,
- verify_integrity=False,
- )
- return mi._values
- def format(
- self,
- name: bool | None = None,
- formatter: Callable | None = None,
- na_rep: str | None = None,
- names: bool = False,
- space: int = 2,
- sparsify=None,
- adjoin: bool = True,
- ) -> list:
- if name is not None:
- names = name
- if len(self) == 0:
- return []
- stringified_levels = []
- for lev, level_codes in zip(self.levels, self.codes):
- na = na_rep if na_rep is not None else _get_na_rep(lev.dtype)
- if len(lev) > 0:
- formatted = lev.take(level_codes).format(formatter=formatter)
- # we have some NA
- mask = level_codes == -1
- if mask.any():
- formatted = np.array(formatted, dtype=object)
- formatted[mask] = na
- formatted = formatted.tolist()
- else:
- # weird all NA case
- formatted = [
- pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n"))
- for x in algos.take_nd(lev._values, level_codes)
- ]
- stringified_levels.append(formatted)
- result_levels = []
- for lev, lev_name in zip(stringified_levels, self.names):
- level = []
- if names:
- level.append(
- pprint_thing(lev_name, escape_chars=("\t", "\r", "\n"))
- if lev_name is not None
- else ""
- )
- level.extend(np.array(lev, dtype=object))
- result_levels.append(level)
- if sparsify is None:
- sparsify = get_option("display.multi_sparse")
- if sparsify:
- sentinel: Literal[""] | bool | lib.NoDefault = ""
- # GH3547 use value of sparsify as sentinel if it's "Falsey"
- assert isinstance(sparsify, bool) or sparsify is lib.no_default
- if sparsify in [False, lib.no_default]:
- sentinel = sparsify
- # little bit of a kludge job for #1217
- result_levels = sparsify_labels(
- result_levels, start=int(names), sentinel=sentinel
- )
- if adjoin:
- from pandas.io.formats.format import get_adjustment
- adj = get_adjustment()
- return adj.adjoin(space, *result_levels).split("\n")
- else:
- return result_levels
- # --------------------------------------------------------------------
- # Names Methods
- def _get_names(self) -> FrozenList:
- return FrozenList(self._names)
- def _set_names(self, names, *, level=None, validate: bool = True):
- """
- Set new names on index. Each name has to be a hashable type.
- Parameters
- ----------
- values : str or sequence
- name(s) to set
- level : int, level name, or sequence of int/level names (default None)
- If the index is a MultiIndex (hierarchical), level(s) to set (None
- for all levels). Otherwise level must be None
- validate : bool, default True
- validate that the names match level lengths
- Raises
- ------
- TypeError if each name is not hashable.
- Notes
- -----
- sets names on levels. WARNING: mutates!
- Note that you generally want to set this *after* changing levels, so
- that it only acts on copies
- """
- # GH 15110
- # Don't allow a single string for names in a MultiIndex
- if names is not None and not is_list_like(names):
- raise ValueError("Names should be list-like for a MultiIndex")
- names = list(names)
- if validate:
- if level is not None and len(names) != len(level):
- raise ValueError("Length of names must match length of level.")
- if level is None and len(names) != self.nlevels:
- raise ValueError(
- "Length of names must match number of levels in MultiIndex."
- )
- if level is None:
- level = range(self.nlevels)
- else:
- level = [self._get_level_number(lev) for lev in level]
- # set the name
- for lev, name in zip(level, names):
- if name is not None:
- # GH 20527
- # All items in 'names' need to be hashable:
- if not is_hashable(name):
- raise TypeError(
- f"{type(self).__name__}.name must be a hashable type"
- )
- self._names[lev] = name
- # If .levels has been accessed, the names in our cache will be stale.
- self._reset_cache()
- names = property(
- fset=_set_names,
- fget=_get_names,
- doc="""
- Names of levels in MultiIndex.
- Examples
- --------
- >>> mi = pd.MultiIndex.from_arrays(
- ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z'])
- >>> mi
- MultiIndex([(1, 3, 5),
- (2, 4, 6)],
- names=['x', 'y', 'z'])
- >>> mi.names
- FrozenList(['x', 'y', 'z'])
- """,
- )
- # --------------------------------------------------------------------
- @cache_readonly
- def inferred_type(self) -> str:
- return "mixed"
- def _get_level_number(self, level) -> int:
- count = self.names.count(level)
- if (count > 1) and not is_integer(level):
- raise ValueError(
- f"The name {level} occurs multiple times, use a level number"
- )
- try:
- level = self.names.index(level)
- except ValueError as err:
- if not is_integer(level):
- raise KeyError(f"Level {level} not found") from err
- if level < 0:
- level += self.nlevels
- if level < 0:
- orig_level = level - self.nlevels
- raise IndexError(
- f"Too many levels: Index has only {self.nlevels} levels, "
- f"{orig_level} is not a valid level number"
- ) from err
- # Note: levels are zero-based
- elif level >= self.nlevels:
- raise IndexError(
- f"Too many levels: Index has only {self.nlevels} levels, "
- f"not {level + 1}"
- ) from err
- return level
- @cache_readonly
- def is_monotonic_increasing(self) -> bool:
- """
- Return a boolean if the values are equal or increasing.
- """
- if any(-1 in code for code in self.codes):
- return False
- if all(level.is_monotonic_increasing for level in self.levels):
- # If each level is sorted, we can operate on the codes directly. GH27495
- return libalgos.is_lexsorted(
- [x.astype("int64", copy=False) for x in self.codes]
- )
- # reversed() because lexsort() wants the most significant key last.
- values = [
- self._get_level_values(i)._values for i in reversed(range(len(self.levels)))
- ]
- try:
- # error: Argument 1 to "lexsort" has incompatible type
- # "List[Union[ExtensionArray, ndarray[Any, Any]]]";
- # expected "Union[_SupportsArray[dtype[Any]],
- # _NestedSequence[_SupportsArray[dtype[Any]]], bool,
- # int, float, complex, str, bytes, _NestedSequence[Union
- # [bool, int, float, complex, str, bytes]]]"
- sort_order = np.lexsort(values) # type: ignore[arg-type]
- return Index(sort_order).is_monotonic_increasing
- except TypeError:
- # we have mixed types and np.lexsort is not happy
- return Index(self._values).is_monotonic_increasing
- @cache_readonly
- def is_monotonic_decreasing(self) -> bool:
- """
- Return a boolean if the values are equal or decreasing.
- """
- # monotonic decreasing if and only if reverse is monotonic increasing
- return self[::-1].is_monotonic_increasing
- @cache_readonly
- def _inferred_type_levels(self) -> list[str]:
- """return a list of the inferred types, one for each level"""
- return [i.inferred_type for i in self.levels]
- @doc(Index.duplicated)
- def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:
- shape = tuple(len(lev) for lev in self.levels)
- ids = get_group_index(self.codes, shape, sort=False, xnull=False)
- return duplicated(ids, keep)
- # error: Cannot override final attribute "_duplicated"
- # (previously declared in base class "IndexOpsMixin")
- _duplicated = duplicated # type: ignore[misc]
- def fillna(self, value=None, downcast=None):
- """
- fillna is not implemented for MultiIndex
- """
- raise NotImplementedError("isna is not defined for MultiIndex")
- @doc(Index.dropna)
- def dropna(self, how: AnyAll = "any") -> MultiIndex:
- nans = [level_codes == -1 for level_codes in self.codes]
- if how == "any":
- indexer = np.any(nans, axis=0)
- elif how == "all":
- indexer = np.all(nans, axis=0)
- else:
- raise ValueError(f"invalid how option: {how}")
- new_codes = [level_codes[~indexer] for level_codes in self.codes]
- return self.set_codes(codes=new_codes)
- def _get_level_values(self, level: int, unique: bool = False) -> Index:
- """
- Return vector of label values for requested level,
- equal to the length of the index
- **this is an internal method**
- Parameters
- ----------
- level : int
- unique : bool, default False
- if True, drop duplicated values
- Returns
- -------
- Index
- """
- lev = self.levels[level]
- level_codes = self.codes[level]
- name = self._names[level]
- if unique:
- level_codes = algos.unique(level_codes)
- filled = algos.take_nd(lev._values, level_codes, fill_value=lev._na_value)
- return lev._shallow_copy(filled, name=name)
- def get_level_values(self, level):
- """
- Return vector of label values for requested level.
- Length of returned vector is equal to the length of the index.
- Parameters
- ----------
- level : int or str
- ``level`` is either the integer position of the level in the
- MultiIndex, or the name of the level.
- Returns
- -------
- Index
- Values is a level of this MultiIndex converted to
- a single :class:`Index` (or subclass thereof).
- Notes
- -----
- If the level contains missing values, the result may be casted to
- ``float`` with missing values specified as ``NaN``. This is because
- the level is converted to a regular ``Index``.
- Examples
- --------
- Create a MultiIndex:
- >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def')))
- >>> mi.names = ['level_1', 'level_2']
- Get level values by supplying level as either integer or name:
- >>> mi.get_level_values(0)
- Index(['a', 'b', 'c'], dtype='object', name='level_1')
- >>> mi.get_level_values('level_2')
- Index(['d', 'e', 'f'], dtype='object', name='level_2')
- If a level contains missing values, the return type of the level
- may be cast to ``float``.
- >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).dtypes
- level_0 int64
- level_1 int64
- dtype: object
- >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).get_level_values(0)
- Index([1.0, nan, 2.0], dtype='float64')
- """
- level = self._get_level_number(level)
- values = self._get_level_values(level)
- return values
- @doc(Index.unique)
- def unique(self, level=None):
- if level is None:
- return self.drop_duplicates()
- else:
- level = self._get_level_number(level)
- return self._get_level_values(level=level, unique=True)
- def to_frame(
- self,
- index: bool = True,
- name=lib.no_default,
- allow_duplicates: bool = False,
- ) -> DataFrame:
- """
- Create a DataFrame with the levels of the MultiIndex as columns.
- Column ordering is determined by the DataFrame constructor with data as
- a dict.
- Parameters
- ----------
- index : bool, default True
- Set the index of the returned DataFrame as the original MultiIndex.
- name : list / sequence of str, optional
- The passed names should substitute index level names.
- allow_duplicates : bool, optional default False
- Allow duplicate column labels to be created.
- .. versionadded:: 1.5.0
- Returns
- -------
- DataFrame
- See Also
- --------
- DataFrame : Two-dimensional, size-mutable, potentially heterogeneous
- tabular data.
- Examples
- --------
- >>> mi = pd.MultiIndex.from_arrays([['a', 'b'], ['c', 'd']])
- >>> mi
- MultiIndex([('a', 'c'),
- ('b', 'd')],
- )
- >>> df = mi.to_frame()
- >>> df
- 0 1
- a c a c
- b d b d
- >>> df = mi.to_frame(index=False)
- >>> df
- 0 1
- 0 a c
- 1 b d
- >>> df = mi.to_frame(name=['x', 'y'])
- >>> df
- x y
- a c a c
- b d b d
- """
- from pandas import DataFrame
- if name is not lib.no_default:
- if not is_list_like(name):
- raise TypeError("'name' must be a list / sequence of column names.")
- if len(name) != len(self.levels):
- raise ValueError(
- "'name' should have same length as number of levels on index."
- )
- idx_names = name
- else:
- idx_names = self._get_level_names()
- if not allow_duplicates and len(set(idx_names)) != len(idx_names):
- raise ValueError(
- "Cannot create duplicate column labels if allow_duplicates is False"
- )
- # Guarantee resulting column order - PY36+ dict maintains insertion order
- result = DataFrame(
- {level: self._get_level_values(level) for level in range(len(self.levels))},
- copy=False,
- )
- result.columns = idx_names
- if index:
- result.index = self
- return result
- # error: Return type "Index" of "to_flat_index" incompatible with return type
- # "MultiIndex" in supertype "Index"
- def to_flat_index(self) -> Index: # type: ignore[override]
- """
- Convert a MultiIndex to an Index of Tuples containing the level values.
- Returns
- -------
- pd.Index
- Index with the MultiIndex data represented in Tuples.
- See Also
- --------
- MultiIndex.from_tuples : Convert flat index back to MultiIndex.
- Notes
- -----
- This method will simply return the caller if called by anything other
- than a MultiIndex.
- Examples
- --------
- >>> index = pd.MultiIndex.from_product(
- ... [['foo', 'bar'], ['baz', 'qux']],
- ... names=['a', 'b'])
- >>> index.to_flat_index()
- Index([('foo', 'baz'), ('foo', 'qux'),
- ('bar', 'baz'), ('bar', 'qux')],
- dtype='object')
- """
- return Index(self._values, tupleize_cols=False)
- def _is_lexsorted(self) -> bool:
- """
- Return True if the codes are lexicographically sorted.
- Returns
- -------
- bool
- Examples
- --------
- In the below examples, the first level of the MultiIndex is sorted because
- a<b<c, so there is no need to look at the next level.
- >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'],
- ... ['d', 'e', 'f']])._is_lexsorted()
- True
- >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'],
- ... ['d', 'f', 'e']])._is_lexsorted()
- True
- In case there is a tie, the lexicographical sorting looks
- at the next level of the MultiIndex.
- >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'b', 'c']])._is_lexsorted()
- True
- >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'c', 'b']])._is_lexsorted()
- False
- >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'],
- ... ['aa', 'bb', 'aa', 'bb']])._is_lexsorted()
- True
- >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'],
- ... ['bb', 'aa', 'aa', 'bb']])._is_lexsorted()
- False
- """
- return self._lexsort_depth == self.nlevels
- @cache_readonly
- def _lexsort_depth(self) -> int:
- """
- Compute and return the lexsort_depth, the number of levels of the
- MultiIndex that are sorted lexically
- Returns
- -------
- int
- """
- if self.sortorder is not None:
- return self.sortorder
- return _lexsort_depth(self.codes, self.nlevels)
- def _sort_levels_monotonic(self, raise_if_incomparable: bool = False) -> MultiIndex:
- """
- This is an *internal* function.
- Create a new MultiIndex from the current to monotonically sorted
- items IN the levels. This does not actually make the entire MultiIndex
- monotonic, JUST the levels.
- The resulting MultiIndex will have the same outward
- appearance, meaning the same .values and ordering. It will also
- be .equals() to the original.
- Returns
- -------
- MultiIndex
- Examples
- --------
- >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],
- ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
- >>> mi
- MultiIndex([('a', 'bb'),
- ('a', 'aa'),
- ('b', 'bb'),
- ('b', 'aa')],
- )
- >>> mi.sort_values()
- MultiIndex([('a', 'aa'),
- ('a', 'bb'),
- ('b', 'aa'),
- ('b', 'bb')],
- )
- """
- if self._is_lexsorted() and self.is_monotonic_increasing:
- return self
- new_levels = []
- new_codes = []
- for lev, level_codes in zip(self.levels, self.codes):
- if not lev.is_monotonic_increasing:
- try:
- # indexer to reorder the levels
- indexer = lev.argsort()
- except TypeError:
- if raise_if_incomparable:
- raise
- else:
- lev = lev.take(indexer)
- # indexer to reorder the level codes
- indexer = ensure_platform_int(indexer)
- ri = lib.get_reverse_indexer(indexer, len(indexer))
- level_codes = algos.take_nd(ri, level_codes)
- new_levels.append(lev)
- new_codes.append(level_codes)
- return MultiIndex(
- new_levels,
- new_codes,
- names=self.names,
- sortorder=self.sortorder,
- verify_integrity=False,
- )
- def remove_unused_levels(self) -> MultiIndex:
- """
- Create new MultiIndex from current that removes unused levels.
- Unused level(s) means levels that are not expressed in the
- labels. The resulting MultiIndex will have the same outward
- appearance, meaning the same .values and ordering. It will
- also be .equals() to the original.
- Returns
- -------
- MultiIndex
- Examples
- --------
- >>> mi = pd.MultiIndex.from_product([range(2), list('ab')])
- >>> mi
- MultiIndex([(0, 'a'),
- (0, 'b'),
- (1, 'a'),
- (1, 'b')],
- )
- >>> mi[2:]
- MultiIndex([(1, 'a'),
- (1, 'b')],
- )
- The 0 from the first level is not represented
- and can be removed
- >>> mi2 = mi[2:].remove_unused_levels()
- >>> mi2.levels
- FrozenList([[1], ['a', 'b']])
- """
- new_levels = []
- new_codes = []
- changed = False
- for lev, level_codes in zip(self.levels, self.codes):
- # Since few levels are typically unused, bincount() is more
- # efficient than unique() - however it only accepts positive values
- # (and drops order):
- uniques = np.where(np.bincount(level_codes + 1) > 0)[0] - 1
- has_na = int(len(uniques) and (uniques[0] == -1))
- if len(uniques) != len(lev) + has_na:
- if lev.isna().any() and len(uniques) == len(lev):
- break
- # We have unused levels
- changed = True
- # Recalculate uniques, now preserving order.
- # Can easily be cythonized by exploiting the already existing
- # "uniques" and stop parsing "level_codes" when all items
- # are found:
- uniques = algos.unique(level_codes)
- if has_na:
- na_idx = np.where(uniques == -1)[0]
- # Just ensure that -1 is in first position:
- uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]]
- # codes get mapped from uniques to 0:len(uniques)
- # -1 (if present) is mapped to last position
- code_mapping = np.zeros(len(lev) + has_na)
- # ... and reassigned value -1:
- code_mapping[uniques] = np.arange(len(uniques)) - has_na
- level_codes = code_mapping[level_codes]
- # new levels are simple
- lev = lev.take(uniques[has_na:])
- new_levels.append(lev)
- new_codes.append(level_codes)
- result = self.view()
- if changed:
- result._reset_identity()
- result._set_levels(new_levels, validate=False)
- result._set_codes(new_codes, validate=False)
- return result
- # --------------------------------------------------------------------
- # Pickling Methods
- def __reduce__(self):
- """Necessary for making this object picklable"""
- d = {
- "levels": list(self.levels),
- "codes": list(self.codes),
- "sortorder": self.sortorder,
- "names": list(self.names),
- }
- return ibase._new_Index, (type(self), d), None
- # --------------------------------------------------------------------
- def __getitem__(self, key):
- if is_scalar(key):
- key = com.cast_scalar_indexer(key)
- retval = []
- for lev, level_codes in zip(self.levels, self.codes):
- if level_codes[key] == -1:
- retval.append(np.nan)
- else:
- retval.append(lev[level_codes[key]])
- return tuple(retval)
- else:
- # in general cannot be sure whether the result will be sorted
- sortorder = None
- if com.is_bool_indexer(key):
- key = np.asarray(key, dtype=bool)
- sortorder = self.sortorder
- elif isinstance(key, slice):
- if key.step is None or key.step > 0:
- sortorder = self.sortorder
- elif isinstance(key, Index):
- key = np.asarray(key)
- new_codes = [level_codes[key] for level_codes in self.codes]
- return MultiIndex(
- levels=self.levels,
- codes=new_codes,
- names=self.names,
- sortorder=sortorder,
- verify_integrity=False,
- )
- def _getitem_slice(self: MultiIndex, slobj: slice) -> MultiIndex:
- """
- Fastpath for __getitem__ when we know we have a slice.
- """
- sortorder = None
- if slobj.step is None or slobj.step > 0:
- sortorder = self.sortorder
- new_codes = [level_codes[slobj] for level_codes in self.codes]
- return type(self)(
- levels=self.levels,
- codes=new_codes,
- names=self._names,
- sortorder=sortorder,
- verify_integrity=False,
- )
- @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
- def take(
- self: MultiIndex,
- indices,
- axis: Axis = 0,
- allow_fill: bool = True,
- fill_value=None,
- **kwargs,
- ) -> MultiIndex:
- nv.validate_take((), kwargs)
- indices = ensure_platform_int(indices)
- # only fill if we are passing a non-None fill_value
- allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices)
- na_value = -1
- taken = [lab.take(indices) for lab in self.codes]
- if allow_fill:
- mask = indices == -1
- if mask.any():
- masked = []
- for new_label in taken:
- label_values = new_label
- label_values[mask] = na_value
- masked.append(np.asarray(label_values))
- taken = masked
- return MultiIndex(
- levels=self.levels, codes=taken, names=self.names, verify_integrity=False
- )
- def append(self, other):
- """
- Append a collection of Index options together.
- Parameters
- ----------
- other : Index or list/tuple of indices
- Returns
- -------
- Index
- The combined index.
- Examples
- --------
- >>> mi = pd.MultiIndex.from_arrays([['a'], ['b']])
- >>> mi
- MultiIndex([('a', 'b')],
- )
- >>> mi.append(mi)
- MultiIndex([('a', 'b'), ('a', 'b')],
- )
- """
- if not isinstance(other, (list, tuple)):
- other = [other]
- if all(
- (isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other
- ):
- arrays, names = [], []
- for i in range(self.nlevels):
- label = self._get_level_values(i)
- appended = [o._get_level_values(i) for o in other]
- arrays.append(label.append(appended))
- single_label_name = all(label.name == x.name for x in appended)
- names.append(label.name if single_label_name else None)
- return MultiIndex.from_arrays(arrays, names=names)
- to_concat = (self._values,) + tuple(k._values for k in other)
- new_tuples = np.concatenate(to_concat)
- # if all(isinstance(x, MultiIndex) for x in other):
- try:
- # We only get here if other contains at least one index with tuples,
- # setting names to None automatically
- return MultiIndex.from_tuples(new_tuples)
- except (TypeError, IndexError):
- return Index(new_tuples)
- def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:
- if len(args) == 0 and len(kwargs) == 0:
- # lexsort is significantly faster than self._values.argsort()
- target = self._sort_levels_monotonic(raise_if_incomparable=True)
- return lexsort_indexer(target._get_codes_for_sorting())
- return self._values.argsort(*args, **kwargs)
- @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)
- def repeat(self, repeats: int, axis=None) -> MultiIndex:
- nv.validate_repeat((), {"axis": axis})
- # error: Incompatible types in assignment (expression has type "ndarray",
- # variable has type "int")
- repeats = ensure_platform_int(repeats) # type: ignore[assignment]
- return MultiIndex(
- levels=self.levels,
- codes=[
- level_codes.view(np.ndarray).astype(np.intp, copy=False).repeat(repeats)
- for level_codes in self.codes
- ],
- names=self.names,
- sortorder=self.sortorder,
- verify_integrity=False,
- )
- # error: Signature of "drop" incompatible with supertype "Index"
- def drop( # type: ignore[override]
- self,
- codes,
- level: Index | np.ndarray | Iterable[Hashable] | None = None,
- errors: IgnoreRaise = "raise",
- ) -> MultiIndex:
- """
- Make new MultiIndex with passed list of codes deleted.
- Parameters
- ----------
- codes : array-like
- Must be a list of tuples when level is not specified.
- level : int or level name, default None
- errors : str, default 'raise'
- Returns
- -------
- MultiIndex
- """
- if level is not None:
- return self._drop_from_level(codes, level, errors)
- if not isinstance(codes, (np.ndarray, Index)):
- try:
- codes = com.index_labels_to_array(codes, dtype=np.dtype("object"))
- except ValueError:
- pass
- inds = []
- for level_codes in codes:
- try:
- loc = self.get_loc(level_codes)
- # get_loc returns either an integer, a slice, or a boolean
- # mask
- if isinstance(loc, int):
- inds.append(loc)
- elif isinstance(loc, slice):
- step = loc.step if loc.step is not None else 1
- inds.extend(range(loc.start, loc.stop, step))
- elif com.is_bool_indexer(loc):
- if self._lexsort_depth == 0:
- warnings.warn(
- "dropping on a non-lexsorted multi-index "
- "without a level parameter may impact performance.",
- PerformanceWarning,
- stacklevel=find_stack_level(),
- )
- loc = loc.nonzero()[0]
- inds.extend(loc)
- else:
- msg = f"unsupported indexer of type {type(loc)}"
- raise AssertionError(msg)
- except KeyError:
- if errors != "ignore":
- raise
- return self.delete(inds)
- def _drop_from_level(
- self, codes, level, errors: IgnoreRaise = "raise"
- ) -> MultiIndex:
- codes = com.index_labels_to_array(codes)
- i = self._get_level_number(level)
- index = self.levels[i]
- values = index.get_indexer(codes)
- # If nan should be dropped it will equal -1 here. We have to check which values
- # are not nan and equal -1, this means they are missing in the index
- nan_codes = isna(codes)
- values[(np.equal(nan_codes, False)) & (values == -1)] = -2
- if index.shape[0] == self.shape[0]:
- values[np.equal(nan_codes, True)] = -2
- not_found = codes[values == -2]
- if len(not_found) != 0 and errors != "ignore":
- raise KeyError(f"labels {not_found} not found in level")
- mask = ~algos.isin(self.codes[i], values)
- return self[mask]
- def swaplevel(self, i=-2, j=-1) -> MultiIndex:
- """
- Swap level i with level j.
- Calling this method does not change the ordering of the values.
- Parameters
- ----------
- i : int, str, default -2
- First level of index to be swapped. Can pass level name as string.
- Type of parameters can be mixed.
- j : int, str, default -1
- Second level of index to be swapped. Can pass level name as string.
- Type of parameters can be mixed.
- Returns
- -------
- MultiIndex
- A new MultiIndex.
- See Also
- --------
- Series.swaplevel : Swap levels i and j in a MultiIndex.
- DataFrame.swaplevel : Swap levels i and j in a MultiIndex on a
- particular axis.
- Examples
- --------
- >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],
- ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
- >>> mi
- MultiIndex([('a', 'bb'),
- ('a', 'aa'),
- ('b', 'bb'),
- ('b', 'aa')],
- )
- >>> mi.swaplevel(0, 1)
- MultiIndex([('bb', 'a'),
- ('aa', 'a'),
- ('bb', 'b'),
- ('aa', 'b')],
- )
- """
- new_levels = list(self.levels)
- new_codes = list(self.codes)
- new_names = list(self.names)
- i = self._get_level_number(i)
- j = self._get_level_number(j)
- new_levels[i], new_levels[j] = new_levels[j], new_levels[i]
- new_codes[i], new_codes[j] = new_codes[j], new_codes[i]
- new_names[i], new_names[j] = new_names[j], new_names[i]
- return MultiIndex(
- levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
- )
- def reorder_levels(self, order) -> MultiIndex:
- """
- Rearrange levels using input order. May not drop or duplicate levels.
- Parameters
- ----------
- order : list of int or list of str
- List representing new level order. Reference level by number
- (position) or by key (label).
- Returns
- -------
- MultiIndex
- Examples
- --------
- >>> mi = pd.MultiIndex.from_arrays([[1, 2], [3, 4]], names=['x', 'y'])
- >>> mi
- MultiIndex([(1, 3),
- (2, 4)],
- names=['x', 'y'])
- >>> mi.reorder_levels(order=[1, 0])
- MultiIndex([(3, 1),
- (4, 2)],
- names=['y', 'x'])
- >>> mi.reorder_levels(order=['y', 'x'])
- MultiIndex([(3, 1),
- (4, 2)],
- names=['y', 'x'])
- """
- order = [self._get_level_number(i) for i in order]
- if len(order) != self.nlevels:
- raise AssertionError(
- f"Length of order must be same as number of levels ({self.nlevels}), "
- f"got {len(order)}"
- )
- new_levels = [self.levels[i] for i in order]
- new_codes = [self.codes[i] for i in order]
- new_names = [self.names[i] for i in order]
- return MultiIndex(
- levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
- )
- def _get_codes_for_sorting(self) -> list[Categorical]:
- """
- we are categorizing our codes by using the
- available categories (all, not just observed)
- excluding any missing ones (-1); this is in preparation
- for sorting, where we need to disambiguate that -1 is not
- a valid valid
- """
- def cats(level_codes):
- return np.arange(
- np.array(level_codes).max() + 1 if len(level_codes) else 0,
- dtype=level_codes.dtype,
- )
- return [
- Categorical.from_codes(level_codes, cats(level_codes), ordered=True)
- for level_codes in self.codes
- ]
- def sortlevel(
- self,
- level: IndexLabel = 0,
- ascending: bool | list[bool] = True,
- sort_remaining: bool = True,
- ) -> tuple[MultiIndex, npt.NDArray[np.intp]]:
- """
- Sort MultiIndex at the requested level.
- The result will respect the original ordering of the associated
- factor at that level.
- Parameters
- ----------
- level : list-like, int or str, default 0
- If a string is given, must be a name of the level.
- If list-like must be names or ints of levels.
- ascending : bool, default True
- False to sort in descending order.
- Can also be a list to specify a directed ordering.
- sort_remaining : sort by the remaining levels after level
- Returns
- -------
- sorted_index : pd.MultiIndex
- Resulting index.
- indexer : np.ndarray[np.intp]
- Indices of output values in original index.
- Examples
- --------
- >>> mi = pd.MultiIndex.from_arrays([[0, 0], [2, 1]])
- >>> mi
- MultiIndex([(0, 2),
- (0, 1)],
- )
- >>> mi.sortlevel()
- (MultiIndex([(0, 1),
- (0, 2)],
- ), array([1, 0]))
- >>> mi.sortlevel(sort_remaining=False)
- (MultiIndex([(0, 2),
- (0, 1)],
- ), array([0, 1]))
- >>> mi.sortlevel(1)
- (MultiIndex([(0, 1),
- (0, 2)],
- ), array([1, 0]))
- >>> mi.sortlevel(1, ascending=False)
- (MultiIndex([(0, 2),
- (0, 1)],
- ), array([0, 1]))
- """
- if not is_list_like(level):
- level = [level]
- # error: Item "Hashable" of "Union[Hashable, Sequence[Hashable]]" has
- # no attribute "__iter__" (not iterable)
- level = [
- self._get_level_number(lev) for lev in level # type: ignore[union-attr]
- ]
- sortorder = None
- # we have a directed ordering via ascending
- if isinstance(ascending, list):
- if not len(level) == len(ascending):
- raise ValueError("level must have same length as ascending")
- indexer = lexsort_indexer(
- [self.codes[lev] for lev in level], orders=ascending
- )
- # level ordering
- else:
- codes = list(self.codes)
- shape = list(self.levshape)
- # partition codes and shape
- primary = tuple(codes[lev] for lev in level)
- primshp = tuple(shape[lev] for lev in level)
- # Reverse sorted to retain the order of
- # smaller indices that needs to be removed
- for lev in sorted(level, reverse=True):
- codes.pop(lev)
- shape.pop(lev)
- if sort_remaining:
- primary += primary + tuple(codes)
- primshp += primshp + tuple(shape)
- else:
- sortorder = level[0]
- indexer = indexer_from_factorized(primary, primshp, compress=False)
- if not ascending:
- indexer = indexer[::-1]
- indexer = ensure_platform_int(indexer)
- new_codes = [level_codes.take(indexer) for level_codes in self.codes]
- new_index = MultiIndex(
- codes=new_codes,
- levels=self.levels,
- names=self.names,
- sortorder=sortorder,
- verify_integrity=False,
- )
- return new_index, indexer
- def _wrap_reindex_result(self, target, indexer, preserve_names: bool):
- if not isinstance(target, MultiIndex):
- if indexer is None:
- target = self
- elif (indexer >= 0).all():
- target = self.take(indexer)
- else:
- try:
- target = MultiIndex.from_tuples(target)
- except TypeError:
- # not all tuples, see test_constructor_dict_multiindex_reindex_flat
- return target
- target = self._maybe_preserve_names(target, preserve_names)
- return target
- def _maybe_preserve_names(self, target: Index, preserve_names: bool) -> Index:
- if (
- preserve_names
- and target.nlevels == self.nlevels
- and target.names != self.names
- ):
- target = target.copy(deep=False)
- target.names = self.names
- return target
- # --------------------------------------------------------------------
- # Indexing Methods
- def _check_indexing_error(self, key) -> None:
- if not is_hashable(key) or is_iterator(key):
- # We allow tuples if they are hashable, whereas other Index
- # subclasses require scalar.
- # We have to explicitly exclude generators, as these are hashable.
- raise InvalidIndexError(key)
- @cache_readonly
- def _should_fallback_to_positional(self) -> bool:
- """
- Should integer key(s) be treated as positional?
- """
- # GH#33355
- return self.levels[0]._should_fallback_to_positional
- def _get_indexer_strict(
- self, key, axis_name: str
- ) -> tuple[Index, npt.NDArray[np.intp]]:
- keyarr = key
- if not isinstance(keyarr, Index):
- keyarr = com.asarray_tuplesafe(keyarr)
- if len(keyarr) and not isinstance(keyarr[0], tuple):
- indexer = self._get_indexer_level_0(keyarr)
- self._raise_if_missing(key, indexer, axis_name)
- return self[indexer], indexer
- return super()._get_indexer_strict(key, axis_name)
- def _raise_if_missing(self, key, indexer, axis_name: str) -> None:
- keyarr = key
- if not isinstance(key, Index):
- keyarr = com.asarray_tuplesafe(key)
- if len(keyarr) and not isinstance(keyarr[0], tuple):
- # i.e. same condition for special case in MultiIndex._get_indexer_strict
- mask = indexer == -1
- if mask.any():
- check = self.levels[0].get_indexer(keyarr)
- cmask = check == -1
- if cmask.any():
- raise KeyError(f"{keyarr[cmask]} not in index")
- # We get here when levels still contain values which are not
- # actually in Index anymore
- raise KeyError(f"{keyarr} not in index")
- else:
- return super()._raise_if_missing(key, indexer, axis_name)
- def _get_indexer_level_0(self, target) -> npt.NDArray[np.intp]:
- """
- Optimized equivalent to `self.get_level_values(0).get_indexer_for(target)`.
- """
- lev = self.levels[0]
- codes = self._codes[0]
- cat = Categorical.from_codes(codes=codes, categories=lev)
- ci = Index(cat)
- return ci.get_indexer_for(target)
- def get_slice_bound(
- self,
- label: Hashable | Sequence[Hashable],
- side: Literal["left", "right"],
- ) -> int:
- """
- For an ordered MultiIndex, compute slice bound
- that corresponds to given label.
- Returns leftmost (one-past-the-rightmost if `side=='right') position
- of given label.
- Parameters
- ----------
- label : object or tuple of objects
- side : {'left', 'right'}
- Returns
- -------
- int
- Index of label.
- Notes
- -----
- This method only works if level 0 index of the MultiIndex is lexsorted.
- Examples
- --------
- >>> mi = pd.MultiIndex.from_arrays([list('abbc'), list('gefd')])
- Get the locations from the leftmost 'b' in the first level
- until the end of the multiindex:
- >>> mi.get_slice_bound('b', side="left")
- 1
- Like above, but if you get the locations from the rightmost
- 'b' in the first level and 'f' in the second level:
- >>> mi.get_slice_bound(('b','f'), side="right")
- 3
- See Also
- --------
- MultiIndex.get_loc : Get location for a label or a tuple of labels.
- MultiIndex.get_locs : Get location for a label/slice/list/mask or a
- sequence of such.
- """
- if not isinstance(label, tuple):
- label = (label,)
- return self._partial_tup_index(label, side=side)
- # pylint: disable-next=useless-parent-delegation
- def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]:
- """
- For an ordered MultiIndex, compute the slice locations for input
- labels.
- The input labels can be tuples representing partial levels, e.g. for a
- MultiIndex with 3 levels, you can pass a single value (corresponding to
- the first level), or a 1-, 2-, or 3-tuple.
- Parameters
- ----------
- start : label or tuple, default None
- If None, defaults to the beginning
- end : label or tuple
- If None, defaults to the end
- step : int or None
- Slice step
- Returns
- -------
- (start, end) : (int, int)
- Notes
- -----
- This method only works if the MultiIndex is properly lexsorted. So,
- if only the first 2 levels of a 3-level MultiIndex are lexsorted,
- you can only pass two levels to ``.slice_locs``.
- Examples
- --------
- >>> mi = pd.MultiIndex.from_arrays([list('abbd'), list('deff')],
- ... names=['A', 'B'])
- Get the slice locations from the beginning of 'b' in the first level
- until the end of the multiindex:
- >>> mi.slice_locs(start='b')
- (1, 4)
- Like above, but stop at the end of 'b' in the first level and 'f' in
- the second level:
- >>> mi.slice_locs(start='b', end=('b', 'f'))
- (1, 3)
- See Also
- --------
- MultiIndex.get_loc : Get location for a label or a tuple of labels.
- MultiIndex.get_locs : Get location for a label/slice/list/mask or a
- sequence of such.
- """
- # This function adds nothing to its parent implementation (the magic
- # happens in get_slice_bound method), but it adds meaningful doc.
- return super().slice_locs(start, end, step)
- def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left"):
- if len(tup) > self._lexsort_depth:
- raise UnsortedIndexError(
- f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth "
- f"({self._lexsort_depth})"
- )
- n = len(tup)
- start, end = 0, len(self)
- zipped = zip(tup, self.levels, self.codes)
- for k, (lab, lev, level_codes) in enumerate(zipped):
- section = level_codes[start:end]
- if lab not in lev and not isna(lab):
- # short circuit
- try:
- loc = algos.searchsorted(lev, lab, side=side)
- except TypeError as err:
- # non-comparable e.g. test_slice_locs_with_type_mismatch
- raise TypeError(f"Level type mismatch: {lab}") from err
- if not is_integer(loc):
- # non-comparable level, e.g. test_groupby_example
- raise TypeError(f"Level type mismatch: {lab}")
- if side == "right" and loc >= 0:
- loc -= 1
- return start + algos.searchsorted(section, loc, side=side)
- idx = self._get_loc_single_level_index(lev, lab)
- if isinstance(idx, slice) and k < n - 1:
- # Get start and end value from slice, necessary when a non-integer
- # interval is given as input GH#37707
- start = idx.start
- end = idx.stop
- elif k < n - 1:
- # error: Incompatible types in assignment (expression has type
- # "Union[ndarray[Any, dtype[signedinteger[Any]]]
- end = start + algos.searchsorted( # type: ignore[assignment]
- section, idx, side="right"
- )
- # error: Incompatible types in assignment (expression has type
- # "Union[ndarray[Any, dtype[signedinteger[Any]]]
- start = start + algos.searchsorted( # type: ignore[assignment]
- section, idx, side="left"
- )
- elif isinstance(idx, slice):
- idx = idx.start
- return start + algos.searchsorted(section, idx, side=side)
- else:
- return start + algos.searchsorted(section, idx, side=side)
- def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int:
- """
- If key is NA value, location of index unify as -1.
- Parameters
- ----------
- level_index: Index
- key : label
- Returns
- -------
- loc : int
- If key is NA value, loc is -1
- Else, location of key in index.
- See Also
- --------
- Index.get_loc : The get_loc method for (single-level) index.
- """
- if is_scalar(key) and isna(key):
- # TODO: need is_valid_na_for_dtype(key, level_index.dtype)
- return -1
- else:
- return level_index.get_loc(key)
- def get_loc(self, key):
- """
- Get location for a label or a tuple of labels.
- The location is returned as an integer/slice or boolean
- mask.
- Parameters
- ----------
- key : label or tuple of labels (one for each level)
- Returns
- -------
- int, slice object or boolean mask
- If the key is past the lexsort depth, the return may be a
- boolean mask array, otherwise it is always a slice or int.
- See Also
- --------
- Index.get_loc : The get_loc method for (single-level) index.
- MultiIndex.slice_locs : Get slice location given start label(s) and
- end label(s).
- MultiIndex.get_locs : Get location for a label/slice/list/mask or a
- sequence of such.
- Notes
- -----
- The key cannot be a slice, list of same-level labels, a boolean mask,
- or a sequence of such. If you want to use those, use
- :meth:`MultiIndex.get_locs` instead.
- Examples
- --------
- >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])
- >>> mi.get_loc('b')
- slice(1, 3, None)
- >>> mi.get_loc(('b', 'e'))
- 1
- """
- self._check_indexing_error(key)
- def _maybe_to_slice(loc):
- """convert integer indexer to boolean mask or slice if possible"""
- if not isinstance(loc, np.ndarray) or loc.dtype != np.intp:
- return loc
- loc = lib.maybe_indices_to_slice(loc, len(self))
- if isinstance(loc, slice):
- return loc
- mask = np.empty(len(self), dtype="bool")
- mask.fill(False)
- mask[loc] = True
- return mask
- if not isinstance(key, tuple):
- loc = self._get_level_indexer(key, level=0)
- return _maybe_to_slice(loc)
- keylen = len(key)
- if self.nlevels < keylen:
- raise KeyError(
- f"Key length ({keylen}) exceeds index depth ({self.nlevels})"
- )
- if keylen == self.nlevels and self.is_unique:
- # TODO: what if we have an IntervalIndex level?
- # i.e. do we need _index_as_unique on that level?
- try:
- return self._engine.get_loc(key)
- except TypeError:
- # e.g. test_partial_slicing_with_multiindex partial string slicing
- loc, _ = self.get_loc_level(key, list(range(self.nlevels)))
- return loc
- # -- partial selection or non-unique index
- # break the key into 2 parts based on the lexsort_depth of the index;
- # the first part returns a continuous slice of the index; the 2nd part
- # needs linear search within the slice
- i = self._lexsort_depth
- lead_key, follow_key = key[:i], key[i:]
- if not lead_key:
- start = 0
- stop = len(self)
- else:
- try:
- start, stop = self.slice_locs(lead_key, lead_key)
- except TypeError as err:
- # e.g. test_groupby_example key = ((0, 0, 1, 2), "new_col")
- # when self has 5 integer levels
- raise KeyError(key) from err
- if start == stop:
- raise KeyError(key)
- if not follow_key:
- return slice(start, stop)
- warnings.warn(
- "indexing past lexsort depth may impact performance.",
- PerformanceWarning,
- stacklevel=find_stack_level(),
- )
- loc = np.arange(start, stop, dtype=np.intp)
- for i, k in enumerate(follow_key, len(lead_key)):
- mask = self.codes[i][loc] == self._get_loc_single_level_index(
- self.levels[i], k
- )
- if not mask.all():
- loc = loc[mask]
- if not len(loc):
- raise KeyError(key)
- return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop)
- def get_loc_level(self, key, level: IndexLabel = 0, drop_level: bool = True):
- """
- Get location and sliced index for requested label(s)/level(s).
- Parameters
- ----------
- key : label or sequence of labels
- level : int/level name or list thereof, optional
- drop_level : bool, default True
- If ``False``, the resulting index will not drop any level.
- Returns
- -------
- tuple
- A 2-tuple where the elements :
- Element 0: int, slice object or boolean array.
- Element 1: The resulting sliced multiindex/index. If the key
- contains all levels, this will be ``None``.
- See Also
- --------
- MultiIndex.get_loc : Get location for a label or a tuple of labels.
- MultiIndex.get_locs : Get location for a label/slice/list/mask or a
- sequence of such.
- Examples
- --------
- >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')],
- ... names=['A', 'B'])
- >>> mi.get_loc_level('b')
- (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B'))
- >>> mi.get_loc_level('e', level='B')
- (array([False, True, False]), Index(['b'], dtype='object', name='A'))
- >>> mi.get_loc_level(['b', 'e'])
- (1, None)
- """
- if not isinstance(level, (list, tuple)):
- level = self._get_level_number(level)
- else:
- level = [self._get_level_number(lev) for lev in level]
- loc, mi = self._get_loc_level(key, level=level)
- if not drop_level:
- if lib.is_integer(loc):
- mi = self[loc : loc + 1]
- else:
- mi = self[loc]
- return loc, mi
- def _get_loc_level(self, key, level: int | list[int] = 0):
- """
- get_loc_level but with `level` known to be positional, not name-based.
- """
- # different name to distinguish from maybe_droplevels
- def maybe_mi_droplevels(indexer, levels):
- """
- If level does not exist or all levels were dropped, the exception
- has to be handled outside.
- """
- new_index = self[indexer]
- for i in sorted(levels, reverse=True):
- new_index = new_index._drop_level_numbers([i])
- return new_index
- if isinstance(level, (tuple, list)):
- if len(key) != len(level):
- raise AssertionError(
- "Key for location must have same length as number of levels"
- )
- result = None
- for lev, k in zip(level, key):
- loc, new_index = self._get_loc_level(k, level=lev)
- if isinstance(loc, slice):
- mask = np.zeros(len(self), dtype=bool)
- mask[loc] = True
- loc = mask
- result = loc if result is None else result & loc
- try:
- # FIXME: we should be only dropping levels on which we are
- # scalar-indexing
- mi = maybe_mi_droplevels(result, level)
- except ValueError:
- # droplevel failed because we tried to drop all levels,
- # i.e. len(level) == self.nlevels
- mi = self[result]
- return result, mi
- # kludge for #1796
- if isinstance(key, list):
- key = tuple(key)
- if isinstance(key, tuple) and level == 0:
- try:
- # Check if this tuple is a single key in our first level
- if key in self.levels[0]:
- indexer = self._get_level_indexer(key, level=level)
- new_index = maybe_mi_droplevels(indexer, [0])
- return indexer, new_index
- except (TypeError, InvalidIndexError):
- pass
- if not any(isinstance(k, slice) for k in key):
- if len(key) == self.nlevels and self.is_unique:
- # Complete key in unique index -> standard get_loc
- try:
- return (self._engine.get_loc(key), None)
- except KeyError as err:
- raise KeyError(key) from err
- except TypeError:
- # e.g. partial string indexing
- # test_partial_string_timestamp_multiindex
- pass
- # partial selection
- indexer = self.get_loc(key)
- ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)]
- if len(ilevels) == self.nlevels:
- if is_integer(indexer):
- # we are dropping all levels
- return indexer, None
- # TODO: in some cases we still need to drop some levels,
- # e.g. test_multiindex_perf_warn
- # test_partial_string_timestamp_multiindex
- ilevels = [
- i
- for i in range(len(key))
- if (
- not isinstance(key[i], str)
- or not self.levels[i]._supports_partial_string_indexing
- )
- and key[i] != slice(None, None)
- ]
- if len(ilevels) == self.nlevels:
- # TODO: why?
- ilevels = []
- return indexer, maybe_mi_droplevels(indexer, ilevels)
- else:
- indexer = None
- for i, k in enumerate(key):
- if not isinstance(k, slice):
- loc_level = self._get_level_indexer(k, level=i)
- if isinstance(loc_level, slice):
- if com.is_null_slice(loc_level) or com.is_full_slice(
- loc_level, len(self)
- ):
- # everything
- continue
- # e.g. test_xs_IndexSlice_argument_not_implemented
- k_index = np.zeros(len(self), dtype=bool)
- k_index[loc_level] = True
- else:
- k_index = loc_level
- elif com.is_null_slice(k):
- # taking everything, does not affect `indexer` below
- continue
- else:
- # FIXME: this message can be inaccurate, e.g.
- # test_series_varied_multiindex_alignment
- raise TypeError(f"Expected label or tuple of labels, got {key}")
- if indexer is None:
- indexer = k_index
- else:
- indexer &= k_index
- if indexer is None:
- indexer = slice(None, None)
- ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)]
- return indexer, maybe_mi_droplevels(indexer, ilevels)
- else:
- indexer = self._get_level_indexer(key, level=level)
- if (
- isinstance(key, str)
- and self.levels[level]._supports_partial_string_indexing
- ):
- # check to see if we did an exact lookup vs sliced
- check = self.levels[level].get_loc(key)
- if not is_integer(check):
- # e.g. test_partial_string_timestamp_multiindex
- return indexer, self[indexer]
- try:
- result_index = maybe_mi_droplevels(indexer, [level])
- except ValueError:
- result_index = self[indexer]
- return indexer, result_index
- def _get_level_indexer(
- self, key, level: int = 0, indexer: npt.NDArray[np.bool_] | None = None
- ):
- # `level` kwarg is _always_ positional, never name
- # return a boolean array or slice showing where the key is
- # in the totality of values
- # if the indexer is provided, then use this
- level_index = self.levels[level]
- level_codes = self.codes[level]
- def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
- # Compute a bool indexer to identify the positions to take.
- # If we have an existing indexer, we only need to examine the
- # subset of positions where the existing indexer is True.
- if indexer is not None:
- # we only need to look at the subset of codes where the
- # existing indexer equals True
- codes = codes[indexer]
- if step is None or step == 1:
- new_indexer = (codes >= start) & (codes < stop)
- else:
- r = np.arange(start, stop, step, dtype=codes.dtype)
- new_indexer = algos.isin(codes, r)
- if indexer is None:
- return new_indexer
- indexer = indexer.copy()
- indexer[indexer] = new_indexer
- return indexer
- if isinstance(key, slice):
- # handle a slice, returning a slice if we can
- # otherwise a boolean indexer
- step = key.step
- is_negative_step = step is not None and step < 0
- try:
- if key.start is not None:
- start = level_index.get_loc(key.start)
- elif is_negative_step:
- start = len(level_index) - 1
- else:
- start = 0
- if key.stop is not None:
- stop = level_index.get_loc(key.stop)
- elif is_negative_step:
- stop = 0
- elif isinstance(start, slice):
- stop = len(level_index)
- else:
- stop = len(level_index) - 1
- except KeyError:
- # we have a partial slice (like looking up a partial date
- # string)
- start = stop = level_index.slice_indexer(key.start, key.stop, key.step)
- step = start.step
- if isinstance(start, slice) or isinstance(stop, slice):
- # we have a slice for start and/or stop
- # a partial date slicer on a DatetimeIndex generates a slice
- # note that the stop ALREADY includes the stopped point (if
- # it was a string sliced)
- start = getattr(start, "start", start)
- stop = getattr(stop, "stop", stop)
- return convert_indexer(start, stop, step)
- elif level > 0 or self._lexsort_depth == 0 or step is not None:
- # need to have like semantics here to right
- # searching as when we are using a slice
- # so adjust the stop by 1 (so we include stop)
- stop = (stop - 1) if is_negative_step else (stop + 1)
- return convert_indexer(start, stop, step)
- else:
- # sorted, so can return slice object -> view
- i = algos.searchsorted(level_codes, start, side="left")
- j = algos.searchsorted(level_codes, stop, side="right")
- return slice(i, j, step)
- else:
- idx = self._get_loc_single_level_index(level_index, key)
- if level > 0 or self._lexsort_depth == 0:
- # Desired level is not sorted
- if isinstance(idx, slice):
- # test_get_loc_partial_timestamp_multiindex
- locs = (level_codes >= idx.start) & (level_codes < idx.stop)
- return locs
- locs = np.array(level_codes == idx, dtype=bool, copy=False)
- if not locs.any():
- # The label is present in self.levels[level] but unused:
- raise KeyError(key)
- return locs
- if isinstance(idx, slice):
- # e.g. test_partial_string_timestamp_multiindex
- start = algos.searchsorted(level_codes, idx.start, side="left")
- # NB: "left" here bc of slice semantics
- end = algos.searchsorted(level_codes, idx.stop, side="left")
- else:
- start = algos.searchsorted(level_codes, idx, side="left")
- end = algos.searchsorted(level_codes, idx, side="right")
- if start == end:
- # The label is present in self.levels[level] but unused:
- raise KeyError(key)
- return slice(start, end)
- def get_locs(self, seq):
- """
- Get location for a sequence of labels.
- Parameters
- ----------
- seq : label, slice, list, mask or a sequence of such
- You should use one of the above for each level.
- If a level should not be used, set it to ``slice(None)``.
- Returns
- -------
- numpy.ndarray
- NumPy array of integers suitable for passing to iloc.
- See Also
- --------
- MultiIndex.get_loc : Get location for a label or a tuple of labels.
- MultiIndex.slice_locs : Get slice location given start label(s) and
- end label(s).
- Examples
- --------
- >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])
- >>> mi.get_locs('b') # doctest: +SKIP
- array([1, 2], dtype=int64)
- >>> mi.get_locs([slice(None), ['e', 'f']]) # doctest: +SKIP
- array([1, 2], dtype=int64)
- >>> mi.get_locs([[True, False, True], slice('e', 'f')]) # doctest: +SKIP
- array([2], dtype=int64)
- """
- # must be lexsorted to at least as many levels
- true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s]
- if true_slices and true_slices[-1] >= self._lexsort_depth:
- raise UnsortedIndexError(
- "MultiIndex slicing requires the index to be lexsorted: slicing "
- f"on levels {true_slices}, lexsort depth {self._lexsort_depth}"
- )
- if any(x is Ellipsis for x in seq):
- raise NotImplementedError(
- "MultiIndex does not support indexing with Ellipsis"
- )
- n = len(self)
- def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]:
- if isinstance(indexer, slice):
- new_indexer = np.zeros(n, dtype=np.bool_)
- new_indexer[indexer] = True
- return new_indexer
- return indexer
- # a bool indexer for the positions we want to take
- indexer: npt.NDArray[np.bool_] | None = None
- for i, k in enumerate(seq):
- lvl_indexer: npt.NDArray[np.bool_] | slice | None = None
- if com.is_bool_indexer(k):
- if len(k) != n:
- raise ValueError(
- "cannot index with a boolean indexer that "
- "is not the same length as the index"
- )
- lvl_indexer = np.asarray(k)
- elif is_list_like(k):
- # a collection of labels to include from this level (these are or'd)
- # GH#27591 check if this is a single tuple key in the level
- try:
- lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer)
- except (InvalidIndexError, TypeError, KeyError) as err:
- # InvalidIndexError e.g. non-hashable, fall back to treating
- # this as a sequence of labels
- # KeyError it can be ambiguous if this is a label or sequence
- # of labels
- # github.com/pandas-dev/pandas/issues/39424#issuecomment-871626708
- for x in k:
- if not is_hashable(x):
- # e.g. slice
- raise err
- # GH 39424: Ignore not founds
- # GH 42351: No longer ignore not founds & enforced in 2.0
- # TODO: how to handle IntervalIndex level? (no test cases)
- item_indexer = self._get_level_indexer(
- x, level=i, indexer=indexer
- )
- if lvl_indexer is None:
- lvl_indexer = _to_bool_indexer(item_indexer)
- elif isinstance(item_indexer, slice):
- lvl_indexer[item_indexer] = True # type: ignore[index]
- else:
- lvl_indexer |= item_indexer
- if lvl_indexer is None:
- # no matches we are done
- # test_loc_getitem_duplicates_multiindex_empty_indexer
- return np.array([], dtype=np.intp)
- elif com.is_null_slice(k):
- # empty slice
- if indexer is None and i == len(seq) - 1:
- return np.arange(n, dtype=np.intp)
- continue
- else:
- # a slice or a single label
- lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer)
- # update indexer
- lvl_indexer = _to_bool_indexer(lvl_indexer)
- if indexer is None:
- indexer = lvl_indexer
- else:
- indexer &= lvl_indexer
- if not np.any(indexer) and np.any(lvl_indexer):
- raise KeyError(seq)
- # empty indexer
- if indexer is None:
- return np.array([], dtype=np.intp)
- pos_indexer = indexer.nonzero()[0]
- return self._reorder_indexer(seq, pos_indexer)
- # --------------------------------------------------------------------
- def _reorder_indexer(
- self,
- seq: tuple[Scalar | Iterable | AnyArrayLike, ...],
- indexer: npt.NDArray[np.intp],
- ) -> npt.NDArray[np.intp]:
- """
- Reorder an indexer of a MultiIndex (self) so that the labels are in the
- same order as given in seq
- Parameters
- ----------
- seq : label/slice/list/mask or a sequence of such
- indexer: a position indexer of self
- Returns
- -------
- indexer : a sorted position indexer of self ordered as seq
- """
- # check if sorting is necessary
- need_sort = False
- for i, k in enumerate(seq):
- if com.is_null_slice(k) or com.is_bool_indexer(k) or is_scalar(k):
- pass
- elif is_list_like(k):
- if len(k) <= 1: # type: ignore[arg-type]
- pass
- elif self._is_lexsorted():
- # If the index is lexsorted and the list_like label
- # in seq are sorted then we do not need to sort
- k_codes = self.levels[i].get_indexer(k)
- k_codes = k_codes[k_codes >= 0] # Filter absent keys
- # True if the given codes are not ordered
- need_sort = (k_codes[:-1] > k_codes[1:]).any()
- else:
- need_sort = True
- elif isinstance(k, slice):
- if self._is_lexsorted():
- need_sort = k.step is not None and k.step < 0
- else:
- need_sort = True
- else:
- need_sort = True
- if need_sort:
- break
- if not need_sort:
- return indexer
- n = len(self)
- keys: tuple[np.ndarray, ...] = ()
- # For each level of the sequence in seq, map the level codes with the
- # order they appears in a list-like sequence
- # This mapping is then use to reorder the indexer
- for i, k in enumerate(seq):
- if is_scalar(k):
- # GH#34603 we want to treat a scalar the same as an all equal list
- k = [k]
- if com.is_bool_indexer(k):
- new_order = np.arange(n)[indexer]
- elif is_list_like(k):
- # Generate a map with all level codes as sorted initially
- k = algos.unique(k)
- key_order_map = np.ones(len(self.levels[i]), dtype=np.uint64) * len(
- self.levels[i]
- )
- # Set order as given in the indexer list
- level_indexer = self.levels[i].get_indexer(k)
- level_indexer = level_indexer[level_indexer >= 0] # Filter absent keys
- key_order_map[level_indexer] = np.arange(len(level_indexer))
- new_order = key_order_map[self.codes[i][indexer]]
- elif isinstance(k, slice) and k.step is not None and k.step < 0:
- # flip order for negative step
- new_order = np.arange(n)[::-1][indexer]
- elif isinstance(k, slice) and k.start is None and k.stop is None:
- # slice(None) should not determine order GH#31330
- new_order = np.ones((n,), dtype=np.intp)[indexer]
- else:
- # For all other case, use the same order as the level
- new_order = np.arange(n)[indexer]
- keys = (new_order,) + keys
- # Find the reordering using lexsort on the keys mapping
- ind = np.lexsort(keys)
- return indexer[ind]
- def truncate(self, before=None, after=None) -> MultiIndex:
- """
- Slice index between two labels / tuples, return new MultiIndex.
- Parameters
- ----------
- before : label or tuple, can be partial. Default None
- None defaults to start.
- after : label or tuple, can be partial. Default None
- None defaults to end.
- Returns
- -------
- MultiIndex
- The truncated MultiIndex.
- Examples
- --------
- >>> mi = pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['x', 'y', 'z']])
- >>> mi
- MultiIndex([('a', 'x'), ('b', 'y'), ('c', 'z')],
- )
- >>> mi.truncate(before='a', after='b')
- MultiIndex([('a', 'x'), ('b', 'y')],
- )
- """
- if after and before and after < before:
- raise ValueError("after < before")
- i, j = self.levels[0].slice_locs(before, after)
- left, right = self.slice_locs(before, after)
- new_levels = list(self.levels)
- new_levels[0] = new_levels[0][i:j]
- new_codes = [level_codes[left:right] for level_codes in self.codes]
- new_codes[0] = new_codes[0] - i
- return MultiIndex(
- levels=new_levels,
- codes=new_codes,
- names=self._names,
- verify_integrity=False,
- )
- def equals(self, other: object) -> bool:
- """
- Determines if two MultiIndex objects have the same labeling information
- (the levels themselves do not necessarily have to be the same)
- See Also
- --------
- equal_levels
- """
- if self.is_(other):
- return True
- if not isinstance(other, Index):
- return False
- if len(self) != len(other):
- return False
- if not isinstance(other, MultiIndex):
- # d-level MultiIndex can equal d-tuple Index
- if not self._should_compare(other):
- # object Index or Categorical[object] may contain tuples
- return False
- return array_equivalent(self._values, other._values)
- if self.nlevels != other.nlevels:
- return False
- for i in range(self.nlevels):
- self_codes = self.codes[i]
- other_codes = other.codes[i]
- self_mask = self_codes == -1
- other_mask = other_codes == -1
- if not np.array_equal(self_mask, other_mask):
- return False
- self_codes = self_codes[~self_mask]
- self_values = self.levels[i]._values.take(self_codes)
- other_codes = other_codes[~other_mask]
- other_values = other.levels[i]._values.take(other_codes)
- # since we use NaT both datetime64 and timedelta64 we can have a
- # situation where a level is typed say timedelta64 in self (IOW it
- # has other values than NaT) but types datetime64 in other (where
- # its all NaT) but these are equivalent
- if len(self_values) == 0 and len(other_values) == 0:
- continue
- if not isinstance(self_values, np.ndarray):
- # i.e. ExtensionArray
- if not self_values.equals(other_values):
- return False
- elif not isinstance(other_values, np.ndarray):
- # i.e. other is ExtensionArray
- if not other_values.equals(self_values):
- return False
- else:
- if not array_equivalent(self_values, other_values):
- return False
- return True
- def equal_levels(self, other: MultiIndex) -> bool:
- """
- Return True if the levels of both MultiIndex objects are the same
- """
- if self.nlevels != other.nlevels:
- return False
- for i in range(self.nlevels):
- if not self.levels[i].equals(other.levels[i]):
- return False
- return True
- # --------------------------------------------------------------------
- # Set Methods
- def _union(self, other, sort) -> MultiIndex:
- other, result_names = self._convert_can_do_setop(other)
- if other.has_duplicates:
- # This is only necessary if other has dupes,
- # otherwise difference is faster
- result = super()._union(other, sort)
- if isinstance(result, MultiIndex):
- return result
- return MultiIndex.from_arrays(
- zip(*result), sortorder=None, names=result_names
- )
- else:
- right_missing = other.difference(self, sort=False)
- if len(right_missing):
- result = self.append(right_missing)
- else:
- result = self._get_reconciled_name_object(other)
- if sort is not False:
- try:
- result = result.sort_values()
- except TypeError:
- if sort is True:
- raise
- warnings.warn(
- "The values in the array are unorderable. "
- "Pass `sort=False` to suppress this warning.",
- RuntimeWarning,
- stacklevel=find_stack_level(),
- )
- return result
- def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
- return is_object_dtype(dtype)
- def _get_reconciled_name_object(self, other) -> MultiIndex:
- """
- If the result of a set operation will be self,
- return self, unless the names change, in which
- case make a shallow copy of self.
- """
- names = self._maybe_match_names(other)
- if self.names != names:
- # error: Cannot determine type of "rename"
- return self.rename(names) # type: ignore[has-type]
- return self
- def _maybe_match_names(self, other):
- """
- Try to find common names to attach to the result of an operation between
- a and b. Return a consensus list of names if they match at least partly
- or list of None if they have completely different names.
- """
- if len(self.names) != len(other.names):
- return [None] * len(self.names)
- names = []
- for a_name, b_name in zip(self.names, other.names):
- if a_name == b_name:
- names.append(a_name)
- else:
- # TODO: what if they both have np.nan for their names?
- names.append(None)
- return names
- def _wrap_intersection_result(self, other, result) -> MultiIndex:
- _, result_names = self._convert_can_do_setop(other)
- return result.set_names(result_names)
- def _wrap_difference_result(self, other, result: MultiIndex) -> MultiIndex:
- _, result_names = self._convert_can_do_setop(other)
- if len(result) == 0:
- return result.remove_unused_levels().set_names(result_names)
- else:
- return result.set_names(result_names)
- def _convert_can_do_setop(self, other):
- result_names = self.names
- if not isinstance(other, Index):
- if len(other) == 0:
- return self[:0], self.names
- else:
- msg = "other must be a MultiIndex or a list of tuples"
- try:
- other = MultiIndex.from_tuples(other, names=self.names)
- except (ValueError, TypeError) as err:
- # ValueError raised by tuples_to_object_array if we
- # have non-object dtype
- raise TypeError(msg) from err
- else:
- result_names = get_unanimous_names(self, other)
- return other, result_names
- # --------------------------------------------------------------------
- @doc(Index.astype)
- def astype(self, dtype, copy: bool = True):
- dtype = pandas_dtype(dtype)
- if is_categorical_dtype(dtype):
- msg = "> 1 ndim Categorical are not supported at this time"
- raise NotImplementedError(msg)
- if not is_object_dtype(dtype):
- raise TypeError(
- "Setting a MultiIndex dtype to anything other than object "
- "is not supported"
- )
- if copy is True:
- return self._view()
- return self
- def _validate_fill_value(self, item):
- if isinstance(item, MultiIndex):
- # GH#43212
- if item.nlevels != self.nlevels:
- raise ValueError("Item must have length equal to number of levels.")
- return item._values
- elif not isinstance(item, tuple):
- # Pad the key with empty strings if lower levels of the key
- # aren't specified:
- item = (item,) + ("",) * (self.nlevels - 1)
- elif len(item) != self.nlevels:
- raise ValueError("Item must have length equal to number of levels.")
- return item
- def putmask(self, mask, value: MultiIndex) -> MultiIndex:
- """
- Return a new MultiIndex of the values set with the mask.
- Parameters
- ----------
- mask : array like
- value : MultiIndex
- Must either be the same length as self or length one
- Returns
- -------
- MultiIndex
- """
- mask, noop = validate_putmask(self, mask)
- if noop:
- return self.copy()
- if len(mask) == len(value):
- subset = value[mask].remove_unused_levels()
- else:
- subset = value.remove_unused_levels()
- new_levels = []
- new_codes = []
- for i, (value_level, level, level_codes) in enumerate(
- zip(subset.levels, self.levels, self.codes)
- ):
- new_level = level.union(value_level, sort=False)
- value_codes = new_level.get_indexer_for(subset.get_level_values(i))
- new_code = ensure_int64(level_codes)
- new_code[mask] = value_codes
- new_levels.append(new_level)
- new_codes.append(new_code)
- return MultiIndex(
- levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False
- )
- def insert(self, loc: int, item) -> MultiIndex:
- """
- Make new MultiIndex inserting new item at location
- Parameters
- ----------
- loc : int
- item : tuple
- Must be same length as number of levels in the MultiIndex
- Returns
- -------
- new_index : Index
- """
- item = self._validate_fill_value(item)
- new_levels = []
- new_codes = []
- for k, level, level_codes in zip(item, self.levels, self.codes):
- if k not in level:
- # have to insert into level
- # must insert at end otherwise you have to recompute all the
- # other codes
- lev_loc = len(level)
- level = level.insert(lev_loc, k)
- else:
- lev_loc = level.get_loc(k)
- new_levels.append(level)
- new_codes.append(np.insert(ensure_int64(level_codes), loc, lev_loc))
- return MultiIndex(
- levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False
- )
- def delete(self, loc) -> MultiIndex:
- """
- Make new index with passed location deleted
- Returns
- -------
- new_index : MultiIndex
- """
- new_codes = [np.delete(level_codes, loc) for level_codes in self.codes]
- return MultiIndex(
- levels=self.levels,
- codes=new_codes,
- names=self.names,
- verify_integrity=False,
- )
- @doc(Index.isin)
- def isin(self, values, level=None) -> npt.NDArray[np.bool_]:
- if isinstance(values, Generator):
- values = list(values)
- if level is None:
- if len(values) == 0:
- return np.zeros((len(self),), dtype=np.bool_)
- if not isinstance(values, MultiIndex):
- values = MultiIndex.from_tuples(values)
- return values.unique().get_indexer_for(self) != -1
- else:
- num = self._get_level_number(level)
- levs = self.get_level_values(num)
- if levs.size == 0:
- return np.zeros(len(levs), dtype=np.bool_)
- return levs.isin(values)
- # error: Incompatible types in assignment (expression has type overloaded function,
- # base class "Index" defined the type as "Callable[[Index, Any, bool], Any]")
- rename = Index.set_names # type: ignore[assignment]
- # ---------------------------------------------------------------
- # Arithmetic/Numeric Methods - Disabled
- __add__ = make_invalid_op("__add__")
- __radd__ = make_invalid_op("__radd__")
- __iadd__ = make_invalid_op("__iadd__")
- __sub__ = make_invalid_op("__sub__")
- __rsub__ = make_invalid_op("__rsub__")
- __isub__ = make_invalid_op("__isub__")
- __pow__ = make_invalid_op("__pow__")
- __rpow__ = make_invalid_op("__rpow__")
- __mul__ = make_invalid_op("__mul__")
- __rmul__ = make_invalid_op("__rmul__")
- __floordiv__ = make_invalid_op("__floordiv__")
- __rfloordiv__ = make_invalid_op("__rfloordiv__")
- __truediv__ = make_invalid_op("__truediv__")
- __rtruediv__ = make_invalid_op("__rtruediv__")
- __mod__ = make_invalid_op("__mod__")
- __rmod__ = make_invalid_op("__rmod__")
- __divmod__ = make_invalid_op("__divmod__")
- __rdivmod__ = make_invalid_op("__rdivmod__")
- # Unary methods disabled
- __neg__ = make_invalid_op("__neg__")
- __pos__ = make_invalid_op("__pos__")
- __abs__ = make_invalid_op("__abs__")
- __invert__ = make_invalid_op("__invert__")
- def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int:
- """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted."""
- int64_codes = [ensure_int64(level_codes) for level_codes in codes]
- for k in range(nlevels, 0, -1):
- if libalgos.is_lexsorted(int64_codes[:k]):
- return k
- return 0
- def sparsify_labels(label_list, start: int = 0, sentinel: object = ""):
- pivoted = list(zip(*label_list))
- k = len(label_list)
- result = pivoted[: start + 1]
- prev = pivoted[start]
- for cur in pivoted[start + 1 :]:
- sparse_cur = []
- for i, (p, t) in enumerate(zip(prev, cur)):
- if i == k - 1:
- sparse_cur.append(t)
- result.append(sparse_cur)
- break
- if p == t:
- sparse_cur.append(sentinel)
- else:
- sparse_cur.extend(cur[i:])
- result.append(sparse_cur)
- break
- prev = cur
- return list(zip(*result))
- def _get_na_rep(dtype) -> str:
- if is_extension_array_dtype(dtype):
- return f"{dtype.na_value}"
- else:
- dtype = dtype.type
- return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype, "NaN")
- def maybe_droplevels(index: Index, key) -> Index:
- """
- Attempt to drop level or levels from the given index.
- Parameters
- ----------
- index: Index
- key : scalar or tuple
- Returns
- -------
- Index
- """
- # drop levels
- original_index = index
- if isinstance(key, tuple):
- # Caller is responsible for ensuring the key is not an entry in the first
- # level of the MultiIndex.
- for _ in key:
- try:
- index = index._drop_level_numbers([0])
- except ValueError:
- # we have dropped too much, so back out
- return original_index
- else:
- try:
- index = index._drop_level_numbers([0])
- except ValueError:
- pass
- return index
- def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray:
- """
- Coerce the array-like indexer to the smallest integer dtype that can encode all
- of the given categories.
- Parameters
- ----------
- array_like : array-like
- categories : array-like
- copy : bool
- Returns
- -------
- np.ndarray
- Non-writeable.
- """
- array_like = coerce_indexer_dtype(array_like, categories)
- if copy:
- array_like = array_like.copy()
- array_like.flags.writeable = False
- return array_like
- def _require_listlike(level, arr, arrname: str):
- """
- Ensure that level is either None or listlike, and arr is list-of-listlike.
- """
- if level is not None and not is_list_like(level):
- if not is_list_like(arr):
- raise TypeError(f"{arrname} must be list-like")
- if len(arr) > 0 and is_list_like(arr[0]):
- raise TypeError(f"{arrname} must be list-like")
- level = [level]
- arr = [arr]
- elif level is None or is_list_like(level):
- if not is_list_like(arr) or not is_list_like(arr[0]):
- raise TypeError(f"{arrname} must be list of lists-like")
- return level, arr
|