base.py 239 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243
  1. from __future__ import annotations
  2. from datetime import datetime
  3. import functools
  4. from itertools import zip_longest
  5. import operator
  6. from typing import (
  7. TYPE_CHECKING,
  8. Any,
  9. Callable,
  10. ClassVar,
  11. Hashable,
  12. Iterable,
  13. Literal,
  14. NoReturn,
  15. Sequence,
  16. TypeVar,
  17. cast,
  18. final,
  19. overload,
  20. )
  21. import warnings
  22. import numpy as np
  23. from pandas._config import get_option
  24. from pandas._libs import (
  25. NaT,
  26. algos as libalgos,
  27. index as libindex,
  28. lib,
  29. )
  30. from pandas._libs.internals import BlockValuesRefs
  31. import pandas._libs.join as libjoin
  32. from pandas._libs.lib import (
  33. is_datetime_array,
  34. no_default,
  35. )
  36. from pandas._libs.missing import is_float_nan
  37. from pandas._libs.tslibs import (
  38. IncompatibleFrequency,
  39. OutOfBoundsDatetime,
  40. Timestamp,
  41. tz_compare,
  42. )
  43. from pandas._typing import (
  44. AnyAll,
  45. ArrayLike,
  46. Axes,
  47. Axis,
  48. DropKeep,
  49. DtypeObj,
  50. F,
  51. IgnoreRaise,
  52. IndexLabel,
  53. JoinHow,
  54. Level,
  55. Shape,
  56. npt,
  57. )
  58. from pandas.compat.numpy import function as nv
  59. from pandas.errors import (
  60. DuplicateLabelError,
  61. InvalidIndexError,
  62. )
  63. from pandas.util._decorators import (
  64. Appender,
  65. cache_readonly,
  66. doc,
  67. )
  68. from pandas.util._exceptions import (
  69. find_stack_level,
  70. rewrite_exception,
  71. )
  72. from pandas.core.dtypes.astype import (
  73. astype_array,
  74. astype_is_view,
  75. )
  76. from pandas.core.dtypes.cast import (
  77. LossySetitemError,
  78. can_hold_element,
  79. common_dtype_categorical_compat,
  80. find_result_type,
  81. infer_dtype_from,
  82. maybe_cast_pointwise_result,
  83. np_can_hold_element,
  84. )
  85. from pandas.core.dtypes.common import (
  86. ensure_int64,
  87. ensure_object,
  88. ensure_platform_int,
  89. is_any_real_numeric_dtype,
  90. is_bool_dtype,
  91. is_categorical_dtype,
  92. is_dtype_equal,
  93. is_ea_or_datetimelike_dtype,
  94. is_extension_array_dtype,
  95. is_float,
  96. is_float_dtype,
  97. is_hashable,
  98. is_integer,
  99. is_integer_dtype,
  100. is_interval_dtype,
  101. is_iterator,
  102. is_list_like,
  103. is_numeric_dtype,
  104. is_object_dtype,
  105. is_scalar,
  106. is_signed_integer_dtype,
  107. is_string_dtype,
  108. needs_i8_conversion,
  109. pandas_dtype,
  110. validate_all_hashable,
  111. )
  112. from pandas.core.dtypes.concat import concat_compat
  113. from pandas.core.dtypes.dtypes import (
  114. CategoricalDtype,
  115. DatetimeTZDtype,
  116. ExtensionDtype,
  117. IntervalDtype,
  118. PeriodDtype,
  119. )
  120. from pandas.core.dtypes.generic import (
  121. ABCDataFrame,
  122. ABCDatetimeIndex,
  123. ABCMultiIndex,
  124. ABCPeriodIndex,
  125. ABCSeries,
  126. ABCTimedeltaIndex,
  127. )
  128. from pandas.core.dtypes.inference import is_dict_like
  129. from pandas.core.dtypes.missing import (
  130. array_equivalent,
  131. is_valid_na_for_dtype,
  132. isna,
  133. )
  134. from pandas.core import (
  135. arraylike,
  136. ops,
  137. )
  138. from pandas.core.accessor import CachedAccessor
  139. import pandas.core.algorithms as algos
  140. from pandas.core.array_algos.putmask import (
  141. setitem_datetimelike_compat,
  142. validate_putmask,
  143. )
  144. from pandas.core.arrays import (
  145. ArrowExtensionArray,
  146. BaseMaskedArray,
  147. Categorical,
  148. ExtensionArray,
  149. )
  150. from pandas.core.arrays.string_ import StringArray
  151. from pandas.core.base import (
  152. IndexOpsMixin,
  153. PandasObject,
  154. )
  155. import pandas.core.common as com
  156. from pandas.core.construction import (
  157. ensure_wrapped_if_datetimelike,
  158. extract_array,
  159. sanitize_array,
  160. )
  161. from pandas.core.indexers import disallow_ndim_indexing
  162. from pandas.core.indexes.frozen import FrozenList
  163. from pandas.core.missing import clean_reindex_fill_method
  164. from pandas.core.ops import get_op_result_name
  165. from pandas.core.ops.invalid import make_invalid_op
  166. from pandas.core.sorting import (
  167. ensure_key_mapped,
  168. get_group_index_sorter,
  169. nargsort,
  170. )
  171. from pandas.core.strings.accessor import StringMethods
  172. from pandas.io.formats.printing import (
  173. PrettyDict,
  174. default_pprint,
  175. format_object_summary,
  176. pprint_thing,
  177. )
  178. if TYPE_CHECKING:
  179. from pandas import (
  180. CategoricalIndex,
  181. DataFrame,
  182. MultiIndex,
  183. Series,
  184. )
  185. from pandas.core.arrays import PeriodArray
  186. __all__ = ["Index"]
  187. _unsortable_types = frozenset(("mixed", "mixed-integer"))
  188. _index_doc_kwargs: dict[str, str] = {
  189. "klass": "Index",
  190. "inplace": "",
  191. "target_klass": "Index",
  192. "raises_section": "",
  193. "unique": "Index",
  194. "duplicated": "np.ndarray",
  195. }
  196. _index_shared_docs: dict[str, str] = {}
  197. str_t = str
  198. _dtype_obj = np.dtype("object")
  199. _masked_engines = {
  200. "Complex128": libindex.MaskedComplex128Engine,
  201. "Complex64": libindex.MaskedComplex64Engine,
  202. "Float64": libindex.MaskedFloat64Engine,
  203. "Float32": libindex.MaskedFloat32Engine,
  204. "UInt64": libindex.MaskedUInt64Engine,
  205. "UInt32": libindex.MaskedUInt32Engine,
  206. "UInt16": libindex.MaskedUInt16Engine,
  207. "UInt8": libindex.MaskedUInt8Engine,
  208. "Int64": libindex.MaskedInt64Engine,
  209. "Int32": libindex.MaskedInt32Engine,
  210. "Int16": libindex.MaskedInt16Engine,
  211. "Int8": libindex.MaskedInt8Engine,
  212. "boolean": libindex.MaskedBoolEngine,
  213. "double[pyarrow]": libindex.MaskedFloat64Engine,
  214. "float64[pyarrow]": libindex.MaskedFloat64Engine,
  215. "float32[pyarrow]": libindex.MaskedFloat32Engine,
  216. "float[pyarrow]": libindex.MaskedFloat32Engine,
  217. "uint64[pyarrow]": libindex.MaskedUInt64Engine,
  218. "uint32[pyarrow]": libindex.MaskedUInt32Engine,
  219. "uint16[pyarrow]": libindex.MaskedUInt16Engine,
  220. "uint8[pyarrow]": libindex.MaskedUInt8Engine,
  221. "int64[pyarrow]": libindex.MaskedInt64Engine,
  222. "int32[pyarrow]": libindex.MaskedInt32Engine,
  223. "int16[pyarrow]": libindex.MaskedInt16Engine,
  224. "int8[pyarrow]": libindex.MaskedInt8Engine,
  225. "bool[pyarrow]": libindex.MaskedBoolEngine,
  226. }
  227. def _maybe_return_indexers(meth: F) -> F:
  228. """
  229. Decorator to simplify 'return_indexers' checks in Index.join.
  230. """
  231. @functools.wraps(meth)
  232. def join(
  233. self,
  234. other: Index,
  235. *,
  236. how: JoinHow = "left",
  237. level=None,
  238. return_indexers: bool = False,
  239. sort: bool = False,
  240. ):
  241. join_index, lidx, ridx = meth(self, other, how=how, level=level, sort=sort)
  242. if not return_indexers:
  243. return join_index
  244. if lidx is not None:
  245. lidx = ensure_platform_int(lidx)
  246. if ridx is not None:
  247. ridx = ensure_platform_int(ridx)
  248. return join_index, lidx, ridx
  249. return cast(F, join)
  250. def _new_Index(cls, d):
  251. """
  252. This is called upon unpickling, rather than the default which doesn't
  253. have arguments and breaks __new__.
  254. """
  255. # required for backward compat, because PI can't be instantiated with
  256. # ordinals through __new__ GH #13277
  257. if issubclass(cls, ABCPeriodIndex):
  258. from pandas.core.indexes.period import _new_PeriodIndex
  259. return _new_PeriodIndex(cls, **d)
  260. if issubclass(cls, ABCMultiIndex):
  261. if "labels" in d and "codes" not in d:
  262. # GH#23752 "labels" kwarg has been replaced with "codes"
  263. d["codes"] = d.pop("labels")
  264. # Since this was a valid MultiIndex at pickle-time, we don't need to
  265. # check validty at un-pickle time.
  266. d["verify_integrity"] = False
  267. elif "dtype" not in d and "data" in d:
  268. # Prevent Index.__new__ from conducting inference;
  269. # "data" key not in RangeIndex
  270. d["dtype"] = d["data"].dtype
  271. return cls.__new__(cls, **d)
  272. _IndexT = TypeVar("_IndexT", bound="Index")
  273. class Index(IndexOpsMixin, PandasObject):
  274. """
  275. Immutable sequence used for indexing and alignment.
  276. The basic object storing axis labels for all pandas objects.
  277. .. versionchanged:: 2.0.0
  278. Index can hold all numpy numeric dtypes (except float16). Previously only
  279. int64/uint64/float64 dtypes were accepted.
  280. Parameters
  281. ----------
  282. data : array-like (1-dimensional)
  283. dtype : NumPy dtype (default: object)
  284. If dtype is None, we find the dtype that best fits the data.
  285. If an actual dtype is provided, we coerce to that dtype if it's safe.
  286. Otherwise, an error will be raised.
  287. copy : bool
  288. Make a copy of input ndarray.
  289. name : object
  290. Name to be stored in the index.
  291. tupleize_cols : bool (default: True)
  292. When True, attempt to create a MultiIndex if possible.
  293. See Also
  294. --------
  295. RangeIndex : Index implementing a monotonic integer range.
  296. CategoricalIndex : Index of :class:`Categorical` s.
  297. MultiIndex : A multi-level, or hierarchical Index.
  298. IntervalIndex : An Index of :class:`Interval` s.
  299. DatetimeIndex : Index of datetime64 data.
  300. TimedeltaIndex : Index of timedelta64 data.
  301. PeriodIndex : Index of Period data.
  302. Notes
  303. -----
  304. An Index instance can **only** contain hashable objects.
  305. An Index instance *can not* hold numpy float16 dtype.
  306. Examples
  307. --------
  308. >>> pd.Index([1, 2, 3])
  309. Index([1, 2, 3], dtype='int64')
  310. >>> pd.Index(list('abc'))
  311. Index(['a', 'b', 'c'], dtype='object')
  312. >>> pd.Index([1, 2, 3], dtype="uint8")
  313. Index([1, 2, 3], dtype='uint8')
  314. """
  315. # To hand over control to subclasses
  316. _join_precedence = 1
  317. # Cython methods; see github.com/cython/cython/issues/2647
  318. # for why we need to wrap these instead of making them class attributes
  319. # Moreover, cython will choose the appropriate-dtyped sub-function
  320. # given the dtypes of the passed arguments
  321. @final
  322. def _left_indexer_unique(self: _IndexT, other: _IndexT) -> npt.NDArray[np.intp]:
  323. # Caller is responsible for ensuring other.dtype == self.dtype
  324. sv = self._get_join_target()
  325. ov = other._get_join_target()
  326. # can_use_libjoin assures sv and ov are ndarrays
  327. sv = cast(np.ndarray, sv)
  328. ov = cast(np.ndarray, ov)
  329. # similar but not identical to ov.searchsorted(sv)
  330. return libjoin.left_join_indexer_unique(sv, ov)
  331. @final
  332. def _left_indexer(
  333. self: _IndexT, other: _IndexT
  334. ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
  335. # Caller is responsible for ensuring other.dtype == self.dtype
  336. sv = self._get_join_target()
  337. ov = other._get_join_target()
  338. # can_use_libjoin assures sv and ov are ndarrays
  339. sv = cast(np.ndarray, sv)
  340. ov = cast(np.ndarray, ov)
  341. joined_ndarray, lidx, ridx = libjoin.left_join_indexer(sv, ov)
  342. joined = self._from_join_target(joined_ndarray)
  343. return joined, lidx, ridx
  344. @final
  345. def _inner_indexer(
  346. self: _IndexT, other: _IndexT
  347. ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
  348. # Caller is responsible for ensuring other.dtype == self.dtype
  349. sv = self._get_join_target()
  350. ov = other._get_join_target()
  351. # can_use_libjoin assures sv and ov are ndarrays
  352. sv = cast(np.ndarray, sv)
  353. ov = cast(np.ndarray, ov)
  354. joined_ndarray, lidx, ridx = libjoin.inner_join_indexer(sv, ov)
  355. joined = self._from_join_target(joined_ndarray)
  356. return joined, lidx, ridx
  357. @final
  358. def _outer_indexer(
  359. self: _IndexT, other: _IndexT
  360. ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
  361. # Caller is responsible for ensuring other.dtype == self.dtype
  362. sv = self._get_join_target()
  363. ov = other._get_join_target()
  364. # can_use_libjoin assures sv and ov are ndarrays
  365. sv = cast(np.ndarray, sv)
  366. ov = cast(np.ndarray, ov)
  367. joined_ndarray, lidx, ridx = libjoin.outer_join_indexer(sv, ov)
  368. joined = self._from_join_target(joined_ndarray)
  369. return joined, lidx, ridx
  370. _typ: str = "index"
  371. _data: ExtensionArray | np.ndarray
  372. _data_cls: type[ExtensionArray] | tuple[type[np.ndarray], type[ExtensionArray]] = (
  373. np.ndarray,
  374. ExtensionArray,
  375. )
  376. _id: object | None = None
  377. _name: Hashable = None
  378. # MultiIndex.levels previously allowed setting the index name. We
  379. # don't allow this anymore, and raise if it happens rather than
  380. # failing silently.
  381. _no_setting_name: bool = False
  382. _comparables: list[str] = ["name"]
  383. _attributes: list[str] = ["name"]
  384. @cache_readonly
  385. def _can_hold_strings(self) -> bool:
  386. return not is_numeric_dtype(self)
  387. _engine_types: dict[np.dtype | ExtensionDtype, type[libindex.IndexEngine]] = {
  388. np.dtype(np.int8): libindex.Int8Engine,
  389. np.dtype(np.int16): libindex.Int16Engine,
  390. np.dtype(np.int32): libindex.Int32Engine,
  391. np.dtype(np.int64): libindex.Int64Engine,
  392. np.dtype(np.uint8): libindex.UInt8Engine,
  393. np.dtype(np.uint16): libindex.UInt16Engine,
  394. np.dtype(np.uint32): libindex.UInt32Engine,
  395. np.dtype(np.uint64): libindex.UInt64Engine,
  396. np.dtype(np.float32): libindex.Float32Engine,
  397. np.dtype(np.float64): libindex.Float64Engine,
  398. np.dtype(np.complex64): libindex.Complex64Engine,
  399. np.dtype(np.complex128): libindex.Complex128Engine,
  400. }
  401. @property
  402. def _engine_type(
  403. self,
  404. ) -> type[libindex.IndexEngine] | type[libindex.ExtensionEngine]:
  405. return self._engine_types.get(self.dtype, libindex.ObjectEngine)
  406. # whether we support partial string indexing. Overridden
  407. # in DatetimeIndex and PeriodIndex
  408. _supports_partial_string_indexing = False
  409. _accessors = {"str"}
  410. str = CachedAccessor("str", StringMethods)
  411. _references = None
  412. # --------------------------------------------------------------------
  413. # Constructors
  414. def __new__(
  415. cls,
  416. data=None,
  417. dtype=None,
  418. copy: bool = False,
  419. name=None,
  420. tupleize_cols: bool = True,
  421. ) -> Index:
  422. from pandas.core.indexes.range import RangeIndex
  423. name = maybe_extract_name(name, data, cls)
  424. if dtype is not None:
  425. dtype = pandas_dtype(dtype)
  426. data_dtype = getattr(data, "dtype", None)
  427. refs = None
  428. if not copy and isinstance(data, (ABCSeries, Index)):
  429. refs = data._references
  430. # range
  431. if isinstance(data, (range, RangeIndex)):
  432. result = RangeIndex(start=data, copy=copy, name=name)
  433. if dtype is not None:
  434. return result.astype(dtype, copy=False)
  435. return result
  436. elif is_ea_or_datetimelike_dtype(dtype):
  437. # non-EA dtype indexes have special casting logic, so we punt here
  438. pass
  439. elif is_ea_or_datetimelike_dtype(data_dtype):
  440. pass
  441. elif isinstance(data, (np.ndarray, Index, ABCSeries)):
  442. if isinstance(data, ABCMultiIndex):
  443. data = data._values
  444. if data.dtype.kind not in ["i", "u", "f", "b", "c", "m", "M"]:
  445. # GH#11836 we need to avoid having numpy coerce
  446. # things that look like ints/floats to ints unless
  447. # they are actually ints, e.g. '0' and 0.0
  448. # should not be coerced
  449. data = com.asarray_tuplesafe(data, dtype=_dtype_obj)
  450. elif is_scalar(data):
  451. raise cls._raise_scalar_data_error(data)
  452. elif hasattr(data, "__array__"):
  453. return Index(np.asarray(data), dtype=dtype, copy=copy, name=name)
  454. elif not is_list_like(data) and not isinstance(data, memoryview):
  455. # 2022-11-16 the memoryview check is only necessary on some CI
  456. # builds, not clear why
  457. raise cls._raise_scalar_data_error(data)
  458. else:
  459. if tupleize_cols:
  460. # GH21470: convert iterable to list before determining if empty
  461. if is_iterator(data):
  462. data = list(data)
  463. if data and all(isinstance(e, tuple) for e in data):
  464. # we must be all tuples, otherwise don't construct
  465. # 10697
  466. from pandas.core.indexes.multi import MultiIndex
  467. return MultiIndex.from_tuples(data, names=name)
  468. # other iterable of some kind
  469. if not isinstance(data, (list, tuple)):
  470. # we allow set/frozenset, which Series/sanitize_array does not, so
  471. # cast to list here
  472. data = list(data)
  473. if len(data) == 0:
  474. # unlike Series, we default to object dtype:
  475. data = np.array(data, dtype=object)
  476. if len(data) and isinstance(data[0], tuple):
  477. # Ensure we get 1-D array of tuples instead of 2D array.
  478. data = com.asarray_tuplesafe(data, dtype=_dtype_obj)
  479. try:
  480. arr = sanitize_array(data, None, dtype=dtype, copy=copy)
  481. except ValueError as err:
  482. if "index must be specified when data is not list-like" in str(err):
  483. raise cls._raise_scalar_data_error(data) from err
  484. if "Data must be 1-dimensional" in str(err):
  485. raise ValueError("Index data must be 1-dimensional") from err
  486. raise
  487. arr = ensure_wrapped_if_datetimelike(arr)
  488. klass = cls._dtype_to_subclass(arr.dtype)
  489. arr = klass._ensure_array(arr, arr.dtype, copy=False)
  490. return klass._simple_new(arr, name, refs=refs)
  491. @classmethod
  492. def _ensure_array(cls, data, dtype, copy: bool):
  493. """
  494. Ensure we have a valid array to pass to _simple_new.
  495. """
  496. if data.ndim > 1:
  497. # GH#13601, GH#20285, GH#27125
  498. raise ValueError("Index data must be 1-dimensional")
  499. elif dtype == np.float16:
  500. # float16 not supported (no indexing engine)
  501. raise NotImplementedError("float16 indexes are not supported")
  502. if copy:
  503. # asarray_tuplesafe does not always copy underlying data,
  504. # so need to make sure that this happens
  505. data = data.copy()
  506. return data
  507. @final
  508. @classmethod
  509. def _dtype_to_subclass(cls, dtype: DtypeObj):
  510. # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
  511. if isinstance(dtype, ExtensionDtype):
  512. if isinstance(dtype, DatetimeTZDtype):
  513. from pandas import DatetimeIndex
  514. return DatetimeIndex
  515. elif isinstance(dtype, CategoricalDtype):
  516. from pandas import CategoricalIndex
  517. return CategoricalIndex
  518. elif isinstance(dtype, IntervalDtype):
  519. from pandas import IntervalIndex
  520. return IntervalIndex
  521. elif isinstance(dtype, PeriodDtype):
  522. from pandas import PeriodIndex
  523. return PeriodIndex
  524. return Index
  525. if dtype.kind == "M":
  526. from pandas import DatetimeIndex
  527. return DatetimeIndex
  528. elif dtype.kind == "m":
  529. from pandas import TimedeltaIndex
  530. return TimedeltaIndex
  531. elif dtype.kind == "O":
  532. # NB: assuming away MultiIndex
  533. return Index
  534. elif issubclass(dtype.type, str) or is_numeric_dtype(dtype):
  535. return Index
  536. raise NotImplementedError(dtype)
  537. # NOTE for new Index creation:
  538. # - _simple_new: It returns new Index with the same type as the caller.
  539. # All metadata (such as name) must be provided by caller's responsibility.
  540. # Using _shallow_copy is recommended because it fills these metadata
  541. # otherwise specified.
  542. # - _shallow_copy: It returns new Index with the same type (using
  543. # _simple_new), but fills caller's metadata otherwise specified. Passed
  544. # kwargs will overwrite corresponding metadata.
  545. # See each method's docstring.
  546. @classmethod
  547. def _simple_new(
  548. cls: type[_IndexT], values: ArrayLike, name: Hashable = None, refs=None
  549. ) -> _IndexT:
  550. """
  551. We require that we have a dtype compat for the values. If we are passed
  552. a non-dtype compat, then coerce using the constructor.
  553. Must be careful not to recurse.
  554. """
  555. assert isinstance(values, cls._data_cls), type(values)
  556. result = object.__new__(cls)
  557. result._data = values
  558. result._name = name
  559. result._cache = {}
  560. result._reset_identity()
  561. if refs is not None:
  562. result._references = refs
  563. else:
  564. result._references = BlockValuesRefs()
  565. result._references.add_index_reference(result)
  566. return result
  567. @classmethod
  568. def _with_infer(cls, *args, **kwargs):
  569. """
  570. Constructor that uses the 1.0.x behavior inferring numeric dtypes
  571. for ndarray[object] inputs.
  572. """
  573. result = cls(*args, **kwargs)
  574. if result.dtype == _dtype_obj and not result._is_multi:
  575. # error: Argument 1 to "maybe_convert_objects" has incompatible type
  576. # "Union[ExtensionArray, ndarray[Any, Any]]"; expected
  577. # "ndarray[Any, Any]"
  578. values = lib.maybe_convert_objects(result._values) # type: ignore[arg-type]
  579. if values.dtype.kind in ["i", "u", "f", "b"]:
  580. return Index(values, name=result.name)
  581. return result
  582. @cache_readonly
  583. def _constructor(self: _IndexT) -> type[_IndexT]:
  584. return type(self)
  585. @final
  586. def _maybe_check_unique(self) -> None:
  587. """
  588. Check that an Index has no duplicates.
  589. This is typically only called via
  590. `NDFrame.flags.allows_duplicate_labels.setter` when it's set to
  591. True (duplicates aren't allowed).
  592. Raises
  593. ------
  594. DuplicateLabelError
  595. When the index is not unique.
  596. """
  597. if not self.is_unique:
  598. msg = """Index has duplicates."""
  599. duplicates = self._format_duplicate_message()
  600. msg += f"\n{duplicates}"
  601. raise DuplicateLabelError(msg)
  602. @final
  603. def _format_duplicate_message(self) -> DataFrame:
  604. """
  605. Construct the DataFrame for a DuplicateLabelError.
  606. This returns a DataFrame indicating the labels and positions
  607. of duplicates in an index. This should only be called when it's
  608. already known that duplicates are present.
  609. Examples
  610. --------
  611. >>> idx = pd.Index(['a', 'b', 'a'])
  612. >>> idx._format_duplicate_message()
  613. positions
  614. label
  615. a [0, 2]
  616. """
  617. from pandas import Series
  618. duplicates = self[self.duplicated(keep="first")].unique()
  619. assert len(duplicates)
  620. out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates]
  621. if self._is_multi:
  622. # test_format_duplicate_labels_message_multi
  623. # error: "Type[Index]" has no attribute "from_tuples" [attr-defined]
  624. out.index = type(self).from_tuples(out.index) # type: ignore[attr-defined]
  625. if self.nlevels == 1:
  626. out = out.rename_axis("label")
  627. return out.to_frame(name="positions")
  628. # --------------------------------------------------------------------
  629. # Index Internals Methods
  630. def _shallow_copy(self: _IndexT, values, name: Hashable = no_default) -> _IndexT:
  631. """
  632. Create a new Index with the same class as the caller, don't copy the
  633. data, use the same object attributes with passed in attributes taking
  634. precedence.
  635. *this is an internal non-public method*
  636. Parameters
  637. ----------
  638. values : the values to create the new Index, optional
  639. name : Label, defaults to self.name
  640. """
  641. name = self._name if name is no_default else name
  642. return self._simple_new(values, name=name, refs=self._references)
  643. def _view(self: _IndexT) -> _IndexT:
  644. """
  645. fastpath to make a shallow copy, i.e. new object with same data.
  646. """
  647. result = self._simple_new(self._values, name=self._name, refs=self._references)
  648. result._cache = self._cache
  649. return result
  650. @final
  651. def _rename(self: _IndexT, name: Hashable) -> _IndexT:
  652. """
  653. fastpath for rename if new name is already validated.
  654. """
  655. result = self._view()
  656. result._name = name
  657. return result
  658. @final
  659. def is_(self, other) -> bool:
  660. """
  661. More flexible, faster check like ``is`` but that works through views.
  662. Note: this is *not* the same as ``Index.identical()``, which checks
  663. that metadata is also the same.
  664. Parameters
  665. ----------
  666. other : object
  667. Other object to compare against.
  668. Returns
  669. -------
  670. bool
  671. True if both have same underlying data, False otherwise.
  672. See Also
  673. --------
  674. Index.identical : Works like ``Index.is_`` but also checks metadata.
  675. """
  676. if self is other:
  677. return True
  678. elif not hasattr(other, "_id"):
  679. return False
  680. elif self._id is None or other._id is None:
  681. return False
  682. else:
  683. return self._id is other._id
  684. @final
  685. def _reset_identity(self) -> None:
  686. """
  687. Initializes or resets ``_id`` attribute with new object.
  688. """
  689. self._id = object()
  690. @final
  691. def _cleanup(self) -> None:
  692. self._engine.clear_mapping()
  693. @cache_readonly
  694. def _engine(
  695. self,
  696. ) -> libindex.IndexEngine | libindex.ExtensionEngine | libindex.MaskedIndexEngine:
  697. # For base class (object dtype) we get ObjectEngine
  698. target_values = self._get_engine_target()
  699. if isinstance(target_values, ExtensionArray):
  700. if isinstance(target_values, (BaseMaskedArray, ArrowExtensionArray)):
  701. try:
  702. return _masked_engines[target_values.dtype.name](target_values)
  703. except KeyError:
  704. # Not supported yet e.g. decimal
  705. pass
  706. elif self._engine_type is libindex.ObjectEngine:
  707. return libindex.ExtensionEngine(target_values)
  708. target_values = cast(np.ndarray, target_values)
  709. # to avoid a reference cycle, bind `target_values` to a local variable, so
  710. # `self` is not passed into the lambda.
  711. if target_values.dtype == bool:
  712. return libindex.BoolEngine(target_values)
  713. elif target_values.dtype == np.complex64:
  714. return libindex.Complex64Engine(target_values)
  715. elif target_values.dtype == np.complex128:
  716. return libindex.Complex128Engine(target_values)
  717. elif needs_i8_conversion(self.dtype):
  718. # We need to keep M8/m8 dtype when initializing the Engine,
  719. # but don't want to change _get_engine_target bc it is used
  720. # elsewhere
  721. # error: Item "ExtensionArray" of "Union[ExtensionArray,
  722. # ndarray[Any, Any]]" has no attribute "_ndarray" [union-attr]
  723. target_values = self._data._ndarray # type: ignore[union-attr]
  724. # error: Argument 1 to "ExtensionEngine" has incompatible type
  725. # "ndarray[Any, Any]"; expected "ExtensionArray"
  726. return self._engine_type(target_values) # type: ignore[arg-type]
  727. @final
  728. @cache_readonly
  729. def _dir_additions_for_owner(self) -> set[str_t]:
  730. """
  731. Add the string-like labels to the owner dataframe/series dir output.
  732. If this is a MultiIndex, it's first level values are used.
  733. """
  734. return {
  735. c
  736. for c in self.unique(level=0)[: get_option("display.max_dir_items")]
  737. if isinstance(c, str) and c.isidentifier()
  738. }
  739. # --------------------------------------------------------------------
  740. # Array-Like Methods
  741. # ndarray compat
  742. def __len__(self) -> int:
  743. """
  744. Return the length of the Index.
  745. """
  746. return len(self._data)
  747. def __array__(self, dtype=None) -> np.ndarray:
  748. """
  749. The array interface, return my values.
  750. """
  751. return np.asarray(self._data, dtype=dtype)
  752. def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
  753. if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs):
  754. return NotImplemented
  755. result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
  756. self, ufunc, method, *inputs, **kwargs
  757. )
  758. if result is not NotImplemented:
  759. return result
  760. if "out" in kwargs:
  761. # e.g. test_dti_isub_tdi
  762. return arraylike.dispatch_ufunc_with_out(
  763. self, ufunc, method, *inputs, **kwargs
  764. )
  765. if method == "reduce":
  766. result = arraylike.dispatch_reduction_ufunc(
  767. self, ufunc, method, *inputs, **kwargs
  768. )
  769. if result is not NotImplemented:
  770. return result
  771. new_inputs = [x if x is not self else x._values for x in inputs]
  772. result = getattr(ufunc, method)(*new_inputs, **kwargs)
  773. if ufunc.nout == 2:
  774. # i.e. np.divmod, np.modf, np.frexp
  775. return tuple(self.__array_wrap__(x) for x in result)
  776. if result.dtype == np.float16:
  777. result = result.astype(np.float32)
  778. return self.__array_wrap__(result)
  779. def __array_wrap__(self, result, context=None):
  780. """
  781. Gets called after a ufunc and other functions e.g. np.split.
  782. """
  783. result = lib.item_from_zerodim(result)
  784. if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1:
  785. return result
  786. return Index(result, name=self.name)
  787. @cache_readonly
  788. def dtype(self) -> DtypeObj:
  789. """
  790. Return the dtype object of the underlying data.
  791. """
  792. return self._data.dtype
  793. @final
  794. def ravel(self, order: str_t = "C") -> Index:
  795. """
  796. Return a view on self.
  797. Returns
  798. -------
  799. Index
  800. See Also
  801. --------
  802. numpy.ndarray.ravel : Return a flattened array.
  803. """
  804. return self[:]
  805. def view(self, cls=None):
  806. # we need to see if we are subclassing an
  807. # index type here
  808. if cls is not None and not hasattr(cls, "_typ"):
  809. dtype = cls
  810. if isinstance(cls, str):
  811. dtype = pandas_dtype(cls)
  812. if isinstance(dtype, (np.dtype, ExtensionDtype)) and needs_i8_conversion(
  813. dtype
  814. ):
  815. if dtype.kind == "m" and dtype != "m8[ns]":
  816. # e.g. m8[s]
  817. return self._data.view(cls)
  818. idx_cls = self._dtype_to_subclass(dtype)
  819. # NB: we only get here for subclasses that override
  820. # _data_cls such that it is a type and not a tuple
  821. # of types.
  822. arr_cls = idx_cls._data_cls
  823. arr = arr_cls(self._data.view("i8"), dtype=dtype)
  824. return idx_cls._simple_new(arr, name=self.name, refs=self._references)
  825. result = self._data.view(cls)
  826. else:
  827. result = self._view()
  828. if isinstance(result, Index):
  829. result._id = self._id
  830. return result
  831. def astype(self, dtype, copy: bool = True):
  832. """
  833. Create an Index with values cast to dtypes.
  834. The class of a new Index is determined by dtype. When conversion is
  835. impossible, a TypeError exception is raised.
  836. Parameters
  837. ----------
  838. dtype : numpy dtype or pandas type
  839. Note that any signed integer `dtype` is treated as ``'int64'``,
  840. and any unsigned integer `dtype` is treated as ``'uint64'``,
  841. regardless of the size.
  842. copy : bool, default True
  843. By default, astype always returns a newly allocated object.
  844. If copy is set to False and internal requirements on dtype are
  845. satisfied, the original data is used to create a new Index
  846. or the original Index is returned.
  847. Returns
  848. -------
  849. Index
  850. Index with values cast to specified dtype.
  851. """
  852. if dtype is not None:
  853. dtype = pandas_dtype(dtype)
  854. if is_dtype_equal(self.dtype, dtype):
  855. # Ensure that self.astype(self.dtype) is self
  856. return self.copy() if copy else self
  857. values = self._data
  858. if isinstance(values, ExtensionArray):
  859. with rewrite_exception(type(values).__name__, type(self).__name__):
  860. new_values = values.astype(dtype, copy=copy)
  861. elif isinstance(dtype, ExtensionDtype):
  862. cls = dtype.construct_array_type()
  863. # Note: for RangeIndex and CategoricalDtype self vs self._values
  864. # behaves differently here.
  865. new_values = cls._from_sequence(self, dtype=dtype, copy=copy)
  866. else:
  867. # GH#13149 specifically use astype_array instead of astype
  868. new_values = astype_array(values, dtype=dtype, copy=copy)
  869. # pass copy=False because any copying will be done in the astype above
  870. result = Index(new_values, name=self.name, dtype=new_values.dtype, copy=False)
  871. if (
  872. not copy
  873. and self._references is not None
  874. and astype_is_view(self.dtype, dtype)
  875. ):
  876. result._references = self._references
  877. result._references.add_index_reference(result)
  878. return result
  879. _index_shared_docs[
  880. "take"
  881. ] = """
  882. Return a new %(klass)s of the values selected by the indices.
  883. For internal compatibility with numpy arrays.
  884. Parameters
  885. ----------
  886. indices : array-like
  887. Indices to be taken.
  888. axis : int, optional
  889. The axis over which to select values, always 0.
  890. allow_fill : bool, default True
  891. fill_value : scalar, default None
  892. If allow_fill=True and fill_value is not None, indices specified by
  893. -1 are regarded as NA. If Index doesn't hold NA, raise ValueError.
  894. Returns
  895. -------
  896. Index
  897. An index formed of elements at the given indices. Will be the same
  898. type as self, except for RangeIndex.
  899. See Also
  900. --------
  901. numpy.ndarray.take: Return an array formed from the
  902. elements of a at the given indices.
  903. """
  904. @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
  905. def take(
  906. self,
  907. indices,
  908. axis: Axis = 0,
  909. allow_fill: bool = True,
  910. fill_value=None,
  911. **kwargs,
  912. ):
  913. if kwargs:
  914. nv.validate_take((), kwargs)
  915. if is_scalar(indices):
  916. raise TypeError("Expected indices to be array-like")
  917. indices = ensure_platform_int(indices)
  918. allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices)
  919. # Note: we discard fill_value and use self._na_value, only relevant
  920. # in the case where allow_fill is True and fill_value is not None
  921. values = self._values
  922. if isinstance(values, np.ndarray):
  923. taken = algos.take(
  924. values, indices, allow_fill=allow_fill, fill_value=self._na_value
  925. )
  926. else:
  927. # algos.take passes 'axis' keyword which not all EAs accept
  928. taken = values.take(
  929. indices, allow_fill=allow_fill, fill_value=self._na_value
  930. )
  931. # _constructor so RangeIndex-> Index with an int64 dtype
  932. return self._constructor._simple_new(taken, name=self.name)
  933. @final
  934. def _maybe_disallow_fill(self, allow_fill: bool, fill_value, indices) -> bool:
  935. """
  936. We only use pandas-style take when allow_fill is True _and_
  937. fill_value is not None.
  938. """
  939. if allow_fill and fill_value is not None:
  940. # only fill if we are passing a non-None fill_value
  941. if self._can_hold_na:
  942. if (indices < -1).any():
  943. raise ValueError(
  944. "When allow_fill=True and fill_value is not None, "
  945. "all indices must be >= -1"
  946. )
  947. else:
  948. cls_name = type(self).__name__
  949. raise ValueError(
  950. f"Unable to fill values because {cls_name} cannot contain NA"
  951. )
  952. else:
  953. allow_fill = False
  954. return allow_fill
  955. _index_shared_docs[
  956. "repeat"
  957. ] = """
  958. Repeat elements of a %(klass)s.
  959. Returns a new %(klass)s where each element of the current %(klass)s
  960. is repeated consecutively a given number of times.
  961. Parameters
  962. ----------
  963. repeats : int or array of ints
  964. The number of repetitions for each element. This should be a
  965. non-negative integer. Repeating 0 times will return an empty
  966. %(klass)s.
  967. axis : None
  968. Must be ``None``. Has no effect but is accepted for compatibility
  969. with numpy.
  970. Returns
  971. -------
  972. %(klass)s
  973. Newly created %(klass)s with repeated elements.
  974. See Also
  975. --------
  976. Series.repeat : Equivalent function for Series.
  977. numpy.repeat : Similar method for :class:`numpy.ndarray`.
  978. Examples
  979. --------
  980. >>> idx = pd.Index(['a', 'b', 'c'])
  981. >>> idx
  982. Index(['a', 'b', 'c'], dtype='object')
  983. >>> idx.repeat(2)
  984. Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object')
  985. >>> idx.repeat([1, 2, 3])
  986. Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object')
  987. """
  988. @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)
  989. def repeat(self, repeats, axis=None):
  990. repeats = ensure_platform_int(repeats)
  991. nv.validate_repeat((), {"axis": axis})
  992. res_values = self._values.repeat(repeats)
  993. # _constructor so RangeIndex-> Index with an int64 dtype
  994. return self._constructor._simple_new(res_values, name=self.name)
  995. # --------------------------------------------------------------------
  996. # Copying Methods
  997. def copy(
  998. self: _IndexT,
  999. name: Hashable | None = None,
  1000. deep: bool = False,
  1001. ) -> _IndexT:
  1002. """
  1003. Make a copy of this object.
  1004. Name is set on the new object.
  1005. Parameters
  1006. ----------
  1007. name : Label, optional
  1008. Set name for new object.
  1009. deep : bool, default False
  1010. Returns
  1011. -------
  1012. Index
  1013. Index refer to new object which is a copy of this object.
  1014. Notes
  1015. -----
  1016. In most cases, there should be no functional difference from using
  1017. ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
  1018. """
  1019. name = self._validate_names(name=name, deep=deep)[0]
  1020. if deep:
  1021. new_data = self._data.copy()
  1022. new_index = type(self)._simple_new(new_data, name=name)
  1023. else:
  1024. new_index = self._rename(name=name)
  1025. return new_index
  1026. @final
  1027. def __copy__(self: _IndexT, **kwargs) -> _IndexT:
  1028. return self.copy(**kwargs)
  1029. @final
  1030. def __deepcopy__(self: _IndexT, memo=None) -> _IndexT:
  1031. """
  1032. Parameters
  1033. ----------
  1034. memo, default None
  1035. Standard signature. Unused
  1036. """
  1037. return self.copy(deep=True)
  1038. # --------------------------------------------------------------------
  1039. # Rendering Methods
  1040. @final
  1041. def __repr__(self) -> str_t:
  1042. """
  1043. Return a string representation for this object.
  1044. """
  1045. klass_name = type(self).__name__
  1046. data = self._format_data()
  1047. attrs = self._format_attrs()
  1048. space = self._format_space()
  1049. attrs_str = [f"{k}={v}" for k, v in attrs]
  1050. prepr = f",{space}".join(attrs_str)
  1051. # no data provided, just attributes
  1052. if data is None:
  1053. data = ""
  1054. return f"{klass_name}({data}{prepr})"
  1055. def _format_space(self) -> str_t:
  1056. # using space here controls if the attributes
  1057. # are line separated or not (the default)
  1058. # max_seq_items = get_option('display.max_seq_items')
  1059. # if len(self) > max_seq_items:
  1060. # space = "\n%s" % (' ' * (len(klass) + 1))
  1061. return " "
  1062. @property
  1063. def _formatter_func(self):
  1064. """
  1065. Return the formatter function.
  1066. """
  1067. return default_pprint
  1068. def _format_data(self, name=None) -> str_t:
  1069. """
  1070. Return the formatted data as a unicode string.
  1071. """
  1072. # do we want to justify (only do so for non-objects)
  1073. is_justify = True
  1074. if self.inferred_type == "string":
  1075. is_justify = False
  1076. elif self.inferred_type == "categorical":
  1077. self = cast("CategoricalIndex", self)
  1078. if is_object_dtype(self.categories):
  1079. is_justify = False
  1080. return format_object_summary(
  1081. self,
  1082. self._formatter_func,
  1083. is_justify=is_justify,
  1084. name=name,
  1085. line_break_each_value=self._is_multi,
  1086. )
  1087. def _format_attrs(self) -> list[tuple[str_t, str_t | int | bool | None]]:
  1088. """
  1089. Return a list of tuples of the (attr,formatted_value).
  1090. """
  1091. attrs: list[tuple[str_t, str_t | int | bool | None]] = []
  1092. if not self._is_multi:
  1093. attrs.append(("dtype", f"'{self.dtype}'"))
  1094. if self.name is not None:
  1095. attrs.append(("name", default_pprint(self.name)))
  1096. elif self._is_multi and any(x is not None for x in self.names):
  1097. attrs.append(("names", default_pprint(self.names)))
  1098. max_seq_items = get_option("display.max_seq_items") or len(self)
  1099. if len(self) > max_seq_items:
  1100. attrs.append(("length", len(self)))
  1101. return attrs
  1102. @final
  1103. def _get_level_names(self) -> Hashable | Sequence[Hashable]:
  1104. """
  1105. Return a name or list of names with None replaced by the level number.
  1106. """
  1107. if self._is_multi:
  1108. return [
  1109. level if name is None else name for level, name in enumerate(self.names)
  1110. ]
  1111. else:
  1112. return 0 if self.name is None else self.name
  1113. @final
  1114. def _mpl_repr(self) -> np.ndarray:
  1115. # how to represent ourselves to matplotlib
  1116. if isinstance(self.dtype, np.dtype) and self.dtype.kind != "M":
  1117. return cast(np.ndarray, self.values)
  1118. return self.astype(object, copy=False)._values
  1119. def format(
  1120. self,
  1121. name: bool = False,
  1122. formatter: Callable | None = None,
  1123. na_rep: str_t = "NaN",
  1124. ) -> list[str_t]:
  1125. """
  1126. Render a string representation of the Index.
  1127. """
  1128. header = []
  1129. if name:
  1130. header.append(
  1131. pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
  1132. if self.name is not None
  1133. else ""
  1134. )
  1135. if formatter is not None:
  1136. return header + list(self.map(formatter))
  1137. return self._format_with_header(header, na_rep=na_rep)
  1138. def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t]:
  1139. from pandas.io.formats.format import format_array
  1140. values = self._values
  1141. if is_object_dtype(values.dtype):
  1142. values = cast(np.ndarray, values)
  1143. values = lib.maybe_convert_objects(values, safe=True)
  1144. result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]
  1145. # could have nans
  1146. mask = is_float_nan(values)
  1147. if mask.any():
  1148. result_arr = np.array(result)
  1149. result_arr[mask] = na_rep
  1150. result = result_arr.tolist()
  1151. else:
  1152. result = trim_front(format_array(values, None, justify="left"))
  1153. return header + result
  1154. def _format_native_types(
  1155. self,
  1156. *,
  1157. na_rep: str_t = "",
  1158. decimal: str_t = ".",
  1159. float_format=None,
  1160. date_format=None,
  1161. quoting=None,
  1162. ) -> npt.NDArray[np.object_]:
  1163. """
  1164. Actually format specific types of the index.
  1165. """
  1166. from pandas.io.formats.format import FloatArrayFormatter
  1167. if is_float_dtype(self.dtype) and not is_extension_array_dtype(self.dtype):
  1168. formatter = FloatArrayFormatter(
  1169. self._values,
  1170. na_rep=na_rep,
  1171. float_format=float_format,
  1172. decimal=decimal,
  1173. quoting=quoting,
  1174. fixed_width=False,
  1175. )
  1176. return formatter.get_result_as_array()
  1177. mask = isna(self)
  1178. if not is_object_dtype(self) and not quoting:
  1179. values = np.asarray(self).astype(str)
  1180. else:
  1181. values = np.array(self, dtype=object, copy=True)
  1182. values[mask] = na_rep
  1183. return values
  1184. def _summary(self, name=None) -> str_t:
  1185. """
  1186. Return a summarized representation.
  1187. Parameters
  1188. ----------
  1189. name : str
  1190. name to use in the summary representation
  1191. Returns
  1192. -------
  1193. String with a summarized representation of the index
  1194. """
  1195. if len(self) > 0:
  1196. head = self[0]
  1197. if hasattr(head, "format") and not isinstance(head, str):
  1198. head = head.format()
  1199. elif needs_i8_conversion(self.dtype):
  1200. # e.g. Timedelta, display as values, not quoted
  1201. head = self._formatter_func(head).replace("'", "")
  1202. tail = self[-1]
  1203. if hasattr(tail, "format") and not isinstance(tail, str):
  1204. tail = tail.format()
  1205. elif needs_i8_conversion(self.dtype):
  1206. # e.g. Timedelta, display as values, not quoted
  1207. tail = self._formatter_func(tail).replace("'", "")
  1208. index_summary = f", {head} to {tail}"
  1209. else:
  1210. index_summary = ""
  1211. if name is None:
  1212. name = type(self).__name__
  1213. return f"{name}: {len(self)} entries{index_summary}"
  1214. # --------------------------------------------------------------------
  1215. # Conversion Methods
  1216. def to_flat_index(self: _IndexT) -> _IndexT:
  1217. """
  1218. Identity method.
  1219. This is implemented for compatibility with subclass implementations
  1220. when chaining.
  1221. Returns
  1222. -------
  1223. pd.Index
  1224. Caller.
  1225. See Also
  1226. --------
  1227. MultiIndex.to_flat_index : Subclass implementation.
  1228. """
  1229. return self
  1230. @final
  1231. def to_series(self, index=None, name: Hashable = None) -> Series:
  1232. """
  1233. Create a Series with both index and values equal to the index keys.
  1234. Useful with map for returning an indexer based on an index.
  1235. Parameters
  1236. ----------
  1237. index : Index, optional
  1238. Index of resulting Series. If None, defaults to original index.
  1239. name : str, optional
  1240. Name of resulting Series. If None, defaults to name of original
  1241. index.
  1242. Returns
  1243. -------
  1244. Series
  1245. The dtype will be based on the type of the Index values.
  1246. See Also
  1247. --------
  1248. Index.to_frame : Convert an Index to a DataFrame.
  1249. Series.to_frame : Convert Series to DataFrame.
  1250. Examples
  1251. --------
  1252. >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')
  1253. By default, the original Index and original name is reused.
  1254. >>> idx.to_series()
  1255. animal
  1256. Ant Ant
  1257. Bear Bear
  1258. Cow Cow
  1259. Name: animal, dtype: object
  1260. To enforce a new Index, specify new labels to ``index``:
  1261. >>> idx.to_series(index=[0, 1, 2])
  1262. 0 Ant
  1263. 1 Bear
  1264. 2 Cow
  1265. Name: animal, dtype: object
  1266. To override the name of the resulting column, specify `name`:
  1267. >>> idx.to_series(name='zoo')
  1268. animal
  1269. Ant Ant
  1270. Bear Bear
  1271. Cow Cow
  1272. Name: zoo, dtype: object
  1273. """
  1274. from pandas import Series
  1275. if index is None:
  1276. index = self._view()
  1277. if name is None:
  1278. name = self.name
  1279. return Series(self._values.copy(), index=index, name=name)
  1280. def to_frame(
  1281. self, index: bool = True, name: Hashable = lib.no_default
  1282. ) -> DataFrame:
  1283. """
  1284. Create a DataFrame with a column containing the Index.
  1285. Parameters
  1286. ----------
  1287. index : bool, default True
  1288. Set the index of the returned DataFrame as the original Index.
  1289. name : object, defaults to index.name
  1290. The passed name should substitute for the index name (if it has
  1291. one).
  1292. Returns
  1293. -------
  1294. DataFrame
  1295. DataFrame containing the original Index data.
  1296. See Also
  1297. --------
  1298. Index.to_series : Convert an Index to a Series.
  1299. Series.to_frame : Convert Series to DataFrame.
  1300. Examples
  1301. --------
  1302. >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')
  1303. >>> idx.to_frame()
  1304. animal
  1305. animal
  1306. Ant Ant
  1307. Bear Bear
  1308. Cow Cow
  1309. By default, the original Index is reused. To enforce a new Index:
  1310. >>> idx.to_frame(index=False)
  1311. animal
  1312. 0 Ant
  1313. 1 Bear
  1314. 2 Cow
  1315. To override the name of the resulting column, specify `name`:
  1316. >>> idx.to_frame(index=False, name='zoo')
  1317. zoo
  1318. 0 Ant
  1319. 1 Bear
  1320. 2 Cow
  1321. """
  1322. from pandas import DataFrame
  1323. if name is lib.no_default:
  1324. name = self._get_level_names()
  1325. result = DataFrame({name: self._values.copy()})
  1326. if index:
  1327. result.index = self
  1328. return result
  1329. # --------------------------------------------------------------------
  1330. # Name-Centric Methods
  1331. @property
  1332. def name(self) -> Hashable:
  1333. """
  1334. Return Index or MultiIndex name.
  1335. """
  1336. return self._name
  1337. @name.setter
  1338. def name(self, value: Hashable) -> None:
  1339. if self._no_setting_name:
  1340. # Used in MultiIndex.levels to avoid silently ignoring name updates.
  1341. raise RuntimeError(
  1342. "Cannot set name on a level of a MultiIndex. Use "
  1343. "'MultiIndex.set_names' instead."
  1344. )
  1345. maybe_extract_name(value, None, type(self))
  1346. self._name = value
  1347. @final
  1348. def _validate_names(
  1349. self, name=None, names=None, deep: bool = False
  1350. ) -> list[Hashable]:
  1351. """
  1352. Handles the quirks of having a singular 'name' parameter for general
  1353. Index and plural 'names' parameter for MultiIndex.
  1354. """
  1355. from copy import deepcopy
  1356. if names is not None and name is not None:
  1357. raise TypeError("Can only provide one of `names` and `name`")
  1358. if names is None and name is None:
  1359. new_names = deepcopy(self.names) if deep else self.names
  1360. elif names is not None:
  1361. if not is_list_like(names):
  1362. raise TypeError("Must pass list-like as `names`.")
  1363. new_names = names
  1364. elif not is_list_like(name):
  1365. new_names = [name]
  1366. else:
  1367. new_names = name
  1368. if len(new_names) != len(self.names):
  1369. raise ValueError(
  1370. f"Length of new names must be {len(self.names)}, got {len(new_names)}"
  1371. )
  1372. # All items in 'new_names' need to be hashable
  1373. validate_all_hashable(*new_names, error_name=f"{type(self).__name__}.name")
  1374. return new_names
  1375. def _get_default_index_names(
  1376. self, names: Hashable | Sequence[Hashable] | None = None, default=None
  1377. ) -> list[Hashable]:
  1378. """
  1379. Get names of index.
  1380. Parameters
  1381. ----------
  1382. names : int, str or 1-dimensional list, default None
  1383. Index names to set.
  1384. default : str
  1385. Default name of index.
  1386. Raises
  1387. ------
  1388. TypeError
  1389. if names not str or list-like
  1390. """
  1391. from pandas.core.indexes.multi import MultiIndex
  1392. if names is not None:
  1393. if isinstance(names, (int, str)):
  1394. names = [names]
  1395. if not isinstance(names, list) and names is not None:
  1396. raise ValueError("Index names must be str or 1-dimensional list")
  1397. if not names:
  1398. if isinstance(self, MultiIndex):
  1399. names = com.fill_missing_names(self.names)
  1400. else:
  1401. names = [default] if self.name is None else [self.name]
  1402. return names
  1403. def _get_names(self) -> FrozenList:
  1404. return FrozenList((self.name,))
  1405. def _set_names(self, values, *, level=None) -> None:
  1406. """
  1407. Set new names on index. Each name has to be a hashable type.
  1408. Parameters
  1409. ----------
  1410. values : str or sequence
  1411. name(s) to set
  1412. level : int, level name, or sequence of int/level names (default None)
  1413. If the index is a MultiIndex (hierarchical), level(s) to set (None
  1414. for all levels). Otherwise level must be None
  1415. Raises
  1416. ------
  1417. TypeError if each name is not hashable.
  1418. """
  1419. if not is_list_like(values):
  1420. raise ValueError("Names must be a list-like")
  1421. if len(values) != 1:
  1422. raise ValueError(f"Length of new names must be 1, got {len(values)}")
  1423. # GH 20527
  1424. # All items in 'name' need to be hashable:
  1425. validate_all_hashable(*values, error_name=f"{type(self).__name__}.name")
  1426. self._name = values[0]
  1427. names = property(fset=_set_names, fget=_get_names)
  1428. @overload
  1429. def set_names(
  1430. self: _IndexT, names, *, level=..., inplace: Literal[False] = ...
  1431. ) -> _IndexT:
  1432. ...
  1433. @overload
  1434. def set_names(self, names, *, level=..., inplace: Literal[True]) -> None:
  1435. ...
  1436. @overload
  1437. def set_names(
  1438. self: _IndexT, names, *, level=..., inplace: bool = ...
  1439. ) -> _IndexT | None:
  1440. ...
  1441. def set_names(
  1442. self: _IndexT, names, *, level=None, inplace: bool = False
  1443. ) -> _IndexT | None:
  1444. """
  1445. Set Index or MultiIndex name.
  1446. Able to set new names partially and by level.
  1447. Parameters
  1448. ----------
  1449. names : label or list of label or dict-like for MultiIndex
  1450. Name(s) to set.
  1451. .. versionchanged:: 1.3.0
  1452. level : int, label or list of int or label, optional
  1453. If the index is a MultiIndex and names is not dict-like, level(s) to set
  1454. (None for all levels). Otherwise level must be None.
  1455. .. versionchanged:: 1.3.0
  1456. inplace : bool, default False
  1457. Modifies the object directly, instead of creating a new Index or
  1458. MultiIndex.
  1459. Returns
  1460. -------
  1461. Index or None
  1462. The same type as the caller or None if ``inplace=True``.
  1463. See Also
  1464. --------
  1465. Index.rename : Able to set new names without level.
  1466. Examples
  1467. --------
  1468. >>> idx = pd.Index([1, 2, 3, 4])
  1469. >>> idx
  1470. Index([1, 2, 3, 4], dtype='int64')
  1471. >>> idx.set_names('quarter')
  1472. Index([1, 2, 3, 4], dtype='int64', name='quarter')
  1473. >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],
  1474. ... [2018, 2019]])
  1475. >>> idx
  1476. MultiIndex([('python', 2018),
  1477. ('python', 2019),
  1478. ( 'cobra', 2018),
  1479. ( 'cobra', 2019)],
  1480. )
  1481. >>> idx = idx.set_names(['kind', 'year'])
  1482. >>> idx.set_names('species', level=0)
  1483. MultiIndex([('python', 2018),
  1484. ('python', 2019),
  1485. ( 'cobra', 2018),
  1486. ( 'cobra', 2019)],
  1487. names=['species', 'year'])
  1488. When renaming levels with a dict, levels can not be passed.
  1489. >>> idx.set_names({'kind': 'snake'})
  1490. MultiIndex([('python', 2018),
  1491. ('python', 2019),
  1492. ( 'cobra', 2018),
  1493. ( 'cobra', 2019)],
  1494. names=['snake', 'year'])
  1495. """
  1496. if level is not None and not isinstance(self, ABCMultiIndex):
  1497. raise ValueError("Level must be None for non-MultiIndex")
  1498. if level is not None and not is_list_like(level) and is_list_like(names):
  1499. raise TypeError("Names must be a string when a single level is provided.")
  1500. if not is_list_like(names) and level is None and self.nlevels > 1:
  1501. raise TypeError("Must pass list-like as `names`.")
  1502. if is_dict_like(names) and not isinstance(self, ABCMultiIndex):
  1503. raise TypeError("Can only pass dict-like as `names` for MultiIndex.")
  1504. if is_dict_like(names) and level is not None:
  1505. raise TypeError("Can not pass level for dictlike `names`.")
  1506. if isinstance(self, ABCMultiIndex) and is_dict_like(names) and level is None:
  1507. # Transform dict to list of new names and corresponding levels
  1508. level, names_adjusted = [], []
  1509. for i, name in enumerate(self.names):
  1510. if name in names.keys():
  1511. level.append(i)
  1512. names_adjusted.append(names[name])
  1513. names = names_adjusted
  1514. if not is_list_like(names):
  1515. names = [names]
  1516. if level is not None and not is_list_like(level):
  1517. level = [level]
  1518. if inplace:
  1519. idx = self
  1520. else:
  1521. idx = self._view()
  1522. idx._set_names(names, level=level)
  1523. if not inplace:
  1524. return idx
  1525. return None
  1526. def rename(self, name, inplace: bool = False):
  1527. """
  1528. Alter Index or MultiIndex name.
  1529. Able to set new names without level. Defaults to returning new index.
  1530. Length of names must match number of levels in MultiIndex.
  1531. Parameters
  1532. ----------
  1533. name : label or list of labels
  1534. Name(s) to set.
  1535. inplace : bool, default False
  1536. Modifies the object directly, instead of creating a new Index or
  1537. MultiIndex.
  1538. Returns
  1539. -------
  1540. Index or None
  1541. The same type as the caller or None if ``inplace=True``.
  1542. See Also
  1543. --------
  1544. Index.set_names : Able to set new names partially and by level.
  1545. Examples
  1546. --------
  1547. >>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score')
  1548. >>> idx.rename('grade')
  1549. Index(['A', 'C', 'A', 'B'], dtype='object', name='grade')
  1550. >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],
  1551. ... [2018, 2019]],
  1552. ... names=['kind', 'year'])
  1553. >>> idx
  1554. MultiIndex([('python', 2018),
  1555. ('python', 2019),
  1556. ( 'cobra', 2018),
  1557. ( 'cobra', 2019)],
  1558. names=['kind', 'year'])
  1559. >>> idx.rename(['species', 'year'])
  1560. MultiIndex([('python', 2018),
  1561. ('python', 2019),
  1562. ( 'cobra', 2018),
  1563. ( 'cobra', 2019)],
  1564. names=['species', 'year'])
  1565. >>> idx.rename('species')
  1566. Traceback (most recent call last):
  1567. TypeError: Must pass list-like as `names`.
  1568. """
  1569. return self.set_names([name], inplace=inplace)
  1570. # --------------------------------------------------------------------
  1571. # Level-Centric Methods
  1572. @property
  1573. def nlevels(self) -> int:
  1574. """
  1575. Number of levels.
  1576. """
  1577. return 1
  1578. def _sort_levels_monotonic(self: _IndexT) -> _IndexT:
  1579. """
  1580. Compat with MultiIndex.
  1581. """
  1582. return self
  1583. @final
  1584. def _validate_index_level(self, level) -> None:
  1585. """
  1586. Validate index level.
  1587. For single-level Index getting level number is a no-op, but some
  1588. verification must be done like in MultiIndex.
  1589. """
  1590. if isinstance(level, int):
  1591. if level < 0 and level != -1:
  1592. raise IndexError(
  1593. "Too many levels: Index has only 1 level, "
  1594. f"{level} is not a valid level number"
  1595. )
  1596. if level > 0:
  1597. raise IndexError(
  1598. f"Too many levels: Index has only 1 level, not {level + 1}"
  1599. )
  1600. elif level != self.name:
  1601. raise KeyError(
  1602. f"Requested level ({level}) does not match index name ({self.name})"
  1603. )
  1604. def _get_level_number(self, level) -> int:
  1605. self._validate_index_level(level)
  1606. return 0
  1607. def sortlevel(
  1608. self, level=None, ascending: bool | list[bool] = True, sort_remaining=None
  1609. ):
  1610. """
  1611. For internal compatibility with the Index API.
  1612. Sort the Index. This is for compat with MultiIndex
  1613. Parameters
  1614. ----------
  1615. ascending : bool, default True
  1616. False to sort in descending order
  1617. level, sort_remaining are compat parameters
  1618. Returns
  1619. -------
  1620. Index
  1621. """
  1622. if not isinstance(ascending, (list, bool)):
  1623. raise TypeError(
  1624. "ascending must be a single bool value or"
  1625. "a list of bool values of length 1"
  1626. )
  1627. if isinstance(ascending, list):
  1628. if len(ascending) != 1:
  1629. raise TypeError("ascending must be a list of bool values of length 1")
  1630. ascending = ascending[0]
  1631. if not isinstance(ascending, bool):
  1632. raise TypeError("ascending must be a bool value")
  1633. return self.sort_values(return_indexer=True, ascending=ascending)
  1634. def _get_level_values(self, level) -> Index:
  1635. """
  1636. Return an Index of values for requested level.
  1637. This is primarily useful to get an individual level of values from a
  1638. MultiIndex, but is provided on Index as well for compatibility.
  1639. Parameters
  1640. ----------
  1641. level : int or str
  1642. It is either the integer position or the name of the level.
  1643. Returns
  1644. -------
  1645. Index
  1646. Calling object, as there is only one level in the Index.
  1647. See Also
  1648. --------
  1649. MultiIndex.get_level_values : Get values for a level of a MultiIndex.
  1650. Notes
  1651. -----
  1652. For Index, level should be 0, since there are no multiple levels.
  1653. Examples
  1654. --------
  1655. >>> idx = pd.Index(list('abc'))
  1656. >>> idx
  1657. Index(['a', 'b', 'c'], dtype='object')
  1658. Get level values by supplying `level` as integer:
  1659. >>> idx.get_level_values(0)
  1660. Index(['a', 'b', 'c'], dtype='object')
  1661. """
  1662. self._validate_index_level(level)
  1663. return self
  1664. get_level_values = _get_level_values
  1665. @final
  1666. def droplevel(self, level: IndexLabel = 0):
  1667. """
  1668. Return index with requested level(s) removed.
  1669. If resulting index has only 1 level left, the result will be
  1670. of Index type, not MultiIndex. The original index is not modified inplace.
  1671. Parameters
  1672. ----------
  1673. level : int, str, or list-like, default 0
  1674. If a string is given, must be the name of a level
  1675. If list-like, elements must be names or indexes of levels.
  1676. Returns
  1677. -------
  1678. Index or MultiIndex
  1679. Examples
  1680. --------
  1681. >>> mi = pd.MultiIndex.from_arrays(
  1682. ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z'])
  1683. >>> mi
  1684. MultiIndex([(1, 3, 5),
  1685. (2, 4, 6)],
  1686. names=['x', 'y', 'z'])
  1687. >>> mi.droplevel()
  1688. MultiIndex([(3, 5),
  1689. (4, 6)],
  1690. names=['y', 'z'])
  1691. >>> mi.droplevel(2)
  1692. MultiIndex([(1, 3),
  1693. (2, 4)],
  1694. names=['x', 'y'])
  1695. >>> mi.droplevel('z')
  1696. MultiIndex([(1, 3),
  1697. (2, 4)],
  1698. names=['x', 'y'])
  1699. >>> mi.droplevel(['x', 'y'])
  1700. Index([5, 6], dtype='int64', name='z')
  1701. """
  1702. if not isinstance(level, (tuple, list)):
  1703. level = [level]
  1704. levnums = sorted(self._get_level_number(lev) for lev in level)[::-1]
  1705. return self._drop_level_numbers(levnums)
  1706. @final
  1707. def _drop_level_numbers(self, levnums: list[int]):
  1708. """
  1709. Drop MultiIndex levels by level _number_, not name.
  1710. """
  1711. if not levnums and not isinstance(self, ABCMultiIndex):
  1712. return self
  1713. if len(levnums) >= self.nlevels:
  1714. raise ValueError(
  1715. f"Cannot remove {len(levnums)} levels from an index with "
  1716. f"{self.nlevels} levels: at least one level must be left."
  1717. )
  1718. # The two checks above guarantee that here self is a MultiIndex
  1719. self = cast("MultiIndex", self)
  1720. new_levels = list(self.levels)
  1721. new_codes = list(self.codes)
  1722. new_names = list(self.names)
  1723. for i in levnums:
  1724. new_levels.pop(i)
  1725. new_codes.pop(i)
  1726. new_names.pop(i)
  1727. if len(new_levels) == 1:
  1728. lev = new_levels[0]
  1729. if len(lev) == 0:
  1730. # If lev is empty, lev.take will fail GH#42055
  1731. if len(new_codes[0]) == 0:
  1732. # GH#45230 preserve RangeIndex here
  1733. # see test_reset_index_empty_rangeindex
  1734. result = lev[:0]
  1735. else:
  1736. res_values = algos.take(lev._values, new_codes[0], allow_fill=True)
  1737. # _constructor instead of type(lev) for RangeIndex compat GH#35230
  1738. result = lev._constructor._simple_new(res_values, name=new_names[0])
  1739. else:
  1740. # set nan if needed
  1741. mask = new_codes[0] == -1
  1742. result = new_levels[0].take(new_codes[0])
  1743. if mask.any():
  1744. result = result.putmask(mask, np.nan)
  1745. result._name = new_names[0]
  1746. return result
  1747. else:
  1748. from pandas.core.indexes.multi import MultiIndex
  1749. return MultiIndex(
  1750. levels=new_levels,
  1751. codes=new_codes,
  1752. names=new_names,
  1753. verify_integrity=False,
  1754. )
  1755. # --------------------------------------------------------------------
  1756. # Introspection Methods
  1757. @cache_readonly
  1758. @final
  1759. def _can_hold_na(self) -> bool:
  1760. if isinstance(self.dtype, ExtensionDtype):
  1761. if isinstance(self.dtype, IntervalDtype):
  1762. # FIXME(GH#45720): this is inaccurate for integer-backed
  1763. # IntervalArray, but without it other.categories.take raises
  1764. # in IntervalArray._cmp_method
  1765. return True
  1766. return self.dtype._can_hold_na
  1767. if self.dtype.kind in ["i", "u", "b"]:
  1768. return False
  1769. return True
  1770. @property
  1771. def is_monotonic_increasing(self) -> bool:
  1772. """
  1773. Return a boolean if the values are equal or increasing.
  1774. Returns
  1775. -------
  1776. bool
  1777. See Also
  1778. --------
  1779. Index.is_monotonic_decreasing : Check if the values are equal or decreasing.
  1780. Examples
  1781. --------
  1782. >>> pd.Index([1, 2, 3]).is_monotonic_increasing
  1783. True
  1784. >>> pd.Index([1, 2, 2]).is_monotonic_increasing
  1785. True
  1786. >>> pd.Index([1, 3, 2]).is_monotonic_increasing
  1787. False
  1788. """
  1789. return self._engine.is_monotonic_increasing
  1790. @property
  1791. def is_monotonic_decreasing(self) -> bool:
  1792. """
  1793. Return a boolean if the values are equal or decreasing.
  1794. Returns
  1795. -------
  1796. bool
  1797. See Also
  1798. --------
  1799. Index.is_monotonic_increasing : Check if the values are equal or increasing.
  1800. Examples
  1801. --------
  1802. >>> pd.Index([3, 2, 1]).is_monotonic_decreasing
  1803. True
  1804. >>> pd.Index([3, 2, 2]).is_monotonic_decreasing
  1805. True
  1806. >>> pd.Index([3, 1, 2]).is_monotonic_decreasing
  1807. False
  1808. """
  1809. return self._engine.is_monotonic_decreasing
  1810. @final
  1811. @property
  1812. def _is_strictly_monotonic_increasing(self) -> bool:
  1813. """
  1814. Return if the index is strictly monotonic increasing
  1815. (only increasing) values.
  1816. Examples
  1817. --------
  1818. >>> Index([1, 2, 3])._is_strictly_monotonic_increasing
  1819. True
  1820. >>> Index([1, 2, 2])._is_strictly_monotonic_increasing
  1821. False
  1822. >>> Index([1, 3, 2])._is_strictly_monotonic_increasing
  1823. False
  1824. """
  1825. return self.is_unique and self.is_monotonic_increasing
  1826. @final
  1827. @property
  1828. def _is_strictly_monotonic_decreasing(self) -> bool:
  1829. """
  1830. Return if the index is strictly monotonic decreasing
  1831. (only decreasing) values.
  1832. Examples
  1833. --------
  1834. >>> Index([3, 2, 1])._is_strictly_monotonic_decreasing
  1835. True
  1836. >>> Index([3, 2, 2])._is_strictly_monotonic_decreasing
  1837. False
  1838. >>> Index([3, 1, 2])._is_strictly_monotonic_decreasing
  1839. False
  1840. """
  1841. return self.is_unique and self.is_monotonic_decreasing
  1842. @cache_readonly
  1843. def is_unique(self) -> bool:
  1844. """
  1845. Return if the index has unique values.
  1846. Returns
  1847. -------
  1848. bool
  1849. See Also
  1850. --------
  1851. Index.has_duplicates : Inverse method that checks if it has duplicate values.
  1852. Examples
  1853. --------
  1854. >>> idx = pd.Index([1, 5, 7, 7])
  1855. >>> idx.is_unique
  1856. False
  1857. >>> idx = pd.Index([1, 5, 7])
  1858. >>> idx.is_unique
  1859. True
  1860. >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
  1861. ... "Watermelon"]).astype("category")
  1862. >>> idx.is_unique
  1863. False
  1864. >>> idx = pd.Index(["Orange", "Apple",
  1865. ... "Watermelon"]).astype("category")
  1866. >>> idx.is_unique
  1867. True
  1868. """
  1869. return self._engine.is_unique
  1870. @final
  1871. @property
  1872. def has_duplicates(self) -> bool:
  1873. """
  1874. Check if the Index has duplicate values.
  1875. Returns
  1876. -------
  1877. bool
  1878. Whether or not the Index has duplicate values.
  1879. See Also
  1880. --------
  1881. Index.is_unique : Inverse method that checks if it has unique values.
  1882. Examples
  1883. --------
  1884. >>> idx = pd.Index([1, 5, 7, 7])
  1885. >>> idx.has_duplicates
  1886. True
  1887. >>> idx = pd.Index([1, 5, 7])
  1888. >>> idx.has_duplicates
  1889. False
  1890. >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
  1891. ... "Watermelon"]).astype("category")
  1892. >>> idx.has_duplicates
  1893. True
  1894. >>> idx = pd.Index(["Orange", "Apple",
  1895. ... "Watermelon"]).astype("category")
  1896. >>> idx.has_duplicates
  1897. False
  1898. """
  1899. return not self.is_unique
  1900. @final
  1901. def is_boolean(self) -> bool:
  1902. """
  1903. Check if the Index only consists of booleans.
  1904. .. deprecated:: 2.0.0
  1905. Use `pandas.api.types.is_bool_dtype` instead.
  1906. Returns
  1907. -------
  1908. bool
  1909. Whether or not the Index only consists of booleans.
  1910. See Also
  1911. --------
  1912. is_integer : Check if the Index only consists of integers (deprecated).
  1913. is_floating : Check if the Index is a floating type (deprecated).
  1914. is_numeric : Check if the Index only consists of numeric data (deprecated).
  1915. is_object : Check if the Index is of the object dtype (deprecated).
  1916. is_categorical : Check if the Index holds categorical data.
  1917. is_interval : Check if the Index holds Interval objects (deprecated).
  1918. Examples
  1919. --------
  1920. >>> idx = pd.Index([True, False, True])
  1921. >>> idx.is_boolean() # doctest: +SKIP
  1922. True
  1923. >>> idx = pd.Index(["True", "False", "True"])
  1924. >>> idx.is_boolean() # doctest: +SKIP
  1925. False
  1926. >>> idx = pd.Index([True, False, "True"])
  1927. >>> idx.is_boolean() # doctest: +SKIP
  1928. False
  1929. """
  1930. warnings.warn(
  1931. f"{type(self).__name__}.is_boolean is deprecated. "
  1932. "Use pandas.api.types.is_bool_type instead.",
  1933. FutureWarning,
  1934. stacklevel=find_stack_level(),
  1935. )
  1936. return self.inferred_type in ["boolean"]
  1937. @final
  1938. def is_integer(self) -> bool:
  1939. """
  1940. Check if the Index only consists of integers.
  1941. .. deprecated:: 2.0.0
  1942. Use `pandas.api.types.is_integer_dtype` instead.
  1943. Returns
  1944. -------
  1945. bool
  1946. Whether or not the Index only consists of integers.
  1947. See Also
  1948. --------
  1949. is_boolean : Check if the Index only consists of booleans (deprecated).
  1950. is_floating : Check if the Index is a floating type (deprecated).
  1951. is_numeric : Check if the Index only consists of numeric data (deprecated).
  1952. is_object : Check if the Index is of the object dtype. (deprecated).
  1953. is_categorical : Check if the Index holds categorical data (deprecated).
  1954. is_interval : Check if the Index holds Interval objects (deprecated).
  1955. Examples
  1956. --------
  1957. >>> idx = pd.Index([1, 2, 3, 4])
  1958. >>> idx.is_integer() # doctest: +SKIP
  1959. True
  1960. >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
  1961. >>> idx.is_integer() # doctest: +SKIP
  1962. False
  1963. >>> idx = pd.Index(["Apple", "Mango", "Watermelon"])
  1964. >>> idx.is_integer() # doctest: +SKIP
  1965. False
  1966. """
  1967. warnings.warn(
  1968. f"{type(self).__name__}.is_integer is deprecated. "
  1969. "Use pandas.api.types.is_integer_dtype instead.",
  1970. FutureWarning,
  1971. stacklevel=find_stack_level(),
  1972. )
  1973. return self.inferred_type in ["integer"]
  1974. @final
  1975. def is_floating(self) -> bool:
  1976. """
  1977. Check if the Index is a floating type.
  1978. .. deprecated:: 2.0.0
  1979. Use `pandas.api.types.is_float_dtype` instead
  1980. The Index may consist of only floats, NaNs, or a mix of floats,
  1981. integers, or NaNs.
  1982. Returns
  1983. -------
  1984. bool
  1985. Whether or not the Index only consists of only consists of floats, NaNs, or
  1986. a mix of floats, integers, or NaNs.
  1987. See Also
  1988. --------
  1989. is_boolean : Check if the Index only consists of booleans (deprecated).
  1990. is_integer : Check if the Index only consists of integers (deprecated).
  1991. is_numeric : Check if the Index only consists of numeric data (deprecated).
  1992. is_object : Check if the Index is of the object dtype. (deprecated).
  1993. is_categorical : Check if the Index holds categorical data (deprecated).
  1994. is_interval : Check if the Index holds Interval objects (deprecated).
  1995. Examples
  1996. --------
  1997. >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
  1998. >>> idx.is_floating() # doctest: +SKIP
  1999. True
  2000. >>> idx = pd.Index([1.0, 2.0, np.nan, 4.0])
  2001. >>> idx.is_floating() # doctest: +SKIP
  2002. True
  2003. >>> idx = pd.Index([1, 2, 3, 4, np.nan])
  2004. >>> idx.is_floating() # doctest: +SKIP
  2005. True
  2006. >>> idx = pd.Index([1, 2, 3, 4])
  2007. >>> idx.is_floating() # doctest: +SKIP
  2008. False
  2009. """
  2010. warnings.warn(
  2011. f"{type(self).__name__}.is_floating is deprecated. "
  2012. "Use pandas.api.types.is_float_dtype instead.",
  2013. FutureWarning,
  2014. stacklevel=find_stack_level(),
  2015. )
  2016. return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"]
  2017. @final
  2018. def is_numeric(self) -> bool:
  2019. """
  2020. Check if the Index only consists of numeric data.
  2021. .. deprecated:: 2.0.0
  2022. Use `pandas.api.types.is_numeric_dtype` instead.
  2023. Returns
  2024. -------
  2025. bool
  2026. Whether or not the Index only consists of numeric data.
  2027. See Also
  2028. --------
  2029. is_boolean : Check if the Index only consists of booleans (deprecated).
  2030. is_integer : Check if the Index only consists of integers (deprecated).
  2031. is_floating : Check if the Index is a floating type (deprecated).
  2032. is_object : Check if the Index is of the object dtype. (deprecated).
  2033. is_categorical : Check if the Index holds categorical data (deprecated).
  2034. is_interval : Check if the Index holds Interval objects (deprecated).
  2035. Examples
  2036. --------
  2037. >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
  2038. >>> idx.is_numeric() # doctest: +SKIP
  2039. True
  2040. >>> idx = pd.Index([1, 2, 3, 4.0])
  2041. >>> idx.is_numeric() # doctest: +SKIP
  2042. True
  2043. >>> idx = pd.Index([1, 2, 3, 4])
  2044. >>> idx.is_numeric() # doctest: +SKIP
  2045. True
  2046. >>> idx = pd.Index([1, 2, 3, 4.0, np.nan])
  2047. >>> idx.is_numeric() # doctest: +SKIP
  2048. True
  2049. >>> idx = pd.Index([1, 2, 3, 4.0, np.nan, "Apple"])
  2050. >>> idx.is_numeric() # doctest: +SKIP
  2051. False
  2052. """
  2053. warnings.warn(
  2054. f"{type(self).__name__}.is_numeric is deprecated. "
  2055. "Use pandas.api.types.is_any_real_numeric_dtype instead",
  2056. FutureWarning,
  2057. stacklevel=find_stack_level(),
  2058. )
  2059. return self.inferred_type in ["integer", "floating"]
  2060. @final
  2061. def is_object(self) -> bool:
  2062. """
  2063. Check if the Index is of the object dtype.
  2064. .. deprecated:: 2.0.0
  2065. Use `pandas.api.types.is_object_dtype` instead.
  2066. Returns
  2067. -------
  2068. bool
  2069. Whether or not the Index is of the object dtype.
  2070. See Also
  2071. --------
  2072. is_boolean : Check if the Index only consists of booleans (deprecated).
  2073. is_integer : Check if the Index only consists of integers (deprecated).
  2074. is_floating : Check if the Index is a floating type (deprecated).
  2075. is_numeric : Check if the Index only consists of numeric data (deprecated).
  2076. is_categorical : Check if the Index holds categorical data (deprecated).
  2077. is_interval : Check if the Index holds Interval objects (deprecated).
  2078. Examples
  2079. --------
  2080. >>> idx = pd.Index(["Apple", "Mango", "Watermelon"])
  2081. >>> idx.is_object() # doctest: +SKIP
  2082. True
  2083. >>> idx = pd.Index(["Apple", "Mango", 2.0])
  2084. >>> idx.is_object() # doctest: +SKIP
  2085. True
  2086. >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
  2087. ... "Watermelon"]).astype("category")
  2088. >>> idx.is_object() # doctest: +SKIP
  2089. False
  2090. >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
  2091. >>> idx.is_object() # doctest: +SKIP
  2092. False
  2093. """
  2094. warnings.warn(
  2095. f"{type(self).__name__}.is_object is deprecated."
  2096. "Use pandas.api.types.is_object_dtype instead",
  2097. FutureWarning,
  2098. stacklevel=find_stack_level(),
  2099. )
  2100. return is_object_dtype(self.dtype)
  2101. @final
  2102. def is_categorical(self) -> bool:
  2103. """
  2104. Check if the Index holds categorical data.
  2105. .. deprecated:: 2.0.0
  2106. Use `isinstance(index.dtype, pd.CategoricalDtype)` instead.
  2107. Returns
  2108. -------
  2109. bool
  2110. True if the Index is categorical.
  2111. See Also
  2112. --------
  2113. CategoricalIndex : Index for categorical data.
  2114. is_boolean : Check if the Index only consists of booleans (deprecated).
  2115. is_integer : Check if the Index only consists of integers (deprecated).
  2116. is_floating : Check if the Index is a floating type (deprecated).
  2117. is_numeric : Check if the Index only consists of numeric data (deprecated).
  2118. is_object : Check if the Index is of the object dtype. (deprecated).
  2119. is_interval : Check if the Index holds Interval objects (deprecated).
  2120. Examples
  2121. --------
  2122. >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
  2123. ... "Watermelon"]).astype("category")
  2124. >>> idx.is_categorical() # doctest: +SKIP
  2125. True
  2126. >>> idx = pd.Index([1, 3, 5, 7])
  2127. >>> idx.is_categorical() # doctest: +SKIP
  2128. False
  2129. >>> s = pd.Series(["Peter", "Victor", "Elisabeth", "Mar"])
  2130. >>> s
  2131. 0 Peter
  2132. 1 Victor
  2133. 2 Elisabeth
  2134. 3 Mar
  2135. dtype: object
  2136. >>> s.index.is_categorical() # doctest: +SKIP
  2137. False
  2138. """
  2139. warnings.warn(
  2140. f"{type(self).__name__}.is_categorical is deprecated."
  2141. "Use pandas.api.types.is_categorical_dtype instead",
  2142. FutureWarning,
  2143. stacklevel=find_stack_level(),
  2144. )
  2145. return self.inferred_type in ["categorical"]
  2146. @final
  2147. def is_interval(self) -> bool:
  2148. """
  2149. Check if the Index holds Interval objects.
  2150. .. deprecated:: 2.0.0
  2151. Use `isinstance(index.dtype, pd.IntervalDtype)` instead.
  2152. Returns
  2153. -------
  2154. bool
  2155. Whether or not the Index holds Interval objects.
  2156. See Also
  2157. --------
  2158. IntervalIndex : Index for Interval objects.
  2159. is_boolean : Check if the Index only consists of booleans (deprecated).
  2160. is_integer : Check if the Index only consists of integers (deprecated).
  2161. is_floating : Check if the Index is a floating type (deprecated).
  2162. is_numeric : Check if the Index only consists of numeric data (deprecated).
  2163. is_object : Check if the Index is of the object dtype. (deprecated).
  2164. is_categorical : Check if the Index holds categorical data (deprecated).
  2165. Examples
  2166. --------
  2167. >>> idx = pd.Index([pd.Interval(left=0, right=5),
  2168. ... pd.Interval(left=5, right=10)])
  2169. >>> idx.is_interval() # doctest: +SKIP
  2170. True
  2171. >>> idx = pd.Index([1, 3, 5, 7])
  2172. >>> idx.is_interval() # doctest: +SKIP
  2173. False
  2174. """
  2175. warnings.warn(
  2176. f"{type(self).__name__}.is_interval is deprecated."
  2177. "Use pandas.api.types.is_interval_dtype instead",
  2178. FutureWarning,
  2179. stacklevel=find_stack_level(),
  2180. )
  2181. return self.inferred_type in ["interval"]
  2182. @final
  2183. def _holds_integer(self) -> bool:
  2184. """
  2185. Whether the type is an integer type.
  2186. """
  2187. return self.inferred_type in ["integer", "mixed-integer"]
  2188. @final
  2189. def holds_integer(self) -> bool:
  2190. """
  2191. Whether the type is an integer type.
  2192. .. deprecated:: 2.0.0
  2193. Use `pandas.api.types.infer_dtype` instead
  2194. """
  2195. warnings.warn(
  2196. f"{type(self).__name__}.holds_integer is deprecated. "
  2197. "Use pandas.api.types.infer_dtype instead.",
  2198. FutureWarning,
  2199. stacklevel=find_stack_level(),
  2200. )
  2201. return self._holds_integer()
  2202. @cache_readonly
  2203. def inferred_type(self) -> str_t:
  2204. """
  2205. Return a string of the type inferred from the values.
  2206. """
  2207. return lib.infer_dtype(self._values, skipna=False)
  2208. @cache_readonly
  2209. @final
  2210. def _is_all_dates(self) -> bool:
  2211. """
  2212. Whether or not the index values only consist of dates.
  2213. """
  2214. if needs_i8_conversion(self.dtype):
  2215. return True
  2216. elif self.dtype != _dtype_obj:
  2217. # TODO(ExtensionIndex): 3rd party EA might override?
  2218. # Note: this includes IntervalIndex, even when the left/right
  2219. # contain datetime-like objects.
  2220. return False
  2221. elif self._is_multi:
  2222. return False
  2223. return is_datetime_array(ensure_object(self._values))
  2224. @final
  2225. @cache_readonly
  2226. def _is_multi(self) -> bool:
  2227. """
  2228. Cached check equivalent to isinstance(self, MultiIndex)
  2229. """
  2230. return isinstance(self, ABCMultiIndex)
  2231. # --------------------------------------------------------------------
  2232. # Pickle Methods
  2233. def __reduce__(self):
  2234. d = {"data": self._data, "name": self.name}
  2235. return _new_Index, (type(self), d), None
  2236. # --------------------------------------------------------------------
  2237. # Null Handling Methods
  2238. @cache_readonly
  2239. def _na_value(self):
  2240. """The expected NA value to use with this index."""
  2241. dtype = self.dtype
  2242. if isinstance(dtype, np.dtype):
  2243. if dtype.kind in ["m", "M"]:
  2244. return NaT
  2245. return np.nan
  2246. return dtype.na_value
  2247. @cache_readonly
  2248. def _isnan(self) -> npt.NDArray[np.bool_]:
  2249. """
  2250. Return if each value is NaN.
  2251. """
  2252. if self._can_hold_na:
  2253. return isna(self)
  2254. else:
  2255. # shouldn't reach to this condition by checking hasnans beforehand
  2256. values = np.empty(len(self), dtype=np.bool_)
  2257. values.fill(False)
  2258. return values
  2259. @cache_readonly
  2260. def hasnans(self) -> bool:
  2261. """
  2262. Return True if there are any NaNs.
  2263. Enables various performance speedups.
  2264. Returns
  2265. -------
  2266. bool
  2267. """
  2268. if self._can_hold_na:
  2269. return bool(self._isnan.any())
  2270. else:
  2271. return False
  2272. @final
  2273. def isna(self) -> npt.NDArray[np.bool_]:
  2274. """
  2275. Detect missing values.
  2276. Return a boolean same-sized object indicating if the values are NA.
  2277. NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get
  2278. mapped to ``True`` values.
  2279. Everything else get mapped to ``False`` values. Characters such as
  2280. empty strings `''` or :attr:`numpy.inf` are not considered NA values
  2281. (unless you set ``pandas.options.mode.use_inf_as_na = True``).
  2282. Returns
  2283. -------
  2284. numpy.ndarray[bool]
  2285. A boolean array of whether my values are NA.
  2286. See Also
  2287. --------
  2288. Index.notna : Boolean inverse of isna.
  2289. Index.dropna : Omit entries with missing values.
  2290. isna : Top-level isna.
  2291. Series.isna : Detect missing values in Series object.
  2292. Examples
  2293. --------
  2294. Show which entries in a pandas.Index are NA. The result is an
  2295. array.
  2296. >>> idx = pd.Index([5.2, 6.0, np.NaN])
  2297. >>> idx
  2298. Index([5.2, 6.0, nan], dtype='float64')
  2299. >>> idx.isna()
  2300. array([False, False, True])
  2301. Empty strings are not considered NA values. None is considered an NA
  2302. value.
  2303. >>> idx = pd.Index(['black', '', 'red', None])
  2304. >>> idx
  2305. Index(['black', '', 'red', None], dtype='object')
  2306. >>> idx.isna()
  2307. array([False, False, False, True])
  2308. For datetimes, `NaT` (Not a Time) is considered as an NA value.
  2309. >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'),
  2310. ... pd.Timestamp(''), None, pd.NaT])
  2311. >>> idx
  2312. DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'],
  2313. dtype='datetime64[ns]', freq=None)
  2314. >>> idx.isna()
  2315. array([False, True, True, True])
  2316. """
  2317. return self._isnan
  2318. isnull = isna
  2319. @final
  2320. def notna(self) -> npt.NDArray[np.bool_]:
  2321. """
  2322. Detect existing (non-missing) values.
  2323. Return a boolean same-sized object indicating if the values are not NA.
  2324. Non-missing values get mapped to ``True``. Characters such as empty
  2325. strings ``''`` or :attr:`numpy.inf` are not considered NA values
  2326. (unless you set ``pandas.options.mode.use_inf_as_na = True``).
  2327. NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False``
  2328. values.
  2329. Returns
  2330. -------
  2331. numpy.ndarray[bool]
  2332. Boolean array to indicate which entries are not NA.
  2333. See Also
  2334. --------
  2335. Index.notnull : Alias of notna.
  2336. Index.isna: Inverse of notna.
  2337. notna : Top-level notna.
  2338. Examples
  2339. --------
  2340. Show which entries in an Index are not NA. The result is an
  2341. array.
  2342. >>> idx = pd.Index([5.2, 6.0, np.NaN])
  2343. >>> idx
  2344. Index([5.2, 6.0, nan], dtype='float64')
  2345. >>> idx.notna()
  2346. array([ True, True, False])
  2347. Empty strings are not considered NA values. None is considered a NA
  2348. value.
  2349. >>> idx = pd.Index(['black', '', 'red', None])
  2350. >>> idx
  2351. Index(['black', '', 'red', None], dtype='object')
  2352. >>> idx.notna()
  2353. array([ True, True, True, False])
  2354. """
  2355. return ~self.isna()
  2356. notnull = notna
  2357. def fillna(self, value=None, downcast=None):
  2358. """
  2359. Fill NA/NaN values with the specified value.
  2360. Parameters
  2361. ----------
  2362. value : scalar
  2363. Scalar value to use to fill holes (e.g. 0).
  2364. This value cannot be a list-likes.
  2365. downcast : dict, default is None
  2366. A dict of item->dtype of what to downcast if possible,
  2367. or the string 'infer' which will try to downcast to an appropriate
  2368. equal type (e.g. float64 to int64 if possible).
  2369. Returns
  2370. -------
  2371. Index
  2372. See Also
  2373. --------
  2374. DataFrame.fillna : Fill NaN values of a DataFrame.
  2375. Series.fillna : Fill NaN Values of a Series.
  2376. """
  2377. value = self._require_scalar(value)
  2378. if self.hasnans:
  2379. result = self.putmask(self._isnan, value)
  2380. if downcast is None:
  2381. # no need to care metadata other than name
  2382. # because it can't have freq if it has NaTs
  2383. # _with_infer needed for test_fillna_categorical
  2384. return Index._with_infer(result, name=self.name)
  2385. raise NotImplementedError(
  2386. f"{type(self).__name__}.fillna does not support 'downcast' "
  2387. "argument values other than 'None'."
  2388. )
  2389. return self._view()
  2390. def dropna(self: _IndexT, how: AnyAll = "any") -> _IndexT:
  2391. """
  2392. Return Index without NA/NaN values.
  2393. Parameters
  2394. ----------
  2395. how : {'any', 'all'}, default 'any'
  2396. If the Index is a MultiIndex, drop the value when any or all levels
  2397. are NaN.
  2398. Returns
  2399. -------
  2400. Index
  2401. """
  2402. if how not in ("any", "all"):
  2403. raise ValueError(f"invalid how option: {how}")
  2404. if self.hasnans:
  2405. res_values = self._values[~self._isnan]
  2406. return type(self)._simple_new(res_values, name=self.name)
  2407. return self._view()
  2408. # --------------------------------------------------------------------
  2409. # Uniqueness Methods
  2410. def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT:
  2411. """
  2412. Return unique values in the index.
  2413. Unique values are returned in order of appearance, this does NOT sort.
  2414. Parameters
  2415. ----------
  2416. level : int or hashable, optional
  2417. Only return values from specified level (for MultiIndex).
  2418. If int, gets the level by integer position, else by level name.
  2419. Returns
  2420. -------
  2421. Index
  2422. See Also
  2423. --------
  2424. unique : Numpy array of unique values in that column.
  2425. Series.unique : Return unique values of Series object.
  2426. """
  2427. if level is not None:
  2428. self._validate_index_level(level)
  2429. if self.is_unique:
  2430. return self._view()
  2431. result = super().unique()
  2432. return self._shallow_copy(result)
  2433. def drop_duplicates(self: _IndexT, *, keep: DropKeep = "first") -> _IndexT:
  2434. """
  2435. Return Index with duplicate values removed.
  2436. Parameters
  2437. ----------
  2438. keep : {'first', 'last', ``False``}, default 'first'
  2439. - 'first' : Drop duplicates except for the first occurrence.
  2440. - 'last' : Drop duplicates except for the last occurrence.
  2441. - ``False`` : Drop all duplicates.
  2442. Returns
  2443. -------
  2444. Index
  2445. See Also
  2446. --------
  2447. Series.drop_duplicates : Equivalent method on Series.
  2448. DataFrame.drop_duplicates : Equivalent method on DataFrame.
  2449. Index.duplicated : Related method on Index, indicating duplicate
  2450. Index values.
  2451. Examples
  2452. --------
  2453. Generate an pandas.Index with duplicate values.
  2454. >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'])
  2455. The `keep` parameter controls which duplicate values are removed.
  2456. The value 'first' keeps the first occurrence for each
  2457. set of duplicated entries. The default value of keep is 'first'.
  2458. >>> idx.drop_duplicates(keep='first')
  2459. Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object')
  2460. The value 'last' keeps the last occurrence for each set of duplicated
  2461. entries.
  2462. >>> idx.drop_duplicates(keep='last')
  2463. Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object')
  2464. The value ``False`` discards all sets of duplicated entries.
  2465. >>> idx.drop_duplicates(keep=False)
  2466. Index(['cow', 'beetle', 'hippo'], dtype='object')
  2467. """
  2468. if self.is_unique:
  2469. return self._view()
  2470. return super().drop_duplicates(keep=keep)
  2471. def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:
  2472. """
  2473. Indicate duplicate index values.
  2474. Duplicated values are indicated as ``True`` values in the resulting
  2475. array. Either all duplicates, all except the first, or all except the
  2476. last occurrence of duplicates can be indicated.
  2477. Parameters
  2478. ----------
  2479. keep : {'first', 'last', False}, default 'first'
  2480. The value or values in a set of duplicates to mark as missing.
  2481. - 'first' : Mark duplicates as ``True`` except for the first
  2482. occurrence.
  2483. - 'last' : Mark duplicates as ``True`` except for the last
  2484. occurrence.
  2485. - ``False`` : Mark all duplicates as ``True``.
  2486. Returns
  2487. -------
  2488. np.ndarray[bool]
  2489. See Also
  2490. --------
  2491. Series.duplicated : Equivalent method on pandas.Series.
  2492. DataFrame.duplicated : Equivalent method on pandas.DataFrame.
  2493. Index.drop_duplicates : Remove duplicate values from Index.
  2494. Examples
  2495. --------
  2496. By default, for each set of duplicated values, the first occurrence is
  2497. set to False and all others to True:
  2498. >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama'])
  2499. >>> idx.duplicated()
  2500. array([False, False, True, False, True])
  2501. which is equivalent to
  2502. >>> idx.duplicated(keep='first')
  2503. array([False, False, True, False, True])
  2504. By using 'last', the last occurrence of each set of duplicated values
  2505. is set on False and all others on True:
  2506. >>> idx.duplicated(keep='last')
  2507. array([ True, False, True, False, False])
  2508. By setting keep on ``False``, all duplicates are True:
  2509. >>> idx.duplicated(keep=False)
  2510. array([ True, False, True, False, True])
  2511. """
  2512. if self.is_unique:
  2513. # fastpath available bc we are immutable
  2514. return np.zeros(len(self), dtype=bool)
  2515. return self._duplicated(keep=keep)
  2516. # --------------------------------------------------------------------
  2517. # Arithmetic & Logical Methods
  2518. def __iadd__(self, other):
  2519. # alias for __add__
  2520. return self + other
  2521. @final
  2522. def __nonzero__(self) -> NoReturn:
  2523. raise ValueError(
  2524. f"The truth value of a {type(self).__name__} is ambiguous. "
  2525. "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
  2526. )
  2527. __bool__ = __nonzero__
  2528. # --------------------------------------------------------------------
  2529. # Set Operation Methods
  2530. def _get_reconciled_name_object(self, other):
  2531. """
  2532. If the result of a set operation will be self,
  2533. return self, unless the name changes, in which
  2534. case make a shallow copy of self.
  2535. """
  2536. name = get_op_result_name(self, other)
  2537. if self.name is not name:
  2538. return self.rename(name)
  2539. return self
  2540. @final
  2541. def _validate_sort_keyword(self, sort):
  2542. if sort not in [None, False, True]:
  2543. raise ValueError(
  2544. "The 'sort' keyword only takes the values of "
  2545. f"None, True, or False; {sort} was passed."
  2546. )
  2547. @final
  2548. def _dti_setop_align_tzs(self, other: Index, setop: str_t) -> tuple[Index, Index]:
  2549. """
  2550. With mismatched timezones, cast both to UTC.
  2551. """
  2552. # Caller is responsibelf or checking
  2553. # `not is_dtype_equal(self.dtype, other.dtype)`
  2554. if (
  2555. isinstance(self, ABCDatetimeIndex)
  2556. and isinstance(other, ABCDatetimeIndex)
  2557. and self.tz is not None
  2558. and other.tz is not None
  2559. ):
  2560. # GH#39328, GH#45357
  2561. left = self.tz_convert("UTC")
  2562. right = other.tz_convert("UTC")
  2563. return left, right
  2564. return self, other
  2565. @final
  2566. def union(self, other, sort=None):
  2567. """
  2568. Form the union of two Index objects.
  2569. If the Index objects are incompatible, both Index objects will be
  2570. cast to dtype('object') first.
  2571. Parameters
  2572. ----------
  2573. other : Index or array-like
  2574. sort : bool or None, default None
  2575. Whether to sort the resulting Index.
  2576. * None : Sort the result, except when
  2577. 1. `self` and `other` are equal.
  2578. 2. `self` or `other` has length 0.
  2579. 3. Some values in `self` or `other` cannot be compared.
  2580. A RuntimeWarning is issued in this case.
  2581. * False : do not sort the result.
  2582. * True : Sort the result (which may raise TypeError).
  2583. Returns
  2584. -------
  2585. Index
  2586. Examples
  2587. --------
  2588. Union matching dtypes
  2589. >>> idx1 = pd.Index([1, 2, 3, 4])
  2590. >>> idx2 = pd.Index([3, 4, 5, 6])
  2591. >>> idx1.union(idx2)
  2592. Index([1, 2, 3, 4, 5, 6], dtype='int64')
  2593. Union mismatched dtypes
  2594. >>> idx1 = pd.Index(['a', 'b', 'c', 'd'])
  2595. >>> idx2 = pd.Index([1, 2, 3, 4])
  2596. >>> idx1.union(idx2)
  2597. Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object')
  2598. MultiIndex case
  2599. >>> idx1 = pd.MultiIndex.from_arrays(
  2600. ... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]]
  2601. ... )
  2602. >>> idx1
  2603. MultiIndex([(1, 'Red'),
  2604. (1, 'Blue'),
  2605. (2, 'Red'),
  2606. (2, 'Blue')],
  2607. )
  2608. >>> idx2 = pd.MultiIndex.from_arrays(
  2609. ... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]]
  2610. ... )
  2611. >>> idx2
  2612. MultiIndex([(3, 'Red'),
  2613. (3, 'Green'),
  2614. (2, 'Red'),
  2615. (2, 'Green')],
  2616. )
  2617. >>> idx1.union(idx2)
  2618. MultiIndex([(1, 'Blue'),
  2619. (1, 'Red'),
  2620. (2, 'Blue'),
  2621. (2, 'Green'),
  2622. (2, 'Red'),
  2623. (3, 'Green'),
  2624. (3, 'Red')],
  2625. )
  2626. >>> idx1.union(idx2, sort=False)
  2627. MultiIndex([(1, 'Red'),
  2628. (1, 'Blue'),
  2629. (2, 'Red'),
  2630. (2, 'Blue'),
  2631. (3, 'Red'),
  2632. (3, 'Green'),
  2633. (2, 'Green')],
  2634. )
  2635. """
  2636. self._validate_sort_keyword(sort)
  2637. self._assert_can_do_setop(other)
  2638. other, result_name = self._convert_can_do_setop(other)
  2639. if not is_dtype_equal(self.dtype, other.dtype):
  2640. if (
  2641. isinstance(self, ABCMultiIndex)
  2642. and not is_object_dtype(_unpack_nested_dtype(other))
  2643. and len(other) > 0
  2644. ):
  2645. raise NotImplementedError(
  2646. "Can only union MultiIndex with MultiIndex or Index of tuples, "
  2647. "try mi.to_flat_index().union(other) instead."
  2648. )
  2649. self, other = self._dti_setop_align_tzs(other, "union")
  2650. dtype = self._find_common_type_compat(other)
  2651. left = self.astype(dtype, copy=False)
  2652. right = other.astype(dtype, copy=False)
  2653. return left.union(right, sort=sort)
  2654. elif not len(other) or self.equals(other):
  2655. # NB: whether this (and the `if not len(self)` check below) come before
  2656. # or after the is_dtype_equal check above affects the returned dtype
  2657. result = self._get_reconciled_name_object(other)
  2658. if sort is True:
  2659. return result.sort_values()
  2660. return result
  2661. elif not len(self):
  2662. result = other._get_reconciled_name_object(self)
  2663. if sort is True:
  2664. return result.sort_values()
  2665. return result
  2666. result = self._union(other, sort=sort)
  2667. return self._wrap_setop_result(other, result)
  2668. def _union(self, other: Index, sort: bool | None):
  2669. """
  2670. Specific union logic should go here. In subclasses, union behavior
  2671. should be overwritten here rather than in `self.union`.
  2672. Parameters
  2673. ----------
  2674. other : Index or array-like
  2675. sort : False or None, default False
  2676. Whether to sort the resulting index.
  2677. * True : sort the result
  2678. * False : do not sort the result.
  2679. * None : sort the result, except when `self` and `other` are equal
  2680. or when the values cannot be compared.
  2681. Returns
  2682. -------
  2683. Index
  2684. """
  2685. lvals = self._values
  2686. rvals = other._values
  2687. if (
  2688. sort in (None, True)
  2689. and self.is_monotonic_increasing
  2690. and other.is_monotonic_increasing
  2691. and not (self.has_duplicates and other.has_duplicates)
  2692. and self._can_use_libjoin
  2693. ):
  2694. # Both are monotonic and at least one is unique, so can use outer join
  2695. # (actually don't need either unique, but without this restriction
  2696. # test_union_same_value_duplicated_in_both fails)
  2697. try:
  2698. return self._outer_indexer(other)[0]
  2699. except (TypeError, IncompatibleFrequency):
  2700. # incomparable objects; should only be for object dtype
  2701. value_list = list(lvals)
  2702. # worth making this faster? a very unusual case
  2703. value_set = set(lvals)
  2704. value_list.extend([x for x in rvals if x not in value_set])
  2705. # If objects are unorderable, we must have object dtype.
  2706. return np.array(value_list, dtype=object)
  2707. elif not other.is_unique:
  2708. # other has duplicates
  2709. result_dups = algos.union_with_duplicates(self, other)
  2710. return _maybe_try_sort(result_dups, sort)
  2711. # The rest of this method is analogous to Index._intersection_via_get_indexer
  2712. # Self may have duplicates; other already checked as unique
  2713. # find indexes of things in "other" that are not in "self"
  2714. if self._index_as_unique:
  2715. indexer = self.get_indexer(other)
  2716. missing = (indexer == -1).nonzero()[0]
  2717. else:
  2718. missing = algos.unique1d(self.get_indexer_non_unique(other)[1])
  2719. result: Index | MultiIndex | ArrayLike
  2720. if self._is_multi:
  2721. # Preserve MultiIndex to avoid losing dtypes
  2722. result = self.append(other.take(missing))
  2723. else:
  2724. if len(missing) > 0:
  2725. other_diff = rvals.take(missing)
  2726. result = concat_compat((lvals, other_diff))
  2727. else:
  2728. result = lvals
  2729. if not self.is_monotonic_increasing or not other.is_monotonic_increasing:
  2730. # if both are monotonic then result should already be sorted
  2731. result = _maybe_try_sort(result, sort)
  2732. return result
  2733. @final
  2734. def _wrap_setop_result(self, other: Index, result) -> Index:
  2735. name = get_op_result_name(self, other)
  2736. if isinstance(result, Index):
  2737. if result.name != name:
  2738. result = result.rename(name)
  2739. else:
  2740. result = self._shallow_copy(result, name=name)
  2741. return result
  2742. @final
  2743. def intersection(self, other, sort: bool = False):
  2744. """
  2745. Form the intersection of two Index objects.
  2746. This returns a new Index with elements common to the index and `other`.
  2747. Parameters
  2748. ----------
  2749. other : Index or array-like
  2750. sort : True, False or None, default False
  2751. Whether to sort the resulting index.
  2752. * None : sort the result, except when `self` and `other` are equal
  2753. or when the values cannot be compared.
  2754. * False : do not sort the result.
  2755. * True : Sort the result (which may raise TypeError).
  2756. Returns
  2757. -------
  2758. Index
  2759. Examples
  2760. --------
  2761. >>> idx1 = pd.Index([1, 2, 3, 4])
  2762. >>> idx2 = pd.Index([3, 4, 5, 6])
  2763. >>> idx1.intersection(idx2)
  2764. Index([3, 4], dtype='int64')
  2765. """
  2766. self._validate_sort_keyword(sort)
  2767. self._assert_can_do_setop(other)
  2768. other, result_name = self._convert_can_do_setop(other)
  2769. if not is_dtype_equal(self.dtype, other.dtype):
  2770. self, other = self._dti_setop_align_tzs(other, "intersection")
  2771. if self.equals(other):
  2772. if self.has_duplicates:
  2773. result = self.unique()._get_reconciled_name_object(other)
  2774. else:
  2775. result = self._get_reconciled_name_object(other)
  2776. if sort is True:
  2777. result = result.sort_values()
  2778. return result
  2779. if len(self) == 0 or len(other) == 0:
  2780. # fastpath; we need to be careful about having commutativity
  2781. if self._is_multi or other._is_multi:
  2782. # _convert_can_do_setop ensures that we have both or neither
  2783. # We retain self.levels
  2784. return self[:0].rename(result_name)
  2785. dtype = self._find_common_type_compat(other)
  2786. if is_dtype_equal(self.dtype, dtype):
  2787. # Slicing allows us to retain DTI/TDI.freq, RangeIndex
  2788. # Note: self[:0] vs other[:0] affects
  2789. # 1) which index's `freq` we get in DTI/TDI cases
  2790. # This may be a historical artifact, i.e. no documented
  2791. # reason for this choice.
  2792. # 2) The `step` we get in RangeIndex cases
  2793. if len(self) == 0:
  2794. return self[:0].rename(result_name)
  2795. else:
  2796. return other[:0].rename(result_name)
  2797. return Index([], dtype=dtype, name=result_name)
  2798. elif not self._should_compare(other):
  2799. # We can infer that the intersection is empty.
  2800. if isinstance(self, ABCMultiIndex):
  2801. return self[:0].rename(result_name)
  2802. return Index([], name=result_name)
  2803. elif not is_dtype_equal(self.dtype, other.dtype):
  2804. dtype = self._find_common_type_compat(other)
  2805. this = self.astype(dtype, copy=False)
  2806. other = other.astype(dtype, copy=False)
  2807. return this.intersection(other, sort=sort)
  2808. result = self._intersection(other, sort=sort)
  2809. return self._wrap_intersection_result(other, result)
  2810. def _intersection(self, other: Index, sort: bool = False):
  2811. """
  2812. intersection specialized to the case with matching dtypes.
  2813. """
  2814. if (
  2815. self.is_monotonic_increasing
  2816. and other.is_monotonic_increasing
  2817. and self._can_use_libjoin
  2818. and not isinstance(self, ABCMultiIndex)
  2819. ):
  2820. try:
  2821. res_indexer, indexer, _ = self._inner_indexer(other)
  2822. except TypeError:
  2823. # non-comparable; should only be for object dtype
  2824. pass
  2825. else:
  2826. # TODO: algos.unique1d should preserve DTA/TDA
  2827. if is_numeric_dtype(self):
  2828. # This is faster, because Index.unique() checks for uniqueness
  2829. # before calculating the unique values.
  2830. res = algos.unique1d(res_indexer)
  2831. else:
  2832. result = self.take(indexer)
  2833. res = result.drop_duplicates()
  2834. return ensure_wrapped_if_datetimelike(res)
  2835. res_values = self._intersection_via_get_indexer(other, sort=sort)
  2836. res_values = _maybe_try_sort(res_values, sort)
  2837. return res_values
  2838. def _wrap_intersection_result(self, other, result):
  2839. # We will override for MultiIndex to handle empty results
  2840. return self._wrap_setop_result(other, result)
  2841. @final
  2842. def _intersection_via_get_indexer(
  2843. self, other: Index | MultiIndex, sort
  2844. ) -> ArrayLike | MultiIndex:
  2845. """
  2846. Find the intersection of two Indexes using get_indexer.
  2847. Returns
  2848. -------
  2849. np.ndarray or ExtensionArray
  2850. The returned array will be unique.
  2851. """
  2852. left_unique = self.unique()
  2853. right_unique = other.unique()
  2854. # even though we are unique, we need get_indexer_for for IntervalIndex
  2855. indexer = left_unique.get_indexer_for(right_unique)
  2856. mask = indexer != -1
  2857. taker = indexer.take(mask.nonzero()[0])
  2858. if sort is False:
  2859. # sort bc we want the elements in the same order they are in self
  2860. # unnecessary in the case with sort=None bc we will sort later
  2861. taker = np.sort(taker)
  2862. if isinstance(left_unique, ABCMultiIndex):
  2863. result = left_unique.take(taker)
  2864. else:
  2865. result = left_unique.take(taker)._values
  2866. return result
  2867. @final
  2868. def difference(self, other, sort=None):
  2869. """
  2870. Return a new Index with elements of index not in `other`.
  2871. This is the set difference of two Index objects.
  2872. Parameters
  2873. ----------
  2874. other : Index or array-like
  2875. sort : bool or None, default None
  2876. Whether to sort the resulting index. By default, the
  2877. values are attempted to be sorted, but any TypeError from
  2878. incomparable elements is caught by pandas.
  2879. * None : Attempt to sort the result, but catch any TypeErrors
  2880. from comparing incomparable elements.
  2881. * False : Do not sort the result.
  2882. * True : Sort the result (which may raise TypeError).
  2883. Returns
  2884. -------
  2885. Index
  2886. Examples
  2887. --------
  2888. >>> idx1 = pd.Index([2, 1, 3, 4])
  2889. >>> idx2 = pd.Index([3, 4, 5, 6])
  2890. >>> idx1.difference(idx2)
  2891. Index([1, 2], dtype='int64')
  2892. >>> idx1.difference(idx2, sort=False)
  2893. Index([2, 1], dtype='int64')
  2894. """
  2895. self._validate_sort_keyword(sort)
  2896. self._assert_can_do_setop(other)
  2897. other, result_name = self._convert_can_do_setop(other)
  2898. # Note: we do NOT call _dti_setop_align_tzs here, as there
  2899. # is no requirement that .difference be commutative, so it does
  2900. # not cast to object.
  2901. if self.equals(other):
  2902. # Note: we do not (yet) sort even if sort=None GH#24959
  2903. return self[:0].rename(result_name)
  2904. if len(other) == 0:
  2905. # Note: we do not (yet) sort even if sort=None GH#24959
  2906. result = self.rename(result_name)
  2907. if sort is True:
  2908. return result.sort_values()
  2909. return result
  2910. if not self._should_compare(other):
  2911. # Nothing matches -> difference is everything
  2912. result = self.rename(result_name)
  2913. if sort is True:
  2914. return result.sort_values()
  2915. return result
  2916. result = self._difference(other, sort=sort)
  2917. return self._wrap_difference_result(other, result)
  2918. def _difference(self, other, sort):
  2919. # overridden by RangeIndex
  2920. this = self.unique()
  2921. indexer = this.get_indexer_for(other)
  2922. indexer = indexer.take((indexer != -1).nonzero()[0])
  2923. label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True)
  2924. the_diff: MultiIndex | ArrayLike
  2925. if isinstance(this, ABCMultiIndex):
  2926. the_diff = this.take(label_diff)
  2927. else:
  2928. the_diff = this._values.take(label_diff)
  2929. the_diff = _maybe_try_sort(the_diff, sort)
  2930. return the_diff
  2931. def _wrap_difference_result(self, other, result):
  2932. # We will override for MultiIndex to handle empty results
  2933. return self._wrap_setop_result(other, result)
  2934. def symmetric_difference(self, other, result_name=None, sort=None):
  2935. """
  2936. Compute the symmetric difference of two Index objects.
  2937. Parameters
  2938. ----------
  2939. other : Index or array-like
  2940. result_name : str
  2941. sort : bool or None, default None
  2942. Whether to sort the resulting index. By default, the
  2943. values are attempted to be sorted, but any TypeError from
  2944. incomparable elements is caught by pandas.
  2945. * None : Attempt to sort the result, but catch any TypeErrors
  2946. from comparing incomparable elements.
  2947. * False : Do not sort the result.
  2948. * True : Sort the result (which may raise TypeError).
  2949. Returns
  2950. -------
  2951. Index
  2952. Notes
  2953. -----
  2954. ``symmetric_difference`` contains elements that appear in either
  2955. ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by
  2956. ``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates
  2957. dropped.
  2958. Examples
  2959. --------
  2960. >>> idx1 = pd.Index([1, 2, 3, 4])
  2961. >>> idx2 = pd.Index([2, 3, 4, 5])
  2962. >>> idx1.symmetric_difference(idx2)
  2963. Index([1, 5], dtype='int64')
  2964. """
  2965. self._validate_sort_keyword(sort)
  2966. self._assert_can_do_setop(other)
  2967. other, result_name_update = self._convert_can_do_setop(other)
  2968. if result_name is None:
  2969. result_name = result_name_update
  2970. if not is_dtype_equal(self.dtype, other.dtype):
  2971. self, other = self._dti_setop_align_tzs(other, "symmetric_difference")
  2972. if not self._should_compare(other):
  2973. return self.union(other, sort=sort).rename(result_name)
  2974. elif not is_dtype_equal(self.dtype, other.dtype):
  2975. dtype = self._find_common_type_compat(other)
  2976. this = self.astype(dtype, copy=False)
  2977. that = other.astype(dtype, copy=False)
  2978. return this.symmetric_difference(that, sort=sort).rename(result_name)
  2979. this = self.unique()
  2980. other = other.unique()
  2981. indexer = this.get_indexer_for(other)
  2982. # {this} minus {other}
  2983. common_indexer = indexer.take((indexer != -1).nonzero()[0])
  2984. left_indexer = np.setdiff1d(
  2985. np.arange(this.size), common_indexer, assume_unique=True
  2986. )
  2987. left_diff = this.take(left_indexer)
  2988. # {other} minus {this}
  2989. right_indexer = (indexer == -1).nonzero()[0]
  2990. right_diff = other.take(right_indexer)
  2991. res_values = left_diff.append(right_diff)
  2992. result = _maybe_try_sort(res_values, sort)
  2993. if not self._is_multi:
  2994. return Index(result, name=result_name, dtype=res_values.dtype)
  2995. else:
  2996. left_diff = cast("MultiIndex", left_diff)
  2997. if len(result) == 0:
  2998. # result might be an Index, if other was an Index
  2999. return left_diff.remove_unused_levels().set_names(result_name)
  3000. return result.set_names(result_name)
  3001. @final
  3002. def _assert_can_do_setop(self, other) -> bool:
  3003. if not is_list_like(other):
  3004. raise TypeError("Input must be Index or array-like")
  3005. return True
  3006. def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]:
  3007. if not isinstance(other, Index):
  3008. other = Index(other, name=self.name)
  3009. result_name = self.name
  3010. else:
  3011. result_name = get_op_result_name(self, other)
  3012. return other, result_name
  3013. # --------------------------------------------------------------------
  3014. # Indexing Methods
  3015. def get_loc(self, key):
  3016. """
  3017. Get integer location, slice or boolean mask for requested label.
  3018. Parameters
  3019. ----------
  3020. key : label
  3021. Returns
  3022. -------
  3023. int if unique index, slice if monotonic index, else mask
  3024. Examples
  3025. --------
  3026. >>> unique_index = pd.Index(list('abc'))
  3027. >>> unique_index.get_loc('b')
  3028. 1
  3029. >>> monotonic_index = pd.Index(list('abbc'))
  3030. >>> monotonic_index.get_loc('b')
  3031. slice(1, 3, None)
  3032. >>> non_monotonic_index = pd.Index(list('abcb'))
  3033. >>> non_monotonic_index.get_loc('b')
  3034. array([False, True, False, True])
  3035. """
  3036. casted_key = self._maybe_cast_indexer(key)
  3037. try:
  3038. return self._engine.get_loc(casted_key)
  3039. except KeyError as err:
  3040. raise KeyError(key) from err
  3041. except TypeError:
  3042. # If we have a listlike key, _check_indexing_error will raise
  3043. # InvalidIndexError. Otherwise we fall through and re-raise
  3044. # the TypeError.
  3045. self._check_indexing_error(key)
  3046. raise
  3047. _index_shared_docs[
  3048. "get_indexer"
  3049. ] = """
  3050. Compute indexer and mask for new index given the current index.
  3051. The indexer should be then used as an input to ndarray.take to align the
  3052. current data to the new index.
  3053. Parameters
  3054. ----------
  3055. target : %(target_klass)s
  3056. method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
  3057. * default: exact matches only.
  3058. * pad / ffill: find the PREVIOUS index value if no exact match.
  3059. * backfill / bfill: use NEXT index value if no exact match
  3060. * nearest: use the NEAREST index value if no exact match. Tied
  3061. distances are broken by preferring the larger index value.
  3062. limit : int, optional
  3063. Maximum number of consecutive labels in ``target`` to match for
  3064. inexact matches.
  3065. tolerance : optional
  3066. Maximum distance between original and new labels for inexact
  3067. matches. The values of the index at the matching locations must
  3068. satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
  3069. Tolerance may be a scalar value, which applies the same tolerance
  3070. to all values, or list-like, which applies variable tolerance per
  3071. element. List-like includes list, tuple, array, Series, and must be
  3072. the same size as the index and its dtype must exactly match the
  3073. index's type.
  3074. Returns
  3075. -------
  3076. np.ndarray[np.intp]
  3077. Integers from 0 to n - 1 indicating that the index at these
  3078. positions matches the corresponding target values. Missing values
  3079. in the target are marked by -1.
  3080. %(raises_section)s
  3081. Notes
  3082. -----
  3083. Returns -1 for unmatched values, for further explanation see the
  3084. example below.
  3085. Examples
  3086. --------
  3087. >>> index = pd.Index(['c', 'a', 'b'])
  3088. >>> index.get_indexer(['a', 'b', 'x'])
  3089. array([ 1, 2, -1])
  3090. Notice that the return value is an array of locations in ``index``
  3091. and ``x`` is marked by -1, as it is not in ``index``.
  3092. """
  3093. @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs)
  3094. @final
  3095. def get_indexer(
  3096. self,
  3097. target,
  3098. method: str_t | None = None,
  3099. limit: int | None = None,
  3100. tolerance=None,
  3101. ) -> npt.NDArray[np.intp]:
  3102. method = clean_reindex_fill_method(method)
  3103. orig_target = target
  3104. target = self._maybe_cast_listlike_indexer(target)
  3105. self._check_indexing_method(method, limit, tolerance)
  3106. if not self._index_as_unique:
  3107. raise InvalidIndexError(self._requires_unique_msg)
  3108. if len(target) == 0:
  3109. return np.array([], dtype=np.intp)
  3110. if not self._should_compare(target) and not self._should_partial_index(target):
  3111. # IntervalIndex get special treatment bc numeric scalars can be
  3112. # matched to Interval scalars
  3113. return self._get_indexer_non_comparable(target, method=method, unique=True)
  3114. if is_categorical_dtype(self.dtype):
  3115. # _maybe_cast_listlike_indexer ensures target has our dtype
  3116. # (could improve perf by doing _should_compare check earlier?)
  3117. assert is_dtype_equal(self.dtype, target.dtype)
  3118. indexer = self._engine.get_indexer(target.codes)
  3119. if self.hasnans and target.hasnans:
  3120. # After _maybe_cast_listlike_indexer, target elements which do not
  3121. # belong to some category are changed to NaNs
  3122. # Mask to track actual NaN values compared to inserted NaN values
  3123. # GH#45361
  3124. target_nans = isna(orig_target)
  3125. loc = self.get_loc(np.nan)
  3126. mask = target.isna()
  3127. indexer[target_nans] = loc
  3128. indexer[mask & ~target_nans] = -1
  3129. return indexer
  3130. if is_categorical_dtype(target.dtype):
  3131. # potential fastpath
  3132. # get an indexer for unique categories then propagate to codes via take_nd
  3133. # get_indexer instead of _get_indexer needed for MultiIndex cases
  3134. # e.g. test_append_different_columns_types
  3135. categories_indexer = self.get_indexer(target.categories)
  3136. indexer = algos.take_nd(categories_indexer, target.codes, fill_value=-1)
  3137. if (not self._is_multi and self.hasnans) and target.hasnans:
  3138. # Exclude MultiIndex because hasnans raises NotImplementedError
  3139. # we should only get here if we are unique, so loc is an integer
  3140. # GH#41934
  3141. loc = self.get_loc(np.nan)
  3142. mask = target.isna()
  3143. indexer[mask] = loc
  3144. return ensure_platform_int(indexer)
  3145. pself, ptarget = self._maybe_promote(target)
  3146. if pself is not self or ptarget is not target:
  3147. return pself.get_indexer(
  3148. ptarget, method=method, limit=limit, tolerance=tolerance
  3149. )
  3150. if is_dtype_equal(self.dtype, target.dtype) and self.equals(target):
  3151. # Only call equals if we have same dtype to avoid inference/casting
  3152. return np.arange(len(target), dtype=np.intp)
  3153. if not is_dtype_equal(
  3154. self.dtype, target.dtype
  3155. ) and not self._should_partial_index(target):
  3156. # _should_partial_index e.g. IntervalIndex with numeric scalars
  3157. # that can be matched to Interval scalars.
  3158. dtype = self._find_common_type_compat(target)
  3159. this = self.astype(dtype, copy=False)
  3160. target = target.astype(dtype, copy=False)
  3161. return this._get_indexer(
  3162. target, method=method, limit=limit, tolerance=tolerance
  3163. )
  3164. return self._get_indexer(target, method, limit, tolerance)
  3165. def _get_indexer(
  3166. self,
  3167. target: Index,
  3168. method: str_t | None = None,
  3169. limit: int | None = None,
  3170. tolerance=None,
  3171. ) -> npt.NDArray[np.intp]:
  3172. if tolerance is not None:
  3173. tolerance = self._convert_tolerance(tolerance, target)
  3174. if method in ["pad", "backfill"]:
  3175. indexer = self._get_fill_indexer(target, method, limit, tolerance)
  3176. elif method == "nearest":
  3177. indexer = self._get_nearest_indexer(target, limit, tolerance)
  3178. else:
  3179. if target._is_multi and self._is_multi:
  3180. engine = self._engine
  3181. # error: Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]"
  3182. # has no attribute "_extract_level_codes"
  3183. tgt_values = engine._extract_level_codes( # type: ignore[union-attr]
  3184. target
  3185. )
  3186. else:
  3187. tgt_values = target._get_engine_target()
  3188. indexer = self._engine.get_indexer(tgt_values)
  3189. return ensure_platform_int(indexer)
  3190. @final
  3191. def _should_partial_index(self, target: Index) -> bool:
  3192. """
  3193. Should we attempt partial-matching indexing?
  3194. """
  3195. if is_interval_dtype(self.dtype):
  3196. if is_interval_dtype(target.dtype):
  3197. return False
  3198. # See https://github.com/pandas-dev/pandas/issues/47772 the commented
  3199. # out code can be restored (instead of hardcoding `return True`)
  3200. # once that issue is fixed
  3201. # "Index" has no attribute "left"
  3202. # return self.left._should_compare(target) # type: ignore[attr-defined]
  3203. return True
  3204. return False
  3205. @final
  3206. def _check_indexing_method(
  3207. self,
  3208. method: str_t | None,
  3209. limit: int | None = None,
  3210. tolerance=None,
  3211. ) -> None:
  3212. """
  3213. Raise if we have a get_indexer `method` that is not supported or valid.
  3214. """
  3215. if method not in [None, "bfill", "backfill", "pad", "ffill", "nearest"]:
  3216. # in practice the clean_reindex_fill_method call would raise
  3217. # before we get here
  3218. raise ValueError("Invalid fill method") # pragma: no cover
  3219. if self._is_multi:
  3220. if method == "nearest":
  3221. raise NotImplementedError(
  3222. "method='nearest' not implemented yet "
  3223. "for MultiIndex; see GitHub issue 9365"
  3224. )
  3225. if method in ("pad", "backfill"):
  3226. if tolerance is not None:
  3227. raise NotImplementedError(
  3228. "tolerance not implemented yet for MultiIndex"
  3229. )
  3230. if is_interval_dtype(self.dtype) or is_categorical_dtype(self.dtype):
  3231. # GH#37871 for now this is only for IntervalIndex and CategoricalIndex
  3232. if method is not None:
  3233. raise NotImplementedError(
  3234. f"method {method} not yet implemented for {type(self).__name__}"
  3235. )
  3236. if method is None:
  3237. if tolerance is not None:
  3238. raise ValueError(
  3239. "tolerance argument only valid if doing pad, "
  3240. "backfill or nearest reindexing"
  3241. )
  3242. if limit is not None:
  3243. raise ValueError(
  3244. "limit argument only valid if doing pad, "
  3245. "backfill or nearest reindexing"
  3246. )
  3247. def _convert_tolerance(self, tolerance, target: np.ndarray | Index) -> np.ndarray:
  3248. # override this method on subclasses
  3249. tolerance = np.asarray(tolerance)
  3250. if target.size != tolerance.size and tolerance.size > 1:
  3251. raise ValueError("list-like tolerance size must match target index size")
  3252. elif is_numeric_dtype(self) and not np.issubdtype(tolerance.dtype, np.number):
  3253. if tolerance.ndim > 0:
  3254. raise ValueError(
  3255. f"tolerance argument for {type(self).__name__} with dtype "
  3256. f"{self.dtype} must contain numeric elements if it is list type"
  3257. )
  3258. raise ValueError(
  3259. f"tolerance argument for {type(self).__name__} with dtype {self.dtype} "
  3260. f"must be numeric if it is a scalar: {repr(tolerance)}"
  3261. )
  3262. return tolerance
  3263. @final
  3264. def _get_fill_indexer(
  3265. self, target: Index, method: str_t, limit: int | None = None, tolerance=None
  3266. ) -> npt.NDArray[np.intp]:
  3267. if self._is_multi:
  3268. # TODO: get_indexer_with_fill docstring says values must be _sorted_
  3269. # but that doesn't appear to be enforced
  3270. # error: "IndexEngine" has no attribute "get_indexer_with_fill"
  3271. engine = self._engine
  3272. with warnings.catch_warnings():
  3273. # TODO: We need to fix this. Casting to int64 in cython
  3274. warnings.filterwarnings("ignore", category=RuntimeWarning)
  3275. return engine.get_indexer_with_fill( # type: ignore[union-attr]
  3276. target=target._values,
  3277. values=self._values,
  3278. method=method,
  3279. limit=limit,
  3280. )
  3281. if self.is_monotonic_increasing and target.is_monotonic_increasing:
  3282. target_values = target._get_engine_target()
  3283. own_values = self._get_engine_target()
  3284. if not isinstance(target_values, np.ndarray) or not isinstance(
  3285. own_values, np.ndarray
  3286. ):
  3287. raise NotImplementedError
  3288. if method == "pad":
  3289. indexer = libalgos.pad(own_values, target_values, limit=limit)
  3290. else:
  3291. # i.e. "backfill"
  3292. indexer = libalgos.backfill(own_values, target_values, limit=limit)
  3293. else:
  3294. indexer = self._get_fill_indexer_searchsorted(target, method, limit)
  3295. if tolerance is not None and len(self):
  3296. indexer = self._filter_indexer_tolerance(target, indexer, tolerance)
  3297. return indexer
  3298. @final
  3299. def _get_fill_indexer_searchsorted(
  3300. self, target: Index, method: str_t, limit: int | None = None
  3301. ) -> npt.NDArray[np.intp]:
  3302. """
  3303. Fallback pad/backfill get_indexer that works for monotonic decreasing
  3304. indexes and non-monotonic targets.
  3305. """
  3306. if limit is not None:
  3307. raise ValueError(
  3308. f"limit argument for {repr(method)} method only well-defined "
  3309. "if index and target are monotonic"
  3310. )
  3311. side: Literal["left", "right"] = "left" if method == "pad" else "right"
  3312. # find exact matches first (this simplifies the algorithm)
  3313. indexer = self.get_indexer(target)
  3314. nonexact = indexer == -1
  3315. indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side)
  3316. if side == "left":
  3317. # searchsorted returns "indices into a sorted array such that,
  3318. # if the corresponding elements in v were inserted before the
  3319. # indices, the order of a would be preserved".
  3320. # Thus, we need to subtract 1 to find values to the left.
  3321. indexer[nonexact] -= 1
  3322. # This also mapped not found values (values of 0 from
  3323. # np.searchsorted) to -1, which conveniently is also our
  3324. # sentinel for missing values
  3325. else:
  3326. # Mark indices to the right of the largest value as not found
  3327. indexer[indexer == len(self)] = -1
  3328. return indexer
  3329. @final
  3330. def _get_nearest_indexer(
  3331. self, target: Index, limit: int | None, tolerance
  3332. ) -> npt.NDArray[np.intp]:
  3333. """
  3334. Get the indexer for the nearest index labels; requires an index with
  3335. values that can be subtracted from each other (e.g., not strings or
  3336. tuples).
  3337. """
  3338. if not len(self):
  3339. return self._get_fill_indexer(target, "pad")
  3340. left_indexer = self.get_indexer(target, "pad", limit=limit)
  3341. right_indexer = self.get_indexer(target, "backfill", limit=limit)
  3342. left_distances = self._difference_compat(target, left_indexer)
  3343. right_distances = self._difference_compat(target, right_indexer)
  3344. op = operator.lt if self.is_monotonic_increasing else operator.le
  3345. indexer = np.where(
  3346. # error: Argument 1&2 has incompatible type "Union[ExtensionArray,
  3347. # ndarray[Any, Any]]"; expected "Union[SupportsDunderLE,
  3348. # SupportsDunderGE, SupportsDunderGT, SupportsDunderLT]"
  3349. op(left_distances, right_distances) # type: ignore[arg-type]
  3350. | (right_indexer == -1),
  3351. left_indexer,
  3352. right_indexer,
  3353. )
  3354. if tolerance is not None:
  3355. indexer = self._filter_indexer_tolerance(target, indexer, tolerance)
  3356. return indexer
  3357. @final
  3358. def _filter_indexer_tolerance(
  3359. self,
  3360. target: Index,
  3361. indexer: npt.NDArray[np.intp],
  3362. tolerance,
  3363. ) -> npt.NDArray[np.intp]:
  3364. distance = self._difference_compat(target, indexer)
  3365. return np.where(distance <= tolerance, indexer, -1)
  3366. @final
  3367. def _difference_compat(
  3368. self, target: Index, indexer: npt.NDArray[np.intp]
  3369. ) -> ArrayLike:
  3370. # Compatibility for PeriodArray, for which __sub__ returns an ndarray[object]
  3371. # of DateOffset objects, which do not support __abs__ (and would be slow
  3372. # if they did)
  3373. if isinstance(self.dtype, PeriodDtype):
  3374. # Note: we only get here with matching dtypes
  3375. own_values = cast("PeriodArray", self._data)._ndarray
  3376. target_values = cast("PeriodArray", target._data)._ndarray
  3377. diff = own_values[indexer] - target_values
  3378. else:
  3379. # error: Unsupported left operand type for - ("ExtensionArray")
  3380. diff = self._values[indexer] - target._values # type: ignore[operator]
  3381. return abs(diff)
  3382. # --------------------------------------------------------------------
  3383. # Indexer Conversion Methods
  3384. @final
  3385. def _validate_positional_slice(self, key: slice) -> None:
  3386. """
  3387. For positional indexing, a slice must have either int or None
  3388. for each of start, stop, and step.
  3389. """
  3390. self._validate_indexer("positional", key.start, "iloc")
  3391. self._validate_indexer("positional", key.stop, "iloc")
  3392. self._validate_indexer("positional", key.step, "iloc")
  3393. def _convert_slice_indexer(self, key: slice, kind: str_t):
  3394. """
  3395. Convert a slice indexer.
  3396. By definition, these are labels unless 'iloc' is passed in.
  3397. Floats are not allowed as the start, step, or stop of the slice.
  3398. Parameters
  3399. ----------
  3400. key : label of the slice bound
  3401. kind : {'loc', 'getitem'}
  3402. """
  3403. assert kind in ["loc", "getitem"], kind
  3404. # potentially cast the bounds to integers
  3405. start, stop, step = key.start, key.stop, key.step
  3406. # TODO(GH#50617): once Series.__[gs]etitem__ is removed we should be able
  3407. # to simplify this.
  3408. if isinstance(self.dtype, np.dtype) and is_float_dtype(self.dtype):
  3409. # We always treat __getitem__ slicing as label-based
  3410. # translate to locations
  3411. return self.slice_indexer(start, stop, step)
  3412. # figure out if this is a positional indexer
  3413. def is_int(v):
  3414. return v is None or is_integer(v)
  3415. is_index_slice = is_int(start) and is_int(stop) and is_int(step)
  3416. # special case for interval_dtype bc we do not do partial-indexing
  3417. # on integer Intervals when slicing
  3418. # TODO: write this in terms of e.g. should_partial_index?
  3419. ints_are_positional = self._should_fallback_to_positional or is_interval_dtype(
  3420. self.dtype
  3421. )
  3422. is_positional = is_index_slice and ints_are_positional
  3423. if kind == "getitem":
  3424. # called from the getitem slicers, validate that we are in fact integers
  3425. if is_integer_dtype(self.dtype) or is_index_slice:
  3426. # Note: these checks are redundant if we know is_index_slice
  3427. self._validate_indexer("slice", key.start, "getitem")
  3428. self._validate_indexer("slice", key.stop, "getitem")
  3429. self._validate_indexer("slice", key.step, "getitem")
  3430. return key
  3431. # convert the slice to an indexer here
  3432. # if we are mixed and have integers
  3433. if is_positional:
  3434. try:
  3435. # Validate start & stop
  3436. if start is not None:
  3437. self.get_loc(start)
  3438. if stop is not None:
  3439. self.get_loc(stop)
  3440. is_positional = False
  3441. except KeyError:
  3442. pass
  3443. if com.is_null_slice(key):
  3444. # It doesn't matter if we are positional or label based
  3445. indexer = key
  3446. elif is_positional:
  3447. if kind == "loc":
  3448. # GH#16121, GH#24612, GH#31810
  3449. raise TypeError(
  3450. "Slicing a positional slice with .loc is not allowed, "
  3451. "Use .loc with labels or .iloc with positions instead.",
  3452. )
  3453. indexer = key
  3454. else:
  3455. indexer = self.slice_indexer(start, stop, step)
  3456. return indexer
  3457. @final
  3458. def _raise_invalid_indexer(
  3459. self,
  3460. form: str_t,
  3461. key,
  3462. reraise: lib.NoDefault | None | Exception = lib.no_default,
  3463. ) -> None:
  3464. """
  3465. Raise consistent invalid indexer message.
  3466. """
  3467. msg = (
  3468. f"cannot do {form} indexing on {type(self).__name__} with these "
  3469. f"indexers [{key}] of type {type(key).__name__}"
  3470. )
  3471. if reraise is not lib.no_default:
  3472. raise TypeError(msg) from reraise
  3473. raise TypeError(msg)
  3474. # --------------------------------------------------------------------
  3475. # Reindex Methods
  3476. @final
  3477. def _validate_can_reindex(self, indexer: np.ndarray) -> None:
  3478. """
  3479. Check if we are allowing reindexing with this particular indexer.
  3480. Parameters
  3481. ----------
  3482. indexer : an integer ndarray
  3483. Raises
  3484. ------
  3485. ValueError if its a duplicate axis
  3486. """
  3487. # trying to reindex on an axis with duplicates
  3488. if not self._index_as_unique and len(indexer):
  3489. raise ValueError("cannot reindex on an axis with duplicate labels")
  3490. def reindex(
  3491. self, target, method=None, level=None, limit=None, tolerance=None
  3492. ) -> tuple[Index, npt.NDArray[np.intp] | None]:
  3493. """
  3494. Create index with target's values.
  3495. Parameters
  3496. ----------
  3497. target : an iterable
  3498. method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
  3499. * default: exact matches only.
  3500. * pad / ffill: find the PREVIOUS index value if no exact match.
  3501. * backfill / bfill: use NEXT index value if no exact match
  3502. * nearest: use the NEAREST index value if no exact match. Tied
  3503. distances are broken by preferring the larger index value.
  3504. level : int, optional
  3505. Level of multiindex.
  3506. limit : int, optional
  3507. Maximum number of consecutive labels in ``target`` to match for
  3508. inexact matches.
  3509. tolerance : int or float, optional
  3510. Maximum distance between original and new labels for inexact
  3511. matches. The values of the index at the matching locations must
  3512. satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
  3513. Tolerance may be a scalar value, which applies the same tolerance
  3514. to all values, or list-like, which applies variable tolerance per
  3515. element. List-like includes list, tuple, array, Series, and must be
  3516. the same size as the index and its dtype must exactly match the
  3517. index's type.
  3518. Returns
  3519. -------
  3520. new_index : pd.Index
  3521. Resulting index.
  3522. indexer : np.ndarray[np.intp] or None
  3523. Indices of output values in original index.
  3524. Raises
  3525. ------
  3526. TypeError
  3527. If ``method`` passed along with ``level``.
  3528. ValueError
  3529. If non-unique multi-index
  3530. ValueError
  3531. If non-unique index and ``method`` or ``limit`` passed.
  3532. See Also
  3533. --------
  3534. Series.reindex : Conform Series to new index with optional filling logic.
  3535. DataFrame.reindex : Conform DataFrame to new index with optional filling logic.
  3536. Examples
  3537. --------
  3538. >>> idx = pd.Index(['car', 'bike', 'train', 'tractor'])
  3539. >>> idx
  3540. Index(['car', 'bike', 'train', 'tractor'], dtype='object')
  3541. >>> idx.reindex(['car', 'bike'])
  3542. (Index(['car', 'bike'], dtype='object'), array([0, 1]))
  3543. """
  3544. # GH6552: preserve names when reindexing to non-named target
  3545. # (i.e. neither Index nor Series).
  3546. preserve_names = not hasattr(target, "name")
  3547. # GH7774: preserve dtype/tz if target is empty and not an Index.
  3548. target = ensure_has_len(target) # target may be an iterator
  3549. if not isinstance(target, Index) and len(target) == 0:
  3550. if level is not None and self._is_multi:
  3551. # "Index" has no attribute "levels"; maybe "nlevels"?
  3552. idx = self.levels[level] # type: ignore[attr-defined]
  3553. else:
  3554. idx = self
  3555. target = idx[:0]
  3556. else:
  3557. target = ensure_index(target)
  3558. if level is not None and (
  3559. isinstance(self, ABCMultiIndex) or isinstance(target, ABCMultiIndex)
  3560. ):
  3561. if method is not None:
  3562. raise TypeError("Fill method not supported if level passed")
  3563. # TODO: tests where passing `keep_order=not self._is_multi`
  3564. # makes a difference for non-MultiIndex case
  3565. target, indexer, _ = self._join_level(
  3566. target, level, how="right", keep_order=not self._is_multi
  3567. )
  3568. else:
  3569. if self.equals(target):
  3570. indexer = None
  3571. else:
  3572. if self._index_as_unique:
  3573. indexer = self.get_indexer(
  3574. target, method=method, limit=limit, tolerance=tolerance
  3575. )
  3576. elif self._is_multi:
  3577. raise ValueError("cannot handle a non-unique multi-index!")
  3578. elif not self.is_unique:
  3579. # GH#42568
  3580. raise ValueError("cannot reindex on an axis with duplicate labels")
  3581. else:
  3582. indexer, _ = self.get_indexer_non_unique(target)
  3583. target = self._wrap_reindex_result(target, indexer, preserve_names)
  3584. return target, indexer
  3585. def _wrap_reindex_result(self, target, indexer, preserve_names: bool):
  3586. target = self._maybe_preserve_names(target, preserve_names)
  3587. return target
  3588. def _maybe_preserve_names(self, target: Index, preserve_names: bool):
  3589. if preserve_names and target.nlevels == 1 and target.name != self.name:
  3590. target = target.copy(deep=False)
  3591. target.name = self.name
  3592. return target
  3593. @final
  3594. def _reindex_non_unique(
  3595. self, target: Index
  3596. ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp] | None]:
  3597. """
  3598. Create a new index with target's values (move/add/delete values as
  3599. necessary) use with non-unique Index and a possibly non-unique target.
  3600. Parameters
  3601. ----------
  3602. target : an iterable
  3603. Returns
  3604. -------
  3605. new_index : pd.Index
  3606. Resulting index.
  3607. indexer : np.ndarray[np.intp]
  3608. Indices of output values in original index.
  3609. new_indexer : np.ndarray[np.intp] or None
  3610. """
  3611. target = ensure_index(target)
  3612. if len(target) == 0:
  3613. # GH#13691
  3614. return self[:0], np.array([], dtype=np.intp), None
  3615. indexer, missing = self.get_indexer_non_unique(target)
  3616. check = indexer != -1
  3617. new_labels = self.take(indexer[check])
  3618. new_indexer = None
  3619. if len(missing):
  3620. length = np.arange(len(indexer), dtype=np.intp)
  3621. missing = ensure_platform_int(missing)
  3622. missing_labels = target.take(missing)
  3623. missing_indexer = length[~check]
  3624. cur_labels = self.take(indexer[check]).values
  3625. cur_indexer = length[check]
  3626. # Index constructor below will do inference
  3627. new_labels = np.empty((len(indexer),), dtype=object)
  3628. new_labels[cur_indexer] = cur_labels
  3629. new_labels[missing_indexer] = missing_labels
  3630. # GH#38906
  3631. if not len(self):
  3632. new_indexer = np.arange(0, dtype=np.intp)
  3633. # a unique indexer
  3634. elif target.is_unique:
  3635. # see GH5553, make sure we use the right indexer
  3636. new_indexer = np.arange(len(indexer), dtype=np.intp)
  3637. new_indexer[cur_indexer] = np.arange(len(cur_labels))
  3638. new_indexer[missing_indexer] = -1
  3639. # we have a non_unique selector, need to use the original
  3640. # indexer here
  3641. else:
  3642. # need to retake to have the same size as the indexer
  3643. indexer[~check] = -1
  3644. # reset the new indexer to account for the new size
  3645. new_indexer = np.arange(len(self.take(indexer)), dtype=np.intp)
  3646. new_indexer[~check] = -1
  3647. if not isinstance(self, ABCMultiIndex):
  3648. new_index = Index(new_labels, name=self.name)
  3649. else:
  3650. new_index = type(self).from_tuples(new_labels, names=self.names)
  3651. return new_index, indexer, new_indexer
  3652. # --------------------------------------------------------------------
  3653. # Join Methods
  3654. @overload
  3655. def join(
  3656. self,
  3657. other: Index,
  3658. *,
  3659. how: JoinHow = ...,
  3660. level: Level = ...,
  3661. return_indexers: Literal[True],
  3662. sort: bool = ...,
  3663. ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
  3664. ...
  3665. @overload
  3666. def join(
  3667. self,
  3668. other: Index,
  3669. *,
  3670. how: JoinHow = ...,
  3671. level: Level = ...,
  3672. return_indexers: Literal[False] = ...,
  3673. sort: bool = ...,
  3674. ) -> Index:
  3675. ...
  3676. @overload
  3677. def join(
  3678. self,
  3679. other: Index,
  3680. *,
  3681. how: JoinHow = ...,
  3682. level: Level = ...,
  3683. return_indexers: bool = ...,
  3684. sort: bool = ...,
  3685. ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
  3686. ...
  3687. @final
  3688. @_maybe_return_indexers
  3689. def join(
  3690. self,
  3691. other: Index,
  3692. *,
  3693. how: JoinHow = "left",
  3694. level: Level = None,
  3695. return_indexers: bool = False,
  3696. sort: bool = False,
  3697. ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
  3698. """
  3699. Compute join_index and indexers to conform data structures to the new index.
  3700. Parameters
  3701. ----------
  3702. other : Index
  3703. how : {'left', 'right', 'inner', 'outer'}
  3704. level : int or level name, default None
  3705. return_indexers : bool, default False
  3706. sort : bool, default False
  3707. Sort the join keys lexicographically in the result Index. If False,
  3708. the order of the join keys depends on the join type (how keyword).
  3709. Returns
  3710. -------
  3711. join_index, (left_indexer, right_indexer)
  3712. """
  3713. other = ensure_index(other)
  3714. if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex):
  3715. if (self.tz is None) ^ (other.tz is None):
  3716. # Raise instead of casting to object below.
  3717. raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")
  3718. if not self._is_multi and not other._is_multi:
  3719. # We have specific handling for MultiIndex below
  3720. pself, pother = self._maybe_promote(other)
  3721. if pself is not self or pother is not other:
  3722. return pself.join(
  3723. pother, how=how, level=level, return_indexers=True, sort=sort
  3724. )
  3725. lindexer: np.ndarray | None
  3726. rindexer: np.ndarray | None
  3727. # try to figure out the join level
  3728. # GH3662
  3729. if level is None and (self._is_multi or other._is_multi):
  3730. # have the same levels/names so a simple join
  3731. if self.names == other.names:
  3732. pass
  3733. else:
  3734. return self._join_multi(other, how=how)
  3735. # join on the level
  3736. if level is not None and (self._is_multi or other._is_multi):
  3737. return self._join_level(other, level, how=how)
  3738. if len(other) == 0:
  3739. if how in ("left", "outer"):
  3740. join_index = self._view()
  3741. rindexer = np.broadcast_to(np.intp(-1), len(join_index))
  3742. return join_index, None, rindexer
  3743. elif how in ("right", "inner", "cross"):
  3744. join_index = other._view()
  3745. lindexer = np.array([])
  3746. return join_index, lindexer, None
  3747. if len(self) == 0:
  3748. if how in ("right", "outer"):
  3749. join_index = other._view()
  3750. lindexer = np.broadcast_to(np.intp(-1), len(join_index))
  3751. return join_index, lindexer, None
  3752. elif how in ("left", "inner", "cross"):
  3753. join_index = self._view()
  3754. rindexer = np.array([])
  3755. return join_index, None, rindexer
  3756. if self._join_precedence < other._join_precedence:
  3757. flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"}
  3758. how = flip.get(how, how)
  3759. join_index, lidx, ridx = other.join(
  3760. self, how=how, level=level, return_indexers=True
  3761. )
  3762. lidx, ridx = ridx, lidx
  3763. return join_index, lidx, ridx
  3764. if not is_dtype_equal(self.dtype, other.dtype):
  3765. dtype = self._find_common_type_compat(other)
  3766. this = self.astype(dtype, copy=False)
  3767. other = other.astype(dtype, copy=False)
  3768. return this.join(other, how=how, return_indexers=True)
  3769. _validate_join_method(how)
  3770. if not self.is_unique and not other.is_unique:
  3771. return self._join_non_unique(other, how=how)
  3772. elif not self.is_unique or not other.is_unique:
  3773. if self.is_monotonic_increasing and other.is_monotonic_increasing:
  3774. if not is_interval_dtype(self.dtype):
  3775. # otherwise we will fall through to _join_via_get_indexer
  3776. # GH#39133
  3777. # go through object dtype for ea till engine is supported properly
  3778. return self._join_monotonic(other, how=how)
  3779. else:
  3780. return self._join_non_unique(other, how=how)
  3781. elif (
  3782. # GH48504: exclude MultiIndex to avoid going through MultiIndex._values
  3783. self.is_monotonic_increasing
  3784. and other.is_monotonic_increasing
  3785. and self._can_use_libjoin
  3786. and not isinstance(self, ABCMultiIndex)
  3787. and not is_categorical_dtype(self.dtype)
  3788. ):
  3789. # Categorical is monotonic if data are ordered as categories, but join can
  3790. # not handle this in case of not lexicographically monotonic GH#38502
  3791. try:
  3792. return self._join_monotonic(other, how=how)
  3793. except TypeError:
  3794. # object dtype; non-comparable objects
  3795. pass
  3796. return self._join_via_get_indexer(other, how, sort)
  3797. @final
  3798. def _join_via_get_indexer(
  3799. self, other: Index, how: JoinHow, sort: bool
  3800. ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
  3801. # Fallback if we do not have any fastpaths available based on
  3802. # uniqueness/monotonicity
  3803. # Note: at this point we have checked matching dtypes
  3804. if how == "left":
  3805. join_index = self
  3806. elif how == "right":
  3807. join_index = other
  3808. elif how == "inner":
  3809. # TODO: sort=False here for backwards compat. It may
  3810. # be better to use the sort parameter passed into join
  3811. join_index = self.intersection(other, sort=False)
  3812. elif how == "outer":
  3813. # TODO: sort=True here for backwards compat. It may
  3814. # be better to use the sort parameter passed into join
  3815. join_index = self.union(other)
  3816. if sort:
  3817. join_index = join_index.sort_values()
  3818. if join_index is self:
  3819. lindexer = None
  3820. else:
  3821. lindexer = self.get_indexer_for(join_index)
  3822. if join_index is other:
  3823. rindexer = None
  3824. else:
  3825. rindexer = other.get_indexer_for(join_index)
  3826. return join_index, lindexer, rindexer
  3827. @final
  3828. def _join_multi(self, other: Index, how: JoinHow):
  3829. from pandas.core.indexes.multi import MultiIndex
  3830. from pandas.core.reshape.merge import restore_dropped_levels_multijoin
  3831. # figure out join names
  3832. self_names_list = list(com.not_none(*self.names))
  3833. other_names_list = list(com.not_none(*other.names))
  3834. self_names_order = self_names_list.index
  3835. other_names_order = other_names_list.index
  3836. self_names = set(self_names_list)
  3837. other_names = set(other_names_list)
  3838. overlap = self_names & other_names
  3839. # need at least 1 in common
  3840. if not overlap:
  3841. raise ValueError("cannot join with no overlapping index names")
  3842. if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
  3843. # Drop the non-matching levels from left and right respectively
  3844. ldrop_names = sorted(self_names - overlap, key=self_names_order)
  3845. rdrop_names = sorted(other_names - overlap, key=other_names_order)
  3846. # if only the order differs
  3847. if not len(ldrop_names + rdrop_names):
  3848. self_jnlevels = self
  3849. other_jnlevels = other.reorder_levels(self.names)
  3850. else:
  3851. self_jnlevels = self.droplevel(ldrop_names)
  3852. other_jnlevels = other.droplevel(rdrop_names)
  3853. # Join left and right
  3854. # Join on same leveled multi-index frames is supported
  3855. join_idx, lidx, ridx = self_jnlevels.join(
  3856. other_jnlevels, how=how, return_indexers=True
  3857. )
  3858. # Restore the dropped levels
  3859. # Returned index level order is
  3860. # common levels, ldrop_names, rdrop_names
  3861. dropped_names = ldrop_names + rdrop_names
  3862. # error: Argument 5/6 to "restore_dropped_levels_multijoin" has
  3863. # incompatible type "Optional[ndarray[Any, dtype[signedinteger[Any
  3864. # ]]]]"; expected "ndarray[Any, dtype[signedinteger[Any]]]"
  3865. levels, codes, names = restore_dropped_levels_multijoin(
  3866. self,
  3867. other,
  3868. dropped_names,
  3869. join_idx,
  3870. lidx, # type: ignore[arg-type]
  3871. ridx, # type: ignore[arg-type]
  3872. )
  3873. # Re-create the multi-index
  3874. multi_join_idx = MultiIndex(
  3875. levels=levels, codes=codes, names=names, verify_integrity=False
  3876. )
  3877. multi_join_idx = multi_join_idx.remove_unused_levels()
  3878. return multi_join_idx, lidx, ridx
  3879. jl = list(overlap)[0]
  3880. # Case where only one index is multi
  3881. # make the indices into mi's that match
  3882. flip_order = False
  3883. if isinstance(self, MultiIndex):
  3884. self, other = other, self
  3885. flip_order = True
  3886. # flip if join method is right or left
  3887. flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"}
  3888. how = flip.get(how, how)
  3889. level = other.names.index(jl)
  3890. result = self._join_level(other, level, how=how)
  3891. if flip_order:
  3892. return result[0], result[2], result[1]
  3893. return result
  3894. @final
  3895. def _join_non_unique(
  3896. self, other: Index, how: JoinHow = "left"
  3897. ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
  3898. from pandas.core.reshape.merge import get_join_indexers
  3899. # We only get here if dtypes match
  3900. assert self.dtype == other.dtype
  3901. left_idx, right_idx = get_join_indexers(
  3902. [self._values], [other._values], how=how, sort=True
  3903. )
  3904. mask = left_idx == -1
  3905. join_idx = self.take(left_idx)
  3906. right = other.take(right_idx)
  3907. join_index = join_idx.putmask(mask, right)
  3908. return join_index, left_idx, right_idx
  3909. @final
  3910. def _join_level(
  3911. self, other: Index, level, how: JoinHow = "left", keep_order: bool = True
  3912. ) -> tuple[MultiIndex, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
  3913. """
  3914. The join method *only* affects the level of the resulting
  3915. MultiIndex. Otherwise it just exactly aligns the Index data to the
  3916. labels of the level in the MultiIndex.
  3917. If ```keep_order == True```, the order of the data indexed by the
  3918. MultiIndex will not be changed; otherwise, it will tie out
  3919. with `other`.
  3920. """
  3921. from pandas.core.indexes.multi import MultiIndex
  3922. def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]:
  3923. """
  3924. Returns sorter for the inner most level while preserving the
  3925. order of higher levels.
  3926. Parameters
  3927. ----------
  3928. labels : list[np.ndarray]
  3929. Each ndarray has signed integer dtype, not necessarily identical.
  3930. Returns
  3931. -------
  3932. np.ndarray[np.intp]
  3933. """
  3934. if labels[0].size == 0:
  3935. return np.empty(0, dtype=np.intp)
  3936. if len(labels) == 1:
  3937. return get_group_index_sorter(ensure_platform_int(labels[0]))
  3938. # find indexers of beginning of each set of
  3939. # same-key labels w.r.t all but last level
  3940. tic = labels[0][:-1] != labels[0][1:]
  3941. for lab in labels[1:-1]:
  3942. tic |= lab[:-1] != lab[1:]
  3943. starts = np.hstack(([True], tic, [True])).nonzero()[0]
  3944. lab = ensure_int64(labels[-1])
  3945. return lib.get_level_sorter(lab, ensure_platform_int(starts))
  3946. if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
  3947. raise TypeError("Join on level between two MultiIndex objects is ambiguous")
  3948. left, right = self, other
  3949. flip_order = not isinstance(self, MultiIndex)
  3950. if flip_order:
  3951. left, right = right, left
  3952. flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"}
  3953. how = flip.get(how, how)
  3954. assert isinstance(left, MultiIndex)
  3955. level = left._get_level_number(level)
  3956. old_level = left.levels[level]
  3957. if not right.is_unique:
  3958. raise NotImplementedError(
  3959. "Index._join_level on non-unique index is not implemented"
  3960. )
  3961. new_level, left_lev_indexer, right_lev_indexer = old_level.join(
  3962. right, how=how, return_indexers=True
  3963. )
  3964. if left_lev_indexer is None:
  3965. if keep_order or len(left) == 0:
  3966. left_indexer = None
  3967. join_index = left
  3968. else: # sort the leaves
  3969. left_indexer = _get_leaf_sorter(left.codes[: level + 1])
  3970. join_index = left[left_indexer]
  3971. else:
  3972. left_lev_indexer = ensure_platform_int(left_lev_indexer)
  3973. rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level))
  3974. old_codes = left.codes[level]
  3975. taker = old_codes[old_codes != -1]
  3976. new_lev_codes = rev_indexer.take(taker)
  3977. new_codes = list(left.codes)
  3978. new_codes[level] = new_lev_codes
  3979. new_levels = list(left.levels)
  3980. new_levels[level] = new_level
  3981. if keep_order: # just drop missing values. o.w. keep order
  3982. left_indexer = np.arange(len(left), dtype=np.intp)
  3983. left_indexer = cast(np.ndarray, left_indexer)
  3984. mask = new_lev_codes != -1
  3985. if not mask.all():
  3986. new_codes = [lab[mask] for lab in new_codes]
  3987. left_indexer = left_indexer[mask]
  3988. else: # tie out the order with other
  3989. if level == 0: # outer most level, take the fast route
  3990. max_new_lev = 0 if len(new_lev_codes) == 0 else new_lev_codes.max()
  3991. ngroups = 1 + max_new_lev
  3992. left_indexer, counts = libalgos.groupsort_indexer(
  3993. new_lev_codes, ngroups
  3994. )
  3995. # missing values are placed first; drop them!
  3996. left_indexer = left_indexer[counts[0] :]
  3997. new_codes = [lab[left_indexer] for lab in new_codes]
  3998. else: # sort the leaves
  3999. mask = new_lev_codes != -1
  4000. mask_all = mask.all()
  4001. if not mask_all:
  4002. new_codes = [lab[mask] for lab in new_codes]
  4003. left_indexer = _get_leaf_sorter(new_codes[: level + 1])
  4004. new_codes = [lab[left_indexer] for lab in new_codes]
  4005. # left_indexers are w.r.t masked frame.
  4006. # reverse to original frame!
  4007. if not mask_all:
  4008. left_indexer = mask.nonzero()[0][left_indexer]
  4009. join_index = MultiIndex(
  4010. levels=new_levels,
  4011. codes=new_codes,
  4012. names=left.names,
  4013. verify_integrity=False,
  4014. )
  4015. if right_lev_indexer is not None:
  4016. right_indexer = right_lev_indexer.take(join_index.codes[level])
  4017. else:
  4018. right_indexer = join_index.codes[level]
  4019. if flip_order:
  4020. left_indexer, right_indexer = right_indexer, left_indexer
  4021. left_indexer = (
  4022. None if left_indexer is None else ensure_platform_int(left_indexer)
  4023. )
  4024. right_indexer = (
  4025. None if right_indexer is None else ensure_platform_int(right_indexer)
  4026. )
  4027. return join_index, left_indexer, right_indexer
  4028. @final
  4029. def _join_monotonic(
  4030. self, other: Index, how: JoinHow = "left"
  4031. ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
  4032. # We only get here with matching dtypes and both monotonic increasing
  4033. assert other.dtype == self.dtype
  4034. if self.equals(other):
  4035. # This is a convenient place for this check, but its correctness
  4036. # does not depend on monotonicity, so it could go earlier
  4037. # in the calling method.
  4038. ret_index = other if how == "right" else self
  4039. return ret_index, None, None
  4040. ridx: npt.NDArray[np.intp] | None
  4041. lidx: npt.NDArray[np.intp] | None
  4042. if self.is_unique and other.is_unique:
  4043. # We can perform much better than the general case
  4044. if how == "left":
  4045. join_index = self
  4046. lidx = None
  4047. ridx = self._left_indexer_unique(other)
  4048. elif how == "right":
  4049. join_index = other
  4050. lidx = other._left_indexer_unique(self)
  4051. ridx = None
  4052. elif how == "inner":
  4053. join_array, lidx, ridx = self._inner_indexer(other)
  4054. join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
  4055. elif how == "outer":
  4056. join_array, lidx, ridx = self._outer_indexer(other)
  4057. join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
  4058. else:
  4059. if how == "left":
  4060. join_array, lidx, ridx = self._left_indexer(other)
  4061. elif how == "right":
  4062. join_array, ridx, lidx = other._left_indexer(self)
  4063. elif how == "inner":
  4064. join_array, lidx, ridx = self._inner_indexer(other)
  4065. elif how == "outer":
  4066. join_array, lidx, ridx = self._outer_indexer(other)
  4067. assert lidx is not None
  4068. assert ridx is not None
  4069. join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
  4070. lidx = None if lidx is None else ensure_platform_int(lidx)
  4071. ridx = None if ridx is None else ensure_platform_int(ridx)
  4072. return join_index, lidx, ridx
  4073. def _wrap_joined_index(
  4074. self: _IndexT,
  4075. joined: ArrayLike,
  4076. other: _IndexT,
  4077. lidx: npt.NDArray[np.intp],
  4078. ridx: npt.NDArray[np.intp],
  4079. ) -> _IndexT:
  4080. assert other.dtype == self.dtype
  4081. if isinstance(self, ABCMultiIndex):
  4082. name = self.names if self.names == other.names else None
  4083. # error: Incompatible return value type (got "MultiIndex",
  4084. # expected "_IndexT")
  4085. mask = lidx == -1
  4086. join_idx = self.take(lidx)
  4087. right = other.take(ridx)
  4088. join_index = join_idx.putmask(mask, right)._sort_levels_monotonic()
  4089. return join_index.set_names(name) # type: ignore[return-value]
  4090. else:
  4091. name = get_op_result_name(self, other)
  4092. return self._constructor._with_infer(joined, name=name, dtype=self.dtype)
  4093. @cache_readonly
  4094. def _can_use_libjoin(self) -> bool:
  4095. """
  4096. Whether we can use the fastpaths implement in _libs.join
  4097. """
  4098. if type(self) is Index:
  4099. # excludes EAs, but include masks, we get here with monotonic
  4100. # values only, meaning no NA
  4101. return (
  4102. isinstance(self.dtype, np.dtype)
  4103. or isinstance(self.values, BaseMaskedArray)
  4104. or isinstance(self._values, ArrowExtensionArray)
  4105. )
  4106. return not is_interval_dtype(self.dtype)
  4107. # --------------------------------------------------------------------
  4108. # Uncategorized Methods
  4109. @property
  4110. def values(self) -> ArrayLike:
  4111. """
  4112. Return an array representing the data in the Index.
  4113. .. warning::
  4114. We recommend using :attr:`Index.array` or
  4115. :meth:`Index.to_numpy`, depending on whether you need
  4116. a reference to the underlying data or a NumPy array.
  4117. Returns
  4118. -------
  4119. array: numpy.ndarray or ExtensionArray
  4120. See Also
  4121. --------
  4122. Index.array : Reference to the underlying data.
  4123. Index.to_numpy : A NumPy array representing the underlying data.
  4124. """
  4125. return self._data
  4126. @cache_readonly
  4127. @doc(IndexOpsMixin.array)
  4128. def array(self) -> ExtensionArray:
  4129. array = self._data
  4130. if isinstance(array, np.ndarray):
  4131. from pandas.core.arrays.numpy_ import PandasArray
  4132. array = PandasArray(array)
  4133. return array
  4134. @property
  4135. def _values(self) -> ExtensionArray | np.ndarray:
  4136. """
  4137. The best array representation.
  4138. This is an ndarray or ExtensionArray.
  4139. ``_values`` are consistent between ``Series`` and ``Index``.
  4140. It may differ from the public '.values' method.
  4141. index | values | _values |
  4142. ----------------- | --------------- | ------------- |
  4143. Index | ndarray | ndarray |
  4144. CategoricalIndex | Categorical | Categorical |
  4145. DatetimeIndex | ndarray[M8ns] | DatetimeArray |
  4146. DatetimeIndex[tz] | ndarray[M8ns] | DatetimeArray |
  4147. PeriodIndex | ndarray[object] | PeriodArray |
  4148. IntervalIndex | IntervalArray | IntervalArray |
  4149. See Also
  4150. --------
  4151. values : Values
  4152. """
  4153. return self._data
  4154. def _get_engine_target(self) -> ArrayLike:
  4155. """
  4156. Get the ndarray or ExtensionArray that we can pass to the IndexEngine
  4157. constructor.
  4158. """
  4159. vals = self._values
  4160. if isinstance(vals, StringArray):
  4161. # GH#45652 much more performant than ExtensionEngine
  4162. return vals._ndarray
  4163. if (
  4164. type(self) is Index
  4165. and isinstance(self._values, ExtensionArray)
  4166. and not isinstance(self._values, BaseMaskedArray)
  4167. and not (
  4168. isinstance(self._values, ArrowExtensionArray)
  4169. and is_numeric_dtype(self.dtype)
  4170. # Exclude decimal
  4171. and self.dtype.kind != "O"
  4172. )
  4173. ):
  4174. # TODO(ExtensionIndex): remove special-case, just use self._values
  4175. return self._values.astype(object)
  4176. return vals
  4177. def _get_join_target(self) -> ArrayLike:
  4178. """
  4179. Get the ndarray or ExtensionArray that we can pass to the join
  4180. functions.
  4181. """
  4182. if isinstance(self._values, BaseMaskedArray):
  4183. # This is only used if our array is monotonic, so no NAs present
  4184. return self._values._data
  4185. elif isinstance(self._values, ArrowExtensionArray):
  4186. # This is only used if our array is monotonic, so no missing values
  4187. # present
  4188. return self._values.to_numpy()
  4189. return self._get_engine_target()
  4190. def _from_join_target(self, result: np.ndarray) -> ArrayLike:
  4191. """
  4192. Cast the ndarray returned from one of the libjoin.foo_indexer functions
  4193. back to type(self)._data.
  4194. """
  4195. if isinstance(self.values, BaseMaskedArray):
  4196. return type(self.values)(result, np.zeros(result.shape, dtype=np.bool_))
  4197. elif isinstance(self.values, ArrowExtensionArray):
  4198. return type(self.values)._from_sequence(result)
  4199. return result
  4200. @doc(IndexOpsMixin._memory_usage)
  4201. def memory_usage(self, deep: bool = False) -> int:
  4202. result = self._memory_usage(deep=deep)
  4203. # include our engine hashtable
  4204. result += self._engine.sizeof(deep=deep)
  4205. return result
  4206. @final
  4207. def where(self, cond, other=None) -> Index:
  4208. """
  4209. Replace values where the condition is False.
  4210. The replacement is taken from other.
  4211. Parameters
  4212. ----------
  4213. cond : bool array-like with the same length as self
  4214. Condition to select the values on.
  4215. other : scalar, or array-like, default None
  4216. Replacement if the condition is False.
  4217. Returns
  4218. -------
  4219. pandas.Index
  4220. A copy of self with values replaced from other
  4221. where the condition is False.
  4222. See Also
  4223. --------
  4224. Series.where : Same method for Series.
  4225. DataFrame.where : Same method for DataFrame.
  4226. Examples
  4227. --------
  4228. >>> idx = pd.Index(['car', 'bike', 'train', 'tractor'])
  4229. >>> idx
  4230. Index(['car', 'bike', 'train', 'tractor'], dtype='object')
  4231. >>> idx.where(idx.isin(['car', 'train']), 'other')
  4232. Index(['car', 'other', 'train', 'other'], dtype='object')
  4233. """
  4234. if isinstance(self, ABCMultiIndex):
  4235. raise NotImplementedError(
  4236. ".where is not supported for MultiIndex operations"
  4237. )
  4238. cond = np.asarray(cond, dtype=bool)
  4239. return self.putmask(~cond, other)
  4240. # construction helpers
  4241. @final
  4242. @classmethod
  4243. def _raise_scalar_data_error(cls, data):
  4244. # We return the TypeError so that we can raise it from the constructor
  4245. # in order to keep mypy happy
  4246. raise TypeError(
  4247. f"{cls.__name__}(...) must be called with a collection of some "
  4248. f"kind, {repr(data)} was passed"
  4249. )
  4250. def _validate_fill_value(self, value):
  4251. """
  4252. Check if the value can be inserted into our array without casting,
  4253. and convert it to an appropriate native type if necessary.
  4254. Raises
  4255. ------
  4256. TypeError
  4257. If the value cannot be inserted into an array of this dtype.
  4258. """
  4259. dtype = self.dtype
  4260. if isinstance(dtype, np.dtype) and dtype.kind not in ["m", "M"]:
  4261. # return np_can_hold_element(dtype, value)
  4262. try:
  4263. return np_can_hold_element(dtype, value)
  4264. except LossySetitemError as err:
  4265. # re-raise as TypeError for consistency
  4266. raise TypeError from err
  4267. elif not can_hold_element(self._values, value):
  4268. raise TypeError
  4269. return value
  4270. @final
  4271. def _require_scalar(self, value):
  4272. """
  4273. Check that this is a scalar value that we can use for setitem-like
  4274. operations without changing dtype.
  4275. """
  4276. if not is_scalar(value):
  4277. raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}")
  4278. return value
  4279. def _is_memory_usage_qualified(self) -> bool:
  4280. """
  4281. Return a boolean if we need a qualified .info display.
  4282. """
  4283. return is_object_dtype(self.dtype)
  4284. def __contains__(self, key: Any) -> bool:
  4285. """
  4286. Return a boolean indicating whether the provided key is in the index.
  4287. Parameters
  4288. ----------
  4289. key : label
  4290. The key to check if it is present in the index.
  4291. Returns
  4292. -------
  4293. bool
  4294. Whether the key search is in the index.
  4295. Raises
  4296. ------
  4297. TypeError
  4298. If the key is not hashable.
  4299. See Also
  4300. --------
  4301. Index.isin : Returns an ndarray of boolean dtype indicating whether the
  4302. list-like key is in the index.
  4303. Examples
  4304. --------
  4305. >>> idx = pd.Index([1, 2, 3, 4])
  4306. >>> idx
  4307. Index([1, 2, 3, 4], dtype='int64')
  4308. >>> 2 in idx
  4309. True
  4310. >>> 6 in idx
  4311. False
  4312. """
  4313. hash(key)
  4314. try:
  4315. return key in self._engine
  4316. except (OverflowError, TypeError, ValueError):
  4317. return False
  4318. # https://github.com/python/typeshed/issues/2148#issuecomment-520783318
  4319. # Incompatible types in assignment (expression has type "None", base class
  4320. # "object" defined the type as "Callable[[object], int]")
  4321. __hash__: ClassVar[None] # type: ignore[assignment]
  4322. @final
  4323. def __setitem__(self, key, value):
  4324. raise TypeError("Index does not support mutable operations")
  4325. def __getitem__(self, key):
  4326. """
  4327. Override numpy.ndarray's __getitem__ method to work as desired.
  4328. This function adds lists and Series as valid boolean indexers
  4329. (ndarrays only supports ndarray with dtype=bool).
  4330. If resulting ndim != 1, plain ndarray is returned instead of
  4331. corresponding `Index` subclass.
  4332. """
  4333. getitem = self._data.__getitem__
  4334. if is_integer(key) or is_float(key):
  4335. # GH#44051 exclude bool, which would return a 2d ndarray
  4336. key = com.cast_scalar_indexer(key)
  4337. return getitem(key)
  4338. if isinstance(key, slice):
  4339. # This case is separated from the conditional above to avoid
  4340. # pessimization com.is_bool_indexer and ndim checks.
  4341. result = getitem(key)
  4342. # Going through simple_new for performance.
  4343. return type(self)._simple_new(
  4344. result, name=self._name, refs=self._references
  4345. )
  4346. if com.is_bool_indexer(key):
  4347. # if we have list[bools, length=1e5] then doing this check+convert
  4348. # takes 166 µs + 2.1 ms and cuts the ndarray.__getitem__
  4349. # time below from 3.8 ms to 496 µs
  4350. # if we already have ndarray[bool], the overhead is 1.4 µs or .25%
  4351. if is_extension_array_dtype(getattr(key, "dtype", None)):
  4352. key = key.to_numpy(dtype=bool, na_value=False)
  4353. else:
  4354. key = np.asarray(key, dtype=bool)
  4355. result = getitem(key)
  4356. # Because we ruled out integer above, we always get an arraylike here
  4357. if result.ndim > 1:
  4358. disallow_ndim_indexing(result)
  4359. # NB: Using _constructor._simple_new would break if MultiIndex
  4360. # didn't override __getitem__
  4361. return self._constructor._simple_new(result, name=self._name)
  4362. def _getitem_slice(self: _IndexT, slobj: slice) -> _IndexT:
  4363. """
  4364. Fastpath for __getitem__ when we know we have a slice.
  4365. """
  4366. res = self._data[slobj]
  4367. return type(self)._simple_new(res, name=self._name, refs=self._references)
  4368. @final
  4369. def _can_hold_identifiers_and_holds_name(self, name) -> bool:
  4370. """
  4371. Faster check for ``name in self`` when we know `name` is a Python
  4372. identifier (e.g. in NDFrame.__getattr__, which hits this to support
  4373. . key lookup). For indexes that can't hold identifiers (everything
  4374. but object & categorical) we just return False.
  4375. https://github.com/pandas-dev/pandas/issues/19764
  4376. """
  4377. if (
  4378. is_object_dtype(self.dtype)
  4379. or is_string_dtype(self.dtype)
  4380. or is_categorical_dtype(self.dtype)
  4381. ):
  4382. return name in self
  4383. return False
  4384. def append(self, other: Index | Sequence[Index]) -> Index:
  4385. """
  4386. Append a collection of Index options together.
  4387. Parameters
  4388. ----------
  4389. other : Index or list/tuple of indices
  4390. Returns
  4391. -------
  4392. Index
  4393. """
  4394. to_concat = [self]
  4395. if isinstance(other, (list, tuple)):
  4396. to_concat += list(other)
  4397. else:
  4398. # error: Argument 1 to "append" of "list" has incompatible type
  4399. # "Union[Index, Sequence[Index]]"; expected "Index"
  4400. to_concat.append(other) # type: ignore[arg-type]
  4401. for obj in to_concat:
  4402. if not isinstance(obj, Index):
  4403. raise TypeError("all inputs must be Index")
  4404. names = {obj.name for obj in to_concat}
  4405. name = None if len(names) > 1 else self.name
  4406. return self._concat(to_concat, name)
  4407. def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
  4408. """
  4409. Concatenate multiple Index objects.
  4410. """
  4411. to_concat_vals = [x._values for x in to_concat]
  4412. result = concat_compat(to_concat_vals)
  4413. return Index._with_infer(result, name=name)
  4414. def putmask(self, mask, value) -> Index:
  4415. """
  4416. Return a new Index of the values set with the mask.
  4417. Returns
  4418. -------
  4419. Index
  4420. See Also
  4421. --------
  4422. numpy.ndarray.putmask : Changes elements of an array
  4423. based on conditional and input values.
  4424. """
  4425. mask, noop = validate_putmask(self._values, mask)
  4426. if noop:
  4427. return self.copy()
  4428. if self.dtype != object and is_valid_na_for_dtype(value, self.dtype):
  4429. # e.g. None -> np.nan, see also Block._standardize_fill_value
  4430. value = self._na_value
  4431. try:
  4432. converted = self._validate_fill_value(value)
  4433. except (LossySetitemError, ValueError, TypeError) as err:
  4434. if is_object_dtype(self): # pragma: no cover
  4435. raise err
  4436. # See also: Block.coerce_to_target_dtype
  4437. dtype = self._find_common_type_compat(value)
  4438. return self.astype(dtype).putmask(mask, value)
  4439. values = self._values.copy()
  4440. if isinstance(values, np.ndarray):
  4441. converted = setitem_datetimelike_compat(values, mask.sum(), converted)
  4442. np.putmask(values, mask, converted)
  4443. else:
  4444. # Note: we use the original value here, not converted, as
  4445. # _validate_fill_value is not idempotent
  4446. values._putmask(mask, value)
  4447. return self._shallow_copy(values)
  4448. def equals(self, other: Any) -> bool:
  4449. """
  4450. Determine if two Index object are equal.
  4451. The things that are being compared are:
  4452. * The elements inside the Index object.
  4453. * The order of the elements inside the Index object.
  4454. Parameters
  4455. ----------
  4456. other : Any
  4457. The other object to compare against.
  4458. Returns
  4459. -------
  4460. bool
  4461. True if "other" is an Index and it has the same elements and order
  4462. as the calling index; False otherwise.
  4463. Examples
  4464. --------
  4465. >>> idx1 = pd.Index([1, 2, 3])
  4466. >>> idx1
  4467. Index([1, 2, 3], dtype='int64')
  4468. >>> idx1.equals(pd.Index([1, 2, 3]))
  4469. True
  4470. The elements inside are compared
  4471. >>> idx2 = pd.Index(["1", "2", "3"])
  4472. >>> idx2
  4473. Index(['1', '2', '3'], dtype='object')
  4474. >>> idx1.equals(idx2)
  4475. False
  4476. The order is compared
  4477. >>> ascending_idx = pd.Index([1, 2, 3])
  4478. >>> ascending_idx
  4479. Index([1, 2, 3], dtype='int64')
  4480. >>> descending_idx = pd.Index([3, 2, 1])
  4481. >>> descending_idx
  4482. Index([3, 2, 1], dtype='int64')
  4483. >>> ascending_idx.equals(descending_idx)
  4484. False
  4485. The dtype is *not* compared
  4486. >>> int64_idx = pd.Index([1, 2, 3], dtype='int64')
  4487. >>> int64_idx
  4488. Index([1, 2, 3], dtype='int64')
  4489. >>> uint64_idx = pd.Index([1, 2, 3], dtype='uint64')
  4490. >>> uint64_idx
  4491. Index([1, 2, 3], dtype='uint64')
  4492. >>> int64_idx.equals(uint64_idx)
  4493. True
  4494. """
  4495. if self.is_(other):
  4496. return True
  4497. if not isinstance(other, Index):
  4498. return False
  4499. if is_object_dtype(self.dtype) and not is_object_dtype(other.dtype):
  4500. # if other is not object, use other's logic for coercion
  4501. return other.equals(self)
  4502. if isinstance(other, ABCMultiIndex):
  4503. # d-level MultiIndex can equal d-tuple Index
  4504. return other.equals(self)
  4505. if isinstance(self._values, ExtensionArray):
  4506. # Dispatch to the ExtensionArray's .equals method.
  4507. if not isinstance(other, type(self)):
  4508. return False
  4509. earr = cast(ExtensionArray, self._data)
  4510. return earr.equals(other._data)
  4511. if is_extension_array_dtype(other.dtype):
  4512. # All EA-backed Index subclasses override equals
  4513. return other.equals(self)
  4514. return array_equivalent(self._values, other._values)
  4515. @final
  4516. def identical(self, other) -> bool:
  4517. """
  4518. Similar to equals, but checks that object attributes and types are also equal.
  4519. Returns
  4520. -------
  4521. bool
  4522. If two Index objects have equal elements and same type True,
  4523. otherwise False.
  4524. """
  4525. return (
  4526. self.equals(other)
  4527. and all(
  4528. getattr(self, c, None) == getattr(other, c, None)
  4529. for c in self._comparables
  4530. )
  4531. and type(self) == type(other)
  4532. and self.dtype == other.dtype
  4533. )
  4534. @final
  4535. def asof(self, label):
  4536. """
  4537. Return the label from the index, or, if not present, the previous one.
  4538. Assuming that the index is sorted, return the passed index label if it
  4539. is in the index, or return the previous index label if the passed one
  4540. is not in the index.
  4541. Parameters
  4542. ----------
  4543. label : object
  4544. The label up to which the method returns the latest index label.
  4545. Returns
  4546. -------
  4547. object
  4548. The passed label if it is in the index. The previous label if the
  4549. passed label is not in the sorted index or `NaN` if there is no
  4550. such label.
  4551. See Also
  4552. --------
  4553. Series.asof : Return the latest value in a Series up to the
  4554. passed index.
  4555. merge_asof : Perform an asof merge (similar to left join but it
  4556. matches on nearest key rather than equal key).
  4557. Index.get_loc : An `asof` is a thin wrapper around `get_loc`
  4558. with method='pad'.
  4559. Examples
  4560. --------
  4561. `Index.asof` returns the latest index label up to the passed label.
  4562. >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03'])
  4563. >>> idx.asof('2014-01-01')
  4564. '2013-12-31'
  4565. If the label is in the index, the method returns the passed label.
  4566. >>> idx.asof('2014-01-02')
  4567. '2014-01-02'
  4568. If all of the labels in the index are later than the passed label,
  4569. NaN is returned.
  4570. >>> idx.asof('1999-01-02')
  4571. nan
  4572. If the index is not sorted, an error is raised.
  4573. >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02',
  4574. ... '2014-01-03'])
  4575. >>> idx_not_sorted.asof('2013-12-31')
  4576. Traceback (most recent call last):
  4577. ValueError: index must be monotonic increasing or decreasing
  4578. """
  4579. self._searchsorted_monotonic(label) # validate sortedness
  4580. try:
  4581. loc = self.get_loc(label)
  4582. except (KeyError, TypeError):
  4583. # KeyError -> No exact match, try for padded
  4584. # TypeError -> passed e.g. non-hashable, fall through to get
  4585. # the tested exception message
  4586. indexer = self.get_indexer([label], method="pad")
  4587. if indexer.ndim > 1 or indexer.size > 1:
  4588. raise TypeError("asof requires scalar valued input")
  4589. loc = indexer.item()
  4590. if loc == -1:
  4591. return self._na_value
  4592. else:
  4593. if isinstance(loc, slice):
  4594. loc = loc.indices(len(self))[-1]
  4595. return self[loc]
  4596. def asof_locs(
  4597. self, where: Index, mask: npt.NDArray[np.bool_]
  4598. ) -> npt.NDArray[np.intp]:
  4599. """
  4600. Return the locations (indices) of labels in the index.
  4601. As in the `asof` function, if the label (a particular entry in
  4602. `where`) is not in the index, the latest index label up to the
  4603. passed label is chosen and its index returned.
  4604. If all of the labels in the index are later than a label in `where`,
  4605. -1 is returned.
  4606. `mask` is used to ignore NA values in the index during calculation.
  4607. Parameters
  4608. ----------
  4609. where : Index
  4610. An Index consisting of an array of timestamps.
  4611. mask : np.ndarray[bool]
  4612. Array of booleans denoting where values in the original
  4613. data are not NA.
  4614. Returns
  4615. -------
  4616. np.ndarray[np.intp]
  4617. An array of locations (indices) of the labels from the Index
  4618. which correspond to the return values of the `asof` function
  4619. for every element in `where`.
  4620. """
  4621. # error: No overload variant of "searchsorted" of "ndarray" matches argument
  4622. # types "Union[ExtensionArray, ndarray[Any, Any]]", "str"
  4623. # TODO: will be fixed when ExtensionArray.searchsorted() is fixed
  4624. locs = self._values[mask].searchsorted(
  4625. where._values, side="right" # type: ignore[call-overload]
  4626. )
  4627. locs = np.where(locs > 0, locs - 1, 0)
  4628. result = np.arange(len(self), dtype=np.intp)[mask].take(locs)
  4629. first_value = self._values[mask.argmax()]
  4630. result[(locs == 0) & (where._values < first_value)] = -1
  4631. return result
  4632. def sort_values(
  4633. self,
  4634. return_indexer: bool = False,
  4635. ascending: bool = True,
  4636. na_position: str_t = "last",
  4637. key: Callable | None = None,
  4638. ):
  4639. """
  4640. Return a sorted copy of the index.
  4641. Return a sorted copy of the index, and optionally return the indices
  4642. that sorted the index itself.
  4643. Parameters
  4644. ----------
  4645. return_indexer : bool, default False
  4646. Should the indices that would sort the index be returned.
  4647. ascending : bool, default True
  4648. Should the index values be sorted in an ascending order.
  4649. na_position : {'first' or 'last'}, default 'last'
  4650. Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
  4651. the end.
  4652. .. versionadded:: 1.2.0
  4653. key : callable, optional
  4654. If not None, apply the key function to the index values
  4655. before sorting. This is similar to the `key` argument in the
  4656. builtin :meth:`sorted` function, with the notable difference that
  4657. this `key` function should be *vectorized*. It should expect an
  4658. ``Index`` and return an ``Index`` of the same shape.
  4659. .. versionadded:: 1.1.0
  4660. Returns
  4661. -------
  4662. sorted_index : pandas.Index
  4663. Sorted copy of the index.
  4664. indexer : numpy.ndarray, optional
  4665. The indices that the index itself was sorted by.
  4666. See Also
  4667. --------
  4668. Series.sort_values : Sort values of a Series.
  4669. DataFrame.sort_values : Sort values in a DataFrame.
  4670. Examples
  4671. --------
  4672. >>> idx = pd.Index([10, 100, 1, 1000])
  4673. >>> idx
  4674. Index([10, 100, 1, 1000], dtype='int64')
  4675. Sort values in ascending order (default behavior).
  4676. >>> idx.sort_values()
  4677. Index([1, 10, 100, 1000], dtype='int64')
  4678. Sort values in descending order, and also get the indices `idx` was
  4679. sorted by.
  4680. >>> idx.sort_values(ascending=False, return_indexer=True)
  4681. (Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2]))
  4682. """
  4683. idx = ensure_key_mapped(self, key)
  4684. # GH 35584. Sort missing values according to na_position kwarg
  4685. # ignore na_position for MultiIndex
  4686. if not isinstance(self, ABCMultiIndex):
  4687. _as = nargsort(
  4688. items=idx, ascending=ascending, na_position=na_position, key=key
  4689. )
  4690. else:
  4691. _as = idx.argsort()
  4692. if not ascending:
  4693. _as = _as[::-1]
  4694. sorted_index = self.take(_as)
  4695. if return_indexer:
  4696. return sorted_index, _as
  4697. else:
  4698. return sorted_index
  4699. @final
  4700. def sort(self, *args, **kwargs):
  4701. """
  4702. Use sort_values instead.
  4703. """
  4704. raise TypeError("cannot sort an Index object in-place, use sort_values instead")
  4705. def shift(self, periods: int = 1, freq=None):
  4706. """
  4707. Shift index by desired number of time frequency increments.
  4708. This method is for shifting the values of datetime-like indexes
  4709. by a specified time increment a given number of times.
  4710. Parameters
  4711. ----------
  4712. periods : int, default 1
  4713. Number of periods (or increments) to shift by,
  4714. can be positive or negative.
  4715. freq : pandas.DateOffset, pandas.Timedelta or str, optional
  4716. Frequency increment to shift by.
  4717. If None, the index is shifted by its own `freq` attribute.
  4718. Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.
  4719. Returns
  4720. -------
  4721. pandas.Index
  4722. Shifted index.
  4723. See Also
  4724. --------
  4725. Series.shift : Shift values of Series.
  4726. Notes
  4727. -----
  4728. This method is only implemented for datetime-like index classes,
  4729. i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex.
  4730. Examples
  4731. --------
  4732. Put the first 5 month starts of 2011 into an index.
  4733. >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS')
  4734. >>> month_starts
  4735. DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01',
  4736. '2011-05-01'],
  4737. dtype='datetime64[ns]', freq='MS')
  4738. Shift the index by 10 days.
  4739. >>> month_starts.shift(10, freq='D')
  4740. DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11',
  4741. '2011-05-11'],
  4742. dtype='datetime64[ns]', freq=None)
  4743. The default value of `freq` is the `freq` attribute of the index,
  4744. which is 'MS' (month start) in this example.
  4745. >>> month_starts.shift(10)
  4746. DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01',
  4747. '2012-03-01'],
  4748. dtype='datetime64[ns]', freq='MS')
  4749. """
  4750. raise NotImplementedError(
  4751. f"This method is only implemented for DatetimeIndex, PeriodIndex and "
  4752. f"TimedeltaIndex; Got type {type(self).__name__}"
  4753. )
  4754. def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:
  4755. """
  4756. Return the integer indices that would sort the index.
  4757. Parameters
  4758. ----------
  4759. *args
  4760. Passed to `numpy.ndarray.argsort`.
  4761. **kwargs
  4762. Passed to `numpy.ndarray.argsort`.
  4763. Returns
  4764. -------
  4765. np.ndarray[np.intp]
  4766. Integer indices that would sort the index if used as
  4767. an indexer.
  4768. See Also
  4769. --------
  4770. numpy.argsort : Similar method for NumPy arrays.
  4771. Index.sort_values : Return sorted copy of Index.
  4772. Examples
  4773. --------
  4774. >>> idx = pd.Index(['b', 'a', 'd', 'c'])
  4775. >>> idx
  4776. Index(['b', 'a', 'd', 'c'], dtype='object')
  4777. >>> order = idx.argsort()
  4778. >>> order
  4779. array([1, 0, 3, 2])
  4780. >>> idx[order]
  4781. Index(['a', 'b', 'c', 'd'], dtype='object')
  4782. """
  4783. # This works for either ndarray or EA, is overridden
  4784. # by RangeIndex, MultIIndex
  4785. return self._data.argsort(*args, **kwargs)
  4786. def _check_indexing_error(self, key):
  4787. if not is_scalar(key):
  4788. # if key is not a scalar, directly raise an error (the code below
  4789. # would convert to numpy arrays and raise later any way) - GH29926
  4790. raise InvalidIndexError(key)
  4791. @cache_readonly
  4792. def _should_fallback_to_positional(self) -> bool:
  4793. """
  4794. Should an integer key be treated as positional?
  4795. """
  4796. return self.inferred_type not in {
  4797. "integer",
  4798. "mixed-integer",
  4799. "floating",
  4800. "complex",
  4801. }
  4802. _index_shared_docs[
  4803. "get_indexer_non_unique"
  4804. ] = """
  4805. Compute indexer and mask for new index given the current index.
  4806. The indexer should be then used as an input to ndarray.take to align the
  4807. current data to the new index.
  4808. Parameters
  4809. ----------
  4810. target : %(target_klass)s
  4811. Returns
  4812. -------
  4813. indexer : np.ndarray[np.intp]
  4814. Integers from 0 to n - 1 indicating that the index at these
  4815. positions matches the corresponding target values. Missing values
  4816. in the target are marked by -1.
  4817. missing : np.ndarray[np.intp]
  4818. An indexer into the target of the values not found.
  4819. These correspond to the -1 in the indexer array.
  4820. Examples
  4821. --------
  4822. >>> index = pd.Index(['c', 'b', 'a', 'b', 'b'])
  4823. >>> index.get_indexer_non_unique(['b', 'b'])
  4824. (array([1, 3, 4, 1, 3, 4]), array([], dtype=int64))
  4825. In the example below there are no matched values.
  4826. >>> index = pd.Index(['c', 'b', 'a', 'b', 'b'])
  4827. >>> index.get_indexer_non_unique(['q', 'r', 't'])
  4828. (array([-1, -1, -1]), array([0, 1, 2]))
  4829. For this reason, the returned ``indexer`` contains only integers equal to -1.
  4830. It demonstrates that there's no match between the index and the ``target``
  4831. values at these positions. The mask [0, 1, 2] in the return value shows that
  4832. the first, second, and third elements are missing.
  4833. Notice that the return value is a tuple contains two items. In the example
  4834. below the first item is an array of locations in ``index``. The second
  4835. item is a mask shows that the first and third elements are missing.
  4836. >>> index = pd.Index(['c', 'b', 'a', 'b', 'b'])
  4837. >>> index.get_indexer_non_unique(['f', 'b', 's'])
  4838. (array([-1, 1, 3, 4, -1]), array([0, 2]))
  4839. """
  4840. @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
  4841. def get_indexer_non_unique(
  4842. self, target
  4843. ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
  4844. target = ensure_index(target)
  4845. target = self._maybe_cast_listlike_indexer(target)
  4846. if not self._should_compare(target) and not self._should_partial_index(target):
  4847. # _should_partial_index e.g. IntervalIndex with numeric scalars
  4848. # that can be matched to Interval scalars.
  4849. return self._get_indexer_non_comparable(target, method=None, unique=False)
  4850. pself, ptarget = self._maybe_promote(target)
  4851. if pself is not self or ptarget is not target:
  4852. return pself.get_indexer_non_unique(ptarget)
  4853. if not is_dtype_equal(self.dtype, target.dtype):
  4854. # TODO: if object, could use infer_dtype to preempt costly
  4855. # conversion if still non-comparable?
  4856. dtype = self._find_common_type_compat(target)
  4857. this = self.astype(dtype, copy=False)
  4858. that = target.astype(dtype, copy=False)
  4859. return this.get_indexer_non_unique(that)
  4860. # TODO: get_indexer has fastpaths for both Categorical-self and
  4861. # Categorical-target. Can we do something similar here?
  4862. # Note: _maybe_promote ensures we never get here with MultiIndex
  4863. # self and non-Multi target
  4864. tgt_values = target._get_engine_target()
  4865. if self._is_multi and target._is_multi:
  4866. engine = self._engine
  4867. # Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]" has
  4868. # no attribute "_extract_level_codes"
  4869. tgt_values = engine._extract_level_codes(target) # type: ignore[union-attr]
  4870. indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
  4871. return ensure_platform_int(indexer), ensure_platform_int(missing)
  4872. @final
  4873. def get_indexer_for(self, target) -> npt.NDArray[np.intp]:
  4874. """
  4875. Guaranteed return of an indexer even when non-unique.
  4876. This dispatches to get_indexer or get_indexer_non_unique
  4877. as appropriate.
  4878. Returns
  4879. -------
  4880. np.ndarray[np.intp]
  4881. List of indices.
  4882. Examples
  4883. --------
  4884. >>> idx = pd.Index([np.nan, 'var1', np.nan])
  4885. >>> idx.get_indexer_for([np.nan])
  4886. array([0, 2])
  4887. """
  4888. if self._index_as_unique:
  4889. return self.get_indexer(target)
  4890. indexer, _ = self.get_indexer_non_unique(target)
  4891. return indexer
  4892. def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]:
  4893. """
  4894. Analogue to get_indexer that raises if any elements are missing.
  4895. """
  4896. keyarr = key
  4897. if not isinstance(keyarr, Index):
  4898. keyarr = com.asarray_tuplesafe(keyarr)
  4899. if self._index_as_unique:
  4900. indexer = self.get_indexer_for(keyarr)
  4901. keyarr = self.reindex(keyarr)[0]
  4902. else:
  4903. keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
  4904. self._raise_if_missing(keyarr, indexer, axis_name)
  4905. keyarr = self.take(indexer)
  4906. if isinstance(key, Index):
  4907. # GH 42790 - Preserve name from an Index
  4908. keyarr.name = key.name
  4909. if (
  4910. isinstance(keyarr.dtype, np.dtype) and keyarr.dtype.kind in ["m", "M"]
  4911. ) or isinstance(keyarr.dtype, DatetimeTZDtype):
  4912. # DTI/TDI.take can infer a freq in some cases when we dont want one
  4913. if isinstance(key, list) or (
  4914. isinstance(key, type(self))
  4915. # "Index" has no attribute "freq"
  4916. and key.freq is None # type: ignore[attr-defined]
  4917. ):
  4918. keyarr = keyarr._with_freq(None)
  4919. return keyarr, indexer
  4920. def _raise_if_missing(self, key, indexer, axis_name: str_t) -> None:
  4921. """
  4922. Check that indexer can be used to return a result.
  4923. e.g. at least one element was found,
  4924. unless the list of keys was actually empty.
  4925. Parameters
  4926. ----------
  4927. key : list-like
  4928. Targeted labels (only used to show correct error message).
  4929. indexer: array-like of booleans
  4930. Indices corresponding to the key,
  4931. (with -1 indicating not found).
  4932. axis_name : str
  4933. Raises
  4934. ------
  4935. KeyError
  4936. If at least one key was requested but none was found.
  4937. """
  4938. if len(key) == 0:
  4939. return
  4940. # Count missing values
  4941. missing_mask = indexer < 0
  4942. nmissing = missing_mask.sum()
  4943. if nmissing:
  4944. # TODO: remove special-case; this is just to keep exception
  4945. # message tests from raising while debugging
  4946. use_interval_msg = is_interval_dtype(self.dtype) or (
  4947. is_categorical_dtype(self.dtype)
  4948. # "Index" has no attribute "categories" [attr-defined]
  4949. and is_interval_dtype(
  4950. self.categories.dtype # type: ignore[attr-defined]
  4951. )
  4952. )
  4953. if nmissing == len(indexer):
  4954. if use_interval_msg:
  4955. key = list(key)
  4956. raise KeyError(f"None of [{key}] are in the [{axis_name}]")
  4957. not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
  4958. raise KeyError(f"{not_found} not in index")
  4959. @overload
  4960. def _get_indexer_non_comparable(
  4961. self, target: Index, method, unique: Literal[True] = ...
  4962. ) -> npt.NDArray[np.intp]:
  4963. ...
  4964. @overload
  4965. def _get_indexer_non_comparable(
  4966. self, target: Index, method, unique: Literal[False]
  4967. ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
  4968. ...
  4969. @overload
  4970. def _get_indexer_non_comparable(
  4971. self, target: Index, method, unique: bool = True
  4972. ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
  4973. ...
  4974. @final
  4975. def _get_indexer_non_comparable(
  4976. self, target: Index, method, unique: bool = True
  4977. ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
  4978. """
  4979. Called from get_indexer or get_indexer_non_unique when the target
  4980. is of a non-comparable dtype.
  4981. For get_indexer lookups with method=None, get_indexer is an _equality_
  4982. check, so non-comparable dtypes mean we will always have no matches.
  4983. For get_indexer lookups with a method, get_indexer is an _inequality_
  4984. check, so non-comparable dtypes mean we will always raise TypeError.
  4985. Parameters
  4986. ----------
  4987. target : Index
  4988. method : str or None
  4989. unique : bool, default True
  4990. * True if called from get_indexer.
  4991. * False if called from get_indexer_non_unique.
  4992. Raises
  4993. ------
  4994. TypeError
  4995. If doing an inequality check, i.e. method is not None.
  4996. """
  4997. if method is not None:
  4998. other = _unpack_nested_dtype(target)
  4999. raise TypeError(f"Cannot compare dtypes {self.dtype} and {other.dtype}")
  5000. no_matches = -1 * np.ones(target.shape, dtype=np.intp)
  5001. if unique:
  5002. # This is for get_indexer
  5003. return no_matches
  5004. else:
  5005. # This is for get_indexer_non_unique
  5006. missing = np.arange(len(target), dtype=np.intp)
  5007. return no_matches, missing
  5008. @property
  5009. def _index_as_unique(self) -> bool:
  5010. """
  5011. Whether we should treat this as unique for the sake of
  5012. get_indexer vs get_indexer_non_unique.
  5013. For IntervalIndex compat.
  5014. """
  5015. return self.is_unique
  5016. _requires_unique_msg = "Reindexing only valid with uniquely valued Index objects"
  5017. @final
  5018. def _maybe_promote(self, other: Index) -> tuple[Index, Index]:
  5019. """
  5020. When dealing with an object-dtype Index and a non-object Index, see
  5021. if we can upcast the object-dtype one to improve performance.
  5022. """
  5023. if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex):
  5024. if (
  5025. self.tz is not None
  5026. and other.tz is not None
  5027. and not tz_compare(self.tz, other.tz)
  5028. ):
  5029. # standardize on UTC
  5030. return self.tz_convert("UTC"), other.tz_convert("UTC")
  5031. elif self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex):
  5032. try:
  5033. return type(other)(self), other
  5034. except OutOfBoundsDatetime:
  5035. return self, other
  5036. elif self.inferred_type == "timedelta" and isinstance(other, ABCTimedeltaIndex):
  5037. # TODO: we dont have tests that get here
  5038. return type(other)(self), other
  5039. elif self.dtype.kind == "u" and other.dtype.kind == "i":
  5040. # GH#41873
  5041. if other.min() >= 0:
  5042. # lookup min as it may be cached
  5043. # TODO: may need itemsize check if we have non-64-bit Indexes
  5044. return self, other.astype(self.dtype)
  5045. elif self._is_multi and not other._is_multi:
  5046. try:
  5047. # "Type[Index]" has no attribute "from_tuples"
  5048. other = type(self).from_tuples(other) # type: ignore[attr-defined]
  5049. except (TypeError, ValueError):
  5050. # let's instead try with a straight Index
  5051. self = Index(self._values)
  5052. if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype):
  5053. # Reverse op so we dont need to re-implement on the subclasses
  5054. other, self = other._maybe_promote(self)
  5055. return self, other
  5056. @final
  5057. def _find_common_type_compat(self, target) -> DtypeObj:
  5058. """
  5059. Implementation of find_common_type that adjusts for Index-specific
  5060. special cases.
  5061. """
  5062. target_dtype, _ = infer_dtype_from(target, pandas_dtype=True)
  5063. # special case: if one dtype is uint64 and the other a signed int, return object
  5064. # See https://github.com/pandas-dev/pandas/issues/26778 for discussion
  5065. # Now it's:
  5066. # * float | [u]int -> float
  5067. # * uint64 | signed int -> object
  5068. # We may change union(float | [u]int) to go to object.
  5069. if self.dtype == "uint64" or target_dtype == "uint64":
  5070. if is_signed_integer_dtype(self.dtype) or is_signed_integer_dtype(
  5071. target_dtype
  5072. ):
  5073. return _dtype_obj
  5074. dtype = find_result_type(self._values, target)
  5075. dtype = common_dtype_categorical_compat([self, target], dtype)
  5076. return dtype
  5077. @final
  5078. def _should_compare(self, other: Index) -> bool:
  5079. """
  5080. Check if `self == other` can ever have non-False entries.
  5081. """
  5082. if (is_bool_dtype(other) and is_any_real_numeric_dtype(self)) or (
  5083. is_bool_dtype(self) and is_any_real_numeric_dtype(other)
  5084. ):
  5085. # GH#16877 Treat boolean labels passed to a numeric index as not
  5086. # found. Without this fix False and True would be treated as 0 and 1
  5087. # respectively.
  5088. return False
  5089. other = _unpack_nested_dtype(other)
  5090. dtype = other.dtype
  5091. return self._is_comparable_dtype(dtype) or is_object_dtype(dtype)
  5092. def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
  5093. """
  5094. Can we compare values of the given dtype to our own?
  5095. """
  5096. if self.dtype.kind == "b":
  5097. return dtype.kind == "b"
  5098. elif is_numeric_dtype(self.dtype):
  5099. return is_numeric_dtype(dtype)
  5100. # TODO: this was written assuming we only get here with object-dtype,
  5101. # which is nom longer correct. Can we specialize for EA?
  5102. return True
  5103. @final
  5104. def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]:
  5105. """
  5106. Group the index labels by a given array of values.
  5107. Parameters
  5108. ----------
  5109. values : array
  5110. Values used to determine the groups.
  5111. Returns
  5112. -------
  5113. dict
  5114. {group name -> group labels}
  5115. """
  5116. # TODO: if we are a MultiIndex, we can do better
  5117. # that converting to tuples
  5118. if isinstance(values, ABCMultiIndex):
  5119. values = values._values
  5120. values = Categorical(values)
  5121. result = values._reverse_indexer()
  5122. # map to the label
  5123. result = {k: self.take(v) for k, v in result.items()}
  5124. return PrettyDict(result)
  5125. def map(self, mapper, na_action=None):
  5126. """
  5127. Map values using an input mapping or function.
  5128. Parameters
  5129. ----------
  5130. mapper : function, dict, or Series
  5131. Mapping correspondence.
  5132. na_action : {None, 'ignore'}
  5133. If 'ignore', propagate NA values, without passing them to the
  5134. mapping correspondence.
  5135. Returns
  5136. -------
  5137. Union[Index, MultiIndex]
  5138. The output of the mapping function applied to the index.
  5139. If the function returns a tuple with more than one element
  5140. a MultiIndex will be returned.
  5141. """
  5142. from pandas.core.indexes.multi import MultiIndex
  5143. new_values = self._map_values(mapper, na_action=na_action)
  5144. # we can return a MultiIndex
  5145. if new_values.size and isinstance(new_values[0], tuple):
  5146. if isinstance(self, MultiIndex):
  5147. names = self.names
  5148. elif self.name:
  5149. names = [self.name] * len(new_values[0])
  5150. else:
  5151. names = None
  5152. return MultiIndex.from_tuples(new_values, names=names)
  5153. dtype = None
  5154. if not new_values.size:
  5155. # empty
  5156. dtype = self.dtype
  5157. # e.g. if we are floating and new_values is all ints, then we
  5158. # don't want to cast back to floating. But if we are UInt64
  5159. # and new_values is all ints, we want to try.
  5160. same_dtype = lib.infer_dtype(new_values, skipna=False) == self.inferred_type
  5161. if same_dtype:
  5162. new_values = maybe_cast_pointwise_result(
  5163. new_values, self.dtype, same_dtype=same_dtype
  5164. )
  5165. return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name)
  5166. # TODO: De-duplicate with map, xref GH#32349
  5167. @final
  5168. def _transform_index(self, func, *, level=None) -> Index:
  5169. """
  5170. Apply function to all values found in index.
  5171. This includes transforming multiindex entries separately.
  5172. Only apply function to one level of the MultiIndex if level is specified.
  5173. """
  5174. if isinstance(self, ABCMultiIndex):
  5175. values = [
  5176. self.get_level_values(i).map(func)
  5177. if i == level or level is None
  5178. else self.get_level_values(i)
  5179. for i in range(self.nlevels)
  5180. ]
  5181. return type(self).from_arrays(values)
  5182. else:
  5183. items = [func(x) for x in self]
  5184. return Index(items, name=self.name, tupleize_cols=False)
  5185. def isin(self, values, level=None) -> npt.NDArray[np.bool_]:
  5186. """
  5187. Return a boolean array where the index values are in `values`.
  5188. Compute boolean array of whether each index value is found in the
  5189. passed set of values. The length of the returned boolean array matches
  5190. the length of the index.
  5191. Parameters
  5192. ----------
  5193. values : set or list-like
  5194. Sought values.
  5195. level : str or int, optional
  5196. Name or position of the index level to use (if the index is a
  5197. `MultiIndex`).
  5198. Returns
  5199. -------
  5200. np.ndarray[bool]
  5201. NumPy array of boolean values.
  5202. See Also
  5203. --------
  5204. Series.isin : Same for Series.
  5205. DataFrame.isin : Same method for DataFrames.
  5206. Notes
  5207. -----
  5208. In the case of `MultiIndex` you must either specify `values` as a
  5209. list-like object containing tuples that are the same length as the
  5210. number of levels, or specify `level`. Otherwise it will raise a
  5211. ``ValueError``.
  5212. If `level` is specified:
  5213. - if it is the name of one *and only one* index level, use that level;
  5214. - otherwise it should be a number indicating level position.
  5215. Examples
  5216. --------
  5217. >>> idx = pd.Index([1,2,3])
  5218. >>> idx
  5219. Index([1, 2, 3], dtype='int64')
  5220. Check whether each index value in a list of values.
  5221. >>> idx.isin([1, 4])
  5222. array([ True, False, False])
  5223. >>> midx = pd.MultiIndex.from_arrays([[1,2,3],
  5224. ... ['red', 'blue', 'green']],
  5225. ... names=('number', 'color'))
  5226. >>> midx
  5227. MultiIndex([(1, 'red'),
  5228. (2, 'blue'),
  5229. (3, 'green')],
  5230. names=['number', 'color'])
  5231. Check whether the strings in the 'color' level of the MultiIndex
  5232. are in a list of colors.
  5233. >>> midx.isin(['red', 'orange', 'yellow'], level='color')
  5234. array([ True, False, False])
  5235. To check across the levels of a MultiIndex, pass a list of tuples:
  5236. >>> midx.isin([(1, 'red'), (3, 'red')])
  5237. array([ True, False, False])
  5238. For a DatetimeIndex, string values in `values` are converted to
  5239. Timestamps.
  5240. >>> dates = ['2000-03-11', '2000-03-12', '2000-03-13']
  5241. >>> dti = pd.to_datetime(dates)
  5242. >>> dti
  5243. DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'],
  5244. dtype='datetime64[ns]', freq=None)
  5245. >>> dti.isin(['2000-03-11'])
  5246. array([ True, False, False])
  5247. """
  5248. if level is not None:
  5249. self._validate_index_level(level)
  5250. return algos.isin(self._values, values)
  5251. def _get_string_slice(self, key: str_t):
  5252. # this is for partial string indexing,
  5253. # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex
  5254. raise NotImplementedError
  5255. def slice_indexer(
  5256. self,
  5257. start: Hashable | None = None,
  5258. end: Hashable | None = None,
  5259. step: int | None = None,
  5260. ) -> slice:
  5261. """
  5262. Compute the slice indexer for input labels and step.
  5263. Index needs to be ordered and unique.
  5264. Parameters
  5265. ----------
  5266. start : label, default None
  5267. If None, defaults to the beginning.
  5268. end : label, default None
  5269. If None, defaults to the end.
  5270. step : int, default None
  5271. Returns
  5272. -------
  5273. slice
  5274. Raises
  5275. ------
  5276. KeyError : If key does not exist, or key is not unique and index is
  5277. not ordered.
  5278. Notes
  5279. -----
  5280. This function assumes that the data is sorted, so use at your own peril
  5281. Examples
  5282. --------
  5283. This is a method on all index types. For example you can do:
  5284. >>> idx = pd.Index(list('abcd'))
  5285. >>> idx.slice_indexer(start='b', end='c')
  5286. slice(1, 3, None)
  5287. >>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')])
  5288. >>> idx.slice_indexer(start='b', end=('c', 'g'))
  5289. slice(1, 3, None)
  5290. """
  5291. start_slice, end_slice = self.slice_locs(start, end, step=step)
  5292. # return a slice
  5293. if not is_scalar(start_slice):
  5294. raise AssertionError("Start slice bound is non-scalar")
  5295. if not is_scalar(end_slice):
  5296. raise AssertionError("End slice bound is non-scalar")
  5297. return slice(start_slice, end_slice, step)
  5298. def _maybe_cast_indexer(self, key):
  5299. """
  5300. If we have a float key and are not a floating index, then try to cast
  5301. to an int if equivalent.
  5302. """
  5303. return key
  5304. def _maybe_cast_listlike_indexer(self, target) -> Index:
  5305. """
  5306. Analogue to maybe_cast_indexer for get_indexer instead of get_loc.
  5307. """
  5308. return ensure_index(target)
  5309. @final
  5310. def _validate_indexer(self, form: str_t, key, kind: str_t) -> None:
  5311. """
  5312. If we are positional indexer, validate that we have appropriate
  5313. typed bounds must be an integer.
  5314. """
  5315. assert kind in ["getitem", "iloc"]
  5316. if key is not None and not is_integer(key):
  5317. self._raise_invalid_indexer(form, key)
  5318. def _maybe_cast_slice_bound(self, label, side: str_t):
  5319. """
  5320. This function should be overloaded in subclasses that allow non-trivial
  5321. casting on label-slice bounds, e.g. datetime-like indices allowing
  5322. strings containing formatted datetimes.
  5323. Parameters
  5324. ----------
  5325. label : object
  5326. side : {'left', 'right'}
  5327. Returns
  5328. -------
  5329. label : object
  5330. Notes
  5331. -----
  5332. Value of `side` parameter should be validated in caller.
  5333. """
  5334. # We are a plain index here (sub-class override this method if they
  5335. # wish to have special treatment for floats/ints, e.g. datetimelike Indexes
  5336. if is_numeric_dtype(self.dtype):
  5337. return self._maybe_cast_indexer(label)
  5338. # reject them, if index does not contain label
  5339. if (is_float(label) or is_integer(label)) and label not in self:
  5340. self._raise_invalid_indexer("slice", label)
  5341. return label
  5342. def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):
  5343. if self.is_monotonic_increasing:
  5344. return self.searchsorted(label, side=side)
  5345. elif self.is_monotonic_decreasing:
  5346. # np.searchsorted expects ascending sort order, have to reverse
  5347. # everything for it to work (element ordering, search side and
  5348. # resulting value).
  5349. pos = self[::-1].searchsorted(
  5350. label, side="right" if side == "left" else "left"
  5351. )
  5352. return len(self) - pos
  5353. raise ValueError("index must be monotonic increasing or decreasing")
  5354. def get_slice_bound(self, label, side: Literal["left", "right"]) -> int:
  5355. """
  5356. Calculate slice bound that corresponds to given label.
  5357. Returns leftmost (one-past-the-rightmost if ``side=='right'``) position
  5358. of given label.
  5359. Parameters
  5360. ----------
  5361. label : object
  5362. side : {'left', 'right'}
  5363. Returns
  5364. -------
  5365. int
  5366. Index of label.
  5367. """
  5368. if side not in ("left", "right"):
  5369. raise ValueError(
  5370. "Invalid value for side kwarg, must be either "
  5371. f"'left' or 'right': {side}"
  5372. )
  5373. original_label = label
  5374. # For datetime indices label may be a string that has to be converted
  5375. # to datetime boundary according to its resolution.
  5376. label = self._maybe_cast_slice_bound(label, side)
  5377. # we need to look up the label
  5378. try:
  5379. slc = self.get_loc(label)
  5380. except KeyError as err:
  5381. try:
  5382. return self._searchsorted_monotonic(label, side)
  5383. except ValueError:
  5384. # raise the original KeyError
  5385. raise err
  5386. if isinstance(slc, np.ndarray):
  5387. # get_loc may return a boolean array, which
  5388. # is OK as long as they are representable by a slice.
  5389. assert is_bool_dtype(slc.dtype)
  5390. slc = lib.maybe_booleans_to_slice(slc.view("u1"))
  5391. if isinstance(slc, np.ndarray):
  5392. raise KeyError(
  5393. f"Cannot get {side} slice bound for non-unique "
  5394. f"label: {repr(original_label)}"
  5395. )
  5396. if isinstance(slc, slice):
  5397. if side == "left":
  5398. return slc.start
  5399. else:
  5400. return slc.stop
  5401. else:
  5402. if side == "right":
  5403. return slc + 1
  5404. else:
  5405. return slc
  5406. def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]:
  5407. """
  5408. Compute slice locations for input labels.
  5409. Parameters
  5410. ----------
  5411. start : label, default None
  5412. If None, defaults to the beginning.
  5413. end : label, default None
  5414. If None, defaults to the end.
  5415. step : int, defaults None
  5416. If None, defaults to 1.
  5417. Returns
  5418. -------
  5419. tuple[int, int]
  5420. See Also
  5421. --------
  5422. Index.get_loc : Get location for a single label.
  5423. Notes
  5424. -----
  5425. This method only works if the index is monotonic or unique.
  5426. Examples
  5427. --------
  5428. >>> idx = pd.Index(list('abcd'))
  5429. >>> idx.slice_locs(start='b', end='c')
  5430. (1, 3)
  5431. """
  5432. inc = step is None or step >= 0
  5433. if not inc:
  5434. # If it's a reverse slice, temporarily swap bounds.
  5435. start, end = end, start
  5436. # GH 16785: If start and end happen to be date strings with UTC offsets
  5437. # attempt to parse and check that the offsets are the same
  5438. if isinstance(start, (str, datetime)) and isinstance(end, (str, datetime)):
  5439. try:
  5440. ts_start = Timestamp(start)
  5441. ts_end = Timestamp(end)
  5442. except (ValueError, TypeError):
  5443. pass
  5444. else:
  5445. if not tz_compare(ts_start.tzinfo, ts_end.tzinfo):
  5446. raise ValueError("Both dates must have the same UTC offset")
  5447. start_slice = None
  5448. if start is not None:
  5449. start_slice = self.get_slice_bound(start, "left")
  5450. if start_slice is None:
  5451. start_slice = 0
  5452. end_slice = None
  5453. if end is not None:
  5454. end_slice = self.get_slice_bound(end, "right")
  5455. if end_slice is None:
  5456. end_slice = len(self)
  5457. if not inc:
  5458. # Bounds at this moment are swapped, swap them back and shift by 1.
  5459. #
  5460. # slice_locs('B', 'A', step=-1): s='B', e='A'
  5461. #
  5462. # s='A' e='B'
  5463. # AFTER SWAP: | |
  5464. # v ------------------> V
  5465. # -----------------------------------
  5466. # | | |A|A|A|A| | | | | |B|B| | | | |
  5467. # -----------------------------------
  5468. # ^ <------------------ ^
  5469. # SHOULD BE: | |
  5470. # end=s-1 start=e-1
  5471. #
  5472. end_slice, start_slice = start_slice - 1, end_slice - 1
  5473. # i == -1 triggers ``len(self) + i`` selection that points to the
  5474. # last element, not before-the-first one, subtracting len(self)
  5475. # compensates that.
  5476. if end_slice == -1:
  5477. end_slice -= len(self)
  5478. if start_slice == -1:
  5479. start_slice -= len(self)
  5480. return start_slice, end_slice
  5481. def delete(self: _IndexT, loc) -> _IndexT:
  5482. """
  5483. Make new Index with passed location(-s) deleted.
  5484. Parameters
  5485. ----------
  5486. loc : int or list of int
  5487. Location of item(-s) which will be deleted.
  5488. Use a list of locations to delete more than one value at the same time.
  5489. Returns
  5490. -------
  5491. Index
  5492. Will be same type as self, except for RangeIndex.
  5493. See Also
  5494. --------
  5495. numpy.delete : Delete any rows and column from NumPy array (ndarray).
  5496. Examples
  5497. --------
  5498. >>> idx = pd.Index(['a', 'b', 'c'])
  5499. >>> idx.delete(1)
  5500. Index(['a', 'c'], dtype='object')
  5501. >>> idx = pd.Index(['a', 'b', 'c'])
  5502. >>> idx.delete([0, 2])
  5503. Index(['b'], dtype='object')
  5504. """
  5505. values = self._values
  5506. res_values: ArrayLike
  5507. if isinstance(values, np.ndarray):
  5508. # TODO(__array_function__): special casing will be unnecessary
  5509. res_values = np.delete(values, loc)
  5510. else:
  5511. res_values = values.delete(loc)
  5512. # _constructor so RangeIndex-> Index with an int64 dtype
  5513. return self._constructor._simple_new(res_values, name=self.name)
  5514. def insert(self, loc: int, item) -> Index:
  5515. """
  5516. Make new Index inserting new item at location.
  5517. Follows Python numpy.insert semantics for negative values.
  5518. Parameters
  5519. ----------
  5520. loc : int
  5521. item : object
  5522. Returns
  5523. -------
  5524. Index
  5525. """
  5526. item = lib.item_from_zerodim(item)
  5527. if is_valid_na_for_dtype(item, self.dtype) and self.dtype != object:
  5528. item = self._na_value
  5529. arr = self._values
  5530. try:
  5531. if isinstance(arr, ExtensionArray):
  5532. res_values = arr.insert(loc, item)
  5533. return type(self)._simple_new(res_values, name=self.name)
  5534. else:
  5535. item = self._validate_fill_value(item)
  5536. except (TypeError, ValueError, LossySetitemError):
  5537. # e.g. trying to insert an integer into a DatetimeIndex
  5538. # We cannot keep the same dtype, so cast to the (often object)
  5539. # minimal shared dtype before doing the insert.
  5540. dtype = self._find_common_type_compat(item)
  5541. return self.astype(dtype).insert(loc, item)
  5542. if arr.dtype != object or not isinstance(
  5543. item, (tuple, np.datetime64, np.timedelta64)
  5544. ):
  5545. # with object-dtype we need to worry about numpy incorrectly casting
  5546. # dt64/td64 to integer, also about treating tuples as sequences
  5547. # special-casing dt64/td64 https://github.com/numpy/numpy/issues/12550
  5548. casted = arr.dtype.type(item)
  5549. new_values = np.insert(arr, loc, casted)
  5550. else:
  5551. # error: No overload variant of "insert" matches argument types
  5552. # "ndarray[Any, Any]", "int", "None"
  5553. new_values = np.insert(arr, loc, None) # type: ignore[call-overload]
  5554. loc = loc if loc >= 0 else loc - 1
  5555. new_values[loc] = item
  5556. return Index._with_infer(new_values, name=self.name)
  5557. def drop(
  5558. self,
  5559. labels: Index | np.ndarray | Iterable[Hashable],
  5560. errors: IgnoreRaise = "raise",
  5561. ) -> Index:
  5562. """
  5563. Make new Index with passed list of labels deleted.
  5564. Parameters
  5565. ----------
  5566. labels : array-like or scalar
  5567. errors : {'ignore', 'raise'}, default 'raise'
  5568. If 'ignore', suppress error and existing labels are dropped.
  5569. Returns
  5570. -------
  5571. Index
  5572. Will be same type as self, except for RangeIndex.
  5573. Raises
  5574. ------
  5575. KeyError
  5576. If not all of the labels are found in the selected axis
  5577. """
  5578. if not isinstance(labels, Index):
  5579. # avoid materializing e.g. RangeIndex
  5580. arr_dtype = "object" if self.dtype == "object" else None
  5581. labels = com.index_labels_to_array(labels, dtype=arr_dtype)
  5582. indexer = self.get_indexer_for(labels)
  5583. mask = indexer == -1
  5584. if mask.any():
  5585. if errors != "ignore":
  5586. raise KeyError(f"{list(labels[mask])} not found in axis")
  5587. indexer = indexer[~mask]
  5588. return self.delete(indexer)
  5589. def infer_objects(self, copy: bool = True) -> Index:
  5590. """
  5591. If we have an object dtype, try to infer a non-object dtype.
  5592. Parameters
  5593. ----------
  5594. copy : bool, default True
  5595. Whether to make a copy in cases where no inference occurs.
  5596. """
  5597. if self._is_multi:
  5598. raise NotImplementedError(
  5599. "infer_objects is not implemented for MultiIndex. "
  5600. "Use index.to_frame().infer_objects() instead."
  5601. )
  5602. if self.dtype != object:
  5603. return self.copy() if copy else self
  5604. values = self._values
  5605. values = cast("npt.NDArray[np.object_]", values)
  5606. res_values = lib.maybe_convert_objects(
  5607. values,
  5608. convert_datetime=True,
  5609. convert_timedelta=True,
  5610. convert_period=True,
  5611. convert_interval=True,
  5612. )
  5613. if copy and res_values is values:
  5614. return self.copy()
  5615. result = Index(res_values, name=self.name)
  5616. if not copy and res_values is values and self._references is not None:
  5617. result._references = self._references
  5618. result._references.add_index_reference(result)
  5619. return result
  5620. # --------------------------------------------------------------------
  5621. # Generated Arithmetic, Comparison, and Unary Methods
  5622. def _cmp_method(self, other, op):
  5623. """
  5624. Wrapper used to dispatch comparison operations.
  5625. """
  5626. if self.is_(other):
  5627. # fastpath
  5628. if op in {operator.eq, operator.le, operator.ge}:
  5629. arr = np.ones(len(self), dtype=bool)
  5630. if self._can_hold_na and not isinstance(self, ABCMultiIndex):
  5631. # TODO: should set MultiIndex._can_hold_na = False?
  5632. arr[self.isna()] = False
  5633. return arr
  5634. elif op is operator.ne:
  5635. arr = np.zeros(len(self), dtype=bool)
  5636. if self._can_hold_na and not isinstance(self, ABCMultiIndex):
  5637. arr[self.isna()] = True
  5638. return arr
  5639. if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)) and len(
  5640. self
  5641. ) != len(other):
  5642. raise ValueError("Lengths must match to compare")
  5643. if not isinstance(other, ABCMultiIndex):
  5644. other = extract_array(other, extract_numpy=True)
  5645. else:
  5646. other = np.asarray(other)
  5647. if is_object_dtype(self.dtype) and isinstance(other, ExtensionArray):
  5648. # e.g. PeriodArray, Categorical
  5649. with np.errstate(all="ignore"):
  5650. result = op(self._values, other)
  5651. elif isinstance(self._values, ExtensionArray):
  5652. result = op(self._values, other)
  5653. elif is_object_dtype(self.dtype) and not isinstance(self, ABCMultiIndex):
  5654. # don't pass MultiIndex
  5655. with np.errstate(all="ignore"):
  5656. result = ops.comp_method_OBJECT_ARRAY(op, self._values, other)
  5657. else:
  5658. with np.errstate(all="ignore"):
  5659. result = ops.comparison_op(self._values, other, op)
  5660. return result
  5661. @final
  5662. def _logical_method(self, other, op):
  5663. res_name = ops.get_op_result_name(self, other)
  5664. lvalues = self._values
  5665. rvalues = extract_array(other, extract_numpy=True, extract_range=True)
  5666. res_values = ops.logical_op(lvalues, rvalues, op)
  5667. return self._construct_result(res_values, name=res_name)
  5668. @final
  5669. def _construct_result(self, result, name):
  5670. if isinstance(result, tuple):
  5671. return (
  5672. Index(result[0], name=name, dtype=result[0].dtype),
  5673. Index(result[1], name=name, dtype=result[1].dtype),
  5674. )
  5675. return Index(result, name=name, dtype=result.dtype)
  5676. def _arith_method(self, other, op):
  5677. if (
  5678. isinstance(other, Index)
  5679. and is_object_dtype(other.dtype)
  5680. and type(other) is not Index
  5681. ):
  5682. # We return NotImplemented for object-dtype index *subclasses* so they have
  5683. # a chance to implement ops before we unwrap them.
  5684. # See https://github.com/pandas-dev/pandas/issues/31109
  5685. return NotImplemented
  5686. return super()._arith_method(other, op)
  5687. @final
  5688. def _unary_method(self, op):
  5689. result = op(self._values)
  5690. return Index(result, name=self.name)
  5691. def __abs__(self) -> Index:
  5692. return self._unary_method(operator.abs)
  5693. def __neg__(self) -> Index:
  5694. return self._unary_method(operator.neg)
  5695. def __pos__(self) -> Index:
  5696. return self._unary_method(operator.pos)
  5697. def __invert__(self) -> Index:
  5698. # GH#8875
  5699. return self._unary_method(operator.inv)
  5700. # --------------------------------------------------------------------
  5701. # Reductions
  5702. def any(self, *args, **kwargs):
  5703. """
  5704. Return whether any element is Truthy.
  5705. Parameters
  5706. ----------
  5707. *args
  5708. Required for compatibility with numpy.
  5709. **kwargs
  5710. Required for compatibility with numpy.
  5711. Returns
  5712. -------
  5713. bool or array-like (if axis is specified)
  5714. A single element array-like may be converted to bool.
  5715. See Also
  5716. --------
  5717. Index.all : Return whether all elements are True.
  5718. Series.all : Return whether all elements are True.
  5719. Notes
  5720. -----
  5721. Not a Number (NaN), positive infinity and negative infinity
  5722. evaluate to True because these are not equal to zero.
  5723. Examples
  5724. --------
  5725. >>> index = pd.Index([0, 1, 2])
  5726. >>> index.any()
  5727. True
  5728. >>> index = pd.Index([0, 0, 0])
  5729. >>> index.any()
  5730. False
  5731. """
  5732. nv.validate_any(args, kwargs)
  5733. self._maybe_disable_logical_methods("any")
  5734. # error: Argument 1 to "any" has incompatible type "ArrayLike"; expected
  5735. # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int,
  5736. # float, complex, str, bytes, generic]], Sequence[Sequence[Any]],
  5737. # _SupportsArray]"
  5738. return np.any(self.values) # type: ignore[arg-type]
  5739. def all(self, *args, **kwargs):
  5740. """
  5741. Return whether all elements are Truthy.
  5742. Parameters
  5743. ----------
  5744. *args
  5745. Required for compatibility with numpy.
  5746. **kwargs
  5747. Required for compatibility with numpy.
  5748. Returns
  5749. -------
  5750. bool or array-like (if axis is specified)
  5751. A single element array-like may be converted to bool.
  5752. See Also
  5753. --------
  5754. Index.any : Return whether any element in an Index is True.
  5755. Series.any : Return whether any element in a Series is True.
  5756. Series.all : Return whether all elements in a Series are True.
  5757. Notes
  5758. -----
  5759. Not a Number (NaN), positive infinity and negative infinity
  5760. evaluate to True because these are not equal to zero.
  5761. Examples
  5762. --------
  5763. True, because nonzero integers are considered True.
  5764. >>> pd.Index([1, 2, 3]).all()
  5765. True
  5766. False, because ``0`` is considered False.
  5767. >>> pd.Index([0, 1, 2]).all()
  5768. False
  5769. """
  5770. nv.validate_all(args, kwargs)
  5771. self._maybe_disable_logical_methods("all")
  5772. # error: Argument 1 to "all" has incompatible type "ArrayLike"; expected
  5773. # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int,
  5774. # float, complex, str, bytes, generic]], Sequence[Sequence[Any]],
  5775. # _SupportsArray]"
  5776. return np.all(self.values) # type: ignore[arg-type]
  5777. @final
  5778. def _maybe_disable_logical_methods(self, opname: str_t) -> None:
  5779. """
  5780. raise if this Index subclass does not support any or all.
  5781. """
  5782. if (
  5783. isinstance(self, ABCMultiIndex)
  5784. or needs_i8_conversion(self.dtype)
  5785. or is_interval_dtype(self.dtype)
  5786. or is_categorical_dtype(self.dtype)
  5787. or is_float_dtype(self.dtype)
  5788. ):
  5789. # This call will raise
  5790. make_invalid_op(opname)(self)
  5791. @Appender(IndexOpsMixin.argmin.__doc__)
  5792. def argmin(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
  5793. nv.validate_argmin(args, kwargs)
  5794. nv.validate_minmax_axis(axis)
  5795. if not self._is_multi and self.hasnans:
  5796. # Take advantage of cache
  5797. mask = self._isnan
  5798. if not skipna or mask.all():
  5799. return -1
  5800. return super().argmin(skipna=skipna)
  5801. @Appender(IndexOpsMixin.argmax.__doc__)
  5802. def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
  5803. nv.validate_argmax(args, kwargs)
  5804. nv.validate_minmax_axis(axis)
  5805. if not self._is_multi and self.hasnans:
  5806. # Take advantage of cache
  5807. mask = self._isnan
  5808. if not skipna or mask.all():
  5809. return -1
  5810. return super().argmax(skipna=skipna)
  5811. @doc(IndexOpsMixin.min)
  5812. def min(self, axis=None, skipna: bool = True, *args, **kwargs):
  5813. nv.validate_min(args, kwargs)
  5814. nv.validate_minmax_axis(axis)
  5815. if not len(self):
  5816. return self._na_value
  5817. if len(self) and self.is_monotonic_increasing:
  5818. # quick check
  5819. first = self[0]
  5820. if not isna(first):
  5821. return first
  5822. if not self._is_multi and self.hasnans:
  5823. # Take advantage of cache
  5824. mask = self._isnan
  5825. if not skipna or mask.all():
  5826. return self._na_value
  5827. if not self._is_multi and not isinstance(self._values, np.ndarray):
  5828. return self._values._reduce(name="min", skipna=skipna)
  5829. return super().min(skipna=skipna)
  5830. @doc(IndexOpsMixin.max)
  5831. def max(self, axis=None, skipna: bool = True, *args, **kwargs):
  5832. nv.validate_max(args, kwargs)
  5833. nv.validate_minmax_axis(axis)
  5834. if not len(self):
  5835. return self._na_value
  5836. if len(self) and self.is_monotonic_increasing:
  5837. # quick check
  5838. last = self[-1]
  5839. if not isna(last):
  5840. return last
  5841. if not self._is_multi and self.hasnans:
  5842. # Take advantage of cache
  5843. mask = self._isnan
  5844. if not skipna or mask.all():
  5845. return self._na_value
  5846. if not self._is_multi and not isinstance(self._values, np.ndarray):
  5847. return self._values._reduce(name="max", skipna=skipna)
  5848. return super().max(skipna=skipna)
  5849. # --------------------------------------------------------------------
  5850. @final
  5851. @property
  5852. def shape(self) -> Shape:
  5853. """
  5854. Return a tuple of the shape of the underlying data.
  5855. """
  5856. # See GH#27775, GH#27384 for history/reasoning in how this is defined.
  5857. return (len(self),)
  5858. def ensure_index_from_sequences(sequences, names=None) -> Index:
  5859. """
  5860. Construct an index from sequences of data.
  5861. A single sequence returns an Index. Many sequences returns a
  5862. MultiIndex.
  5863. Parameters
  5864. ----------
  5865. sequences : sequence of sequences
  5866. names : sequence of str
  5867. Returns
  5868. -------
  5869. index : Index or MultiIndex
  5870. Examples
  5871. --------
  5872. >>> ensure_index_from_sequences([[1, 2, 3]], names=["name"])
  5873. Index([1, 2, 3], dtype='int64', name='name')
  5874. >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"])
  5875. MultiIndex([('a', 'a'),
  5876. ('a', 'b')],
  5877. names=['L1', 'L2'])
  5878. See Also
  5879. --------
  5880. ensure_index
  5881. """
  5882. from pandas.core.indexes.multi import MultiIndex
  5883. if len(sequences) == 1:
  5884. if names is not None:
  5885. names = names[0]
  5886. return Index(sequences[0], name=names)
  5887. else:
  5888. return MultiIndex.from_arrays(sequences, names=names)
  5889. def ensure_index(index_like: Axes, copy: bool = False) -> Index:
  5890. """
  5891. Ensure that we have an index from some index-like object.
  5892. Parameters
  5893. ----------
  5894. index_like : sequence
  5895. An Index or other sequence
  5896. copy : bool, default False
  5897. Returns
  5898. -------
  5899. index : Index or MultiIndex
  5900. See Also
  5901. --------
  5902. ensure_index_from_sequences
  5903. Examples
  5904. --------
  5905. >>> ensure_index(['a', 'b'])
  5906. Index(['a', 'b'], dtype='object')
  5907. >>> ensure_index([('a', 'a'), ('b', 'c')])
  5908. Index([('a', 'a'), ('b', 'c')], dtype='object')
  5909. >>> ensure_index([['a', 'a'], ['b', 'c']])
  5910. MultiIndex([('a', 'b'),
  5911. ('a', 'c')],
  5912. )
  5913. """
  5914. if isinstance(index_like, Index):
  5915. if copy:
  5916. index_like = index_like.copy()
  5917. return index_like
  5918. if isinstance(index_like, ABCSeries):
  5919. name = index_like.name
  5920. return Index(index_like, name=name, copy=copy)
  5921. if is_iterator(index_like):
  5922. index_like = list(index_like)
  5923. if isinstance(index_like, list):
  5924. if type(index_like) is not list:
  5925. # must check for exactly list here because of strict type
  5926. # check in clean_index_list
  5927. index_like = list(index_like)
  5928. if len(index_like) and lib.is_all_arraylike(index_like):
  5929. from pandas.core.indexes.multi import MultiIndex
  5930. return MultiIndex.from_arrays(index_like)
  5931. else:
  5932. return Index(index_like, copy=copy, tupleize_cols=False)
  5933. else:
  5934. return Index(index_like, copy=copy)
  5935. def ensure_has_len(seq):
  5936. """
  5937. If seq is an iterator, put its values into a list.
  5938. """
  5939. try:
  5940. len(seq)
  5941. except TypeError:
  5942. return list(seq)
  5943. else:
  5944. return seq
  5945. def trim_front(strings: list[str]) -> list[str]:
  5946. """
  5947. Trims zeros and decimal points.
  5948. Examples
  5949. --------
  5950. >>> trim_front([" a", " b"])
  5951. ['a', 'b']
  5952. >>> trim_front([" a", " "])
  5953. ['a', '']
  5954. """
  5955. if not strings:
  5956. return strings
  5957. while all(strings) and all(x[0] == " " for x in strings):
  5958. strings = [x[1:] for x in strings]
  5959. return strings
  5960. def _validate_join_method(method: str) -> None:
  5961. if method not in ["left", "right", "inner", "outer"]:
  5962. raise ValueError(f"do not recognize join method {method}")
  5963. def maybe_extract_name(name, obj, cls) -> Hashable:
  5964. """
  5965. If no name is passed, then extract it from data, validating hashability.
  5966. """
  5967. if name is None and isinstance(obj, (Index, ABCSeries)):
  5968. # Note we don't just check for "name" attribute since that would
  5969. # pick up e.g. dtype.name
  5970. name = obj.name
  5971. # GH#29069
  5972. if not is_hashable(name):
  5973. raise TypeError(f"{cls.__name__}.name must be a hashable type")
  5974. return name
  5975. def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]:
  5976. """
  5977. Return common name if all indices agree, otherwise None (level-by-level).
  5978. Parameters
  5979. ----------
  5980. indexes : list of Index objects
  5981. Returns
  5982. -------
  5983. list
  5984. A list representing the unanimous 'names' found.
  5985. """
  5986. name_tups = [tuple(i.names) for i in indexes]
  5987. name_sets = [{*ns} for ns in zip_longest(*name_tups)]
  5988. names = tuple(ns.pop() if len(ns) == 1 else None for ns in name_sets)
  5989. return names
  5990. def _unpack_nested_dtype(other: Index) -> Index:
  5991. """
  5992. When checking if our dtype is comparable with another, we need
  5993. to unpack CategoricalDtype to look at its categories.dtype.
  5994. Parameters
  5995. ----------
  5996. other : Index
  5997. Returns
  5998. -------
  5999. Index
  6000. """
  6001. from pandas.core.arrays.arrow import ArrowDtype
  6002. dtype = other.dtype
  6003. if isinstance(dtype, CategoricalDtype):
  6004. # If there is ever a SparseIndex, this could get dispatched
  6005. # here too.
  6006. return dtype.categories
  6007. elif isinstance(dtype, ArrowDtype):
  6008. # GH 53617
  6009. import pyarrow as pa
  6010. if pa.types.is_dictionary(dtype.pyarrow_dtype):
  6011. other = other.astype(ArrowDtype(dtype.pyarrow_dtype.value_type))
  6012. return other
  6013. def _maybe_try_sort(result, sort):
  6014. if sort is not False:
  6015. try:
  6016. result = algos.safe_sort(result)
  6017. except TypeError as err:
  6018. if sort is True:
  6019. raise
  6020. warnings.warn(
  6021. f"{err}, sort order is undefined for incomparable objects.",
  6022. RuntimeWarning,
  6023. stacklevel=find_stack_level(),
  6024. )
  6025. return result