1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243 |
- from __future__ import annotations
- from datetime import datetime
- import functools
- from itertools import zip_longest
- import operator
- from typing import (
- TYPE_CHECKING,
- Any,
- Callable,
- ClassVar,
- Hashable,
- Iterable,
- Literal,
- NoReturn,
- Sequence,
- TypeVar,
- cast,
- final,
- overload,
- )
- import warnings
- import numpy as np
- from pandas._config import get_option
- from pandas._libs import (
- NaT,
- algos as libalgos,
- index as libindex,
- lib,
- )
- from pandas._libs.internals import BlockValuesRefs
- import pandas._libs.join as libjoin
- from pandas._libs.lib import (
- is_datetime_array,
- no_default,
- )
- from pandas._libs.missing import is_float_nan
- from pandas._libs.tslibs import (
- IncompatibleFrequency,
- OutOfBoundsDatetime,
- Timestamp,
- tz_compare,
- )
- from pandas._typing import (
- AnyAll,
- ArrayLike,
- Axes,
- Axis,
- DropKeep,
- DtypeObj,
- F,
- IgnoreRaise,
- IndexLabel,
- JoinHow,
- Level,
- Shape,
- npt,
- )
- from pandas.compat.numpy import function as nv
- from pandas.errors import (
- DuplicateLabelError,
- InvalidIndexError,
- )
- from pandas.util._decorators import (
- Appender,
- cache_readonly,
- doc,
- )
- from pandas.util._exceptions import (
- find_stack_level,
- rewrite_exception,
- )
- from pandas.core.dtypes.astype import (
- astype_array,
- astype_is_view,
- )
- from pandas.core.dtypes.cast import (
- LossySetitemError,
- can_hold_element,
- common_dtype_categorical_compat,
- find_result_type,
- infer_dtype_from,
- maybe_cast_pointwise_result,
- np_can_hold_element,
- )
- from pandas.core.dtypes.common import (
- ensure_int64,
- ensure_object,
- ensure_platform_int,
- is_any_real_numeric_dtype,
- is_bool_dtype,
- is_categorical_dtype,
- is_dtype_equal,
- is_ea_or_datetimelike_dtype,
- is_extension_array_dtype,
- is_float,
- is_float_dtype,
- is_hashable,
- is_integer,
- is_integer_dtype,
- is_interval_dtype,
- is_iterator,
- is_list_like,
- is_numeric_dtype,
- is_object_dtype,
- is_scalar,
- is_signed_integer_dtype,
- is_string_dtype,
- needs_i8_conversion,
- pandas_dtype,
- validate_all_hashable,
- )
- from pandas.core.dtypes.concat import concat_compat
- from pandas.core.dtypes.dtypes import (
- CategoricalDtype,
- DatetimeTZDtype,
- ExtensionDtype,
- IntervalDtype,
- PeriodDtype,
- )
- from pandas.core.dtypes.generic import (
- ABCDataFrame,
- ABCDatetimeIndex,
- ABCMultiIndex,
- ABCPeriodIndex,
- ABCSeries,
- ABCTimedeltaIndex,
- )
- from pandas.core.dtypes.inference import is_dict_like
- from pandas.core.dtypes.missing import (
- array_equivalent,
- is_valid_na_for_dtype,
- isna,
- )
- from pandas.core import (
- arraylike,
- ops,
- )
- from pandas.core.accessor import CachedAccessor
- import pandas.core.algorithms as algos
- from pandas.core.array_algos.putmask import (
- setitem_datetimelike_compat,
- validate_putmask,
- )
- from pandas.core.arrays import (
- ArrowExtensionArray,
- BaseMaskedArray,
- Categorical,
- ExtensionArray,
- )
- from pandas.core.arrays.string_ import StringArray
- from pandas.core.base import (
- IndexOpsMixin,
- PandasObject,
- )
- import pandas.core.common as com
- from pandas.core.construction import (
- ensure_wrapped_if_datetimelike,
- extract_array,
- sanitize_array,
- )
- from pandas.core.indexers import disallow_ndim_indexing
- from pandas.core.indexes.frozen import FrozenList
- from pandas.core.missing import clean_reindex_fill_method
- from pandas.core.ops import get_op_result_name
- from pandas.core.ops.invalid import make_invalid_op
- from pandas.core.sorting import (
- ensure_key_mapped,
- get_group_index_sorter,
- nargsort,
- )
- from pandas.core.strings.accessor import StringMethods
- from pandas.io.formats.printing import (
- PrettyDict,
- default_pprint,
- format_object_summary,
- pprint_thing,
- )
- if TYPE_CHECKING:
- from pandas import (
- CategoricalIndex,
- DataFrame,
- MultiIndex,
- Series,
- )
- from pandas.core.arrays import PeriodArray
- __all__ = ["Index"]
- _unsortable_types = frozenset(("mixed", "mixed-integer"))
- _index_doc_kwargs: dict[str, str] = {
- "klass": "Index",
- "inplace": "",
- "target_klass": "Index",
- "raises_section": "",
- "unique": "Index",
- "duplicated": "np.ndarray",
- }
- _index_shared_docs: dict[str, str] = {}
- str_t = str
- _dtype_obj = np.dtype("object")
- _masked_engines = {
- "Complex128": libindex.MaskedComplex128Engine,
- "Complex64": libindex.MaskedComplex64Engine,
- "Float64": libindex.MaskedFloat64Engine,
- "Float32": libindex.MaskedFloat32Engine,
- "UInt64": libindex.MaskedUInt64Engine,
- "UInt32": libindex.MaskedUInt32Engine,
- "UInt16": libindex.MaskedUInt16Engine,
- "UInt8": libindex.MaskedUInt8Engine,
- "Int64": libindex.MaskedInt64Engine,
- "Int32": libindex.MaskedInt32Engine,
- "Int16": libindex.MaskedInt16Engine,
- "Int8": libindex.MaskedInt8Engine,
- "boolean": libindex.MaskedBoolEngine,
- "double[pyarrow]": libindex.MaskedFloat64Engine,
- "float64[pyarrow]": libindex.MaskedFloat64Engine,
- "float32[pyarrow]": libindex.MaskedFloat32Engine,
- "float[pyarrow]": libindex.MaskedFloat32Engine,
- "uint64[pyarrow]": libindex.MaskedUInt64Engine,
- "uint32[pyarrow]": libindex.MaskedUInt32Engine,
- "uint16[pyarrow]": libindex.MaskedUInt16Engine,
- "uint8[pyarrow]": libindex.MaskedUInt8Engine,
- "int64[pyarrow]": libindex.MaskedInt64Engine,
- "int32[pyarrow]": libindex.MaskedInt32Engine,
- "int16[pyarrow]": libindex.MaskedInt16Engine,
- "int8[pyarrow]": libindex.MaskedInt8Engine,
- "bool[pyarrow]": libindex.MaskedBoolEngine,
- }
- def _maybe_return_indexers(meth: F) -> F:
- """
- Decorator to simplify 'return_indexers' checks in Index.join.
- """
- @functools.wraps(meth)
- def join(
- self,
- other: Index,
- *,
- how: JoinHow = "left",
- level=None,
- return_indexers: bool = False,
- sort: bool = False,
- ):
- join_index, lidx, ridx = meth(self, other, how=how, level=level, sort=sort)
- if not return_indexers:
- return join_index
- if lidx is not None:
- lidx = ensure_platform_int(lidx)
- if ridx is not None:
- ridx = ensure_platform_int(ridx)
- return join_index, lidx, ridx
- return cast(F, join)
- def _new_Index(cls, d):
- """
- This is called upon unpickling, rather than the default which doesn't
- have arguments and breaks __new__.
- """
- # required for backward compat, because PI can't be instantiated with
- # ordinals through __new__ GH #13277
- if issubclass(cls, ABCPeriodIndex):
- from pandas.core.indexes.period import _new_PeriodIndex
- return _new_PeriodIndex(cls, **d)
- if issubclass(cls, ABCMultiIndex):
- if "labels" in d and "codes" not in d:
- # GH#23752 "labels" kwarg has been replaced with "codes"
- d["codes"] = d.pop("labels")
- # Since this was a valid MultiIndex at pickle-time, we don't need to
- # check validty at un-pickle time.
- d["verify_integrity"] = False
- elif "dtype" not in d and "data" in d:
- # Prevent Index.__new__ from conducting inference;
- # "data" key not in RangeIndex
- d["dtype"] = d["data"].dtype
- return cls.__new__(cls, **d)
- _IndexT = TypeVar("_IndexT", bound="Index")
- class Index(IndexOpsMixin, PandasObject):
- """
- Immutable sequence used for indexing and alignment.
- The basic object storing axis labels for all pandas objects.
- .. versionchanged:: 2.0.0
- Index can hold all numpy numeric dtypes (except float16). Previously only
- int64/uint64/float64 dtypes were accepted.
- Parameters
- ----------
- data : array-like (1-dimensional)
- dtype : NumPy dtype (default: object)
- If dtype is None, we find the dtype that best fits the data.
- If an actual dtype is provided, we coerce to that dtype if it's safe.
- Otherwise, an error will be raised.
- copy : bool
- Make a copy of input ndarray.
- name : object
- Name to be stored in the index.
- tupleize_cols : bool (default: True)
- When True, attempt to create a MultiIndex if possible.
- See Also
- --------
- RangeIndex : Index implementing a monotonic integer range.
- CategoricalIndex : Index of :class:`Categorical` s.
- MultiIndex : A multi-level, or hierarchical Index.
- IntervalIndex : An Index of :class:`Interval` s.
- DatetimeIndex : Index of datetime64 data.
- TimedeltaIndex : Index of timedelta64 data.
- PeriodIndex : Index of Period data.
- Notes
- -----
- An Index instance can **only** contain hashable objects.
- An Index instance *can not* hold numpy float16 dtype.
- Examples
- --------
- >>> pd.Index([1, 2, 3])
- Index([1, 2, 3], dtype='int64')
- >>> pd.Index(list('abc'))
- Index(['a', 'b', 'c'], dtype='object')
- >>> pd.Index([1, 2, 3], dtype="uint8")
- Index([1, 2, 3], dtype='uint8')
- """
- # To hand over control to subclasses
- _join_precedence = 1
- # Cython methods; see github.com/cython/cython/issues/2647
- # for why we need to wrap these instead of making them class attributes
- # Moreover, cython will choose the appropriate-dtyped sub-function
- # given the dtypes of the passed arguments
- @final
- def _left_indexer_unique(self: _IndexT, other: _IndexT) -> npt.NDArray[np.intp]:
- # Caller is responsible for ensuring other.dtype == self.dtype
- sv = self._get_join_target()
- ov = other._get_join_target()
- # can_use_libjoin assures sv and ov are ndarrays
- sv = cast(np.ndarray, sv)
- ov = cast(np.ndarray, ov)
- # similar but not identical to ov.searchsorted(sv)
- return libjoin.left_join_indexer_unique(sv, ov)
- @final
- def _left_indexer(
- self: _IndexT, other: _IndexT
- ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
- # Caller is responsible for ensuring other.dtype == self.dtype
- sv = self._get_join_target()
- ov = other._get_join_target()
- # can_use_libjoin assures sv and ov are ndarrays
- sv = cast(np.ndarray, sv)
- ov = cast(np.ndarray, ov)
- joined_ndarray, lidx, ridx = libjoin.left_join_indexer(sv, ov)
- joined = self._from_join_target(joined_ndarray)
- return joined, lidx, ridx
- @final
- def _inner_indexer(
- self: _IndexT, other: _IndexT
- ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
- # Caller is responsible for ensuring other.dtype == self.dtype
- sv = self._get_join_target()
- ov = other._get_join_target()
- # can_use_libjoin assures sv and ov are ndarrays
- sv = cast(np.ndarray, sv)
- ov = cast(np.ndarray, ov)
- joined_ndarray, lidx, ridx = libjoin.inner_join_indexer(sv, ov)
- joined = self._from_join_target(joined_ndarray)
- return joined, lidx, ridx
- @final
- def _outer_indexer(
- self: _IndexT, other: _IndexT
- ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
- # Caller is responsible for ensuring other.dtype == self.dtype
- sv = self._get_join_target()
- ov = other._get_join_target()
- # can_use_libjoin assures sv and ov are ndarrays
- sv = cast(np.ndarray, sv)
- ov = cast(np.ndarray, ov)
- joined_ndarray, lidx, ridx = libjoin.outer_join_indexer(sv, ov)
- joined = self._from_join_target(joined_ndarray)
- return joined, lidx, ridx
- _typ: str = "index"
- _data: ExtensionArray | np.ndarray
- _data_cls: type[ExtensionArray] | tuple[type[np.ndarray], type[ExtensionArray]] = (
- np.ndarray,
- ExtensionArray,
- )
- _id: object | None = None
- _name: Hashable = None
- # MultiIndex.levels previously allowed setting the index name. We
- # don't allow this anymore, and raise if it happens rather than
- # failing silently.
- _no_setting_name: bool = False
- _comparables: list[str] = ["name"]
- _attributes: list[str] = ["name"]
- @cache_readonly
- def _can_hold_strings(self) -> bool:
- return not is_numeric_dtype(self)
- _engine_types: dict[np.dtype | ExtensionDtype, type[libindex.IndexEngine]] = {
- np.dtype(np.int8): libindex.Int8Engine,
- np.dtype(np.int16): libindex.Int16Engine,
- np.dtype(np.int32): libindex.Int32Engine,
- np.dtype(np.int64): libindex.Int64Engine,
- np.dtype(np.uint8): libindex.UInt8Engine,
- np.dtype(np.uint16): libindex.UInt16Engine,
- np.dtype(np.uint32): libindex.UInt32Engine,
- np.dtype(np.uint64): libindex.UInt64Engine,
- np.dtype(np.float32): libindex.Float32Engine,
- np.dtype(np.float64): libindex.Float64Engine,
- np.dtype(np.complex64): libindex.Complex64Engine,
- np.dtype(np.complex128): libindex.Complex128Engine,
- }
- @property
- def _engine_type(
- self,
- ) -> type[libindex.IndexEngine] | type[libindex.ExtensionEngine]:
- return self._engine_types.get(self.dtype, libindex.ObjectEngine)
- # whether we support partial string indexing. Overridden
- # in DatetimeIndex and PeriodIndex
- _supports_partial_string_indexing = False
- _accessors = {"str"}
- str = CachedAccessor("str", StringMethods)
- _references = None
- # --------------------------------------------------------------------
- # Constructors
- def __new__(
- cls,
- data=None,
- dtype=None,
- copy: bool = False,
- name=None,
- tupleize_cols: bool = True,
- ) -> Index:
- from pandas.core.indexes.range import RangeIndex
- name = maybe_extract_name(name, data, cls)
- if dtype is not None:
- dtype = pandas_dtype(dtype)
- data_dtype = getattr(data, "dtype", None)
- refs = None
- if not copy and isinstance(data, (ABCSeries, Index)):
- refs = data._references
- # range
- if isinstance(data, (range, RangeIndex)):
- result = RangeIndex(start=data, copy=copy, name=name)
- if dtype is not None:
- return result.astype(dtype, copy=False)
- return result
- elif is_ea_or_datetimelike_dtype(dtype):
- # non-EA dtype indexes have special casting logic, so we punt here
- pass
- elif is_ea_or_datetimelike_dtype(data_dtype):
- pass
- elif isinstance(data, (np.ndarray, Index, ABCSeries)):
- if isinstance(data, ABCMultiIndex):
- data = data._values
- if data.dtype.kind not in ["i", "u", "f", "b", "c", "m", "M"]:
- # GH#11836 we need to avoid having numpy coerce
- # things that look like ints/floats to ints unless
- # they are actually ints, e.g. '0' and 0.0
- # should not be coerced
- data = com.asarray_tuplesafe(data, dtype=_dtype_obj)
- elif is_scalar(data):
- raise cls._raise_scalar_data_error(data)
- elif hasattr(data, "__array__"):
- return Index(np.asarray(data), dtype=dtype, copy=copy, name=name)
- elif not is_list_like(data) and not isinstance(data, memoryview):
- # 2022-11-16 the memoryview check is only necessary on some CI
- # builds, not clear why
- raise cls._raise_scalar_data_error(data)
- else:
- if tupleize_cols:
- # GH21470: convert iterable to list before determining if empty
- if is_iterator(data):
- data = list(data)
- if data and all(isinstance(e, tuple) for e in data):
- # we must be all tuples, otherwise don't construct
- # 10697
- from pandas.core.indexes.multi import MultiIndex
- return MultiIndex.from_tuples(data, names=name)
- # other iterable of some kind
- if not isinstance(data, (list, tuple)):
- # we allow set/frozenset, which Series/sanitize_array does not, so
- # cast to list here
- data = list(data)
- if len(data) == 0:
- # unlike Series, we default to object dtype:
- data = np.array(data, dtype=object)
- if len(data) and isinstance(data[0], tuple):
- # Ensure we get 1-D array of tuples instead of 2D array.
- data = com.asarray_tuplesafe(data, dtype=_dtype_obj)
- try:
- arr = sanitize_array(data, None, dtype=dtype, copy=copy)
- except ValueError as err:
- if "index must be specified when data is not list-like" in str(err):
- raise cls._raise_scalar_data_error(data) from err
- if "Data must be 1-dimensional" in str(err):
- raise ValueError("Index data must be 1-dimensional") from err
- raise
- arr = ensure_wrapped_if_datetimelike(arr)
- klass = cls._dtype_to_subclass(arr.dtype)
- arr = klass._ensure_array(arr, arr.dtype, copy=False)
- return klass._simple_new(arr, name, refs=refs)
- @classmethod
- def _ensure_array(cls, data, dtype, copy: bool):
- """
- Ensure we have a valid array to pass to _simple_new.
- """
- if data.ndim > 1:
- # GH#13601, GH#20285, GH#27125
- raise ValueError("Index data must be 1-dimensional")
- elif dtype == np.float16:
- # float16 not supported (no indexing engine)
- raise NotImplementedError("float16 indexes are not supported")
- if copy:
- # asarray_tuplesafe does not always copy underlying data,
- # so need to make sure that this happens
- data = data.copy()
- return data
- @final
- @classmethod
- def _dtype_to_subclass(cls, dtype: DtypeObj):
- # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
- if isinstance(dtype, ExtensionDtype):
- if isinstance(dtype, DatetimeTZDtype):
- from pandas import DatetimeIndex
- return DatetimeIndex
- elif isinstance(dtype, CategoricalDtype):
- from pandas import CategoricalIndex
- return CategoricalIndex
- elif isinstance(dtype, IntervalDtype):
- from pandas import IntervalIndex
- return IntervalIndex
- elif isinstance(dtype, PeriodDtype):
- from pandas import PeriodIndex
- return PeriodIndex
- return Index
- if dtype.kind == "M":
- from pandas import DatetimeIndex
- return DatetimeIndex
- elif dtype.kind == "m":
- from pandas import TimedeltaIndex
- return TimedeltaIndex
- elif dtype.kind == "O":
- # NB: assuming away MultiIndex
- return Index
- elif issubclass(dtype.type, str) or is_numeric_dtype(dtype):
- return Index
- raise NotImplementedError(dtype)
- # NOTE for new Index creation:
- # - _simple_new: It returns new Index with the same type as the caller.
- # All metadata (such as name) must be provided by caller's responsibility.
- # Using _shallow_copy is recommended because it fills these metadata
- # otherwise specified.
- # - _shallow_copy: It returns new Index with the same type (using
- # _simple_new), but fills caller's metadata otherwise specified. Passed
- # kwargs will overwrite corresponding metadata.
- # See each method's docstring.
- @classmethod
- def _simple_new(
- cls: type[_IndexT], values: ArrayLike, name: Hashable = None, refs=None
- ) -> _IndexT:
- """
- We require that we have a dtype compat for the values. If we are passed
- a non-dtype compat, then coerce using the constructor.
- Must be careful not to recurse.
- """
- assert isinstance(values, cls._data_cls), type(values)
- result = object.__new__(cls)
- result._data = values
- result._name = name
- result._cache = {}
- result._reset_identity()
- if refs is not None:
- result._references = refs
- else:
- result._references = BlockValuesRefs()
- result._references.add_index_reference(result)
- return result
- @classmethod
- def _with_infer(cls, *args, **kwargs):
- """
- Constructor that uses the 1.0.x behavior inferring numeric dtypes
- for ndarray[object] inputs.
- """
- result = cls(*args, **kwargs)
- if result.dtype == _dtype_obj and not result._is_multi:
- # error: Argument 1 to "maybe_convert_objects" has incompatible type
- # "Union[ExtensionArray, ndarray[Any, Any]]"; expected
- # "ndarray[Any, Any]"
- values = lib.maybe_convert_objects(result._values) # type: ignore[arg-type]
- if values.dtype.kind in ["i", "u", "f", "b"]:
- return Index(values, name=result.name)
- return result
- @cache_readonly
- def _constructor(self: _IndexT) -> type[_IndexT]:
- return type(self)
- @final
- def _maybe_check_unique(self) -> None:
- """
- Check that an Index has no duplicates.
- This is typically only called via
- `NDFrame.flags.allows_duplicate_labels.setter` when it's set to
- True (duplicates aren't allowed).
- Raises
- ------
- DuplicateLabelError
- When the index is not unique.
- """
- if not self.is_unique:
- msg = """Index has duplicates."""
- duplicates = self._format_duplicate_message()
- msg += f"\n{duplicates}"
- raise DuplicateLabelError(msg)
- @final
- def _format_duplicate_message(self) -> DataFrame:
- """
- Construct the DataFrame for a DuplicateLabelError.
- This returns a DataFrame indicating the labels and positions
- of duplicates in an index. This should only be called when it's
- already known that duplicates are present.
- Examples
- --------
- >>> idx = pd.Index(['a', 'b', 'a'])
- >>> idx._format_duplicate_message()
- positions
- label
- a [0, 2]
- """
- from pandas import Series
- duplicates = self[self.duplicated(keep="first")].unique()
- assert len(duplicates)
- out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates]
- if self._is_multi:
- # test_format_duplicate_labels_message_multi
- # error: "Type[Index]" has no attribute "from_tuples" [attr-defined]
- out.index = type(self).from_tuples(out.index) # type: ignore[attr-defined]
- if self.nlevels == 1:
- out = out.rename_axis("label")
- return out.to_frame(name="positions")
- # --------------------------------------------------------------------
- # Index Internals Methods
- def _shallow_copy(self: _IndexT, values, name: Hashable = no_default) -> _IndexT:
- """
- Create a new Index with the same class as the caller, don't copy the
- data, use the same object attributes with passed in attributes taking
- precedence.
- *this is an internal non-public method*
- Parameters
- ----------
- values : the values to create the new Index, optional
- name : Label, defaults to self.name
- """
- name = self._name if name is no_default else name
- return self._simple_new(values, name=name, refs=self._references)
- def _view(self: _IndexT) -> _IndexT:
- """
- fastpath to make a shallow copy, i.e. new object with same data.
- """
- result = self._simple_new(self._values, name=self._name, refs=self._references)
- result._cache = self._cache
- return result
- @final
- def _rename(self: _IndexT, name: Hashable) -> _IndexT:
- """
- fastpath for rename if new name is already validated.
- """
- result = self._view()
- result._name = name
- return result
- @final
- def is_(self, other) -> bool:
- """
- More flexible, faster check like ``is`` but that works through views.
- Note: this is *not* the same as ``Index.identical()``, which checks
- that metadata is also the same.
- Parameters
- ----------
- other : object
- Other object to compare against.
- Returns
- -------
- bool
- True if both have same underlying data, False otherwise.
- See Also
- --------
- Index.identical : Works like ``Index.is_`` but also checks metadata.
- """
- if self is other:
- return True
- elif not hasattr(other, "_id"):
- return False
- elif self._id is None or other._id is None:
- return False
- else:
- return self._id is other._id
- @final
- def _reset_identity(self) -> None:
- """
- Initializes or resets ``_id`` attribute with new object.
- """
- self._id = object()
- @final
- def _cleanup(self) -> None:
- self._engine.clear_mapping()
- @cache_readonly
- def _engine(
- self,
- ) -> libindex.IndexEngine | libindex.ExtensionEngine | libindex.MaskedIndexEngine:
- # For base class (object dtype) we get ObjectEngine
- target_values = self._get_engine_target()
- if isinstance(target_values, ExtensionArray):
- if isinstance(target_values, (BaseMaskedArray, ArrowExtensionArray)):
- try:
- return _masked_engines[target_values.dtype.name](target_values)
- except KeyError:
- # Not supported yet e.g. decimal
- pass
- elif self._engine_type is libindex.ObjectEngine:
- return libindex.ExtensionEngine(target_values)
- target_values = cast(np.ndarray, target_values)
- # to avoid a reference cycle, bind `target_values` to a local variable, so
- # `self` is not passed into the lambda.
- if target_values.dtype == bool:
- return libindex.BoolEngine(target_values)
- elif target_values.dtype == np.complex64:
- return libindex.Complex64Engine(target_values)
- elif target_values.dtype == np.complex128:
- return libindex.Complex128Engine(target_values)
- elif needs_i8_conversion(self.dtype):
- # We need to keep M8/m8 dtype when initializing the Engine,
- # but don't want to change _get_engine_target bc it is used
- # elsewhere
- # error: Item "ExtensionArray" of "Union[ExtensionArray,
- # ndarray[Any, Any]]" has no attribute "_ndarray" [union-attr]
- target_values = self._data._ndarray # type: ignore[union-attr]
- # error: Argument 1 to "ExtensionEngine" has incompatible type
- # "ndarray[Any, Any]"; expected "ExtensionArray"
- return self._engine_type(target_values) # type: ignore[arg-type]
- @final
- @cache_readonly
- def _dir_additions_for_owner(self) -> set[str_t]:
- """
- Add the string-like labels to the owner dataframe/series dir output.
- If this is a MultiIndex, it's first level values are used.
- """
- return {
- c
- for c in self.unique(level=0)[: get_option("display.max_dir_items")]
- if isinstance(c, str) and c.isidentifier()
- }
- # --------------------------------------------------------------------
- # Array-Like Methods
- # ndarray compat
- def __len__(self) -> int:
- """
- Return the length of the Index.
- """
- return len(self._data)
- def __array__(self, dtype=None) -> np.ndarray:
- """
- The array interface, return my values.
- """
- return np.asarray(self._data, dtype=dtype)
- def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
- if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs):
- return NotImplemented
- result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
- self, ufunc, method, *inputs, **kwargs
- )
- if result is not NotImplemented:
- return result
- if "out" in kwargs:
- # e.g. test_dti_isub_tdi
- return arraylike.dispatch_ufunc_with_out(
- self, ufunc, method, *inputs, **kwargs
- )
- if method == "reduce":
- result = arraylike.dispatch_reduction_ufunc(
- self, ufunc, method, *inputs, **kwargs
- )
- if result is not NotImplemented:
- return result
- new_inputs = [x if x is not self else x._values for x in inputs]
- result = getattr(ufunc, method)(*new_inputs, **kwargs)
- if ufunc.nout == 2:
- # i.e. np.divmod, np.modf, np.frexp
- return tuple(self.__array_wrap__(x) for x in result)
- if result.dtype == np.float16:
- result = result.astype(np.float32)
- return self.__array_wrap__(result)
- def __array_wrap__(self, result, context=None):
- """
- Gets called after a ufunc and other functions e.g. np.split.
- """
- result = lib.item_from_zerodim(result)
- if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1:
- return result
- return Index(result, name=self.name)
- @cache_readonly
- def dtype(self) -> DtypeObj:
- """
- Return the dtype object of the underlying data.
- """
- return self._data.dtype
- @final
- def ravel(self, order: str_t = "C") -> Index:
- """
- Return a view on self.
- Returns
- -------
- Index
- See Also
- --------
- numpy.ndarray.ravel : Return a flattened array.
- """
- return self[:]
- def view(self, cls=None):
- # we need to see if we are subclassing an
- # index type here
- if cls is not None and not hasattr(cls, "_typ"):
- dtype = cls
- if isinstance(cls, str):
- dtype = pandas_dtype(cls)
- if isinstance(dtype, (np.dtype, ExtensionDtype)) and needs_i8_conversion(
- dtype
- ):
- if dtype.kind == "m" and dtype != "m8[ns]":
- # e.g. m8[s]
- return self._data.view(cls)
- idx_cls = self._dtype_to_subclass(dtype)
- # NB: we only get here for subclasses that override
- # _data_cls such that it is a type and not a tuple
- # of types.
- arr_cls = idx_cls._data_cls
- arr = arr_cls(self._data.view("i8"), dtype=dtype)
- return idx_cls._simple_new(arr, name=self.name, refs=self._references)
- result = self._data.view(cls)
- else:
- result = self._view()
- if isinstance(result, Index):
- result._id = self._id
- return result
- def astype(self, dtype, copy: bool = True):
- """
- Create an Index with values cast to dtypes.
- The class of a new Index is determined by dtype. When conversion is
- impossible, a TypeError exception is raised.
- Parameters
- ----------
- dtype : numpy dtype or pandas type
- Note that any signed integer `dtype` is treated as ``'int64'``,
- and any unsigned integer `dtype` is treated as ``'uint64'``,
- regardless of the size.
- copy : bool, default True
- By default, astype always returns a newly allocated object.
- If copy is set to False and internal requirements on dtype are
- satisfied, the original data is used to create a new Index
- or the original Index is returned.
- Returns
- -------
- Index
- Index with values cast to specified dtype.
- """
- if dtype is not None:
- dtype = pandas_dtype(dtype)
- if is_dtype_equal(self.dtype, dtype):
- # Ensure that self.astype(self.dtype) is self
- return self.copy() if copy else self
- values = self._data
- if isinstance(values, ExtensionArray):
- with rewrite_exception(type(values).__name__, type(self).__name__):
- new_values = values.astype(dtype, copy=copy)
- elif isinstance(dtype, ExtensionDtype):
- cls = dtype.construct_array_type()
- # Note: for RangeIndex and CategoricalDtype self vs self._values
- # behaves differently here.
- new_values = cls._from_sequence(self, dtype=dtype, copy=copy)
- else:
- # GH#13149 specifically use astype_array instead of astype
- new_values = astype_array(values, dtype=dtype, copy=copy)
- # pass copy=False because any copying will be done in the astype above
- result = Index(new_values, name=self.name, dtype=new_values.dtype, copy=False)
- if (
- not copy
- and self._references is not None
- and astype_is_view(self.dtype, dtype)
- ):
- result._references = self._references
- result._references.add_index_reference(result)
- return result
- _index_shared_docs[
- "take"
- ] = """
- Return a new %(klass)s of the values selected by the indices.
- For internal compatibility with numpy arrays.
- Parameters
- ----------
- indices : array-like
- Indices to be taken.
- axis : int, optional
- The axis over which to select values, always 0.
- allow_fill : bool, default True
- fill_value : scalar, default None
- If allow_fill=True and fill_value is not None, indices specified by
- -1 are regarded as NA. If Index doesn't hold NA, raise ValueError.
- Returns
- -------
- Index
- An index formed of elements at the given indices. Will be the same
- type as self, except for RangeIndex.
- See Also
- --------
- numpy.ndarray.take: Return an array formed from the
- elements of a at the given indices.
- """
- @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
- def take(
- self,
- indices,
- axis: Axis = 0,
- allow_fill: bool = True,
- fill_value=None,
- **kwargs,
- ):
- if kwargs:
- nv.validate_take((), kwargs)
- if is_scalar(indices):
- raise TypeError("Expected indices to be array-like")
- indices = ensure_platform_int(indices)
- allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices)
- # Note: we discard fill_value and use self._na_value, only relevant
- # in the case where allow_fill is True and fill_value is not None
- values = self._values
- if isinstance(values, np.ndarray):
- taken = algos.take(
- values, indices, allow_fill=allow_fill, fill_value=self._na_value
- )
- else:
- # algos.take passes 'axis' keyword which not all EAs accept
- taken = values.take(
- indices, allow_fill=allow_fill, fill_value=self._na_value
- )
- # _constructor so RangeIndex-> Index with an int64 dtype
- return self._constructor._simple_new(taken, name=self.name)
- @final
- def _maybe_disallow_fill(self, allow_fill: bool, fill_value, indices) -> bool:
- """
- We only use pandas-style take when allow_fill is True _and_
- fill_value is not None.
- """
- if allow_fill and fill_value is not None:
- # only fill if we are passing a non-None fill_value
- if self._can_hold_na:
- if (indices < -1).any():
- raise ValueError(
- "When allow_fill=True and fill_value is not None, "
- "all indices must be >= -1"
- )
- else:
- cls_name = type(self).__name__
- raise ValueError(
- f"Unable to fill values because {cls_name} cannot contain NA"
- )
- else:
- allow_fill = False
- return allow_fill
- _index_shared_docs[
- "repeat"
- ] = """
- Repeat elements of a %(klass)s.
- Returns a new %(klass)s where each element of the current %(klass)s
- is repeated consecutively a given number of times.
- Parameters
- ----------
- repeats : int or array of ints
- The number of repetitions for each element. This should be a
- non-negative integer. Repeating 0 times will return an empty
- %(klass)s.
- axis : None
- Must be ``None``. Has no effect but is accepted for compatibility
- with numpy.
- Returns
- -------
- %(klass)s
- Newly created %(klass)s with repeated elements.
- See Also
- --------
- Series.repeat : Equivalent function for Series.
- numpy.repeat : Similar method for :class:`numpy.ndarray`.
- Examples
- --------
- >>> idx = pd.Index(['a', 'b', 'c'])
- >>> idx
- Index(['a', 'b', 'c'], dtype='object')
- >>> idx.repeat(2)
- Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object')
- >>> idx.repeat([1, 2, 3])
- Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object')
- """
- @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)
- def repeat(self, repeats, axis=None):
- repeats = ensure_platform_int(repeats)
- nv.validate_repeat((), {"axis": axis})
- res_values = self._values.repeat(repeats)
- # _constructor so RangeIndex-> Index with an int64 dtype
- return self._constructor._simple_new(res_values, name=self.name)
- # --------------------------------------------------------------------
- # Copying Methods
- def copy(
- self: _IndexT,
- name: Hashable | None = None,
- deep: bool = False,
- ) -> _IndexT:
- """
- Make a copy of this object.
- Name is set on the new object.
- Parameters
- ----------
- name : Label, optional
- Set name for new object.
- deep : bool, default False
- Returns
- -------
- Index
- Index refer to new object which is a copy of this object.
- Notes
- -----
- In most cases, there should be no functional difference from using
- ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
- """
- name = self._validate_names(name=name, deep=deep)[0]
- if deep:
- new_data = self._data.copy()
- new_index = type(self)._simple_new(new_data, name=name)
- else:
- new_index = self._rename(name=name)
- return new_index
- @final
- def __copy__(self: _IndexT, **kwargs) -> _IndexT:
- return self.copy(**kwargs)
- @final
- def __deepcopy__(self: _IndexT, memo=None) -> _IndexT:
- """
- Parameters
- ----------
- memo, default None
- Standard signature. Unused
- """
- return self.copy(deep=True)
- # --------------------------------------------------------------------
- # Rendering Methods
- @final
- def __repr__(self) -> str_t:
- """
- Return a string representation for this object.
- """
- klass_name = type(self).__name__
- data = self._format_data()
- attrs = self._format_attrs()
- space = self._format_space()
- attrs_str = [f"{k}={v}" for k, v in attrs]
- prepr = f",{space}".join(attrs_str)
- # no data provided, just attributes
- if data is None:
- data = ""
- return f"{klass_name}({data}{prepr})"
- def _format_space(self) -> str_t:
- # using space here controls if the attributes
- # are line separated or not (the default)
- # max_seq_items = get_option('display.max_seq_items')
- # if len(self) > max_seq_items:
- # space = "\n%s" % (' ' * (len(klass) + 1))
- return " "
- @property
- def _formatter_func(self):
- """
- Return the formatter function.
- """
- return default_pprint
- def _format_data(self, name=None) -> str_t:
- """
- Return the formatted data as a unicode string.
- """
- # do we want to justify (only do so for non-objects)
- is_justify = True
- if self.inferred_type == "string":
- is_justify = False
- elif self.inferred_type == "categorical":
- self = cast("CategoricalIndex", self)
- if is_object_dtype(self.categories):
- is_justify = False
- return format_object_summary(
- self,
- self._formatter_func,
- is_justify=is_justify,
- name=name,
- line_break_each_value=self._is_multi,
- )
- def _format_attrs(self) -> list[tuple[str_t, str_t | int | bool | None]]:
- """
- Return a list of tuples of the (attr,formatted_value).
- """
- attrs: list[tuple[str_t, str_t | int | bool | None]] = []
- if not self._is_multi:
- attrs.append(("dtype", f"'{self.dtype}'"))
- if self.name is not None:
- attrs.append(("name", default_pprint(self.name)))
- elif self._is_multi and any(x is not None for x in self.names):
- attrs.append(("names", default_pprint(self.names)))
- max_seq_items = get_option("display.max_seq_items") or len(self)
- if len(self) > max_seq_items:
- attrs.append(("length", len(self)))
- return attrs
- @final
- def _get_level_names(self) -> Hashable | Sequence[Hashable]:
- """
- Return a name or list of names with None replaced by the level number.
- """
- if self._is_multi:
- return [
- level if name is None else name for level, name in enumerate(self.names)
- ]
- else:
- return 0 if self.name is None else self.name
- @final
- def _mpl_repr(self) -> np.ndarray:
- # how to represent ourselves to matplotlib
- if isinstance(self.dtype, np.dtype) and self.dtype.kind != "M":
- return cast(np.ndarray, self.values)
- return self.astype(object, copy=False)._values
- def format(
- self,
- name: bool = False,
- formatter: Callable | None = None,
- na_rep: str_t = "NaN",
- ) -> list[str_t]:
- """
- Render a string representation of the Index.
- """
- header = []
- if name:
- header.append(
- pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
- if self.name is not None
- else ""
- )
- if formatter is not None:
- return header + list(self.map(formatter))
- return self._format_with_header(header, na_rep=na_rep)
- def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t]:
- from pandas.io.formats.format import format_array
- values = self._values
- if is_object_dtype(values.dtype):
- values = cast(np.ndarray, values)
- values = lib.maybe_convert_objects(values, safe=True)
- result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]
- # could have nans
- mask = is_float_nan(values)
- if mask.any():
- result_arr = np.array(result)
- result_arr[mask] = na_rep
- result = result_arr.tolist()
- else:
- result = trim_front(format_array(values, None, justify="left"))
- return header + result
- def _format_native_types(
- self,
- *,
- na_rep: str_t = "",
- decimal: str_t = ".",
- float_format=None,
- date_format=None,
- quoting=None,
- ) -> npt.NDArray[np.object_]:
- """
- Actually format specific types of the index.
- """
- from pandas.io.formats.format import FloatArrayFormatter
- if is_float_dtype(self.dtype) and not is_extension_array_dtype(self.dtype):
- formatter = FloatArrayFormatter(
- self._values,
- na_rep=na_rep,
- float_format=float_format,
- decimal=decimal,
- quoting=quoting,
- fixed_width=False,
- )
- return formatter.get_result_as_array()
- mask = isna(self)
- if not is_object_dtype(self) and not quoting:
- values = np.asarray(self).astype(str)
- else:
- values = np.array(self, dtype=object, copy=True)
- values[mask] = na_rep
- return values
- def _summary(self, name=None) -> str_t:
- """
- Return a summarized representation.
- Parameters
- ----------
- name : str
- name to use in the summary representation
- Returns
- -------
- String with a summarized representation of the index
- """
- if len(self) > 0:
- head = self[0]
- if hasattr(head, "format") and not isinstance(head, str):
- head = head.format()
- elif needs_i8_conversion(self.dtype):
- # e.g. Timedelta, display as values, not quoted
- head = self._formatter_func(head).replace("'", "")
- tail = self[-1]
- if hasattr(tail, "format") and not isinstance(tail, str):
- tail = tail.format()
- elif needs_i8_conversion(self.dtype):
- # e.g. Timedelta, display as values, not quoted
- tail = self._formatter_func(tail).replace("'", "")
- index_summary = f", {head} to {tail}"
- else:
- index_summary = ""
- if name is None:
- name = type(self).__name__
- return f"{name}: {len(self)} entries{index_summary}"
- # --------------------------------------------------------------------
- # Conversion Methods
- def to_flat_index(self: _IndexT) -> _IndexT:
- """
- Identity method.
- This is implemented for compatibility with subclass implementations
- when chaining.
- Returns
- -------
- pd.Index
- Caller.
- See Also
- --------
- MultiIndex.to_flat_index : Subclass implementation.
- """
- return self
- @final
- def to_series(self, index=None, name: Hashable = None) -> Series:
- """
- Create a Series with both index and values equal to the index keys.
- Useful with map for returning an indexer based on an index.
- Parameters
- ----------
- index : Index, optional
- Index of resulting Series. If None, defaults to original index.
- name : str, optional
- Name of resulting Series. If None, defaults to name of original
- index.
- Returns
- -------
- Series
- The dtype will be based on the type of the Index values.
- See Also
- --------
- Index.to_frame : Convert an Index to a DataFrame.
- Series.to_frame : Convert Series to DataFrame.
- Examples
- --------
- >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')
- By default, the original Index and original name is reused.
- >>> idx.to_series()
- animal
- Ant Ant
- Bear Bear
- Cow Cow
- Name: animal, dtype: object
- To enforce a new Index, specify new labels to ``index``:
- >>> idx.to_series(index=[0, 1, 2])
- 0 Ant
- 1 Bear
- 2 Cow
- Name: animal, dtype: object
- To override the name of the resulting column, specify `name`:
- >>> idx.to_series(name='zoo')
- animal
- Ant Ant
- Bear Bear
- Cow Cow
- Name: zoo, dtype: object
- """
- from pandas import Series
- if index is None:
- index = self._view()
- if name is None:
- name = self.name
- return Series(self._values.copy(), index=index, name=name)
- def to_frame(
- self, index: bool = True, name: Hashable = lib.no_default
- ) -> DataFrame:
- """
- Create a DataFrame with a column containing the Index.
- Parameters
- ----------
- index : bool, default True
- Set the index of the returned DataFrame as the original Index.
- name : object, defaults to index.name
- The passed name should substitute for the index name (if it has
- one).
- Returns
- -------
- DataFrame
- DataFrame containing the original Index data.
- See Also
- --------
- Index.to_series : Convert an Index to a Series.
- Series.to_frame : Convert Series to DataFrame.
- Examples
- --------
- >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')
- >>> idx.to_frame()
- animal
- animal
- Ant Ant
- Bear Bear
- Cow Cow
- By default, the original Index is reused. To enforce a new Index:
- >>> idx.to_frame(index=False)
- animal
- 0 Ant
- 1 Bear
- 2 Cow
- To override the name of the resulting column, specify `name`:
- >>> idx.to_frame(index=False, name='zoo')
- zoo
- 0 Ant
- 1 Bear
- 2 Cow
- """
- from pandas import DataFrame
- if name is lib.no_default:
- name = self._get_level_names()
- result = DataFrame({name: self._values.copy()})
- if index:
- result.index = self
- return result
- # --------------------------------------------------------------------
- # Name-Centric Methods
- @property
- def name(self) -> Hashable:
- """
- Return Index or MultiIndex name.
- """
- return self._name
- @name.setter
- def name(self, value: Hashable) -> None:
- if self._no_setting_name:
- # Used in MultiIndex.levels to avoid silently ignoring name updates.
- raise RuntimeError(
- "Cannot set name on a level of a MultiIndex. Use "
- "'MultiIndex.set_names' instead."
- )
- maybe_extract_name(value, None, type(self))
- self._name = value
- @final
- def _validate_names(
- self, name=None, names=None, deep: bool = False
- ) -> list[Hashable]:
- """
- Handles the quirks of having a singular 'name' parameter for general
- Index and plural 'names' parameter for MultiIndex.
- """
- from copy import deepcopy
- if names is not None and name is not None:
- raise TypeError("Can only provide one of `names` and `name`")
- if names is None and name is None:
- new_names = deepcopy(self.names) if deep else self.names
- elif names is not None:
- if not is_list_like(names):
- raise TypeError("Must pass list-like as `names`.")
- new_names = names
- elif not is_list_like(name):
- new_names = [name]
- else:
- new_names = name
- if len(new_names) != len(self.names):
- raise ValueError(
- f"Length of new names must be {len(self.names)}, got {len(new_names)}"
- )
- # All items in 'new_names' need to be hashable
- validate_all_hashable(*new_names, error_name=f"{type(self).__name__}.name")
- return new_names
- def _get_default_index_names(
- self, names: Hashable | Sequence[Hashable] | None = None, default=None
- ) -> list[Hashable]:
- """
- Get names of index.
- Parameters
- ----------
- names : int, str or 1-dimensional list, default None
- Index names to set.
- default : str
- Default name of index.
- Raises
- ------
- TypeError
- if names not str or list-like
- """
- from pandas.core.indexes.multi import MultiIndex
- if names is not None:
- if isinstance(names, (int, str)):
- names = [names]
- if not isinstance(names, list) and names is not None:
- raise ValueError("Index names must be str or 1-dimensional list")
- if not names:
- if isinstance(self, MultiIndex):
- names = com.fill_missing_names(self.names)
- else:
- names = [default] if self.name is None else [self.name]
- return names
- def _get_names(self) -> FrozenList:
- return FrozenList((self.name,))
- def _set_names(self, values, *, level=None) -> None:
- """
- Set new names on index. Each name has to be a hashable type.
- Parameters
- ----------
- values : str or sequence
- name(s) to set
- level : int, level name, or sequence of int/level names (default None)
- If the index is a MultiIndex (hierarchical), level(s) to set (None
- for all levels). Otherwise level must be None
- Raises
- ------
- TypeError if each name is not hashable.
- """
- if not is_list_like(values):
- raise ValueError("Names must be a list-like")
- if len(values) != 1:
- raise ValueError(f"Length of new names must be 1, got {len(values)}")
- # GH 20527
- # All items in 'name' need to be hashable:
- validate_all_hashable(*values, error_name=f"{type(self).__name__}.name")
- self._name = values[0]
- names = property(fset=_set_names, fget=_get_names)
- @overload
- def set_names(
- self: _IndexT, names, *, level=..., inplace: Literal[False] = ...
- ) -> _IndexT:
- ...
- @overload
- def set_names(self, names, *, level=..., inplace: Literal[True]) -> None:
- ...
- @overload
- def set_names(
- self: _IndexT, names, *, level=..., inplace: bool = ...
- ) -> _IndexT | None:
- ...
- def set_names(
- self: _IndexT, names, *, level=None, inplace: bool = False
- ) -> _IndexT | None:
- """
- Set Index or MultiIndex name.
- Able to set new names partially and by level.
- Parameters
- ----------
- names : label or list of label or dict-like for MultiIndex
- Name(s) to set.
- .. versionchanged:: 1.3.0
- level : int, label or list of int or label, optional
- If the index is a MultiIndex and names is not dict-like, level(s) to set
- (None for all levels). Otherwise level must be None.
- .. versionchanged:: 1.3.0
- inplace : bool, default False
- Modifies the object directly, instead of creating a new Index or
- MultiIndex.
- Returns
- -------
- Index or None
- The same type as the caller or None if ``inplace=True``.
- See Also
- --------
- Index.rename : Able to set new names without level.
- Examples
- --------
- >>> idx = pd.Index([1, 2, 3, 4])
- >>> idx
- Index([1, 2, 3, 4], dtype='int64')
- >>> idx.set_names('quarter')
- Index([1, 2, 3, 4], dtype='int64', name='quarter')
- >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],
- ... [2018, 2019]])
- >>> idx
- MultiIndex([('python', 2018),
- ('python', 2019),
- ( 'cobra', 2018),
- ( 'cobra', 2019)],
- )
- >>> idx = idx.set_names(['kind', 'year'])
- >>> idx.set_names('species', level=0)
- MultiIndex([('python', 2018),
- ('python', 2019),
- ( 'cobra', 2018),
- ( 'cobra', 2019)],
- names=['species', 'year'])
- When renaming levels with a dict, levels can not be passed.
- >>> idx.set_names({'kind': 'snake'})
- MultiIndex([('python', 2018),
- ('python', 2019),
- ( 'cobra', 2018),
- ( 'cobra', 2019)],
- names=['snake', 'year'])
- """
- if level is not None and not isinstance(self, ABCMultiIndex):
- raise ValueError("Level must be None for non-MultiIndex")
- if level is not None and not is_list_like(level) and is_list_like(names):
- raise TypeError("Names must be a string when a single level is provided.")
- if not is_list_like(names) and level is None and self.nlevels > 1:
- raise TypeError("Must pass list-like as `names`.")
- if is_dict_like(names) and not isinstance(self, ABCMultiIndex):
- raise TypeError("Can only pass dict-like as `names` for MultiIndex.")
- if is_dict_like(names) and level is not None:
- raise TypeError("Can not pass level for dictlike `names`.")
- if isinstance(self, ABCMultiIndex) and is_dict_like(names) and level is None:
- # Transform dict to list of new names and corresponding levels
- level, names_adjusted = [], []
- for i, name in enumerate(self.names):
- if name in names.keys():
- level.append(i)
- names_adjusted.append(names[name])
- names = names_adjusted
- if not is_list_like(names):
- names = [names]
- if level is not None and not is_list_like(level):
- level = [level]
- if inplace:
- idx = self
- else:
- idx = self._view()
- idx._set_names(names, level=level)
- if not inplace:
- return idx
- return None
- def rename(self, name, inplace: bool = False):
- """
- Alter Index or MultiIndex name.
- Able to set new names without level. Defaults to returning new index.
- Length of names must match number of levels in MultiIndex.
- Parameters
- ----------
- name : label or list of labels
- Name(s) to set.
- inplace : bool, default False
- Modifies the object directly, instead of creating a new Index or
- MultiIndex.
- Returns
- -------
- Index or None
- The same type as the caller or None if ``inplace=True``.
- See Also
- --------
- Index.set_names : Able to set new names partially and by level.
- Examples
- --------
- >>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score')
- >>> idx.rename('grade')
- Index(['A', 'C', 'A', 'B'], dtype='object', name='grade')
- >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],
- ... [2018, 2019]],
- ... names=['kind', 'year'])
- >>> idx
- MultiIndex([('python', 2018),
- ('python', 2019),
- ( 'cobra', 2018),
- ( 'cobra', 2019)],
- names=['kind', 'year'])
- >>> idx.rename(['species', 'year'])
- MultiIndex([('python', 2018),
- ('python', 2019),
- ( 'cobra', 2018),
- ( 'cobra', 2019)],
- names=['species', 'year'])
- >>> idx.rename('species')
- Traceback (most recent call last):
- TypeError: Must pass list-like as `names`.
- """
- return self.set_names([name], inplace=inplace)
- # --------------------------------------------------------------------
- # Level-Centric Methods
- @property
- def nlevels(self) -> int:
- """
- Number of levels.
- """
- return 1
- def _sort_levels_monotonic(self: _IndexT) -> _IndexT:
- """
- Compat with MultiIndex.
- """
- return self
- @final
- def _validate_index_level(self, level) -> None:
- """
- Validate index level.
- For single-level Index getting level number is a no-op, but some
- verification must be done like in MultiIndex.
- """
- if isinstance(level, int):
- if level < 0 and level != -1:
- raise IndexError(
- "Too many levels: Index has only 1 level, "
- f"{level} is not a valid level number"
- )
- if level > 0:
- raise IndexError(
- f"Too many levels: Index has only 1 level, not {level + 1}"
- )
- elif level != self.name:
- raise KeyError(
- f"Requested level ({level}) does not match index name ({self.name})"
- )
- def _get_level_number(self, level) -> int:
- self._validate_index_level(level)
- return 0
- def sortlevel(
- self, level=None, ascending: bool | list[bool] = True, sort_remaining=None
- ):
- """
- For internal compatibility with the Index API.
- Sort the Index. This is for compat with MultiIndex
- Parameters
- ----------
- ascending : bool, default True
- False to sort in descending order
- level, sort_remaining are compat parameters
- Returns
- -------
- Index
- """
- if not isinstance(ascending, (list, bool)):
- raise TypeError(
- "ascending must be a single bool value or"
- "a list of bool values of length 1"
- )
- if isinstance(ascending, list):
- if len(ascending) != 1:
- raise TypeError("ascending must be a list of bool values of length 1")
- ascending = ascending[0]
- if not isinstance(ascending, bool):
- raise TypeError("ascending must be a bool value")
- return self.sort_values(return_indexer=True, ascending=ascending)
- def _get_level_values(self, level) -> Index:
- """
- Return an Index of values for requested level.
- This is primarily useful to get an individual level of values from a
- MultiIndex, but is provided on Index as well for compatibility.
- Parameters
- ----------
- level : int or str
- It is either the integer position or the name of the level.
- Returns
- -------
- Index
- Calling object, as there is only one level in the Index.
- See Also
- --------
- MultiIndex.get_level_values : Get values for a level of a MultiIndex.
- Notes
- -----
- For Index, level should be 0, since there are no multiple levels.
- Examples
- --------
- >>> idx = pd.Index(list('abc'))
- >>> idx
- Index(['a', 'b', 'c'], dtype='object')
- Get level values by supplying `level` as integer:
- >>> idx.get_level_values(0)
- Index(['a', 'b', 'c'], dtype='object')
- """
- self._validate_index_level(level)
- return self
- get_level_values = _get_level_values
- @final
- def droplevel(self, level: IndexLabel = 0):
- """
- Return index with requested level(s) removed.
- If resulting index has only 1 level left, the result will be
- of Index type, not MultiIndex. The original index is not modified inplace.
- Parameters
- ----------
- level : int, str, or list-like, default 0
- If a string is given, must be the name of a level
- If list-like, elements must be names or indexes of levels.
- Returns
- -------
- Index or MultiIndex
- Examples
- --------
- >>> mi = pd.MultiIndex.from_arrays(
- ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z'])
- >>> mi
- MultiIndex([(1, 3, 5),
- (2, 4, 6)],
- names=['x', 'y', 'z'])
- >>> mi.droplevel()
- MultiIndex([(3, 5),
- (4, 6)],
- names=['y', 'z'])
- >>> mi.droplevel(2)
- MultiIndex([(1, 3),
- (2, 4)],
- names=['x', 'y'])
- >>> mi.droplevel('z')
- MultiIndex([(1, 3),
- (2, 4)],
- names=['x', 'y'])
- >>> mi.droplevel(['x', 'y'])
- Index([5, 6], dtype='int64', name='z')
- """
- if not isinstance(level, (tuple, list)):
- level = [level]
- levnums = sorted(self._get_level_number(lev) for lev in level)[::-1]
- return self._drop_level_numbers(levnums)
- @final
- def _drop_level_numbers(self, levnums: list[int]):
- """
- Drop MultiIndex levels by level _number_, not name.
- """
- if not levnums and not isinstance(self, ABCMultiIndex):
- return self
- if len(levnums) >= self.nlevels:
- raise ValueError(
- f"Cannot remove {len(levnums)} levels from an index with "
- f"{self.nlevels} levels: at least one level must be left."
- )
- # The two checks above guarantee that here self is a MultiIndex
- self = cast("MultiIndex", self)
- new_levels = list(self.levels)
- new_codes = list(self.codes)
- new_names = list(self.names)
- for i in levnums:
- new_levels.pop(i)
- new_codes.pop(i)
- new_names.pop(i)
- if len(new_levels) == 1:
- lev = new_levels[0]
- if len(lev) == 0:
- # If lev is empty, lev.take will fail GH#42055
- if len(new_codes[0]) == 0:
- # GH#45230 preserve RangeIndex here
- # see test_reset_index_empty_rangeindex
- result = lev[:0]
- else:
- res_values = algos.take(lev._values, new_codes[0], allow_fill=True)
- # _constructor instead of type(lev) for RangeIndex compat GH#35230
- result = lev._constructor._simple_new(res_values, name=new_names[0])
- else:
- # set nan if needed
- mask = new_codes[0] == -1
- result = new_levels[0].take(new_codes[0])
- if mask.any():
- result = result.putmask(mask, np.nan)
- result._name = new_names[0]
- return result
- else:
- from pandas.core.indexes.multi import MultiIndex
- return MultiIndex(
- levels=new_levels,
- codes=new_codes,
- names=new_names,
- verify_integrity=False,
- )
- # --------------------------------------------------------------------
- # Introspection Methods
- @cache_readonly
- @final
- def _can_hold_na(self) -> bool:
- if isinstance(self.dtype, ExtensionDtype):
- if isinstance(self.dtype, IntervalDtype):
- # FIXME(GH#45720): this is inaccurate for integer-backed
- # IntervalArray, but without it other.categories.take raises
- # in IntervalArray._cmp_method
- return True
- return self.dtype._can_hold_na
- if self.dtype.kind in ["i", "u", "b"]:
- return False
- return True
- @property
- def is_monotonic_increasing(self) -> bool:
- """
- Return a boolean if the values are equal or increasing.
- Returns
- -------
- bool
- See Also
- --------
- Index.is_monotonic_decreasing : Check if the values are equal or decreasing.
- Examples
- --------
- >>> pd.Index([1, 2, 3]).is_monotonic_increasing
- True
- >>> pd.Index([1, 2, 2]).is_monotonic_increasing
- True
- >>> pd.Index([1, 3, 2]).is_monotonic_increasing
- False
- """
- return self._engine.is_monotonic_increasing
- @property
- def is_monotonic_decreasing(self) -> bool:
- """
- Return a boolean if the values are equal or decreasing.
- Returns
- -------
- bool
- See Also
- --------
- Index.is_monotonic_increasing : Check if the values are equal or increasing.
- Examples
- --------
- >>> pd.Index([3, 2, 1]).is_monotonic_decreasing
- True
- >>> pd.Index([3, 2, 2]).is_monotonic_decreasing
- True
- >>> pd.Index([3, 1, 2]).is_monotonic_decreasing
- False
- """
- return self._engine.is_monotonic_decreasing
- @final
- @property
- def _is_strictly_monotonic_increasing(self) -> bool:
- """
- Return if the index is strictly monotonic increasing
- (only increasing) values.
- Examples
- --------
- >>> Index([1, 2, 3])._is_strictly_monotonic_increasing
- True
- >>> Index([1, 2, 2])._is_strictly_monotonic_increasing
- False
- >>> Index([1, 3, 2])._is_strictly_monotonic_increasing
- False
- """
- return self.is_unique and self.is_monotonic_increasing
- @final
- @property
- def _is_strictly_monotonic_decreasing(self) -> bool:
- """
- Return if the index is strictly monotonic decreasing
- (only decreasing) values.
- Examples
- --------
- >>> Index([3, 2, 1])._is_strictly_monotonic_decreasing
- True
- >>> Index([3, 2, 2])._is_strictly_monotonic_decreasing
- False
- >>> Index([3, 1, 2])._is_strictly_monotonic_decreasing
- False
- """
- return self.is_unique and self.is_monotonic_decreasing
- @cache_readonly
- def is_unique(self) -> bool:
- """
- Return if the index has unique values.
- Returns
- -------
- bool
- See Also
- --------
- Index.has_duplicates : Inverse method that checks if it has duplicate values.
- Examples
- --------
- >>> idx = pd.Index([1, 5, 7, 7])
- >>> idx.is_unique
- False
- >>> idx = pd.Index([1, 5, 7])
- >>> idx.is_unique
- True
- >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
- ... "Watermelon"]).astype("category")
- >>> idx.is_unique
- False
- >>> idx = pd.Index(["Orange", "Apple",
- ... "Watermelon"]).astype("category")
- >>> idx.is_unique
- True
- """
- return self._engine.is_unique
- @final
- @property
- def has_duplicates(self) -> bool:
- """
- Check if the Index has duplicate values.
- Returns
- -------
- bool
- Whether or not the Index has duplicate values.
- See Also
- --------
- Index.is_unique : Inverse method that checks if it has unique values.
- Examples
- --------
- >>> idx = pd.Index([1, 5, 7, 7])
- >>> idx.has_duplicates
- True
- >>> idx = pd.Index([1, 5, 7])
- >>> idx.has_duplicates
- False
- >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
- ... "Watermelon"]).astype("category")
- >>> idx.has_duplicates
- True
- >>> idx = pd.Index(["Orange", "Apple",
- ... "Watermelon"]).astype("category")
- >>> idx.has_duplicates
- False
- """
- return not self.is_unique
- @final
- def is_boolean(self) -> bool:
- """
- Check if the Index only consists of booleans.
- .. deprecated:: 2.0.0
- Use `pandas.api.types.is_bool_dtype` instead.
- Returns
- -------
- bool
- Whether or not the Index only consists of booleans.
- See Also
- --------
- is_integer : Check if the Index only consists of integers (deprecated).
- is_floating : Check if the Index is a floating type (deprecated).
- is_numeric : Check if the Index only consists of numeric data (deprecated).
- is_object : Check if the Index is of the object dtype (deprecated).
- is_categorical : Check if the Index holds categorical data.
- is_interval : Check if the Index holds Interval objects (deprecated).
- Examples
- --------
- >>> idx = pd.Index([True, False, True])
- >>> idx.is_boolean() # doctest: +SKIP
- True
- >>> idx = pd.Index(["True", "False", "True"])
- >>> idx.is_boolean() # doctest: +SKIP
- False
- >>> idx = pd.Index([True, False, "True"])
- >>> idx.is_boolean() # doctest: +SKIP
- False
- """
- warnings.warn(
- f"{type(self).__name__}.is_boolean is deprecated. "
- "Use pandas.api.types.is_bool_type instead.",
- FutureWarning,
- stacklevel=find_stack_level(),
- )
- return self.inferred_type in ["boolean"]
- @final
- def is_integer(self) -> bool:
- """
- Check if the Index only consists of integers.
- .. deprecated:: 2.0.0
- Use `pandas.api.types.is_integer_dtype` instead.
- Returns
- -------
- bool
- Whether or not the Index only consists of integers.
- See Also
- --------
- is_boolean : Check if the Index only consists of booleans (deprecated).
- is_floating : Check if the Index is a floating type (deprecated).
- is_numeric : Check if the Index only consists of numeric data (deprecated).
- is_object : Check if the Index is of the object dtype. (deprecated).
- is_categorical : Check if the Index holds categorical data (deprecated).
- is_interval : Check if the Index holds Interval objects (deprecated).
- Examples
- --------
- >>> idx = pd.Index([1, 2, 3, 4])
- >>> idx.is_integer() # doctest: +SKIP
- True
- >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
- >>> idx.is_integer() # doctest: +SKIP
- False
- >>> idx = pd.Index(["Apple", "Mango", "Watermelon"])
- >>> idx.is_integer() # doctest: +SKIP
- False
- """
- warnings.warn(
- f"{type(self).__name__}.is_integer is deprecated. "
- "Use pandas.api.types.is_integer_dtype instead.",
- FutureWarning,
- stacklevel=find_stack_level(),
- )
- return self.inferred_type in ["integer"]
- @final
- def is_floating(self) -> bool:
- """
- Check if the Index is a floating type.
- .. deprecated:: 2.0.0
- Use `pandas.api.types.is_float_dtype` instead
- The Index may consist of only floats, NaNs, or a mix of floats,
- integers, or NaNs.
- Returns
- -------
- bool
- Whether or not the Index only consists of only consists of floats, NaNs, or
- a mix of floats, integers, or NaNs.
- See Also
- --------
- is_boolean : Check if the Index only consists of booleans (deprecated).
- is_integer : Check if the Index only consists of integers (deprecated).
- is_numeric : Check if the Index only consists of numeric data (deprecated).
- is_object : Check if the Index is of the object dtype. (deprecated).
- is_categorical : Check if the Index holds categorical data (deprecated).
- is_interval : Check if the Index holds Interval objects (deprecated).
- Examples
- --------
- >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
- >>> idx.is_floating() # doctest: +SKIP
- True
- >>> idx = pd.Index([1.0, 2.0, np.nan, 4.0])
- >>> idx.is_floating() # doctest: +SKIP
- True
- >>> idx = pd.Index([1, 2, 3, 4, np.nan])
- >>> idx.is_floating() # doctest: +SKIP
- True
- >>> idx = pd.Index([1, 2, 3, 4])
- >>> idx.is_floating() # doctest: +SKIP
- False
- """
- warnings.warn(
- f"{type(self).__name__}.is_floating is deprecated. "
- "Use pandas.api.types.is_float_dtype instead.",
- FutureWarning,
- stacklevel=find_stack_level(),
- )
- return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"]
- @final
- def is_numeric(self) -> bool:
- """
- Check if the Index only consists of numeric data.
- .. deprecated:: 2.0.0
- Use `pandas.api.types.is_numeric_dtype` instead.
- Returns
- -------
- bool
- Whether or not the Index only consists of numeric data.
- See Also
- --------
- is_boolean : Check if the Index only consists of booleans (deprecated).
- is_integer : Check if the Index only consists of integers (deprecated).
- is_floating : Check if the Index is a floating type (deprecated).
- is_object : Check if the Index is of the object dtype. (deprecated).
- is_categorical : Check if the Index holds categorical data (deprecated).
- is_interval : Check if the Index holds Interval objects (deprecated).
- Examples
- --------
- >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
- >>> idx.is_numeric() # doctest: +SKIP
- True
- >>> idx = pd.Index([1, 2, 3, 4.0])
- >>> idx.is_numeric() # doctest: +SKIP
- True
- >>> idx = pd.Index([1, 2, 3, 4])
- >>> idx.is_numeric() # doctest: +SKIP
- True
- >>> idx = pd.Index([1, 2, 3, 4.0, np.nan])
- >>> idx.is_numeric() # doctest: +SKIP
- True
- >>> idx = pd.Index([1, 2, 3, 4.0, np.nan, "Apple"])
- >>> idx.is_numeric() # doctest: +SKIP
- False
- """
- warnings.warn(
- f"{type(self).__name__}.is_numeric is deprecated. "
- "Use pandas.api.types.is_any_real_numeric_dtype instead",
- FutureWarning,
- stacklevel=find_stack_level(),
- )
- return self.inferred_type in ["integer", "floating"]
- @final
- def is_object(self) -> bool:
- """
- Check if the Index is of the object dtype.
- .. deprecated:: 2.0.0
- Use `pandas.api.types.is_object_dtype` instead.
- Returns
- -------
- bool
- Whether or not the Index is of the object dtype.
- See Also
- --------
- is_boolean : Check if the Index only consists of booleans (deprecated).
- is_integer : Check if the Index only consists of integers (deprecated).
- is_floating : Check if the Index is a floating type (deprecated).
- is_numeric : Check if the Index only consists of numeric data (deprecated).
- is_categorical : Check if the Index holds categorical data (deprecated).
- is_interval : Check if the Index holds Interval objects (deprecated).
- Examples
- --------
- >>> idx = pd.Index(["Apple", "Mango", "Watermelon"])
- >>> idx.is_object() # doctest: +SKIP
- True
- >>> idx = pd.Index(["Apple", "Mango", 2.0])
- >>> idx.is_object() # doctest: +SKIP
- True
- >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
- ... "Watermelon"]).astype("category")
- >>> idx.is_object() # doctest: +SKIP
- False
- >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
- >>> idx.is_object() # doctest: +SKIP
- False
- """
- warnings.warn(
- f"{type(self).__name__}.is_object is deprecated."
- "Use pandas.api.types.is_object_dtype instead",
- FutureWarning,
- stacklevel=find_stack_level(),
- )
- return is_object_dtype(self.dtype)
- @final
- def is_categorical(self) -> bool:
- """
- Check if the Index holds categorical data.
- .. deprecated:: 2.0.0
- Use `isinstance(index.dtype, pd.CategoricalDtype)` instead.
- Returns
- -------
- bool
- True if the Index is categorical.
- See Also
- --------
- CategoricalIndex : Index for categorical data.
- is_boolean : Check if the Index only consists of booleans (deprecated).
- is_integer : Check if the Index only consists of integers (deprecated).
- is_floating : Check if the Index is a floating type (deprecated).
- is_numeric : Check if the Index only consists of numeric data (deprecated).
- is_object : Check if the Index is of the object dtype. (deprecated).
- is_interval : Check if the Index holds Interval objects (deprecated).
- Examples
- --------
- >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
- ... "Watermelon"]).astype("category")
- >>> idx.is_categorical() # doctest: +SKIP
- True
- >>> idx = pd.Index([1, 3, 5, 7])
- >>> idx.is_categorical() # doctest: +SKIP
- False
- >>> s = pd.Series(["Peter", "Victor", "Elisabeth", "Mar"])
- >>> s
- 0 Peter
- 1 Victor
- 2 Elisabeth
- 3 Mar
- dtype: object
- >>> s.index.is_categorical() # doctest: +SKIP
- False
- """
- warnings.warn(
- f"{type(self).__name__}.is_categorical is deprecated."
- "Use pandas.api.types.is_categorical_dtype instead",
- FutureWarning,
- stacklevel=find_stack_level(),
- )
- return self.inferred_type in ["categorical"]
- @final
- def is_interval(self) -> bool:
- """
- Check if the Index holds Interval objects.
- .. deprecated:: 2.0.0
- Use `isinstance(index.dtype, pd.IntervalDtype)` instead.
- Returns
- -------
- bool
- Whether or not the Index holds Interval objects.
- See Also
- --------
- IntervalIndex : Index for Interval objects.
- is_boolean : Check if the Index only consists of booleans (deprecated).
- is_integer : Check if the Index only consists of integers (deprecated).
- is_floating : Check if the Index is a floating type (deprecated).
- is_numeric : Check if the Index only consists of numeric data (deprecated).
- is_object : Check if the Index is of the object dtype. (deprecated).
- is_categorical : Check if the Index holds categorical data (deprecated).
- Examples
- --------
- >>> idx = pd.Index([pd.Interval(left=0, right=5),
- ... pd.Interval(left=5, right=10)])
- >>> idx.is_interval() # doctest: +SKIP
- True
- >>> idx = pd.Index([1, 3, 5, 7])
- >>> idx.is_interval() # doctest: +SKIP
- False
- """
- warnings.warn(
- f"{type(self).__name__}.is_interval is deprecated."
- "Use pandas.api.types.is_interval_dtype instead",
- FutureWarning,
- stacklevel=find_stack_level(),
- )
- return self.inferred_type in ["interval"]
- @final
- def _holds_integer(self) -> bool:
- """
- Whether the type is an integer type.
- """
- return self.inferred_type in ["integer", "mixed-integer"]
- @final
- def holds_integer(self) -> bool:
- """
- Whether the type is an integer type.
- .. deprecated:: 2.0.0
- Use `pandas.api.types.infer_dtype` instead
- """
- warnings.warn(
- f"{type(self).__name__}.holds_integer is deprecated. "
- "Use pandas.api.types.infer_dtype instead.",
- FutureWarning,
- stacklevel=find_stack_level(),
- )
- return self._holds_integer()
- @cache_readonly
- def inferred_type(self) -> str_t:
- """
- Return a string of the type inferred from the values.
- """
- return lib.infer_dtype(self._values, skipna=False)
- @cache_readonly
- @final
- def _is_all_dates(self) -> bool:
- """
- Whether or not the index values only consist of dates.
- """
- if needs_i8_conversion(self.dtype):
- return True
- elif self.dtype != _dtype_obj:
- # TODO(ExtensionIndex): 3rd party EA might override?
- # Note: this includes IntervalIndex, even when the left/right
- # contain datetime-like objects.
- return False
- elif self._is_multi:
- return False
- return is_datetime_array(ensure_object(self._values))
- @final
- @cache_readonly
- def _is_multi(self) -> bool:
- """
- Cached check equivalent to isinstance(self, MultiIndex)
- """
- return isinstance(self, ABCMultiIndex)
- # --------------------------------------------------------------------
- # Pickle Methods
- def __reduce__(self):
- d = {"data": self._data, "name": self.name}
- return _new_Index, (type(self), d), None
- # --------------------------------------------------------------------
- # Null Handling Methods
- @cache_readonly
- def _na_value(self):
- """The expected NA value to use with this index."""
- dtype = self.dtype
- if isinstance(dtype, np.dtype):
- if dtype.kind in ["m", "M"]:
- return NaT
- return np.nan
- return dtype.na_value
- @cache_readonly
- def _isnan(self) -> npt.NDArray[np.bool_]:
- """
- Return if each value is NaN.
- """
- if self._can_hold_na:
- return isna(self)
- else:
- # shouldn't reach to this condition by checking hasnans beforehand
- values = np.empty(len(self), dtype=np.bool_)
- values.fill(False)
- return values
- @cache_readonly
- def hasnans(self) -> bool:
- """
- Return True if there are any NaNs.
- Enables various performance speedups.
- Returns
- -------
- bool
- """
- if self._can_hold_na:
- return bool(self._isnan.any())
- else:
- return False
- @final
- def isna(self) -> npt.NDArray[np.bool_]:
- """
- Detect missing values.
- Return a boolean same-sized object indicating if the values are NA.
- NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get
- mapped to ``True`` values.
- Everything else get mapped to ``False`` values. Characters such as
- empty strings `''` or :attr:`numpy.inf` are not considered NA values
- (unless you set ``pandas.options.mode.use_inf_as_na = True``).
- Returns
- -------
- numpy.ndarray[bool]
- A boolean array of whether my values are NA.
- See Also
- --------
- Index.notna : Boolean inverse of isna.
- Index.dropna : Omit entries with missing values.
- isna : Top-level isna.
- Series.isna : Detect missing values in Series object.
- Examples
- --------
- Show which entries in a pandas.Index are NA. The result is an
- array.
- >>> idx = pd.Index([5.2, 6.0, np.NaN])
- >>> idx
- Index([5.2, 6.0, nan], dtype='float64')
- >>> idx.isna()
- array([False, False, True])
- Empty strings are not considered NA values. None is considered an NA
- value.
- >>> idx = pd.Index(['black', '', 'red', None])
- >>> idx
- Index(['black', '', 'red', None], dtype='object')
- >>> idx.isna()
- array([False, False, False, True])
- For datetimes, `NaT` (Not a Time) is considered as an NA value.
- >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'),
- ... pd.Timestamp(''), None, pd.NaT])
- >>> idx
- DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'],
- dtype='datetime64[ns]', freq=None)
- >>> idx.isna()
- array([False, True, True, True])
- """
- return self._isnan
- isnull = isna
- @final
- def notna(self) -> npt.NDArray[np.bool_]:
- """
- Detect existing (non-missing) values.
- Return a boolean same-sized object indicating if the values are not NA.
- Non-missing values get mapped to ``True``. Characters such as empty
- strings ``''`` or :attr:`numpy.inf` are not considered NA values
- (unless you set ``pandas.options.mode.use_inf_as_na = True``).
- NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False``
- values.
- Returns
- -------
- numpy.ndarray[bool]
- Boolean array to indicate which entries are not NA.
- See Also
- --------
- Index.notnull : Alias of notna.
- Index.isna: Inverse of notna.
- notna : Top-level notna.
- Examples
- --------
- Show which entries in an Index are not NA. The result is an
- array.
- >>> idx = pd.Index([5.2, 6.0, np.NaN])
- >>> idx
- Index([5.2, 6.0, nan], dtype='float64')
- >>> idx.notna()
- array([ True, True, False])
- Empty strings are not considered NA values. None is considered a NA
- value.
- >>> idx = pd.Index(['black', '', 'red', None])
- >>> idx
- Index(['black', '', 'red', None], dtype='object')
- >>> idx.notna()
- array([ True, True, True, False])
- """
- return ~self.isna()
- notnull = notna
- def fillna(self, value=None, downcast=None):
- """
- Fill NA/NaN values with the specified value.
- Parameters
- ----------
- value : scalar
- Scalar value to use to fill holes (e.g. 0).
- This value cannot be a list-likes.
- downcast : dict, default is None
- A dict of item->dtype of what to downcast if possible,
- or the string 'infer' which will try to downcast to an appropriate
- equal type (e.g. float64 to int64 if possible).
- Returns
- -------
- Index
- See Also
- --------
- DataFrame.fillna : Fill NaN values of a DataFrame.
- Series.fillna : Fill NaN Values of a Series.
- """
- value = self._require_scalar(value)
- if self.hasnans:
- result = self.putmask(self._isnan, value)
- if downcast is None:
- # no need to care metadata other than name
- # because it can't have freq if it has NaTs
- # _with_infer needed for test_fillna_categorical
- return Index._with_infer(result, name=self.name)
- raise NotImplementedError(
- f"{type(self).__name__}.fillna does not support 'downcast' "
- "argument values other than 'None'."
- )
- return self._view()
- def dropna(self: _IndexT, how: AnyAll = "any") -> _IndexT:
- """
- Return Index without NA/NaN values.
- Parameters
- ----------
- how : {'any', 'all'}, default 'any'
- If the Index is a MultiIndex, drop the value when any or all levels
- are NaN.
- Returns
- -------
- Index
- """
- if how not in ("any", "all"):
- raise ValueError(f"invalid how option: {how}")
- if self.hasnans:
- res_values = self._values[~self._isnan]
- return type(self)._simple_new(res_values, name=self.name)
- return self._view()
- # --------------------------------------------------------------------
- # Uniqueness Methods
- def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT:
- """
- Return unique values in the index.
- Unique values are returned in order of appearance, this does NOT sort.
- Parameters
- ----------
- level : int or hashable, optional
- Only return values from specified level (for MultiIndex).
- If int, gets the level by integer position, else by level name.
- Returns
- -------
- Index
- See Also
- --------
- unique : Numpy array of unique values in that column.
- Series.unique : Return unique values of Series object.
- """
- if level is not None:
- self._validate_index_level(level)
- if self.is_unique:
- return self._view()
- result = super().unique()
- return self._shallow_copy(result)
- def drop_duplicates(self: _IndexT, *, keep: DropKeep = "first") -> _IndexT:
- """
- Return Index with duplicate values removed.
- Parameters
- ----------
- keep : {'first', 'last', ``False``}, default 'first'
- - 'first' : Drop duplicates except for the first occurrence.
- - 'last' : Drop duplicates except for the last occurrence.
- - ``False`` : Drop all duplicates.
- Returns
- -------
- Index
- See Also
- --------
- Series.drop_duplicates : Equivalent method on Series.
- DataFrame.drop_duplicates : Equivalent method on DataFrame.
- Index.duplicated : Related method on Index, indicating duplicate
- Index values.
- Examples
- --------
- Generate an pandas.Index with duplicate values.
- >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'])
- The `keep` parameter controls which duplicate values are removed.
- The value 'first' keeps the first occurrence for each
- set of duplicated entries. The default value of keep is 'first'.
- >>> idx.drop_duplicates(keep='first')
- Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object')
- The value 'last' keeps the last occurrence for each set of duplicated
- entries.
- >>> idx.drop_duplicates(keep='last')
- Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object')
- The value ``False`` discards all sets of duplicated entries.
- >>> idx.drop_duplicates(keep=False)
- Index(['cow', 'beetle', 'hippo'], dtype='object')
- """
- if self.is_unique:
- return self._view()
- return super().drop_duplicates(keep=keep)
- def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:
- """
- Indicate duplicate index values.
- Duplicated values are indicated as ``True`` values in the resulting
- array. Either all duplicates, all except the first, or all except the
- last occurrence of duplicates can be indicated.
- Parameters
- ----------
- keep : {'first', 'last', False}, default 'first'
- The value or values in a set of duplicates to mark as missing.
- - 'first' : Mark duplicates as ``True`` except for the first
- occurrence.
- - 'last' : Mark duplicates as ``True`` except for the last
- occurrence.
- - ``False`` : Mark all duplicates as ``True``.
- Returns
- -------
- np.ndarray[bool]
- See Also
- --------
- Series.duplicated : Equivalent method on pandas.Series.
- DataFrame.duplicated : Equivalent method on pandas.DataFrame.
- Index.drop_duplicates : Remove duplicate values from Index.
- Examples
- --------
- By default, for each set of duplicated values, the first occurrence is
- set to False and all others to True:
- >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama'])
- >>> idx.duplicated()
- array([False, False, True, False, True])
- which is equivalent to
- >>> idx.duplicated(keep='first')
- array([False, False, True, False, True])
- By using 'last', the last occurrence of each set of duplicated values
- is set on False and all others on True:
- >>> idx.duplicated(keep='last')
- array([ True, False, True, False, False])
- By setting keep on ``False``, all duplicates are True:
- >>> idx.duplicated(keep=False)
- array([ True, False, True, False, True])
- """
- if self.is_unique:
- # fastpath available bc we are immutable
- return np.zeros(len(self), dtype=bool)
- return self._duplicated(keep=keep)
- # --------------------------------------------------------------------
- # Arithmetic & Logical Methods
- def __iadd__(self, other):
- # alias for __add__
- return self + other
- @final
- def __nonzero__(self) -> NoReturn:
- raise ValueError(
- f"The truth value of a {type(self).__name__} is ambiguous. "
- "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
- )
- __bool__ = __nonzero__
- # --------------------------------------------------------------------
- # Set Operation Methods
- def _get_reconciled_name_object(self, other):
- """
- If the result of a set operation will be self,
- return self, unless the name changes, in which
- case make a shallow copy of self.
- """
- name = get_op_result_name(self, other)
- if self.name is not name:
- return self.rename(name)
- return self
- @final
- def _validate_sort_keyword(self, sort):
- if sort not in [None, False, True]:
- raise ValueError(
- "The 'sort' keyword only takes the values of "
- f"None, True, or False; {sort} was passed."
- )
- @final
- def _dti_setop_align_tzs(self, other: Index, setop: str_t) -> tuple[Index, Index]:
- """
- With mismatched timezones, cast both to UTC.
- """
- # Caller is responsibelf or checking
- # `not is_dtype_equal(self.dtype, other.dtype)`
- if (
- isinstance(self, ABCDatetimeIndex)
- and isinstance(other, ABCDatetimeIndex)
- and self.tz is not None
- and other.tz is not None
- ):
- # GH#39328, GH#45357
- left = self.tz_convert("UTC")
- right = other.tz_convert("UTC")
- return left, right
- return self, other
- @final
- def union(self, other, sort=None):
- """
- Form the union of two Index objects.
- If the Index objects are incompatible, both Index objects will be
- cast to dtype('object') first.
- Parameters
- ----------
- other : Index or array-like
- sort : bool or None, default None
- Whether to sort the resulting Index.
- * None : Sort the result, except when
- 1. `self` and `other` are equal.
- 2. `self` or `other` has length 0.
- 3. Some values in `self` or `other` cannot be compared.
- A RuntimeWarning is issued in this case.
- * False : do not sort the result.
- * True : Sort the result (which may raise TypeError).
- Returns
- -------
- Index
- Examples
- --------
- Union matching dtypes
- >>> idx1 = pd.Index([1, 2, 3, 4])
- >>> idx2 = pd.Index([3, 4, 5, 6])
- >>> idx1.union(idx2)
- Index([1, 2, 3, 4, 5, 6], dtype='int64')
- Union mismatched dtypes
- >>> idx1 = pd.Index(['a', 'b', 'c', 'd'])
- >>> idx2 = pd.Index([1, 2, 3, 4])
- >>> idx1.union(idx2)
- Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object')
- MultiIndex case
- >>> idx1 = pd.MultiIndex.from_arrays(
- ... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]]
- ... )
- >>> idx1
- MultiIndex([(1, 'Red'),
- (1, 'Blue'),
- (2, 'Red'),
- (2, 'Blue')],
- )
- >>> idx2 = pd.MultiIndex.from_arrays(
- ... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]]
- ... )
- >>> idx2
- MultiIndex([(3, 'Red'),
- (3, 'Green'),
- (2, 'Red'),
- (2, 'Green')],
- )
- >>> idx1.union(idx2)
- MultiIndex([(1, 'Blue'),
- (1, 'Red'),
- (2, 'Blue'),
- (2, 'Green'),
- (2, 'Red'),
- (3, 'Green'),
- (3, 'Red')],
- )
- >>> idx1.union(idx2, sort=False)
- MultiIndex([(1, 'Red'),
- (1, 'Blue'),
- (2, 'Red'),
- (2, 'Blue'),
- (3, 'Red'),
- (3, 'Green'),
- (2, 'Green')],
- )
- """
- self._validate_sort_keyword(sort)
- self._assert_can_do_setop(other)
- other, result_name = self._convert_can_do_setop(other)
- if not is_dtype_equal(self.dtype, other.dtype):
- if (
- isinstance(self, ABCMultiIndex)
- and not is_object_dtype(_unpack_nested_dtype(other))
- and len(other) > 0
- ):
- raise NotImplementedError(
- "Can only union MultiIndex with MultiIndex or Index of tuples, "
- "try mi.to_flat_index().union(other) instead."
- )
- self, other = self._dti_setop_align_tzs(other, "union")
- dtype = self._find_common_type_compat(other)
- left = self.astype(dtype, copy=False)
- right = other.astype(dtype, copy=False)
- return left.union(right, sort=sort)
- elif not len(other) or self.equals(other):
- # NB: whether this (and the `if not len(self)` check below) come before
- # or after the is_dtype_equal check above affects the returned dtype
- result = self._get_reconciled_name_object(other)
- if sort is True:
- return result.sort_values()
- return result
- elif not len(self):
- result = other._get_reconciled_name_object(self)
- if sort is True:
- return result.sort_values()
- return result
- result = self._union(other, sort=sort)
- return self._wrap_setop_result(other, result)
- def _union(self, other: Index, sort: bool | None):
- """
- Specific union logic should go here. In subclasses, union behavior
- should be overwritten here rather than in `self.union`.
- Parameters
- ----------
- other : Index or array-like
- sort : False or None, default False
- Whether to sort the resulting index.
- * True : sort the result
- * False : do not sort the result.
- * None : sort the result, except when `self` and `other` are equal
- or when the values cannot be compared.
- Returns
- -------
- Index
- """
- lvals = self._values
- rvals = other._values
- if (
- sort in (None, True)
- and self.is_monotonic_increasing
- and other.is_monotonic_increasing
- and not (self.has_duplicates and other.has_duplicates)
- and self._can_use_libjoin
- ):
- # Both are monotonic and at least one is unique, so can use outer join
- # (actually don't need either unique, but without this restriction
- # test_union_same_value_duplicated_in_both fails)
- try:
- return self._outer_indexer(other)[0]
- except (TypeError, IncompatibleFrequency):
- # incomparable objects; should only be for object dtype
- value_list = list(lvals)
- # worth making this faster? a very unusual case
- value_set = set(lvals)
- value_list.extend([x for x in rvals if x not in value_set])
- # If objects are unorderable, we must have object dtype.
- return np.array(value_list, dtype=object)
- elif not other.is_unique:
- # other has duplicates
- result_dups = algos.union_with_duplicates(self, other)
- return _maybe_try_sort(result_dups, sort)
- # The rest of this method is analogous to Index._intersection_via_get_indexer
- # Self may have duplicates; other already checked as unique
- # find indexes of things in "other" that are not in "self"
- if self._index_as_unique:
- indexer = self.get_indexer(other)
- missing = (indexer == -1).nonzero()[0]
- else:
- missing = algos.unique1d(self.get_indexer_non_unique(other)[1])
- result: Index | MultiIndex | ArrayLike
- if self._is_multi:
- # Preserve MultiIndex to avoid losing dtypes
- result = self.append(other.take(missing))
- else:
- if len(missing) > 0:
- other_diff = rvals.take(missing)
- result = concat_compat((lvals, other_diff))
- else:
- result = lvals
- if not self.is_monotonic_increasing or not other.is_monotonic_increasing:
- # if both are monotonic then result should already be sorted
- result = _maybe_try_sort(result, sort)
- return result
- @final
- def _wrap_setop_result(self, other: Index, result) -> Index:
- name = get_op_result_name(self, other)
- if isinstance(result, Index):
- if result.name != name:
- result = result.rename(name)
- else:
- result = self._shallow_copy(result, name=name)
- return result
- @final
- def intersection(self, other, sort: bool = False):
- """
- Form the intersection of two Index objects.
- This returns a new Index with elements common to the index and `other`.
- Parameters
- ----------
- other : Index or array-like
- sort : True, False or None, default False
- Whether to sort the resulting index.
- * None : sort the result, except when `self` and `other` are equal
- or when the values cannot be compared.
- * False : do not sort the result.
- * True : Sort the result (which may raise TypeError).
- Returns
- -------
- Index
- Examples
- --------
- >>> idx1 = pd.Index([1, 2, 3, 4])
- >>> idx2 = pd.Index([3, 4, 5, 6])
- >>> idx1.intersection(idx2)
- Index([3, 4], dtype='int64')
- """
- self._validate_sort_keyword(sort)
- self._assert_can_do_setop(other)
- other, result_name = self._convert_can_do_setop(other)
- if not is_dtype_equal(self.dtype, other.dtype):
- self, other = self._dti_setop_align_tzs(other, "intersection")
- if self.equals(other):
- if self.has_duplicates:
- result = self.unique()._get_reconciled_name_object(other)
- else:
- result = self._get_reconciled_name_object(other)
- if sort is True:
- result = result.sort_values()
- return result
- if len(self) == 0 or len(other) == 0:
- # fastpath; we need to be careful about having commutativity
- if self._is_multi or other._is_multi:
- # _convert_can_do_setop ensures that we have both or neither
- # We retain self.levels
- return self[:0].rename(result_name)
- dtype = self._find_common_type_compat(other)
- if is_dtype_equal(self.dtype, dtype):
- # Slicing allows us to retain DTI/TDI.freq, RangeIndex
- # Note: self[:0] vs other[:0] affects
- # 1) which index's `freq` we get in DTI/TDI cases
- # This may be a historical artifact, i.e. no documented
- # reason for this choice.
- # 2) The `step` we get in RangeIndex cases
- if len(self) == 0:
- return self[:0].rename(result_name)
- else:
- return other[:0].rename(result_name)
- return Index([], dtype=dtype, name=result_name)
- elif not self._should_compare(other):
- # We can infer that the intersection is empty.
- if isinstance(self, ABCMultiIndex):
- return self[:0].rename(result_name)
- return Index([], name=result_name)
- elif not is_dtype_equal(self.dtype, other.dtype):
- dtype = self._find_common_type_compat(other)
- this = self.astype(dtype, copy=False)
- other = other.astype(dtype, copy=False)
- return this.intersection(other, sort=sort)
- result = self._intersection(other, sort=sort)
- return self._wrap_intersection_result(other, result)
- def _intersection(self, other: Index, sort: bool = False):
- """
- intersection specialized to the case with matching dtypes.
- """
- if (
- self.is_monotonic_increasing
- and other.is_monotonic_increasing
- and self._can_use_libjoin
- and not isinstance(self, ABCMultiIndex)
- ):
- try:
- res_indexer, indexer, _ = self._inner_indexer(other)
- except TypeError:
- # non-comparable; should only be for object dtype
- pass
- else:
- # TODO: algos.unique1d should preserve DTA/TDA
- if is_numeric_dtype(self):
- # This is faster, because Index.unique() checks for uniqueness
- # before calculating the unique values.
- res = algos.unique1d(res_indexer)
- else:
- result = self.take(indexer)
- res = result.drop_duplicates()
- return ensure_wrapped_if_datetimelike(res)
- res_values = self._intersection_via_get_indexer(other, sort=sort)
- res_values = _maybe_try_sort(res_values, sort)
- return res_values
- def _wrap_intersection_result(self, other, result):
- # We will override for MultiIndex to handle empty results
- return self._wrap_setop_result(other, result)
- @final
- def _intersection_via_get_indexer(
- self, other: Index | MultiIndex, sort
- ) -> ArrayLike | MultiIndex:
- """
- Find the intersection of two Indexes using get_indexer.
- Returns
- -------
- np.ndarray or ExtensionArray
- The returned array will be unique.
- """
- left_unique = self.unique()
- right_unique = other.unique()
- # even though we are unique, we need get_indexer_for for IntervalIndex
- indexer = left_unique.get_indexer_for(right_unique)
- mask = indexer != -1
- taker = indexer.take(mask.nonzero()[0])
- if sort is False:
- # sort bc we want the elements in the same order they are in self
- # unnecessary in the case with sort=None bc we will sort later
- taker = np.sort(taker)
- if isinstance(left_unique, ABCMultiIndex):
- result = left_unique.take(taker)
- else:
- result = left_unique.take(taker)._values
- return result
- @final
- def difference(self, other, sort=None):
- """
- Return a new Index with elements of index not in `other`.
- This is the set difference of two Index objects.
- Parameters
- ----------
- other : Index or array-like
- sort : bool or None, default None
- Whether to sort the resulting index. By default, the
- values are attempted to be sorted, but any TypeError from
- incomparable elements is caught by pandas.
- * None : Attempt to sort the result, but catch any TypeErrors
- from comparing incomparable elements.
- * False : Do not sort the result.
- * True : Sort the result (which may raise TypeError).
- Returns
- -------
- Index
- Examples
- --------
- >>> idx1 = pd.Index([2, 1, 3, 4])
- >>> idx2 = pd.Index([3, 4, 5, 6])
- >>> idx1.difference(idx2)
- Index([1, 2], dtype='int64')
- >>> idx1.difference(idx2, sort=False)
- Index([2, 1], dtype='int64')
- """
- self._validate_sort_keyword(sort)
- self._assert_can_do_setop(other)
- other, result_name = self._convert_can_do_setop(other)
- # Note: we do NOT call _dti_setop_align_tzs here, as there
- # is no requirement that .difference be commutative, so it does
- # not cast to object.
- if self.equals(other):
- # Note: we do not (yet) sort even if sort=None GH#24959
- return self[:0].rename(result_name)
- if len(other) == 0:
- # Note: we do not (yet) sort even if sort=None GH#24959
- result = self.rename(result_name)
- if sort is True:
- return result.sort_values()
- return result
- if not self._should_compare(other):
- # Nothing matches -> difference is everything
- result = self.rename(result_name)
- if sort is True:
- return result.sort_values()
- return result
- result = self._difference(other, sort=sort)
- return self._wrap_difference_result(other, result)
- def _difference(self, other, sort):
- # overridden by RangeIndex
- this = self.unique()
- indexer = this.get_indexer_for(other)
- indexer = indexer.take((indexer != -1).nonzero()[0])
- label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True)
- the_diff: MultiIndex | ArrayLike
- if isinstance(this, ABCMultiIndex):
- the_diff = this.take(label_diff)
- else:
- the_diff = this._values.take(label_diff)
- the_diff = _maybe_try_sort(the_diff, sort)
- return the_diff
- def _wrap_difference_result(self, other, result):
- # We will override for MultiIndex to handle empty results
- return self._wrap_setop_result(other, result)
- def symmetric_difference(self, other, result_name=None, sort=None):
- """
- Compute the symmetric difference of two Index objects.
- Parameters
- ----------
- other : Index or array-like
- result_name : str
- sort : bool or None, default None
- Whether to sort the resulting index. By default, the
- values are attempted to be sorted, but any TypeError from
- incomparable elements is caught by pandas.
- * None : Attempt to sort the result, but catch any TypeErrors
- from comparing incomparable elements.
- * False : Do not sort the result.
- * True : Sort the result (which may raise TypeError).
- Returns
- -------
- Index
- Notes
- -----
- ``symmetric_difference`` contains elements that appear in either
- ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by
- ``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates
- dropped.
- Examples
- --------
- >>> idx1 = pd.Index([1, 2, 3, 4])
- >>> idx2 = pd.Index([2, 3, 4, 5])
- >>> idx1.symmetric_difference(idx2)
- Index([1, 5], dtype='int64')
- """
- self._validate_sort_keyword(sort)
- self._assert_can_do_setop(other)
- other, result_name_update = self._convert_can_do_setop(other)
- if result_name is None:
- result_name = result_name_update
- if not is_dtype_equal(self.dtype, other.dtype):
- self, other = self._dti_setop_align_tzs(other, "symmetric_difference")
- if not self._should_compare(other):
- return self.union(other, sort=sort).rename(result_name)
- elif not is_dtype_equal(self.dtype, other.dtype):
- dtype = self._find_common_type_compat(other)
- this = self.astype(dtype, copy=False)
- that = other.astype(dtype, copy=False)
- return this.symmetric_difference(that, sort=sort).rename(result_name)
- this = self.unique()
- other = other.unique()
- indexer = this.get_indexer_for(other)
- # {this} minus {other}
- common_indexer = indexer.take((indexer != -1).nonzero()[0])
- left_indexer = np.setdiff1d(
- np.arange(this.size), common_indexer, assume_unique=True
- )
- left_diff = this.take(left_indexer)
- # {other} minus {this}
- right_indexer = (indexer == -1).nonzero()[0]
- right_diff = other.take(right_indexer)
- res_values = left_diff.append(right_diff)
- result = _maybe_try_sort(res_values, sort)
- if not self._is_multi:
- return Index(result, name=result_name, dtype=res_values.dtype)
- else:
- left_diff = cast("MultiIndex", left_diff)
- if len(result) == 0:
- # result might be an Index, if other was an Index
- return left_diff.remove_unused_levels().set_names(result_name)
- return result.set_names(result_name)
- @final
- def _assert_can_do_setop(self, other) -> bool:
- if not is_list_like(other):
- raise TypeError("Input must be Index or array-like")
- return True
- def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]:
- if not isinstance(other, Index):
- other = Index(other, name=self.name)
- result_name = self.name
- else:
- result_name = get_op_result_name(self, other)
- return other, result_name
- # --------------------------------------------------------------------
- # Indexing Methods
- def get_loc(self, key):
- """
- Get integer location, slice or boolean mask for requested label.
- Parameters
- ----------
- key : label
- Returns
- -------
- int if unique index, slice if monotonic index, else mask
- Examples
- --------
- >>> unique_index = pd.Index(list('abc'))
- >>> unique_index.get_loc('b')
- 1
- >>> monotonic_index = pd.Index(list('abbc'))
- >>> monotonic_index.get_loc('b')
- slice(1, 3, None)
- >>> non_monotonic_index = pd.Index(list('abcb'))
- >>> non_monotonic_index.get_loc('b')
- array([False, True, False, True])
- """
- casted_key = self._maybe_cast_indexer(key)
- try:
- return self._engine.get_loc(casted_key)
- except KeyError as err:
- raise KeyError(key) from err
- except TypeError:
- # If we have a listlike key, _check_indexing_error will raise
- # InvalidIndexError. Otherwise we fall through and re-raise
- # the TypeError.
- self._check_indexing_error(key)
- raise
- _index_shared_docs[
- "get_indexer"
- ] = """
- Compute indexer and mask for new index given the current index.
- The indexer should be then used as an input to ndarray.take to align the
- current data to the new index.
- Parameters
- ----------
- target : %(target_klass)s
- method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
- * default: exact matches only.
- * pad / ffill: find the PREVIOUS index value if no exact match.
- * backfill / bfill: use NEXT index value if no exact match
- * nearest: use the NEAREST index value if no exact match. Tied
- distances are broken by preferring the larger index value.
- limit : int, optional
- Maximum number of consecutive labels in ``target`` to match for
- inexact matches.
- tolerance : optional
- Maximum distance between original and new labels for inexact
- matches. The values of the index at the matching locations must
- satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
- Tolerance may be a scalar value, which applies the same tolerance
- to all values, or list-like, which applies variable tolerance per
- element. List-like includes list, tuple, array, Series, and must be
- the same size as the index and its dtype must exactly match the
- index's type.
- Returns
- -------
- np.ndarray[np.intp]
- Integers from 0 to n - 1 indicating that the index at these
- positions matches the corresponding target values. Missing values
- in the target are marked by -1.
- %(raises_section)s
- Notes
- -----
- Returns -1 for unmatched values, for further explanation see the
- example below.
- Examples
- --------
- >>> index = pd.Index(['c', 'a', 'b'])
- >>> index.get_indexer(['a', 'b', 'x'])
- array([ 1, 2, -1])
- Notice that the return value is an array of locations in ``index``
- and ``x`` is marked by -1, as it is not in ``index``.
- """
- @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs)
- @final
- def get_indexer(
- self,
- target,
- method: str_t | None = None,
- limit: int | None = None,
- tolerance=None,
- ) -> npt.NDArray[np.intp]:
- method = clean_reindex_fill_method(method)
- orig_target = target
- target = self._maybe_cast_listlike_indexer(target)
- self._check_indexing_method(method, limit, tolerance)
- if not self._index_as_unique:
- raise InvalidIndexError(self._requires_unique_msg)
- if len(target) == 0:
- return np.array([], dtype=np.intp)
- if not self._should_compare(target) and not self._should_partial_index(target):
- # IntervalIndex get special treatment bc numeric scalars can be
- # matched to Interval scalars
- return self._get_indexer_non_comparable(target, method=method, unique=True)
- if is_categorical_dtype(self.dtype):
- # _maybe_cast_listlike_indexer ensures target has our dtype
- # (could improve perf by doing _should_compare check earlier?)
- assert is_dtype_equal(self.dtype, target.dtype)
- indexer = self._engine.get_indexer(target.codes)
- if self.hasnans and target.hasnans:
- # After _maybe_cast_listlike_indexer, target elements which do not
- # belong to some category are changed to NaNs
- # Mask to track actual NaN values compared to inserted NaN values
- # GH#45361
- target_nans = isna(orig_target)
- loc = self.get_loc(np.nan)
- mask = target.isna()
- indexer[target_nans] = loc
- indexer[mask & ~target_nans] = -1
- return indexer
- if is_categorical_dtype(target.dtype):
- # potential fastpath
- # get an indexer for unique categories then propagate to codes via take_nd
- # get_indexer instead of _get_indexer needed for MultiIndex cases
- # e.g. test_append_different_columns_types
- categories_indexer = self.get_indexer(target.categories)
- indexer = algos.take_nd(categories_indexer, target.codes, fill_value=-1)
- if (not self._is_multi and self.hasnans) and target.hasnans:
- # Exclude MultiIndex because hasnans raises NotImplementedError
- # we should only get here if we are unique, so loc is an integer
- # GH#41934
- loc = self.get_loc(np.nan)
- mask = target.isna()
- indexer[mask] = loc
- return ensure_platform_int(indexer)
- pself, ptarget = self._maybe_promote(target)
- if pself is not self or ptarget is not target:
- return pself.get_indexer(
- ptarget, method=method, limit=limit, tolerance=tolerance
- )
- if is_dtype_equal(self.dtype, target.dtype) and self.equals(target):
- # Only call equals if we have same dtype to avoid inference/casting
- return np.arange(len(target), dtype=np.intp)
- if not is_dtype_equal(
- self.dtype, target.dtype
- ) and not self._should_partial_index(target):
- # _should_partial_index e.g. IntervalIndex with numeric scalars
- # that can be matched to Interval scalars.
- dtype = self._find_common_type_compat(target)
- this = self.astype(dtype, copy=False)
- target = target.astype(dtype, copy=False)
- return this._get_indexer(
- target, method=method, limit=limit, tolerance=tolerance
- )
- return self._get_indexer(target, method, limit, tolerance)
- def _get_indexer(
- self,
- target: Index,
- method: str_t | None = None,
- limit: int | None = None,
- tolerance=None,
- ) -> npt.NDArray[np.intp]:
- if tolerance is not None:
- tolerance = self._convert_tolerance(tolerance, target)
- if method in ["pad", "backfill"]:
- indexer = self._get_fill_indexer(target, method, limit, tolerance)
- elif method == "nearest":
- indexer = self._get_nearest_indexer(target, limit, tolerance)
- else:
- if target._is_multi and self._is_multi:
- engine = self._engine
- # error: Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]"
- # has no attribute "_extract_level_codes"
- tgt_values = engine._extract_level_codes( # type: ignore[union-attr]
- target
- )
- else:
- tgt_values = target._get_engine_target()
- indexer = self._engine.get_indexer(tgt_values)
- return ensure_platform_int(indexer)
- @final
- def _should_partial_index(self, target: Index) -> bool:
- """
- Should we attempt partial-matching indexing?
- """
- if is_interval_dtype(self.dtype):
- if is_interval_dtype(target.dtype):
- return False
- # See https://github.com/pandas-dev/pandas/issues/47772 the commented
- # out code can be restored (instead of hardcoding `return True`)
- # once that issue is fixed
- # "Index" has no attribute "left"
- # return self.left._should_compare(target) # type: ignore[attr-defined]
- return True
- return False
- @final
- def _check_indexing_method(
- self,
- method: str_t | None,
- limit: int | None = None,
- tolerance=None,
- ) -> None:
- """
- Raise if we have a get_indexer `method` that is not supported or valid.
- """
- if method not in [None, "bfill", "backfill", "pad", "ffill", "nearest"]:
- # in practice the clean_reindex_fill_method call would raise
- # before we get here
- raise ValueError("Invalid fill method") # pragma: no cover
- if self._is_multi:
- if method == "nearest":
- raise NotImplementedError(
- "method='nearest' not implemented yet "
- "for MultiIndex; see GitHub issue 9365"
- )
- if method in ("pad", "backfill"):
- if tolerance is not None:
- raise NotImplementedError(
- "tolerance not implemented yet for MultiIndex"
- )
- if is_interval_dtype(self.dtype) or is_categorical_dtype(self.dtype):
- # GH#37871 for now this is only for IntervalIndex and CategoricalIndex
- if method is not None:
- raise NotImplementedError(
- f"method {method} not yet implemented for {type(self).__name__}"
- )
- if method is None:
- if tolerance is not None:
- raise ValueError(
- "tolerance argument only valid if doing pad, "
- "backfill or nearest reindexing"
- )
- if limit is not None:
- raise ValueError(
- "limit argument only valid if doing pad, "
- "backfill or nearest reindexing"
- )
- def _convert_tolerance(self, tolerance, target: np.ndarray | Index) -> np.ndarray:
- # override this method on subclasses
- tolerance = np.asarray(tolerance)
- if target.size != tolerance.size and tolerance.size > 1:
- raise ValueError("list-like tolerance size must match target index size")
- elif is_numeric_dtype(self) and not np.issubdtype(tolerance.dtype, np.number):
- if tolerance.ndim > 0:
- raise ValueError(
- f"tolerance argument for {type(self).__name__} with dtype "
- f"{self.dtype} must contain numeric elements if it is list type"
- )
- raise ValueError(
- f"tolerance argument for {type(self).__name__} with dtype {self.dtype} "
- f"must be numeric if it is a scalar: {repr(tolerance)}"
- )
- return tolerance
- @final
- def _get_fill_indexer(
- self, target: Index, method: str_t, limit: int | None = None, tolerance=None
- ) -> npt.NDArray[np.intp]:
- if self._is_multi:
- # TODO: get_indexer_with_fill docstring says values must be _sorted_
- # but that doesn't appear to be enforced
- # error: "IndexEngine" has no attribute "get_indexer_with_fill"
- engine = self._engine
- with warnings.catch_warnings():
- # TODO: We need to fix this. Casting to int64 in cython
- warnings.filterwarnings("ignore", category=RuntimeWarning)
- return engine.get_indexer_with_fill( # type: ignore[union-attr]
- target=target._values,
- values=self._values,
- method=method,
- limit=limit,
- )
- if self.is_monotonic_increasing and target.is_monotonic_increasing:
- target_values = target._get_engine_target()
- own_values = self._get_engine_target()
- if not isinstance(target_values, np.ndarray) or not isinstance(
- own_values, np.ndarray
- ):
- raise NotImplementedError
- if method == "pad":
- indexer = libalgos.pad(own_values, target_values, limit=limit)
- else:
- # i.e. "backfill"
- indexer = libalgos.backfill(own_values, target_values, limit=limit)
- else:
- indexer = self._get_fill_indexer_searchsorted(target, method, limit)
- if tolerance is not None and len(self):
- indexer = self._filter_indexer_tolerance(target, indexer, tolerance)
- return indexer
- @final
- def _get_fill_indexer_searchsorted(
- self, target: Index, method: str_t, limit: int | None = None
- ) -> npt.NDArray[np.intp]:
- """
- Fallback pad/backfill get_indexer that works for monotonic decreasing
- indexes and non-monotonic targets.
- """
- if limit is not None:
- raise ValueError(
- f"limit argument for {repr(method)} method only well-defined "
- "if index and target are monotonic"
- )
- side: Literal["left", "right"] = "left" if method == "pad" else "right"
- # find exact matches first (this simplifies the algorithm)
- indexer = self.get_indexer(target)
- nonexact = indexer == -1
- indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side)
- if side == "left":
- # searchsorted returns "indices into a sorted array such that,
- # if the corresponding elements in v were inserted before the
- # indices, the order of a would be preserved".
- # Thus, we need to subtract 1 to find values to the left.
- indexer[nonexact] -= 1
- # This also mapped not found values (values of 0 from
- # np.searchsorted) to -1, which conveniently is also our
- # sentinel for missing values
- else:
- # Mark indices to the right of the largest value as not found
- indexer[indexer == len(self)] = -1
- return indexer
- @final
- def _get_nearest_indexer(
- self, target: Index, limit: int | None, tolerance
- ) -> npt.NDArray[np.intp]:
- """
- Get the indexer for the nearest index labels; requires an index with
- values that can be subtracted from each other (e.g., not strings or
- tuples).
- """
- if not len(self):
- return self._get_fill_indexer(target, "pad")
- left_indexer = self.get_indexer(target, "pad", limit=limit)
- right_indexer = self.get_indexer(target, "backfill", limit=limit)
- left_distances = self._difference_compat(target, left_indexer)
- right_distances = self._difference_compat(target, right_indexer)
- op = operator.lt if self.is_monotonic_increasing else operator.le
- indexer = np.where(
- # error: Argument 1&2 has incompatible type "Union[ExtensionArray,
- # ndarray[Any, Any]]"; expected "Union[SupportsDunderLE,
- # SupportsDunderGE, SupportsDunderGT, SupportsDunderLT]"
- op(left_distances, right_distances) # type: ignore[arg-type]
- | (right_indexer == -1),
- left_indexer,
- right_indexer,
- )
- if tolerance is not None:
- indexer = self._filter_indexer_tolerance(target, indexer, tolerance)
- return indexer
- @final
- def _filter_indexer_tolerance(
- self,
- target: Index,
- indexer: npt.NDArray[np.intp],
- tolerance,
- ) -> npt.NDArray[np.intp]:
- distance = self._difference_compat(target, indexer)
- return np.where(distance <= tolerance, indexer, -1)
- @final
- def _difference_compat(
- self, target: Index, indexer: npt.NDArray[np.intp]
- ) -> ArrayLike:
- # Compatibility for PeriodArray, for which __sub__ returns an ndarray[object]
- # of DateOffset objects, which do not support __abs__ (and would be slow
- # if they did)
- if isinstance(self.dtype, PeriodDtype):
- # Note: we only get here with matching dtypes
- own_values = cast("PeriodArray", self._data)._ndarray
- target_values = cast("PeriodArray", target._data)._ndarray
- diff = own_values[indexer] - target_values
- else:
- # error: Unsupported left operand type for - ("ExtensionArray")
- diff = self._values[indexer] - target._values # type: ignore[operator]
- return abs(diff)
- # --------------------------------------------------------------------
- # Indexer Conversion Methods
- @final
- def _validate_positional_slice(self, key: slice) -> None:
- """
- For positional indexing, a slice must have either int or None
- for each of start, stop, and step.
- """
- self._validate_indexer("positional", key.start, "iloc")
- self._validate_indexer("positional", key.stop, "iloc")
- self._validate_indexer("positional", key.step, "iloc")
- def _convert_slice_indexer(self, key: slice, kind: str_t):
- """
- Convert a slice indexer.
- By definition, these are labels unless 'iloc' is passed in.
- Floats are not allowed as the start, step, or stop of the slice.
- Parameters
- ----------
- key : label of the slice bound
- kind : {'loc', 'getitem'}
- """
- assert kind in ["loc", "getitem"], kind
- # potentially cast the bounds to integers
- start, stop, step = key.start, key.stop, key.step
- # TODO(GH#50617): once Series.__[gs]etitem__ is removed we should be able
- # to simplify this.
- if isinstance(self.dtype, np.dtype) and is_float_dtype(self.dtype):
- # We always treat __getitem__ slicing as label-based
- # translate to locations
- return self.slice_indexer(start, stop, step)
- # figure out if this is a positional indexer
- def is_int(v):
- return v is None or is_integer(v)
- is_index_slice = is_int(start) and is_int(stop) and is_int(step)
- # special case for interval_dtype bc we do not do partial-indexing
- # on integer Intervals when slicing
- # TODO: write this in terms of e.g. should_partial_index?
- ints_are_positional = self._should_fallback_to_positional or is_interval_dtype(
- self.dtype
- )
- is_positional = is_index_slice and ints_are_positional
- if kind == "getitem":
- # called from the getitem slicers, validate that we are in fact integers
- if is_integer_dtype(self.dtype) or is_index_slice:
- # Note: these checks are redundant if we know is_index_slice
- self._validate_indexer("slice", key.start, "getitem")
- self._validate_indexer("slice", key.stop, "getitem")
- self._validate_indexer("slice", key.step, "getitem")
- return key
- # convert the slice to an indexer here
- # if we are mixed and have integers
- if is_positional:
- try:
- # Validate start & stop
- if start is not None:
- self.get_loc(start)
- if stop is not None:
- self.get_loc(stop)
- is_positional = False
- except KeyError:
- pass
- if com.is_null_slice(key):
- # It doesn't matter if we are positional or label based
- indexer = key
- elif is_positional:
- if kind == "loc":
- # GH#16121, GH#24612, GH#31810
- raise TypeError(
- "Slicing a positional slice with .loc is not allowed, "
- "Use .loc with labels or .iloc with positions instead.",
- )
- indexer = key
- else:
- indexer = self.slice_indexer(start, stop, step)
- return indexer
- @final
- def _raise_invalid_indexer(
- self,
- form: str_t,
- key,
- reraise: lib.NoDefault | None | Exception = lib.no_default,
- ) -> None:
- """
- Raise consistent invalid indexer message.
- """
- msg = (
- f"cannot do {form} indexing on {type(self).__name__} with these "
- f"indexers [{key}] of type {type(key).__name__}"
- )
- if reraise is not lib.no_default:
- raise TypeError(msg) from reraise
- raise TypeError(msg)
- # --------------------------------------------------------------------
- # Reindex Methods
- @final
- def _validate_can_reindex(self, indexer: np.ndarray) -> None:
- """
- Check if we are allowing reindexing with this particular indexer.
- Parameters
- ----------
- indexer : an integer ndarray
- Raises
- ------
- ValueError if its a duplicate axis
- """
- # trying to reindex on an axis with duplicates
- if not self._index_as_unique and len(indexer):
- raise ValueError("cannot reindex on an axis with duplicate labels")
- def reindex(
- self, target, method=None, level=None, limit=None, tolerance=None
- ) -> tuple[Index, npt.NDArray[np.intp] | None]:
- """
- Create index with target's values.
- Parameters
- ----------
- target : an iterable
- method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
- * default: exact matches only.
- * pad / ffill: find the PREVIOUS index value if no exact match.
- * backfill / bfill: use NEXT index value if no exact match
- * nearest: use the NEAREST index value if no exact match. Tied
- distances are broken by preferring the larger index value.
- level : int, optional
- Level of multiindex.
- limit : int, optional
- Maximum number of consecutive labels in ``target`` to match for
- inexact matches.
- tolerance : int or float, optional
- Maximum distance between original and new labels for inexact
- matches. The values of the index at the matching locations must
- satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
- Tolerance may be a scalar value, which applies the same tolerance
- to all values, or list-like, which applies variable tolerance per
- element. List-like includes list, tuple, array, Series, and must be
- the same size as the index and its dtype must exactly match the
- index's type.
- Returns
- -------
- new_index : pd.Index
- Resulting index.
- indexer : np.ndarray[np.intp] or None
- Indices of output values in original index.
- Raises
- ------
- TypeError
- If ``method`` passed along with ``level``.
- ValueError
- If non-unique multi-index
- ValueError
- If non-unique index and ``method`` or ``limit`` passed.
- See Also
- --------
- Series.reindex : Conform Series to new index with optional filling logic.
- DataFrame.reindex : Conform DataFrame to new index with optional filling logic.
- Examples
- --------
- >>> idx = pd.Index(['car', 'bike', 'train', 'tractor'])
- >>> idx
- Index(['car', 'bike', 'train', 'tractor'], dtype='object')
- >>> idx.reindex(['car', 'bike'])
- (Index(['car', 'bike'], dtype='object'), array([0, 1]))
- """
- # GH6552: preserve names when reindexing to non-named target
- # (i.e. neither Index nor Series).
- preserve_names = not hasattr(target, "name")
- # GH7774: preserve dtype/tz if target is empty and not an Index.
- target = ensure_has_len(target) # target may be an iterator
- if not isinstance(target, Index) and len(target) == 0:
- if level is not None and self._is_multi:
- # "Index" has no attribute "levels"; maybe "nlevels"?
- idx = self.levels[level] # type: ignore[attr-defined]
- else:
- idx = self
- target = idx[:0]
- else:
- target = ensure_index(target)
- if level is not None and (
- isinstance(self, ABCMultiIndex) or isinstance(target, ABCMultiIndex)
- ):
- if method is not None:
- raise TypeError("Fill method not supported if level passed")
- # TODO: tests where passing `keep_order=not self._is_multi`
- # makes a difference for non-MultiIndex case
- target, indexer, _ = self._join_level(
- target, level, how="right", keep_order=not self._is_multi
- )
- else:
- if self.equals(target):
- indexer = None
- else:
- if self._index_as_unique:
- indexer = self.get_indexer(
- target, method=method, limit=limit, tolerance=tolerance
- )
- elif self._is_multi:
- raise ValueError("cannot handle a non-unique multi-index!")
- elif not self.is_unique:
- # GH#42568
- raise ValueError("cannot reindex on an axis with duplicate labels")
- else:
- indexer, _ = self.get_indexer_non_unique(target)
- target = self._wrap_reindex_result(target, indexer, preserve_names)
- return target, indexer
- def _wrap_reindex_result(self, target, indexer, preserve_names: bool):
- target = self._maybe_preserve_names(target, preserve_names)
- return target
- def _maybe_preserve_names(self, target: Index, preserve_names: bool):
- if preserve_names and target.nlevels == 1 and target.name != self.name:
- target = target.copy(deep=False)
- target.name = self.name
- return target
- @final
- def _reindex_non_unique(
- self, target: Index
- ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp] | None]:
- """
- Create a new index with target's values (move/add/delete values as
- necessary) use with non-unique Index and a possibly non-unique target.
- Parameters
- ----------
- target : an iterable
- Returns
- -------
- new_index : pd.Index
- Resulting index.
- indexer : np.ndarray[np.intp]
- Indices of output values in original index.
- new_indexer : np.ndarray[np.intp] or None
- """
- target = ensure_index(target)
- if len(target) == 0:
- # GH#13691
- return self[:0], np.array([], dtype=np.intp), None
- indexer, missing = self.get_indexer_non_unique(target)
- check = indexer != -1
- new_labels = self.take(indexer[check])
- new_indexer = None
- if len(missing):
- length = np.arange(len(indexer), dtype=np.intp)
- missing = ensure_platform_int(missing)
- missing_labels = target.take(missing)
- missing_indexer = length[~check]
- cur_labels = self.take(indexer[check]).values
- cur_indexer = length[check]
- # Index constructor below will do inference
- new_labels = np.empty((len(indexer),), dtype=object)
- new_labels[cur_indexer] = cur_labels
- new_labels[missing_indexer] = missing_labels
- # GH#38906
- if not len(self):
- new_indexer = np.arange(0, dtype=np.intp)
- # a unique indexer
- elif target.is_unique:
- # see GH5553, make sure we use the right indexer
- new_indexer = np.arange(len(indexer), dtype=np.intp)
- new_indexer[cur_indexer] = np.arange(len(cur_labels))
- new_indexer[missing_indexer] = -1
- # we have a non_unique selector, need to use the original
- # indexer here
- else:
- # need to retake to have the same size as the indexer
- indexer[~check] = -1
- # reset the new indexer to account for the new size
- new_indexer = np.arange(len(self.take(indexer)), dtype=np.intp)
- new_indexer[~check] = -1
- if not isinstance(self, ABCMultiIndex):
- new_index = Index(new_labels, name=self.name)
- else:
- new_index = type(self).from_tuples(new_labels, names=self.names)
- return new_index, indexer, new_indexer
- # --------------------------------------------------------------------
- # Join Methods
- @overload
- def join(
- self,
- other: Index,
- *,
- how: JoinHow = ...,
- level: Level = ...,
- return_indexers: Literal[True],
- sort: bool = ...,
- ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
- ...
- @overload
- def join(
- self,
- other: Index,
- *,
- how: JoinHow = ...,
- level: Level = ...,
- return_indexers: Literal[False] = ...,
- sort: bool = ...,
- ) -> Index:
- ...
- @overload
- def join(
- self,
- other: Index,
- *,
- how: JoinHow = ...,
- level: Level = ...,
- return_indexers: bool = ...,
- sort: bool = ...,
- ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
- ...
- @final
- @_maybe_return_indexers
- def join(
- self,
- other: Index,
- *,
- how: JoinHow = "left",
- level: Level = None,
- return_indexers: bool = False,
- sort: bool = False,
- ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
- """
- Compute join_index and indexers to conform data structures to the new index.
- Parameters
- ----------
- other : Index
- how : {'left', 'right', 'inner', 'outer'}
- level : int or level name, default None
- return_indexers : bool, default False
- sort : bool, default False
- Sort the join keys lexicographically in the result Index. If False,
- the order of the join keys depends on the join type (how keyword).
- Returns
- -------
- join_index, (left_indexer, right_indexer)
- """
- other = ensure_index(other)
- if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex):
- if (self.tz is None) ^ (other.tz is None):
- # Raise instead of casting to object below.
- raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")
- if not self._is_multi and not other._is_multi:
- # We have specific handling for MultiIndex below
- pself, pother = self._maybe_promote(other)
- if pself is not self or pother is not other:
- return pself.join(
- pother, how=how, level=level, return_indexers=True, sort=sort
- )
- lindexer: np.ndarray | None
- rindexer: np.ndarray | None
- # try to figure out the join level
- # GH3662
- if level is None and (self._is_multi or other._is_multi):
- # have the same levels/names so a simple join
- if self.names == other.names:
- pass
- else:
- return self._join_multi(other, how=how)
- # join on the level
- if level is not None and (self._is_multi or other._is_multi):
- return self._join_level(other, level, how=how)
- if len(other) == 0:
- if how in ("left", "outer"):
- join_index = self._view()
- rindexer = np.broadcast_to(np.intp(-1), len(join_index))
- return join_index, None, rindexer
- elif how in ("right", "inner", "cross"):
- join_index = other._view()
- lindexer = np.array([])
- return join_index, lindexer, None
- if len(self) == 0:
- if how in ("right", "outer"):
- join_index = other._view()
- lindexer = np.broadcast_to(np.intp(-1), len(join_index))
- return join_index, lindexer, None
- elif how in ("left", "inner", "cross"):
- join_index = self._view()
- rindexer = np.array([])
- return join_index, None, rindexer
- if self._join_precedence < other._join_precedence:
- flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"}
- how = flip.get(how, how)
- join_index, lidx, ridx = other.join(
- self, how=how, level=level, return_indexers=True
- )
- lidx, ridx = ridx, lidx
- return join_index, lidx, ridx
- if not is_dtype_equal(self.dtype, other.dtype):
- dtype = self._find_common_type_compat(other)
- this = self.astype(dtype, copy=False)
- other = other.astype(dtype, copy=False)
- return this.join(other, how=how, return_indexers=True)
- _validate_join_method(how)
- if not self.is_unique and not other.is_unique:
- return self._join_non_unique(other, how=how)
- elif not self.is_unique or not other.is_unique:
- if self.is_monotonic_increasing and other.is_monotonic_increasing:
- if not is_interval_dtype(self.dtype):
- # otherwise we will fall through to _join_via_get_indexer
- # GH#39133
- # go through object dtype for ea till engine is supported properly
- return self._join_monotonic(other, how=how)
- else:
- return self._join_non_unique(other, how=how)
- elif (
- # GH48504: exclude MultiIndex to avoid going through MultiIndex._values
- self.is_monotonic_increasing
- and other.is_monotonic_increasing
- and self._can_use_libjoin
- and not isinstance(self, ABCMultiIndex)
- and not is_categorical_dtype(self.dtype)
- ):
- # Categorical is monotonic if data are ordered as categories, but join can
- # not handle this in case of not lexicographically monotonic GH#38502
- try:
- return self._join_monotonic(other, how=how)
- except TypeError:
- # object dtype; non-comparable objects
- pass
- return self._join_via_get_indexer(other, how, sort)
- @final
- def _join_via_get_indexer(
- self, other: Index, how: JoinHow, sort: bool
- ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
- # Fallback if we do not have any fastpaths available based on
- # uniqueness/monotonicity
- # Note: at this point we have checked matching dtypes
- if how == "left":
- join_index = self
- elif how == "right":
- join_index = other
- elif how == "inner":
- # TODO: sort=False here for backwards compat. It may
- # be better to use the sort parameter passed into join
- join_index = self.intersection(other, sort=False)
- elif how == "outer":
- # TODO: sort=True here for backwards compat. It may
- # be better to use the sort parameter passed into join
- join_index = self.union(other)
- if sort:
- join_index = join_index.sort_values()
- if join_index is self:
- lindexer = None
- else:
- lindexer = self.get_indexer_for(join_index)
- if join_index is other:
- rindexer = None
- else:
- rindexer = other.get_indexer_for(join_index)
- return join_index, lindexer, rindexer
- @final
- def _join_multi(self, other: Index, how: JoinHow):
- from pandas.core.indexes.multi import MultiIndex
- from pandas.core.reshape.merge import restore_dropped_levels_multijoin
- # figure out join names
- self_names_list = list(com.not_none(*self.names))
- other_names_list = list(com.not_none(*other.names))
- self_names_order = self_names_list.index
- other_names_order = other_names_list.index
- self_names = set(self_names_list)
- other_names = set(other_names_list)
- overlap = self_names & other_names
- # need at least 1 in common
- if not overlap:
- raise ValueError("cannot join with no overlapping index names")
- if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
- # Drop the non-matching levels from left and right respectively
- ldrop_names = sorted(self_names - overlap, key=self_names_order)
- rdrop_names = sorted(other_names - overlap, key=other_names_order)
- # if only the order differs
- if not len(ldrop_names + rdrop_names):
- self_jnlevels = self
- other_jnlevels = other.reorder_levels(self.names)
- else:
- self_jnlevels = self.droplevel(ldrop_names)
- other_jnlevels = other.droplevel(rdrop_names)
- # Join left and right
- # Join on same leveled multi-index frames is supported
- join_idx, lidx, ridx = self_jnlevels.join(
- other_jnlevels, how=how, return_indexers=True
- )
- # Restore the dropped levels
- # Returned index level order is
- # common levels, ldrop_names, rdrop_names
- dropped_names = ldrop_names + rdrop_names
- # error: Argument 5/6 to "restore_dropped_levels_multijoin" has
- # incompatible type "Optional[ndarray[Any, dtype[signedinteger[Any
- # ]]]]"; expected "ndarray[Any, dtype[signedinteger[Any]]]"
- levels, codes, names = restore_dropped_levels_multijoin(
- self,
- other,
- dropped_names,
- join_idx,
- lidx, # type: ignore[arg-type]
- ridx, # type: ignore[arg-type]
- )
- # Re-create the multi-index
- multi_join_idx = MultiIndex(
- levels=levels, codes=codes, names=names, verify_integrity=False
- )
- multi_join_idx = multi_join_idx.remove_unused_levels()
- return multi_join_idx, lidx, ridx
- jl = list(overlap)[0]
- # Case where only one index is multi
- # make the indices into mi's that match
- flip_order = False
- if isinstance(self, MultiIndex):
- self, other = other, self
- flip_order = True
- # flip if join method is right or left
- flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"}
- how = flip.get(how, how)
- level = other.names.index(jl)
- result = self._join_level(other, level, how=how)
- if flip_order:
- return result[0], result[2], result[1]
- return result
- @final
- def _join_non_unique(
- self, other: Index, how: JoinHow = "left"
- ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
- from pandas.core.reshape.merge import get_join_indexers
- # We only get here if dtypes match
- assert self.dtype == other.dtype
- left_idx, right_idx = get_join_indexers(
- [self._values], [other._values], how=how, sort=True
- )
- mask = left_idx == -1
- join_idx = self.take(left_idx)
- right = other.take(right_idx)
- join_index = join_idx.putmask(mask, right)
- return join_index, left_idx, right_idx
- @final
- def _join_level(
- self, other: Index, level, how: JoinHow = "left", keep_order: bool = True
- ) -> tuple[MultiIndex, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
- """
- The join method *only* affects the level of the resulting
- MultiIndex. Otherwise it just exactly aligns the Index data to the
- labels of the level in the MultiIndex.
- If ```keep_order == True```, the order of the data indexed by the
- MultiIndex will not be changed; otherwise, it will tie out
- with `other`.
- """
- from pandas.core.indexes.multi import MultiIndex
- def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]:
- """
- Returns sorter for the inner most level while preserving the
- order of higher levels.
- Parameters
- ----------
- labels : list[np.ndarray]
- Each ndarray has signed integer dtype, not necessarily identical.
- Returns
- -------
- np.ndarray[np.intp]
- """
- if labels[0].size == 0:
- return np.empty(0, dtype=np.intp)
- if len(labels) == 1:
- return get_group_index_sorter(ensure_platform_int(labels[0]))
- # find indexers of beginning of each set of
- # same-key labels w.r.t all but last level
- tic = labels[0][:-1] != labels[0][1:]
- for lab in labels[1:-1]:
- tic |= lab[:-1] != lab[1:]
- starts = np.hstack(([True], tic, [True])).nonzero()[0]
- lab = ensure_int64(labels[-1])
- return lib.get_level_sorter(lab, ensure_platform_int(starts))
- if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
- raise TypeError("Join on level between two MultiIndex objects is ambiguous")
- left, right = self, other
- flip_order = not isinstance(self, MultiIndex)
- if flip_order:
- left, right = right, left
- flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"}
- how = flip.get(how, how)
- assert isinstance(left, MultiIndex)
- level = left._get_level_number(level)
- old_level = left.levels[level]
- if not right.is_unique:
- raise NotImplementedError(
- "Index._join_level on non-unique index is not implemented"
- )
- new_level, left_lev_indexer, right_lev_indexer = old_level.join(
- right, how=how, return_indexers=True
- )
- if left_lev_indexer is None:
- if keep_order or len(left) == 0:
- left_indexer = None
- join_index = left
- else: # sort the leaves
- left_indexer = _get_leaf_sorter(left.codes[: level + 1])
- join_index = left[left_indexer]
- else:
- left_lev_indexer = ensure_platform_int(left_lev_indexer)
- rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level))
- old_codes = left.codes[level]
- taker = old_codes[old_codes != -1]
- new_lev_codes = rev_indexer.take(taker)
- new_codes = list(left.codes)
- new_codes[level] = new_lev_codes
- new_levels = list(left.levels)
- new_levels[level] = new_level
- if keep_order: # just drop missing values. o.w. keep order
- left_indexer = np.arange(len(left), dtype=np.intp)
- left_indexer = cast(np.ndarray, left_indexer)
- mask = new_lev_codes != -1
- if not mask.all():
- new_codes = [lab[mask] for lab in new_codes]
- left_indexer = left_indexer[mask]
- else: # tie out the order with other
- if level == 0: # outer most level, take the fast route
- max_new_lev = 0 if len(new_lev_codes) == 0 else new_lev_codes.max()
- ngroups = 1 + max_new_lev
- left_indexer, counts = libalgos.groupsort_indexer(
- new_lev_codes, ngroups
- )
- # missing values are placed first; drop them!
- left_indexer = left_indexer[counts[0] :]
- new_codes = [lab[left_indexer] for lab in new_codes]
- else: # sort the leaves
- mask = new_lev_codes != -1
- mask_all = mask.all()
- if not mask_all:
- new_codes = [lab[mask] for lab in new_codes]
- left_indexer = _get_leaf_sorter(new_codes[: level + 1])
- new_codes = [lab[left_indexer] for lab in new_codes]
- # left_indexers are w.r.t masked frame.
- # reverse to original frame!
- if not mask_all:
- left_indexer = mask.nonzero()[0][left_indexer]
- join_index = MultiIndex(
- levels=new_levels,
- codes=new_codes,
- names=left.names,
- verify_integrity=False,
- )
- if right_lev_indexer is not None:
- right_indexer = right_lev_indexer.take(join_index.codes[level])
- else:
- right_indexer = join_index.codes[level]
- if flip_order:
- left_indexer, right_indexer = right_indexer, left_indexer
- left_indexer = (
- None if left_indexer is None else ensure_platform_int(left_indexer)
- )
- right_indexer = (
- None if right_indexer is None else ensure_platform_int(right_indexer)
- )
- return join_index, left_indexer, right_indexer
- @final
- def _join_monotonic(
- self, other: Index, how: JoinHow = "left"
- ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
- # We only get here with matching dtypes and both monotonic increasing
- assert other.dtype == self.dtype
- if self.equals(other):
- # This is a convenient place for this check, but its correctness
- # does not depend on monotonicity, so it could go earlier
- # in the calling method.
- ret_index = other if how == "right" else self
- return ret_index, None, None
- ridx: npt.NDArray[np.intp] | None
- lidx: npt.NDArray[np.intp] | None
- if self.is_unique and other.is_unique:
- # We can perform much better than the general case
- if how == "left":
- join_index = self
- lidx = None
- ridx = self._left_indexer_unique(other)
- elif how == "right":
- join_index = other
- lidx = other._left_indexer_unique(self)
- ridx = None
- elif how == "inner":
- join_array, lidx, ridx = self._inner_indexer(other)
- join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
- elif how == "outer":
- join_array, lidx, ridx = self._outer_indexer(other)
- join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
- else:
- if how == "left":
- join_array, lidx, ridx = self._left_indexer(other)
- elif how == "right":
- join_array, ridx, lidx = other._left_indexer(self)
- elif how == "inner":
- join_array, lidx, ridx = self._inner_indexer(other)
- elif how == "outer":
- join_array, lidx, ridx = self._outer_indexer(other)
- assert lidx is not None
- assert ridx is not None
- join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
- lidx = None if lidx is None else ensure_platform_int(lidx)
- ridx = None if ridx is None else ensure_platform_int(ridx)
- return join_index, lidx, ridx
- def _wrap_joined_index(
- self: _IndexT,
- joined: ArrayLike,
- other: _IndexT,
- lidx: npt.NDArray[np.intp],
- ridx: npt.NDArray[np.intp],
- ) -> _IndexT:
- assert other.dtype == self.dtype
- if isinstance(self, ABCMultiIndex):
- name = self.names if self.names == other.names else None
- # error: Incompatible return value type (got "MultiIndex",
- # expected "_IndexT")
- mask = lidx == -1
- join_idx = self.take(lidx)
- right = other.take(ridx)
- join_index = join_idx.putmask(mask, right)._sort_levels_monotonic()
- return join_index.set_names(name) # type: ignore[return-value]
- else:
- name = get_op_result_name(self, other)
- return self._constructor._with_infer(joined, name=name, dtype=self.dtype)
- @cache_readonly
- def _can_use_libjoin(self) -> bool:
- """
- Whether we can use the fastpaths implement in _libs.join
- """
- if type(self) is Index:
- # excludes EAs, but include masks, we get here with monotonic
- # values only, meaning no NA
- return (
- isinstance(self.dtype, np.dtype)
- or isinstance(self.values, BaseMaskedArray)
- or isinstance(self._values, ArrowExtensionArray)
- )
- return not is_interval_dtype(self.dtype)
- # --------------------------------------------------------------------
- # Uncategorized Methods
- @property
- def values(self) -> ArrayLike:
- """
- Return an array representing the data in the Index.
- .. warning::
- We recommend using :attr:`Index.array` or
- :meth:`Index.to_numpy`, depending on whether you need
- a reference to the underlying data or a NumPy array.
- Returns
- -------
- array: numpy.ndarray or ExtensionArray
- See Also
- --------
- Index.array : Reference to the underlying data.
- Index.to_numpy : A NumPy array representing the underlying data.
- """
- return self._data
- @cache_readonly
- @doc(IndexOpsMixin.array)
- def array(self) -> ExtensionArray:
- array = self._data
- if isinstance(array, np.ndarray):
- from pandas.core.arrays.numpy_ import PandasArray
- array = PandasArray(array)
- return array
- @property
- def _values(self) -> ExtensionArray | np.ndarray:
- """
- The best array representation.
- This is an ndarray or ExtensionArray.
- ``_values`` are consistent between ``Series`` and ``Index``.
- It may differ from the public '.values' method.
- index | values | _values |
- ----------------- | --------------- | ------------- |
- Index | ndarray | ndarray |
- CategoricalIndex | Categorical | Categorical |
- DatetimeIndex | ndarray[M8ns] | DatetimeArray |
- DatetimeIndex[tz] | ndarray[M8ns] | DatetimeArray |
- PeriodIndex | ndarray[object] | PeriodArray |
- IntervalIndex | IntervalArray | IntervalArray |
- See Also
- --------
- values : Values
- """
- return self._data
- def _get_engine_target(self) -> ArrayLike:
- """
- Get the ndarray or ExtensionArray that we can pass to the IndexEngine
- constructor.
- """
- vals = self._values
- if isinstance(vals, StringArray):
- # GH#45652 much more performant than ExtensionEngine
- return vals._ndarray
- if (
- type(self) is Index
- and isinstance(self._values, ExtensionArray)
- and not isinstance(self._values, BaseMaskedArray)
- and not (
- isinstance(self._values, ArrowExtensionArray)
- and is_numeric_dtype(self.dtype)
- # Exclude decimal
- and self.dtype.kind != "O"
- )
- ):
- # TODO(ExtensionIndex): remove special-case, just use self._values
- return self._values.astype(object)
- return vals
- def _get_join_target(self) -> ArrayLike:
- """
- Get the ndarray or ExtensionArray that we can pass to the join
- functions.
- """
- if isinstance(self._values, BaseMaskedArray):
- # This is only used if our array is monotonic, so no NAs present
- return self._values._data
- elif isinstance(self._values, ArrowExtensionArray):
- # This is only used if our array is monotonic, so no missing values
- # present
- return self._values.to_numpy()
- return self._get_engine_target()
- def _from_join_target(self, result: np.ndarray) -> ArrayLike:
- """
- Cast the ndarray returned from one of the libjoin.foo_indexer functions
- back to type(self)._data.
- """
- if isinstance(self.values, BaseMaskedArray):
- return type(self.values)(result, np.zeros(result.shape, dtype=np.bool_))
- elif isinstance(self.values, ArrowExtensionArray):
- return type(self.values)._from_sequence(result)
- return result
- @doc(IndexOpsMixin._memory_usage)
- def memory_usage(self, deep: bool = False) -> int:
- result = self._memory_usage(deep=deep)
- # include our engine hashtable
- result += self._engine.sizeof(deep=deep)
- return result
- @final
- def where(self, cond, other=None) -> Index:
- """
- Replace values where the condition is False.
- The replacement is taken from other.
- Parameters
- ----------
- cond : bool array-like with the same length as self
- Condition to select the values on.
- other : scalar, or array-like, default None
- Replacement if the condition is False.
- Returns
- -------
- pandas.Index
- A copy of self with values replaced from other
- where the condition is False.
- See Also
- --------
- Series.where : Same method for Series.
- DataFrame.where : Same method for DataFrame.
- Examples
- --------
- >>> idx = pd.Index(['car', 'bike', 'train', 'tractor'])
- >>> idx
- Index(['car', 'bike', 'train', 'tractor'], dtype='object')
- >>> idx.where(idx.isin(['car', 'train']), 'other')
- Index(['car', 'other', 'train', 'other'], dtype='object')
- """
- if isinstance(self, ABCMultiIndex):
- raise NotImplementedError(
- ".where is not supported for MultiIndex operations"
- )
- cond = np.asarray(cond, dtype=bool)
- return self.putmask(~cond, other)
- # construction helpers
- @final
- @classmethod
- def _raise_scalar_data_error(cls, data):
- # We return the TypeError so that we can raise it from the constructor
- # in order to keep mypy happy
- raise TypeError(
- f"{cls.__name__}(...) must be called with a collection of some "
- f"kind, {repr(data)} was passed"
- )
- def _validate_fill_value(self, value):
- """
- Check if the value can be inserted into our array without casting,
- and convert it to an appropriate native type if necessary.
- Raises
- ------
- TypeError
- If the value cannot be inserted into an array of this dtype.
- """
- dtype = self.dtype
- if isinstance(dtype, np.dtype) and dtype.kind not in ["m", "M"]:
- # return np_can_hold_element(dtype, value)
- try:
- return np_can_hold_element(dtype, value)
- except LossySetitemError as err:
- # re-raise as TypeError for consistency
- raise TypeError from err
- elif not can_hold_element(self._values, value):
- raise TypeError
- return value
- @final
- def _require_scalar(self, value):
- """
- Check that this is a scalar value that we can use for setitem-like
- operations without changing dtype.
- """
- if not is_scalar(value):
- raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}")
- return value
- def _is_memory_usage_qualified(self) -> bool:
- """
- Return a boolean if we need a qualified .info display.
- """
- return is_object_dtype(self.dtype)
- def __contains__(self, key: Any) -> bool:
- """
- Return a boolean indicating whether the provided key is in the index.
- Parameters
- ----------
- key : label
- The key to check if it is present in the index.
- Returns
- -------
- bool
- Whether the key search is in the index.
- Raises
- ------
- TypeError
- If the key is not hashable.
- See Also
- --------
- Index.isin : Returns an ndarray of boolean dtype indicating whether the
- list-like key is in the index.
- Examples
- --------
- >>> idx = pd.Index([1, 2, 3, 4])
- >>> idx
- Index([1, 2, 3, 4], dtype='int64')
- >>> 2 in idx
- True
- >>> 6 in idx
- False
- """
- hash(key)
- try:
- return key in self._engine
- except (OverflowError, TypeError, ValueError):
- return False
- # https://github.com/python/typeshed/issues/2148#issuecomment-520783318
- # Incompatible types in assignment (expression has type "None", base class
- # "object" defined the type as "Callable[[object], int]")
- __hash__: ClassVar[None] # type: ignore[assignment]
- @final
- def __setitem__(self, key, value):
- raise TypeError("Index does not support mutable operations")
- def __getitem__(self, key):
- """
- Override numpy.ndarray's __getitem__ method to work as desired.
- This function adds lists and Series as valid boolean indexers
- (ndarrays only supports ndarray with dtype=bool).
- If resulting ndim != 1, plain ndarray is returned instead of
- corresponding `Index` subclass.
- """
- getitem = self._data.__getitem__
- if is_integer(key) or is_float(key):
- # GH#44051 exclude bool, which would return a 2d ndarray
- key = com.cast_scalar_indexer(key)
- return getitem(key)
- if isinstance(key, slice):
- # This case is separated from the conditional above to avoid
- # pessimization com.is_bool_indexer and ndim checks.
- result = getitem(key)
- # Going through simple_new for performance.
- return type(self)._simple_new(
- result, name=self._name, refs=self._references
- )
- if com.is_bool_indexer(key):
- # if we have list[bools, length=1e5] then doing this check+convert
- # takes 166 µs + 2.1 ms and cuts the ndarray.__getitem__
- # time below from 3.8 ms to 496 µs
- # if we already have ndarray[bool], the overhead is 1.4 µs or .25%
- if is_extension_array_dtype(getattr(key, "dtype", None)):
- key = key.to_numpy(dtype=bool, na_value=False)
- else:
- key = np.asarray(key, dtype=bool)
- result = getitem(key)
- # Because we ruled out integer above, we always get an arraylike here
- if result.ndim > 1:
- disallow_ndim_indexing(result)
- # NB: Using _constructor._simple_new would break if MultiIndex
- # didn't override __getitem__
- return self._constructor._simple_new(result, name=self._name)
- def _getitem_slice(self: _IndexT, slobj: slice) -> _IndexT:
- """
- Fastpath for __getitem__ when we know we have a slice.
- """
- res = self._data[slobj]
- return type(self)._simple_new(res, name=self._name, refs=self._references)
- @final
- def _can_hold_identifiers_and_holds_name(self, name) -> bool:
- """
- Faster check for ``name in self`` when we know `name` is a Python
- identifier (e.g. in NDFrame.__getattr__, which hits this to support
- . key lookup). For indexes that can't hold identifiers (everything
- but object & categorical) we just return False.
- https://github.com/pandas-dev/pandas/issues/19764
- """
- if (
- is_object_dtype(self.dtype)
- or is_string_dtype(self.dtype)
- or is_categorical_dtype(self.dtype)
- ):
- return name in self
- return False
- def append(self, other: Index | Sequence[Index]) -> Index:
- """
- Append a collection of Index options together.
- Parameters
- ----------
- other : Index or list/tuple of indices
- Returns
- -------
- Index
- """
- to_concat = [self]
- if isinstance(other, (list, tuple)):
- to_concat += list(other)
- else:
- # error: Argument 1 to "append" of "list" has incompatible type
- # "Union[Index, Sequence[Index]]"; expected "Index"
- to_concat.append(other) # type: ignore[arg-type]
- for obj in to_concat:
- if not isinstance(obj, Index):
- raise TypeError("all inputs must be Index")
- names = {obj.name for obj in to_concat}
- name = None if len(names) > 1 else self.name
- return self._concat(to_concat, name)
- def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
- """
- Concatenate multiple Index objects.
- """
- to_concat_vals = [x._values for x in to_concat]
- result = concat_compat(to_concat_vals)
- return Index._with_infer(result, name=name)
- def putmask(self, mask, value) -> Index:
- """
- Return a new Index of the values set with the mask.
- Returns
- -------
- Index
- See Also
- --------
- numpy.ndarray.putmask : Changes elements of an array
- based on conditional and input values.
- """
- mask, noop = validate_putmask(self._values, mask)
- if noop:
- return self.copy()
- if self.dtype != object and is_valid_na_for_dtype(value, self.dtype):
- # e.g. None -> np.nan, see also Block._standardize_fill_value
- value = self._na_value
- try:
- converted = self._validate_fill_value(value)
- except (LossySetitemError, ValueError, TypeError) as err:
- if is_object_dtype(self): # pragma: no cover
- raise err
- # See also: Block.coerce_to_target_dtype
- dtype = self._find_common_type_compat(value)
- return self.astype(dtype).putmask(mask, value)
- values = self._values.copy()
- if isinstance(values, np.ndarray):
- converted = setitem_datetimelike_compat(values, mask.sum(), converted)
- np.putmask(values, mask, converted)
- else:
- # Note: we use the original value here, not converted, as
- # _validate_fill_value is not idempotent
- values._putmask(mask, value)
- return self._shallow_copy(values)
- def equals(self, other: Any) -> bool:
- """
- Determine if two Index object are equal.
- The things that are being compared are:
- * The elements inside the Index object.
- * The order of the elements inside the Index object.
- Parameters
- ----------
- other : Any
- The other object to compare against.
- Returns
- -------
- bool
- True if "other" is an Index and it has the same elements and order
- as the calling index; False otherwise.
- Examples
- --------
- >>> idx1 = pd.Index([1, 2, 3])
- >>> idx1
- Index([1, 2, 3], dtype='int64')
- >>> idx1.equals(pd.Index([1, 2, 3]))
- True
- The elements inside are compared
- >>> idx2 = pd.Index(["1", "2", "3"])
- >>> idx2
- Index(['1', '2', '3'], dtype='object')
- >>> idx1.equals(idx2)
- False
- The order is compared
- >>> ascending_idx = pd.Index([1, 2, 3])
- >>> ascending_idx
- Index([1, 2, 3], dtype='int64')
- >>> descending_idx = pd.Index([3, 2, 1])
- >>> descending_idx
- Index([3, 2, 1], dtype='int64')
- >>> ascending_idx.equals(descending_idx)
- False
- The dtype is *not* compared
- >>> int64_idx = pd.Index([1, 2, 3], dtype='int64')
- >>> int64_idx
- Index([1, 2, 3], dtype='int64')
- >>> uint64_idx = pd.Index([1, 2, 3], dtype='uint64')
- >>> uint64_idx
- Index([1, 2, 3], dtype='uint64')
- >>> int64_idx.equals(uint64_idx)
- True
- """
- if self.is_(other):
- return True
- if not isinstance(other, Index):
- return False
- if is_object_dtype(self.dtype) and not is_object_dtype(other.dtype):
- # if other is not object, use other's logic for coercion
- return other.equals(self)
- if isinstance(other, ABCMultiIndex):
- # d-level MultiIndex can equal d-tuple Index
- return other.equals(self)
- if isinstance(self._values, ExtensionArray):
- # Dispatch to the ExtensionArray's .equals method.
- if not isinstance(other, type(self)):
- return False
- earr = cast(ExtensionArray, self._data)
- return earr.equals(other._data)
- if is_extension_array_dtype(other.dtype):
- # All EA-backed Index subclasses override equals
- return other.equals(self)
- return array_equivalent(self._values, other._values)
- @final
- def identical(self, other) -> bool:
- """
- Similar to equals, but checks that object attributes and types are also equal.
- Returns
- -------
- bool
- If two Index objects have equal elements and same type True,
- otherwise False.
- """
- return (
- self.equals(other)
- and all(
- getattr(self, c, None) == getattr(other, c, None)
- for c in self._comparables
- )
- and type(self) == type(other)
- and self.dtype == other.dtype
- )
- @final
- def asof(self, label):
- """
- Return the label from the index, or, if not present, the previous one.
- Assuming that the index is sorted, return the passed index label if it
- is in the index, or return the previous index label if the passed one
- is not in the index.
- Parameters
- ----------
- label : object
- The label up to which the method returns the latest index label.
- Returns
- -------
- object
- The passed label if it is in the index. The previous label if the
- passed label is not in the sorted index or `NaN` if there is no
- such label.
- See Also
- --------
- Series.asof : Return the latest value in a Series up to the
- passed index.
- merge_asof : Perform an asof merge (similar to left join but it
- matches on nearest key rather than equal key).
- Index.get_loc : An `asof` is a thin wrapper around `get_loc`
- with method='pad'.
- Examples
- --------
- `Index.asof` returns the latest index label up to the passed label.
- >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03'])
- >>> idx.asof('2014-01-01')
- '2013-12-31'
- If the label is in the index, the method returns the passed label.
- >>> idx.asof('2014-01-02')
- '2014-01-02'
- If all of the labels in the index are later than the passed label,
- NaN is returned.
- >>> idx.asof('1999-01-02')
- nan
- If the index is not sorted, an error is raised.
- >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02',
- ... '2014-01-03'])
- >>> idx_not_sorted.asof('2013-12-31')
- Traceback (most recent call last):
- ValueError: index must be monotonic increasing or decreasing
- """
- self._searchsorted_monotonic(label) # validate sortedness
- try:
- loc = self.get_loc(label)
- except (KeyError, TypeError):
- # KeyError -> No exact match, try for padded
- # TypeError -> passed e.g. non-hashable, fall through to get
- # the tested exception message
- indexer = self.get_indexer([label], method="pad")
- if indexer.ndim > 1 or indexer.size > 1:
- raise TypeError("asof requires scalar valued input")
- loc = indexer.item()
- if loc == -1:
- return self._na_value
- else:
- if isinstance(loc, slice):
- loc = loc.indices(len(self))[-1]
- return self[loc]
- def asof_locs(
- self, where: Index, mask: npt.NDArray[np.bool_]
- ) -> npt.NDArray[np.intp]:
- """
- Return the locations (indices) of labels in the index.
- As in the `asof` function, if the label (a particular entry in
- `where`) is not in the index, the latest index label up to the
- passed label is chosen and its index returned.
- If all of the labels in the index are later than a label in `where`,
- -1 is returned.
- `mask` is used to ignore NA values in the index during calculation.
- Parameters
- ----------
- where : Index
- An Index consisting of an array of timestamps.
- mask : np.ndarray[bool]
- Array of booleans denoting where values in the original
- data are not NA.
- Returns
- -------
- np.ndarray[np.intp]
- An array of locations (indices) of the labels from the Index
- which correspond to the return values of the `asof` function
- for every element in `where`.
- """
- # error: No overload variant of "searchsorted" of "ndarray" matches argument
- # types "Union[ExtensionArray, ndarray[Any, Any]]", "str"
- # TODO: will be fixed when ExtensionArray.searchsorted() is fixed
- locs = self._values[mask].searchsorted(
- where._values, side="right" # type: ignore[call-overload]
- )
- locs = np.where(locs > 0, locs - 1, 0)
- result = np.arange(len(self), dtype=np.intp)[mask].take(locs)
- first_value = self._values[mask.argmax()]
- result[(locs == 0) & (where._values < first_value)] = -1
- return result
- def sort_values(
- self,
- return_indexer: bool = False,
- ascending: bool = True,
- na_position: str_t = "last",
- key: Callable | None = None,
- ):
- """
- Return a sorted copy of the index.
- Return a sorted copy of the index, and optionally return the indices
- that sorted the index itself.
- Parameters
- ----------
- return_indexer : bool, default False
- Should the indices that would sort the index be returned.
- ascending : bool, default True
- Should the index values be sorted in an ascending order.
- na_position : {'first' or 'last'}, default 'last'
- Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
- the end.
- .. versionadded:: 1.2.0
- key : callable, optional
- If not None, apply the key function to the index values
- before sorting. This is similar to the `key` argument in the
- builtin :meth:`sorted` function, with the notable difference that
- this `key` function should be *vectorized*. It should expect an
- ``Index`` and return an ``Index`` of the same shape.
- .. versionadded:: 1.1.0
- Returns
- -------
- sorted_index : pandas.Index
- Sorted copy of the index.
- indexer : numpy.ndarray, optional
- The indices that the index itself was sorted by.
- See Also
- --------
- Series.sort_values : Sort values of a Series.
- DataFrame.sort_values : Sort values in a DataFrame.
- Examples
- --------
- >>> idx = pd.Index([10, 100, 1, 1000])
- >>> idx
- Index([10, 100, 1, 1000], dtype='int64')
- Sort values in ascending order (default behavior).
- >>> idx.sort_values()
- Index([1, 10, 100, 1000], dtype='int64')
- Sort values in descending order, and also get the indices `idx` was
- sorted by.
- >>> idx.sort_values(ascending=False, return_indexer=True)
- (Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2]))
- """
- idx = ensure_key_mapped(self, key)
- # GH 35584. Sort missing values according to na_position kwarg
- # ignore na_position for MultiIndex
- if not isinstance(self, ABCMultiIndex):
- _as = nargsort(
- items=idx, ascending=ascending, na_position=na_position, key=key
- )
- else:
- _as = idx.argsort()
- if not ascending:
- _as = _as[::-1]
- sorted_index = self.take(_as)
- if return_indexer:
- return sorted_index, _as
- else:
- return sorted_index
- @final
- def sort(self, *args, **kwargs):
- """
- Use sort_values instead.
- """
- raise TypeError("cannot sort an Index object in-place, use sort_values instead")
- def shift(self, periods: int = 1, freq=None):
- """
- Shift index by desired number of time frequency increments.
- This method is for shifting the values of datetime-like indexes
- by a specified time increment a given number of times.
- Parameters
- ----------
- periods : int, default 1
- Number of periods (or increments) to shift by,
- can be positive or negative.
- freq : pandas.DateOffset, pandas.Timedelta or str, optional
- Frequency increment to shift by.
- If None, the index is shifted by its own `freq` attribute.
- Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.
- Returns
- -------
- pandas.Index
- Shifted index.
- See Also
- --------
- Series.shift : Shift values of Series.
- Notes
- -----
- This method is only implemented for datetime-like index classes,
- i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex.
- Examples
- --------
- Put the first 5 month starts of 2011 into an index.
- >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS')
- >>> month_starts
- DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01',
- '2011-05-01'],
- dtype='datetime64[ns]', freq='MS')
- Shift the index by 10 days.
- >>> month_starts.shift(10, freq='D')
- DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11',
- '2011-05-11'],
- dtype='datetime64[ns]', freq=None)
- The default value of `freq` is the `freq` attribute of the index,
- which is 'MS' (month start) in this example.
- >>> month_starts.shift(10)
- DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01',
- '2012-03-01'],
- dtype='datetime64[ns]', freq='MS')
- """
- raise NotImplementedError(
- f"This method is only implemented for DatetimeIndex, PeriodIndex and "
- f"TimedeltaIndex; Got type {type(self).__name__}"
- )
- def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:
- """
- Return the integer indices that would sort the index.
- Parameters
- ----------
- *args
- Passed to `numpy.ndarray.argsort`.
- **kwargs
- Passed to `numpy.ndarray.argsort`.
- Returns
- -------
- np.ndarray[np.intp]
- Integer indices that would sort the index if used as
- an indexer.
- See Also
- --------
- numpy.argsort : Similar method for NumPy arrays.
- Index.sort_values : Return sorted copy of Index.
- Examples
- --------
- >>> idx = pd.Index(['b', 'a', 'd', 'c'])
- >>> idx
- Index(['b', 'a', 'd', 'c'], dtype='object')
- >>> order = idx.argsort()
- >>> order
- array([1, 0, 3, 2])
- >>> idx[order]
- Index(['a', 'b', 'c', 'd'], dtype='object')
- """
- # This works for either ndarray or EA, is overridden
- # by RangeIndex, MultIIndex
- return self._data.argsort(*args, **kwargs)
- def _check_indexing_error(self, key):
- if not is_scalar(key):
- # if key is not a scalar, directly raise an error (the code below
- # would convert to numpy arrays and raise later any way) - GH29926
- raise InvalidIndexError(key)
- @cache_readonly
- def _should_fallback_to_positional(self) -> bool:
- """
- Should an integer key be treated as positional?
- """
- return self.inferred_type not in {
- "integer",
- "mixed-integer",
- "floating",
- "complex",
- }
- _index_shared_docs[
- "get_indexer_non_unique"
- ] = """
- Compute indexer and mask for new index given the current index.
- The indexer should be then used as an input to ndarray.take to align the
- current data to the new index.
- Parameters
- ----------
- target : %(target_klass)s
- Returns
- -------
- indexer : np.ndarray[np.intp]
- Integers from 0 to n - 1 indicating that the index at these
- positions matches the corresponding target values. Missing values
- in the target are marked by -1.
- missing : np.ndarray[np.intp]
- An indexer into the target of the values not found.
- These correspond to the -1 in the indexer array.
- Examples
- --------
- >>> index = pd.Index(['c', 'b', 'a', 'b', 'b'])
- >>> index.get_indexer_non_unique(['b', 'b'])
- (array([1, 3, 4, 1, 3, 4]), array([], dtype=int64))
- In the example below there are no matched values.
- >>> index = pd.Index(['c', 'b', 'a', 'b', 'b'])
- >>> index.get_indexer_non_unique(['q', 'r', 't'])
- (array([-1, -1, -1]), array([0, 1, 2]))
- For this reason, the returned ``indexer`` contains only integers equal to -1.
- It demonstrates that there's no match between the index and the ``target``
- values at these positions. The mask [0, 1, 2] in the return value shows that
- the first, second, and third elements are missing.
- Notice that the return value is a tuple contains two items. In the example
- below the first item is an array of locations in ``index``. The second
- item is a mask shows that the first and third elements are missing.
- >>> index = pd.Index(['c', 'b', 'a', 'b', 'b'])
- >>> index.get_indexer_non_unique(['f', 'b', 's'])
- (array([-1, 1, 3, 4, -1]), array([0, 2]))
- """
- @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
- def get_indexer_non_unique(
- self, target
- ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
- target = ensure_index(target)
- target = self._maybe_cast_listlike_indexer(target)
- if not self._should_compare(target) and not self._should_partial_index(target):
- # _should_partial_index e.g. IntervalIndex with numeric scalars
- # that can be matched to Interval scalars.
- return self._get_indexer_non_comparable(target, method=None, unique=False)
- pself, ptarget = self._maybe_promote(target)
- if pself is not self or ptarget is not target:
- return pself.get_indexer_non_unique(ptarget)
- if not is_dtype_equal(self.dtype, target.dtype):
- # TODO: if object, could use infer_dtype to preempt costly
- # conversion if still non-comparable?
- dtype = self._find_common_type_compat(target)
- this = self.astype(dtype, copy=False)
- that = target.astype(dtype, copy=False)
- return this.get_indexer_non_unique(that)
- # TODO: get_indexer has fastpaths for both Categorical-self and
- # Categorical-target. Can we do something similar here?
- # Note: _maybe_promote ensures we never get here with MultiIndex
- # self and non-Multi target
- tgt_values = target._get_engine_target()
- if self._is_multi and target._is_multi:
- engine = self._engine
- # Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]" has
- # no attribute "_extract_level_codes"
- tgt_values = engine._extract_level_codes(target) # type: ignore[union-attr]
- indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
- return ensure_platform_int(indexer), ensure_platform_int(missing)
- @final
- def get_indexer_for(self, target) -> npt.NDArray[np.intp]:
- """
- Guaranteed return of an indexer even when non-unique.
- This dispatches to get_indexer or get_indexer_non_unique
- as appropriate.
- Returns
- -------
- np.ndarray[np.intp]
- List of indices.
- Examples
- --------
- >>> idx = pd.Index([np.nan, 'var1', np.nan])
- >>> idx.get_indexer_for([np.nan])
- array([0, 2])
- """
- if self._index_as_unique:
- return self.get_indexer(target)
- indexer, _ = self.get_indexer_non_unique(target)
- return indexer
- def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]:
- """
- Analogue to get_indexer that raises if any elements are missing.
- """
- keyarr = key
- if not isinstance(keyarr, Index):
- keyarr = com.asarray_tuplesafe(keyarr)
- if self._index_as_unique:
- indexer = self.get_indexer_for(keyarr)
- keyarr = self.reindex(keyarr)[0]
- else:
- keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
- self._raise_if_missing(keyarr, indexer, axis_name)
- keyarr = self.take(indexer)
- if isinstance(key, Index):
- # GH 42790 - Preserve name from an Index
- keyarr.name = key.name
- if (
- isinstance(keyarr.dtype, np.dtype) and keyarr.dtype.kind in ["m", "M"]
- ) or isinstance(keyarr.dtype, DatetimeTZDtype):
- # DTI/TDI.take can infer a freq in some cases when we dont want one
- if isinstance(key, list) or (
- isinstance(key, type(self))
- # "Index" has no attribute "freq"
- and key.freq is None # type: ignore[attr-defined]
- ):
- keyarr = keyarr._with_freq(None)
- return keyarr, indexer
- def _raise_if_missing(self, key, indexer, axis_name: str_t) -> None:
- """
- Check that indexer can be used to return a result.
- e.g. at least one element was found,
- unless the list of keys was actually empty.
- Parameters
- ----------
- key : list-like
- Targeted labels (only used to show correct error message).
- indexer: array-like of booleans
- Indices corresponding to the key,
- (with -1 indicating not found).
- axis_name : str
- Raises
- ------
- KeyError
- If at least one key was requested but none was found.
- """
- if len(key) == 0:
- return
- # Count missing values
- missing_mask = indexer < 0
- nmissing = missing_mask.sum()
- if nmissing:
- # TODO: remove special-case; this is just to keep exception
- # message tests from raising while debugging
- use_interval_msg = is_interval_dtype(self.dtype) or (
- is_categorical_dtype(self.dtype)
- # "Index" has no attribute "categories" [attr-defined]
- and is_interval_dtype(
- self.categories.dtype # type: ignore[attr-defined]
- )
- )
- if nmissing == len(indexer):
- if use_interval_msg:
- key = list(key)
- raise KeyError(f"None of [{key}] are in the [{axis_name}]")
- not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
- raise KeyError(f"{not_found} not in index")
- @overload
- def _get_indexer_non_comparable(
- self, target: Index, method, unique: Literal[True] = ...
- ) -> npt.NDArray[np.intp]:
- ...
- @overload
- def _get_indexer_non_comparable(
- self, target: Index, method, unique: Literal[False]
- ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
- ...
- @overload
- def _get_indexer_non_comparable(
- self, target: Index, method, unique: bool = True
- ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
- ...
- @final
- def _get_indexer_non_comparable(
- self, target: Index, method, unique: bool = True
- ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
- """
- Called from get_indexer or get_indexer_non_unique when the target
- is of a non-comparable dtype.
- For get_indexer lookups with method=None, get_indexer is an _equality_
- check, so non-comparable dtypes mean we will always have no matches.
- For get_indexer lookups with a method, get_indexer is an _inequality_
- check, so non-comparable dtypes mean we will always raise TypeError.
- Parameters
- ----------
- target : Index
- method : str or None
- unique : bool, default True
- * True if called from get_indexer.
- * False if called from get_indexer_non_unique.
- Raises
- ------
- TypeError
- If doing an inequality check, i.e. method is not None.
- """
- if method is not None:
- other = _unpack_nested_dtype(target)
- raise TypeError(f"Cannot compare dtypes {self.dtype} and {other.dtype}")
- no_matches = -1 * np.ones(target.shape, dtype=np.intp)
- if unique:
- # This is for get_indexer
- return no_matches
- else:
- # This is for get_indexer_non_unique
- missing = np.arange(len(target), dtype=np.intp)
- return no_matches, missing
- @property
- def _index_as_unique(self) -> bool:
- """
- Whether we should treat this as unique for the sake of
- get_indexer vs get_indexer_non_unique.
- For IntervalIndex compat.
- """
- return self.is_unique
- _requires_unique_msg = "Reindexing only valid with uniquely valued Index objects"
- @final
- def _maybe_promote(self, other: Index) -> tuple[Index, Index]:
- """
- When dealing with an object-dtype Index and a non-object Index, see
- if we can upcast the object-dtype one to improve performance.
- """
- if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex):
- if (
- self.tz is not None
- and other.tz is not None
- and not tz_compare(self.tz, other.tz)
- ):
- # standardize on UTC
- return self.tz_convert("UTC"), other.tz_convert("UTC")
- elif self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex):
- try:
- return type(other)(self), other
- except OutOfBoundsDatetime:
- return self, other
- elif self.inferred_type == "timedelta" and isinstance(other, ABCTimedeltaIndex):
- # TODO: we dont have tests that get here
- return type(other)(self), other
- elif self.dtype.kind == "u" and other.dtype.kind == "i":
- # GH#41873
- if other.min() >= 0:
- # lookup min as it may be cached
- # TODO: may need itemsize check if we have non-64-bit Indexes
- return self, other.astype(self.dtype)
- elif self._is_multi and not other._is_multi:
- try:
- # "Type[Index]" has no attribute "from_tuples"
- other = type(self).from_tuples(other) # type: ignore[attr-defined]
- except (TypeError, ValueError):
- # let's instead try with a straight Index
- self = Index(self._values)
- if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype):
- # Reverse op so we dont need to re-implement on the subclasses
- other, self = other._maybe_promote(self)
- return self, other
- @final
- def _find_common_type_compat(self, target) -> DtypeObj:
- """
- Implementation of find_common_type that adjusts for Index-specific
- special cases.
- """
- target_dtype, _ = infer_dtype_from(target, pandas_dtype=True)
- # special case: if one dtype is uint64 and the other a signed int, return object
- # See https://github.com/pandas-dev/pandas/issues/26778 for discussion
- # Now it's:
- # * float | [u]int -> float
- # * uint64 | signed int -> object
- # We may change union(float | [u]int) to go to object.
- if self.dtype == "uint64" or target_dtype == "uint64":
- if is_signed_integer_dtype(self.dtype) or is_signed_integer_dtype(
- target_dtype
- ):
- return _dtype_obj
- dtype = find_result_type(self._values, target)
- dtype = common_dtype_categorical_compat([self, target], dtype)
- return dtype
- @final
- def _should_compare(self, other: Index) -> bool:
- """
- Check if `self == other` can ever have non-False entries.
- """
- if (is_bool_dtype(other) and is_any_real_numeric_dtype(self)) or (
- is_bool_dtype(self) and is_any_real_numeric_dtype(other)
- ):
- # GH#16877 Treat boolean labels passed to a numeric index as not
- # found. Without this fix False and True would be treated as 0 and 1
- # respectively.
- return False
- other = _unpack_nested_dtype(other)
- dtype = other.dtype
- return self._is_comparable_dtype(dtype) or is_object_dtype(dtype)
- def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
- """
- Can we compare values of the given dtype to our own?
- """
- if self.dtype.kind == "b":
- return dtype.kind == "b"
- elif is_numeric_dtype(self.dtype):
- return is_numeric_dtype(dtype)
- # TODO: this was written assuming we only get here with object-dtype,
- # which is nom longer correct. Can we specialize for EA?
- return True
- @final
- def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]:
- """
- Group the index labels by a given array of values.
- Parameters
- ----------
- values : array
- Values used to determine the groups.
- Returns
- -------
- dict
- {group name -> group labels}
- """
- # TODO: if we are a MultiIndex, we can do better
- # that converting to tuples
- if isinstance(values, ABCMultiIndex):
- values = values._values
- values = Categorical(values)
- result = values._reverse_indexer()
- # map to the label
- result = {k: self.take(v) for k, v in result.items()}
- return PrettyDict(result)
- def map(self, mapper, na_action=None):
- """
- Map values using an input mapping or function.
- Parameters
- ----------
- mapper : function, dict, or Series
- Mapping correspondence.
- na_action : {None, 'ignore'}
- If 'ignore', propagate NA values, without passing them to the
- mapping correspondence.
- Returns
- -------
- Union[Index, MultiIndex]
- The output of the mapping function applied to the index.
- If the function returns a tuple with more than one element
- a MultiIndex will be returned.
- """
- from pandas.core.indexes.multi import MultiIndex
- new_values = self._map_values(mapper, na_action=na_action)
- # we can return a MultiIndex
- if new_values.size and isinstance(new_values[0], tuple):
- if isinstance(self, MultiIndex):
- names = self.names
- elif self.name:
- names = [self.name] * len(new_values[0])
- else:
- names = None
- return MultiIndex.from_tuples(new_values, names=names)
- dtype = None
- if not new_values.size:
- # empty
- dtype = self.dtype
- # e.g. if we are floating and new_values is all ints, then we
- # don't want to cast back to floating. But if we are UInt64
- # and new_values is all ints, we want to try.
- same_dtype = lib.infer_dtype(new_values, skipna=False) == self.inferred_type
- if same_dtype:
- new_values = maybe_cast_pointwise_result(
- new_values, self.dtype, same_dtype=same_dtype
- )
- return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name)
- # TODO: De-duplicate with map, xref GH#32349
- @final
- def _transform_index(self, func, *, level=None) -> Index:
- """
- Apply function to all values found in index.
- This includes transforming multiindex entries separately.
- Only apply function to one level of the MultiIndex if level is specified.
- """
- if isinstance(self, ABCMultiIndex):
- values = [
- self.get_level_values(i).map(func)
- if i == level or level is None
- else self.get_level_values(i)
- for i in range(self.nlevels)
- ]
- return type(self).from_arrays(values)
- else:
- items = [func(x) for x in self]
- return Index(items, name=self.name, tupleize_cols=False)
- def isin(self, values, level=None) -> npt.NDArray[np.bool_]:
- """
- Return a boolean array where the index values are in `values`.
- Compute boolean array of whether each index value is found in the
- passed set of values. The length of the returned boolean array matches
- the length of the index.
- Parameters
- ----------
- values : set or list-like
- Sought values.
- level : str or int, optional
- Name or position of the index level to use (if the index is a
- `MultiIndex`).
- Returns
- -------
- np.ndarray[bool]
- NumPy array of boolean values.
- See Also
- --------
- Series.isin : Same for Series.
- DataFrame.isin : Same method for DataFrames.
- Notes
- -----
- In the case of `MultiIndex` you must either specify `values` as a
- list-like object containing tuples that are the same length as the
- number of levels, or specify `level`. Otherwise it will raise a
- ``ValueError``.
- If `level` is specified:
- - if it is the name of one *and only one* index level, use that level;
- - otherwise it should be a number indicating level position.
- Examples
- --------
- >>> idx = pd.Index([1,2,3])
- >>> idx
- Index([1, 2, 3], dtype='int64')
- Check whether each index value in a list of values.
- >>> idx.isin([1, 4])
- array([ True, False, False])
- >>> midx = pd.MultiIndex.from_arrays([[1,2,3],
- ... ['red', 'blue', 'green']],
- ... names=('number', 'color'))
- >>> midx
- MultiIndex([(1, 'red'),
- (2, 'blue'),
- (3, 'green')],
- names=['number', 'color'])
- Check whether the strings in the 'color' level of the MultiIndex
- are in a list of colors.
- >>> midx.isin(['red', 'orange', 'yellow'], level='color')
- array([ True, False, False])
- To check across the levels of a MultiIndex, pass a list of tuples:
- >>> midx.isin([(1, 'red'), (3, 'red')])
- array([ True, False, False])
- For a DatetimeIndex, string values in `values` are converted to
- Timestamps.
- >>> dates = ['2000-03-11', '2000-03-12', '2000-03-13']
- >>> dti = pd.to_datetime(dates)
- >>> dti
- DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'],
- dtype='datetime64[ns]', freq=None)
- >>> dti.isin(['2000-03-11'])
- array([ True, False, False])
- """
- if level is not None:
- self._validate_index_level(level)
- return algos.isin(self._values, values)
- def _get_string_slice(self, key: str_t):
- # this is for partial string indexing,
- # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex
- raise NotImplementedError
- def slice_indexer(
- self,
- start: Hashable | None = None,
- end: Hashable | None = None,
- step: int | None = None,
- ) -> slice:
- """
- Compute the slice indexer for input labels and step.
- Index needs to be ordered and unique.
- Parameters
- ----------
- start : label, default None
- If None, defaults to the beginning.
- end : label, default None
- If None, defaults to the end.
- step : int, default None
- Returns
- -------
- slice
- Raises
- ------
- KeyError : If key does not exist, or key is not unique and index is
- not ordered.
- Notes
- -----
- This function assumes that the data is sorted, so use at your own peril
- Examples
- --------
- This is a method on all index types. For example you can do:
- >>> idx = pd.Index(list('abcd'))
- >>> idx.slice_indexer(start='b', end='c')
- slice(1, 3, None)
- >>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')])
- >>> idx.slice_indexer(start='b', end=('c', 'g'))
- slice(1, 3, None)
- """
- start_slice, end_slice = self.slice_locs(start, end, step=step)
- # return a slice
- if not is_scalar(start_slice):
- raise AssertionError("Start slice bound is non-scalar")
- if not is_scalar(end_slice):
- raise AssertionError("End slice bound is non-scalar")
- return slice(start_slice, end_slice, step)
- def _maybe_cast_indexer(self, key):
- """
- If we have a float key and are not a floating index, then try to cast
- to an int if equivalent.
- """
- return key
- def _maybe_cast_listlike_indexer(self, target) -> Index:
- """
- Analogue to maybe_cast_indexer for get_indexer instead of get_loc.
- """
- return ensure_index(target)
- @final
- def _validate_indexer(self, form: str_t, key, kind: str_t) -> None:
- """
- If we are positional indexer, validate that we have appropriate
- typed bounds must be an integer.
- """
- assert kind in ["getitem", "iloc"]
- if key is not None and not is_integer(key):
- self._raise_invalid_indexer(form, key)
- def _maybe_cast_slice_bound(self, label, side: str_t):
- """
- This function should be overloaded in subclasses that allow non-trivial
- casting on label-slice bounds, e.g. datetime-like indices allowing
- strings containing formatted datetimes.
- Parameters
- ----------
- label : object
- side : {'left', 'right'}
- Returns
- -------
- label : object
- Notes
- -----
- Value of `side` parameter should be validated in caller.
- """
- # We are a plain index here (sub-class override this method if they
- # wish to have special treatment for floats/ints, e.g. datetimelike Indexes
- if is_numeric_dtype(self.dtype):
- return self._maybe_cast_indexer(label)
- # reject them, if index does not contain label
- if (is_float(label) or is_integer(label)) and label not in self:
- self._raise_invalid_indexer("slice", label)
- return label
- def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):
- if self.is_monotonic_increasing:
- return self.searchsorted(label, side=side)
- elif self.is_monotonic_decreasing:
- # np.searchsorted expects ascending sort order, have to reverse
- # everything for it to work (element ordering, search side and
- # resulting value).
- pos = self[::-1].searchsorted(
- label, side="right" if side == "left" else "left"
- )
- return len(self) - pos
- raise ValueError("index must be monotonic increasing or decreasing")
- def get_slice_bound(self, label, side: Literal["left", "right"]) -> int:
- """
- Calculate slice bound that corresponds to given label.
- Returns leftmost (one-past-the-rightmost if ``side=='right'``) position
- of given label.
- Parameters
- ----------
- label : object
- side : {'left', 'right'}
- Returns
- -------
- int
- Index of label.
- """
- if side not in ("left", "right"):
- raise ValueError(
- "Invalid value for side kwarg, must be either "
- f"'left' or 'right': {side}"
- )
- original_label = label
- # For datetime indices label may be a string that has to be converted
- # to datetime boundary according to its resolution.
- label = self._maybe_cast_slice_bound(label, side)
- # we need to look up the label
- try:
- slc = self.get_loc(label)
- except KeyError as err:
- try:
- return self._searchsorted_monotonic(label, side)
- except ValueError:
- # raise the original KeyError
- raise err
- if isinstance(slc, np.ndarray):
- # get_loc may return a boolean array, which
- # is OK as long as they are representable by a slice.
- assert is_bool_dtype(slc.dtype)
- slc = lib.maybe_booleans_to_slice(slc.view("u1"))
- if isinstance(slc, np.ndarray):
- raise KeyError(
- f"Cannot get {side} slice bound for non-unique "
- f"label: {repr(original_label)}"
- )
- if isinstance(slc, slice):
- if side == "left":
- return slc.start
- else:
- return slc.stop
- else:
- if side == "right":
- return slc + 1
- else:
- return slc
- def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]:
- """
- Compute slice locations for input labels.
- Parameters
- ----------
- start : label, default None
- If None, defaults to the beginning.
- end : label, default None
- If None, defaults to the end.
- step : int, defaults None
- If None, defaults to 1.
- Returns
- -------
- tuple[int, int]
- See Also
- --------
- Index.get_loc : Get location for a single label.
- Notes
- -----
- This method only works if the index is monotonic or unique.
- Examples
- --------
- >>> idx = pd.Index(list('abcd'))
- >>> idx.slice_locs(start='b', end='c')
- (1, 3)
- """
- inc = step is None or step >= 0
- if not inc:
- # If it's a reverse slice, temporarily swap bounds.
- start, end = end, start
- # GH 16785: If start and end happen to be date strings with UTC offsets
- # attempt to parse and check that the offsets are the same
- if isinstance(start, (str, datetime)) and isinstance(end, (str, datetime)):
- try:
- ts_start = Timestamp(start)
- ts_end = Timestamp(end)
- except (ValueError, TypeError):
- pass
- else:
- if not tz_compare(ts_start.tzinfo, ts_end.tzinfo):
- raise ValueError("Both dates must have the same UTC offset")
- start_slice = None
- if start is not None:
- start_slice = self.get_slice_bound(start, "left")
- if start_slice is None:
- start_slice = 0
- end_slice = None
- if end is not None:
- end_slice = self.get_slice_bound(end, "right")
- if end_slice is None:
- end_slice = len(self)
- if not inc:
- # Bounds at this moment are swapped, swap them back and shift by 1.
- #
- # slice_locs('B', 'A', step=-1): s='B', e='A'
- #
- # s='A' e='B'
- # AFTER SWAP: | |
- # v ------------------> V
- # -----------------------------------
- # | | |A|A|A|A| | | | | |B|B| | | | |
- # -----------------------------------
- # ^ <------------------ ^
- # SHOULD BE: | |
- # end=s-1 start=e-1
- #
- end_slice, start_slice = start_slice - 1, end_slice - 1
- # i == -1 triggers ``len(self) + i`` selection that points to the
- # last element, not before-the-first one, subtracting len(self)
- # compensates that.
- if end_slice == -1:
- end_slice -= len(self)
- if start_slice == -1:
- start_slice -= len(self)
- return start_slice, end_slice
- def delete(self: _IndexT, loc) -> _IndexT:
- """
- Make new Index with passed location(-s) deleted.
- Parameters
- ----------
- loc : int or list of int
- Location of item(-s) which will be deleted.
- Use a list of locations to delete more than one value at the same time.
- Returns
- -------
- Index
- Will be same type as self, except for RangeIndex.
- See Also
- --------
- numpy.delete : Delete any rows and column from NumPy array (ndarray).
- Examples
- --------
- >>> idx = pd.Index(['a', 'b', 'c'])
- >>> idx.delete(1)
- Index(['a', 'c'], dtype='object')
- >>> idx = pd.Index(['a', 'b', 'c'])
- >>> idx.delete([0, 2])
- Index(['b'], dtype='object')
- """
- values = self._values
- res_values: ArrayLike
- if isinstance(values, np.ndarray):
- # TODO(__array_function__): special casing will be unnecessary
- res_values = np.delete(values, loc)
- else:
- res_values = values.delete(loc)
- # _constructor so RangeIndex-> Index with an int64 dtype
- return self._constructor._simple_new(res_values, name=self.name)
- def insert(self, loc: int, item) -> Index:
- """
- Make new Index inserting new item at location.
- Follows Python numpy.insert semantics for negative values.
- Parameters
- ----------
- loc : int
- item : object
- Returns
- -------
- Index
- """
- item = lib.item_from_zerodim(item)
- if is_valid_na_for_dtype(item, self.dtype) and self.dtype != object:
- item = self._na_value
- arr = self._values
- try:
- if isinstance(arr, ExtensionArray):
- res_values = arr.insert(loc, item)
- return type(self)._simple_new(res_values, name=self.name)
- else:
- item = self._validate_fill_value(item)
- except (TypeError, ValueError, LossySetitemError):
- # e.g. trying to insert an integer into a DatetimeIndex
- # We cannot keep the same dtype, so cast to the (often object)
- # minimal shared dtype before doing the insert.
- dtype = self._find_common_type_compat(item)
- return self.astype(dtype).insert(loc, item)
- if arr.dtype != object or not isinstance(
- item, (tuple, np.datetime64, np.timedelta64)
- ):
- # with object-dtype we need to worry about numpy incorrectly casting
- # dt64/td64 to integer, also about treating tuples as sequences
- # special-casing dt64/td64 https://github.com/numpy/numpy/issues/12550
- casted = arr.dtype.type(item)
- new_values = np.insert(arr, loc, casted)
- else:
- # error: No overload variant of "insert" matches argument types
- # "ndarray[Any, Any]", "int", "None"
- new_values = np.insert(arr, loc, None) # type: ignore[call-overload]
- loc = loc if loc >= 0 else loc - 1
- new_values[loc] = item
- return Index._with_infer(new_values, name=self.name)
- def drop(
- self,
- labels: Index | np.ndarray | Iterable[Hashable],
- errors: IgnoreRaise = "raise",
- ) -> Index:
- """
- Make new Index with passed list of labels deleted.
- Parameters
- ----------
- labels : array-like or scalar
- errors : {'ignore', 'raise'}, default 'raise'
- If 'ignore', suppress error and existing labels are dropped.
- Returns
- -------
- Index
- Will be same type as self, except for RangeIndex.
- Raises
- ------
- KeyError
- If not all of the labels are found in the selected axis
- """
- if not isinstance(labels, Index):
- # avoid materializing e.g. RangeIndex
- arr_dtype = "object" if self.dtype == "object" else None
- labels = com.index_labels_to_array(labels, dtype=arr_dtype)
- indexer = self.get_indexer_for(labels)
- mask = indexer == -1
- if mask.any():
- if errors != "ignore":
- raise KeyError(f"{list(labels[mask])} not found in axis")
- indexer = indexer[~mask]
- return self.delete(indexer)
- def infer_objects(self, copy: bool = True) -> Index:
- """
- If we have an object dtype, try to infer a non-object dtype.
- Parameters
- ----------
- copy : bool, default True
- Whether to make a copy in cases where no inference occurs.
- """
- if self._is_multi:
- raise NotImplementedError(
- "infer_objects is not implemented for MultiIndex. "
- "Use index.to_frame().infer_objects() instead."
- )
- if self.dtype != object:
- return self.copy() if copy else self
- values = self._values
- values = cast("npt.NDArray[np.object_]", values)
- res_values = lib.maybe_convert_objects(
- values,
- convert_datetime=True,
- convert_timedelta=True,
- convert_period=True,
- convert_interval=True,
- )
- if copy and res_values is values:
- return self.copy()
- result = Index(res_values, name=self.name)
- if not copy and res_values is values and self._references is not None:
- result._references = self._references
- result._references.add_index_reference(result)
- return result
- # --------------------------------------------------------------------
- # Generated Arithmetic, Comparison, and Unary Methods
- def _cmp_method(self, other, op):
- """
- Wrapper used to dispatch comparison operations.
- """
- if self.is_(other):
- # fastpath
- if op in {operator.eq, operator.le, operator.ge}:
- arr = np.ones(len(self), dtype=bool)
- if self._can_hold_na and not isinstance(self, ABCMultiIndex):
- # TODO: should set MultiIndex._can_hold_na = False?
- arr[self.isna()] = False
- return arr
- elif op is operator.ne:
- arr = np.zeros(len(self), dtype=bool)
- if self._can_hold_na and not isinstance(self, ABCMultiIndex):
- arr[self.isna()] = True
- return arr
- if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)) and len(
- self
- ) != len(other):
- raise ValueError("Lengths must match to compare")
- if not isinstance(other, ABCMultiIndex):
- other = extract_array(other, extract_numpy=True)
- else:
- other = np.asarray(other)
- if is_object_dtype(self.dtype) and isinstance(other, ExtensionArray):
- # e.g. PeriodArray, Categorical
- with np.errstate(all="ignore"):
- result = op(self._values, other)
- elif isinstance(self._values, ExtensionArray):
- result = op(self._values, other)
- elif is_object_dtype(self.dtype) and not isinstance(self, ABCMultiIndex):
- # don't pass MultiIndex
- with np.errstate(all="ignore"):
- result = ops.comp_method_OBJECT_ARRAY(op, self._values, other)
- else:
- with np.errstate(all="ignore"):
- result = ops.comparison_op(self._values, other, op)
- return result
- @final
- def _logical_method(self, other, op):
- res_name = ops.get_op_result_name(self, other)
- lvalues = self._values
- rvalues = extract_array(other, extract_numpy=True, extract_range=True)
- res_values = ops.logical_op(lvalues, rvalues, op)
- return self._construct_result(res_values, name=res_name)
- @final
- def _construct_result(self, result, name):
- if isinstance(result, tuple):
- return (
- Index(result[0], name=name, dtype=result[0].dtype),
- Index(result[1], name=name, dtype=result[1].dtype),
- )
- return Index(result, name=name, dtype=result.dtype)
- def _arith_method(self, other, op):
- if (
- isinstance(other, Index)
- and is_object_dtype(other.dtype)
- and type(other) is not Index
- ):
- # We return NotImplemented for object-dtype index *subclasses* so they have
- # a chance to implement ops before we unwrap them.
- # See https://github.com/pandas-dev/pandas/issues/31109
- return NotImplemented
- return super()._arith_method(other, op)
- @final
- def _unary_method(self, op):
- result = op(self._values)
- return Index(result, name=self.name)
- def __abs__(self) -> Index:
- return self._unary_method(operator.abs)
- def __neg__(self) -> Index:
- return self._unary_method(operator.neg)
- def __pos__(self) -> Index:
- return self._unary_method(operator.pos)
- def __invert__(self) -> Index:
- # GH#8875
- return self._unary_method(operator.inv)
- # --------------------------------------------------------------------
- # Reductions
- def any(self, *args, **kwargs):
- """
- Return whether any element is Truthy.
- Parameters
- ----------
- *args
- Required for compatibility with numpy.
- **kwargs
- Required for compatibility with numpy.
- Returns
- -------
- bool or array-like (if axis is specified)
- A single element array-like may be converted to bool.
- See Also
- --------
- Index.all : Return whether all elements are True.
- Series.all : Return whether all elements are True.
- Notes
- -----
- Not a Number (NaN), positive infinity and negative infinity
- evaluate to True because these are not equal to zero.
- Examples
- --------
- >>> index = pd.Index([0, 1, 2])
- >>> index.any()
- True
- >>> index = pd.Index([0, 0, 0])
- >>> index.any()
- False
- """
- nv.validate_any(args, kwargs)
- self._maybe_disable_logical_methods("any")
- # error: Argument 1 to "any" has incompatible type "ArrayLike"; expected
- # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int,
- # float, complex, str, bytes, generic]], Sequence[Sequence[Any]],
- # _SupportsArray]"
- return np.any(self.values) # type: ignore[arg-type]
- def all(self, *args, **kwargs):
- """
- Return whether all elements are Truthy.
- Parameters
- ----------
- *args
- Required for compatibility with numpy.
- **kwargs
- Required for compatibility with numpy.
- Returns
- -------
- bool or array-like (if axis is specified)
- A single element array-like may be converted to bool.
- See Also
- --------
- Index.any : Return whether any element in an Index is True.
- Series.any : Return whether any element in a Series is True.
- Series.all : Return whether all elements in a Series are True.
- Notes
- -----
- Not a Number (NaN), positive infinity and negative infinity
- evaluate to True because these are not equal to zero.
- Examples
- --------
- True, because nonzero integers are considered True.
- >>> pd.Index([1, 2, 3]).all()
- True
- False, because ``0`` is considered False.
- >>> pd.Index([0, 1, 2]).all()
- False
- """
- nv.validate_all(args, kwargs)
- self._maybe_disable_logical_methods("all")
- # error: Argument 1 to "all" has incompatible type "ArrayLike"; expected
- # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int,
- # float, complex, str, bytes, generic]], Sequence[Sequence[Any]],
- # _SupportsArray]"
- return np.all(self.values) # type: ignore[arg-type]
- @final
- def _maybe_disable_logical_methods(self, opname: str_t) -> None:
- """
- raise if this Index subclass does not support any or all.
- """
- if (
- isinstance(self, ABCMultiIndex)
- or needs_i8_conversion(self.dtype)
- or is_interval_dtype(self.dtype)
- or is_categorical_dtype(self.dtype)
- or is_float_dtype(self.dtype)
- ):
- # This call will raise
- make_invalid_op(opname)(self)
- @Appender(IndexOpsMixin.argmin.__doc__)
- def argmin(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
- nv.validate_argmin(args, kwargs)
- nv.validate_minmax_axis(axis)
- if not self._is_multi and self.hasnans:
- # Take advantage of cache
- mask = self._isnan
- if not skipna or mask.all():
- return -1
- return super().argmin(skipna=skipna)
- @Appender(IndexOpsMixin.argmax.__doc__)
- def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
- nv.validate_argmax(args, kwargs)
- nv.validate_minmax_axis(axis)
- if not self._is_multi and self.hasnans:
- # Take advantage of cache
- mask = self._isnan
- if not skipna or mask.all():
- return -1
- return super().argmax(skipna=skipna)
- @doc(IndexOpsMixin.min)
- def min(self, axis=None, skipna: bool = True, *args, **kwargs):
- nv.validate_min(args, kwargs)
- nv.validate_minmax_axis(axis)
- if not len(self):
- return self._na_value
- if len(self) and self.is_monotonic_increasing:
- # quick check
- first = self[0]
- if not isna(first):
- return first
- if not self._is_multi and self.hasnans:
- # Take advantage of cache
- mask = self._isnan
- if not skipna or mask.all():
- return self._na_value
- if not self._is_multi and not isinstance(self._values, np.ndarray):
- return self._values._reduce(name="min", skipna=skipna)
- return super().min(skipna=skipna)
- @doc(IndexOpsMixin.max)
- def max(self, axis=None, skipna: bool = True, *args, **kwargs):
- nv.validate_max(args, kwargs)
- nv.validate_minmax_axis(axis)
- if not len(self):
- return self._na_value
- if len(self) and self.is_monotonic_increasing:
- # quick check
- last = self[-1]
- if not isna(last):
- return last
- if not self._is_multi and self.hasnans:
- # Take advantage of cache
- mask = self._isnan
- if not skipna or mask.all():
- return self._na_value
- if not self._is_multi and not isinstance(self._values, np.ndarray):
- return self._values._reduce(name="max", skipna=skipna)
- return super().max(skipna=skipna)
- # --------------------------------------------------------------------
- @final
- @property
- def shape(self) -> Shape:
- """
- Return a tuple of the shape of the underlying data.
- """
- # See GH#27775, GH#27384 for history/reasoning in how this is defined.
- return (len(self),)
- def ensure_index_from_sequences(sequences, names=None) -> Index:
- """
- Construct an index from sequences of data.
- A single sequence returns an Index. Many sequences returns a
- MultiIndex.
- Parameters
- ----------
- sequences : sequence of sequences
- names : sequence of str
- Returns
- -------
- index : Index or MultiIndex
- Examples
- --------
- >>> ensure_index_from_sequences([[1, 2, 3]], names=["name"])
- Index([1, 2, 3], dtype='int64', name='name')
- >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"])
- MultiIndex([('a', 'a'),
- ('a', 'b')],
- names=['L1', 'L2'])
- See Also
- --------
- ensure_index
- """
- from pandas.core.indexes.multi import MultiIndex
- if len(sequences) == 1:
- if names is not None:
- names = names[0]
- return Index(sequences[0], name=names)
- else:
- return MultiIndex.from_arrays(sequences, names=names)
- def ensure_index(index_like: Axes, copy: bool = False) -> Index:
- """
- Ensure that we have an index from some index-like object.
- Parameters
- ----------
- index_like : sequence
- An Index or other sequence
- copy : bool, default False
- Returns
- -------
- index : Index or MultiIndex
- See Also
- --------
- ensure_index_from_sequences
- Examples
- --------
- >>> ensure_index(['a', 'b'])
- Index(['a', 'b'], dtype='object')
- >>> ensure_index([('a', 'a'), ('b', 'c')])
- Index([('a', 'a'), ('b', 'c')], dtype='object')
- >>> ensure_index([['a', 'a'], ['b', 'c']])
- MultiIndex([('a', 'b'),
- ('a', 'c')],
- )
- """
- if isinstance(index_like, Index):
- if copy:
- index_like = index_like.copy()
- return index_like
- if isinstance(index_like, ABCSeries):
- name = index_like.name
- return Index(index_like, name=name, copy=copy)
- if is_iterator(index_like):
- index_like = list(index_like)
- if isinstance(index_like, list):
- if type(index_like) is not list:
- # must check for exactly list here because of strict type
- # check in clean_index_list
- index_like = list(index_like)
- if len(index_like) and lib.is_all_arraylike(index_like):
- from pandas.core.indexes.multi import MultiIndex
- return MultiIndex.from_arrays(index_like)
- else:
- return Index(index_like, copy=copy, tupleize_cols=False)
- else:
- return Index(index_like, copy=copy)
- def ensure_has_len(seq):
- """
- If seq is an iterator, put its values into a list.
- """
- try:
- len(seq)
- except TypeError:
- return list(seq)
- else:
- return seq
- def trim_front(strings: list[str]) -> list[str]:
- """
- Trims zeros and decimal points.
- Examples
- --------
- >>> trim_front([" a", " b"])
- ['a', 'b']
- >>> trim_front([" a", " "])
- ['a', '']
- """
- if not strings:
- return strings
- while all(strings) and all(x[0] == " " for x in strings):
- strings = [x[1:] for x in strings]
- return strings
- def _validate_join_method(method: str) -> None:
- if method not in ["left", "right", "inner", "outer"]:
- raise ValueError(f"do not recognize join method {method}")
- def maybe_extract_name(name, obj, cls) -> Hashable:
- """
- If no name is passed, then extract it from data, validating hashability.
- """
- if name is None and isinstance(obj, (Index, ABCSeries)):
- # Note we don't just check for "name" attribute since that would
- # pick up e.g. dtype.name
- name = obj.name
- # GH#29069
- if not is_hashable(name):
- raise TypeError(f"{cls.__name__}.name must be a hashable type")
- return name
- def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]:
- """
- Return common name if all indices agree, otherwise None (level-by-level).
- Parameters
- ----------
- indexes : list of Index objects
- Returns
- -------
- list
- A list representing the unanimous 'names' found.
- """
- name_tups = [tuple(i.names) for i in indexes]
- name_sets = [{*ns} for ns in zip_longest(*name_tups)]
- names = tuple(ns.pop() if len(ns) == 1 else None for ns in name_sets)
- return names
- def _unpack_nested_dtype(other: Index) -> Index:
- """
- When checking if our dtype is comparable with another, we need
- to unpack CategoricalDtype to look at its categories.dtype.
- Parameters
- ----------
- other : Index
- Returns
- -------
- Index
- """
- from pandas.core.arrays.arrow import ArrowDtype
- dtype = other.dtype
- if isinstance(dtype, CategoricalDtype):
- # If there is ever a SparseIndex, this could get dispatched
- # here too.
- return dtype.categories
- elif isinstance(dtype, ArrowDtype):
- # GH 53617
- import pyarrow as pa
- if pa.types.is_dictionary(dtype.pyarrow_dtype):
- other = other.astype(ArrowDtype(dtype.pyarrow_dtype.value_type))
- return other
- def _maybe_try_sort(result, sort):
- if sort is not False:
- try:
- result = algos.safe_sort(result)
- except TypeError as err:
- if sort is True:
- raise
- warnings.warn(
- f"{err}, sort order is undefined for incomparable objects.",
- RuntimeWarning,
- stacklevel=find_stack_level(),
- )
- return result
|