generic.py 409 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814481548164817481848194820482148224823482448254826482748284829483048314832483348344835483648374838483948404841484248434844484548464847484848494850485148524853485448554856485748584859486048614862486348644865486648674868486948704871487248734874487548764877487848794880488148824883488448854886488748884889489048914892489348944895489648974898489949004901490249034904490549064907490849094910491149124913491449154916491749184919492049214922492349244925492649274928492949304931493249334934493549364937493849394940494149424943494449454946494749484949495049514952495349544955495649574958495949604961496249634964496549664967496849694970497149724973497449754976497749784979498049814982498349844985498649874988498949904991499249934994499549964997499849995000500150025003500450055006500750085009501050115012501350145015501650175018501950205021502250235024502550265027502850295030503150325033503450355036503750385039504050415042504350445045504650475048504950505051505250535054505550565057505850595060506150625063506450655066506750685069507050715072507350745075507650775078507950805081508250835084508550865087508850895090509150925093509450955096509750985099510051015102510351045105510651075108510951105111511251135114511551165117511851195120512151225123512451255126512751285129513051315132513351345135513651375138513951405141514251435144514551465147514851495150515151525153515451555156515751585159516051615162516351645165516651675168516951705171517251735174517551765177517851795180518151825183518451855186518751885189519051915192519351945195519651975198519952005201520252035204520552065207520852095210521152125213521452155216521752185219522052215222522352245225522652275228522952305231523252335234523552365237523852395240524152425243524452455246524752485249525052515252525352545255525652575258525952605261526252635264526552665267526852695270527152725273527452755276527752785279528052815282528352845285528652875288528952905291529252935294529552965297529852995300530153025303530453055306530753085309531053115312531353145315531653175318531953205321532253235324532553265327532853295330533153325333533453355336533753385339534053415342534353445345534653475348534953505351535253535354535553565357535853595360536153625363536453655366536753685369537053715372537353745375537653775378537953805381538253835384538553865387538853895390539153925393539453955396539753985399540054015402540354045405540654075408540954105411541254135414541554165417541854195420542154225423542454255426542754285429543054315432543354345435543654375438543954405441544254435444544554465447544854495450545154525453545454555456545754585459546054615462546354645465546654675468546954705471547254735474547554765477547854795480548154825483548454855486548754885489549054915492549354945495549654975498549955005501550255035504550555065507550855095510551155125513551455155516551755185519552055215522552355245525552655275528552955305531553255335534553555365537553855395540554155425543554455455546554755485549555055515552555355545555555655575558555955605561556255635564556555665567556855695570557155725573557455755576557755785579558055815582558355845585558655875588558955905591559255935594559555965597559855995600560156025603560456055606560756085609561056115612561356145615561656175618561956205621562256235624562556265627562856295630563156325633563456355636563756385639564056415642564356445645564656475648564956505651565256535654565556565657565856595660566156625663566456655666566756685669567056715672567356745675567656775678567956805681568256835684568556865687568856895690569156925693569456955696569756985699570057015702570357045705570657075708570957105711571257135714571557165717571857195720572157225723572457255726572757285729573057315732573357345735573657375738573957405741574257435744574557465747574857495750575157525753575457555756575757585759576057615762576357645765576657675768576957705771577257735774577557765777577857795780578157825783578457855786578757885789579057915792579357945795579657975798579958005801580258035804580558065807580858095810581158125813581458155816581758185819582058215822582358245825582658275828582958305831583258335834583558365837583858395840584158425843584458455846584758485849585058515852585358545855585658575858585958605861586258635864586558665867586858695870587158725873587458755876587758785879588058815882588358845885588658875888588958905891589258935894589558965897589858995900590159025903590459055906590759085909591059115912591359145915591659175918591959205921592259235924592559265927592859295930593159325933593459355936593759385939594059415942594359445945594659475948594959505951595259535954595559565957595859595960596159625963596459655966596759685969597059715972597359745975597659775978597959805981598259835984598559865987598859895990599159925993599459955996599759985999600060016002600360046005600660076008600960106011601260136014601560166017601860196020602160226023602460256026602760286029603060316032603360346035603660376038603960406041604260436044604560466047604860496050605160526053605460556056605760586059606060616062606360646065606660676068606960706071607260736074607560766077607860796080608160826083608460856086608760886089609060916092609360946095609660976098609961006101610261036104610561066107610861096110611161126113611461156116611761186119612061216122612361246125612661276128612961306131613261336134613561366137613861396140614161426143614461456146614761486149615061516152615361546155615661576158615961606161616261636164616561666167616861696170617161726173617461756176617761786179618061816182618361846185618661876188618961906191619261936194619561966197619861996200620162026203620462056206620762086209621062116212621362146215621662176218621962206221622262236224622562266227622862296230623162326233623462356236623762386239624062416242624362446245624662476248624962506251625262536254625562566257625862596260626162626263626462656266626762686269627062716272627362746275627662776278627962806281628262836284628562866287628862896290629162926293629462956296629762986299630063016302630363046305630663076308630963106311631263136314631563166317631863196320632163226323632463256326632763286329633063316332633363346335633663376338633963406341634263436344634563466347634863496350635163526353635463556356635763586359636063616362636363646365636663676368636963706371637263736374637563766377637863796380638163826383638463856386638763886389639063916392639363946395639663976398639964006401640264036404640564066407640864096410641164126413641464156416641764186419642064216422642364246425642664276428642964306431643264336434643564366437643864396440644164426443644464456446644764486449645064516452645364546455645664576458645964606461646264636464646564666467646864696470647164726473647464756476647764786479648064816482648364846485648664876488648964906491649264936494649564966497649864996500650165026503650465056506650765086509651065116512651365146515651665176518651965206521652265236524652565266527652865296530653165326533653465356536653765386539654065416542654365446545654665476548654965506551655265536554655565566557655865596560656165626563656465656566656765686569657065716572657365746575657665776578657965806581658265836584658565866587658865896590659165926593659465956596659765986599660066016602660366046605660666076608660966106611661266136614661566166617661866196620662166226623662466256626662766286629663066316632663366346635663666376638663966406641664266436644664566466647664866496650665166526653665466556656665766586659666066616662666366646665666666676668666966706671667266736674667566766677667866796680668166826683668466856686668766886689669066916692669366946695669666976698669967006701670267036704670567066707670867096710671167126713671467156716671767186719672067216722672367246725672667276728672967306731673267336734673567366737673867396740674167426743674467456746674767486749675067516752675367546755675667576758675967606761676267636764676567666767676867696770677167726773677467756776677767786779678067816782678367846785678667876788678967906791679267936794679567966797679867996800680168026803680468056806680768086809681068116812681368146815681668176818681968206821682268236824682568266827682868296830683168326833683468356836683768386839684068416842684368446845684668476848684968506851685268536854685568566857685868596860686168626863686468656866686768686869687068716872687368746875687668776878687968806881688268836884688568866887688868896890689168926893689468956896689768986899690069016902690369046905690669076908690969106911691269136914691569166917691869196920692169226923692469256926692769286929693069316932693369346935693669376938693969406941694269436944694569466947694869496950695169526953695469556956695769586959696069616962696369646965696669676968696969706971697269736974697569766977697869796980698169826983698469856986698769886989699069916992699369946995699669976998699970007001700270037004700570067007700870097010701170127013701470157016701770187019702070217022702370247025702670277028702970307031703270337034703570367037703870397040704170427043704470457046704770487049705070517052705370547055705670577058705970607061706270637064706570667067706870697070707170727073707470757076707770787079708070817082708370847085708670877088708970907091709270937094709570967097709870997100710171027103710471057106710771087109711071117112711371147115711671177118711971207121712271237124712571267127712871297130713171327133713471357136713771387139714071417142714371447145714671477148714971507151715271537154715571567157715871597160716171627163716471657166716771687169717071717172717371747175717671777178717971807181718271837184718571867187718871897190719171927193719471957196719771987199720072017202720372047205720672077208720972107211721272137214721572167217721872197220722172227223722472257226722772287229723072317232723372347235723672377238723972407241724272437244724572467247724872497250725172527253725472557256725772587259726072617262726372647265726672677268726972707271727272737274727572767277727872797280728172827283728472857286728772887289729072917292729372947295729672977298729973007301730273037304730573067307730873097310731173127313731473157316731773187319732073217322732373247325732673277328732973307331733273337334733573367337733873397340734173427343734473457346734773487349735073517352735373547355735673577358735973607361736273637364736573667367736873697370737173727373737473757376737773787379738073817382738373847385738673877388738973907391739273937394739573967397739873997400740174027403740474057406740774087409741074117412741374147415741674177418741974207421742274237424742574267427742874297430743174327433743474357436743774387439744074417442744374447445744674477448744974507451745274537454745574567457745874597460746174627463746474657466746774687469747074717472747374747475747674777478747974807481748274837484748574867487748874897490749174927493749474957496749774987499750075017502750375047505750675077508750975107511751275137514751575167517751875197520752175227523752475257526752775287529753075317532753375347535753675377538753975407541754275437544754575467547754875497550755175527553755475557556755775587559756075617562756375647565756675677568756975707571757275737574757575767577757875797580758175827583758475857586758775887589759075917592759375947595759675977598759976007601760276037604760576067607760876097610761176127613761476157616761776187619762076217622762376247625762676277628762976307631763276337634763576367637763876397640764176427643764476457646764776487649765076517652765376547655765676577658765976607661766276637664766576667667766876697670767176727673767476757676767776787679768076817682768376847685768676877688768976907691769276937694769576967697769876997700770177027703770477057706770777087709771077117712771377147715771677177718771977207721772277237724772577267727772877297730773177327733773477357736773777387739774077417742774377447745774677477748774977507751775277537754775577567757775877597760776177627763776477657766776777687769777077717772777377747775777677777778777977807781778277837784778577867787778877897790779177927793779477957796779777987799780078017802780378047805780678077808780978107811781278137814781578167817781878197820782178227823782478257826782778287829783078317832783378347835783678377838783978407841784278437844784578467847784878497850785178527853785478557856785778587859786078617862786378647865786678677868786978707871787278737874787578767877787878797880788178827883788478857886788778887889789078917892789378947895789678977898789979007901790279037904790579067907790879097910791179127913791479157916791779187919792079217922792379247925792679277928792979307931793279337934793579367937793879397940794179427943794479457946794779487949795079517952795379547955795679577958795979607961796279637964796579667967796879697970797179727973797479757976797779787979798079817982798379847985798679877988798979907991799279937994799579967997799879998000800180028003800480058006800780088009801080118012801380148015801680178018801980208021802280238024802580268027802880298030803180328033803480358036803780388039804080418042804380448045804680478048804980508051805280538054805580568057805880598060806180628063806480658066806780688069807080718072807380748075807680778078807980808081808280838084808580868087808880898090809180928093809480958096809780988099810081018102810381048105810681078108810981108111811281138114811581168117811881198120812181228123812481258126812781288129813081318132813381348135813681378138813981408141814281438144814581468147814881498150815181528153815481558156815781588159816081618162816381648165816681678168816981708171817281738174817581768177817881798180818181828183818481858186818781888189819081918192819381948195819681978198819982008201820282038204820582068207820882098210821182128213821482158216821782188219822082218222822382248225822682278228822982308231823282338234823582368237823882398240824182428243824482458246824782488249825082518252825382548255825682578258825982608261826282638264826582668267826882698270827182728273827482758276827782788279828082818282828382848285828682878288828982908291829282938294829582968297829882998300830183028303830483058306830783088309831083118312831383148315831683178318831983208321832283238324832583268327832883298330833183328333833483358336833783388339834083418342834383448345834683478348834983508351835283538354835583568357835883598360836183628363836483658366836783688369837083718372837383748375837683778378837983808381838283838384838583868387838883898390839183928393839483958396839783988399840084018402840384048405840684078408840984108411841284138414841584168417841884198420842184228423842484258426842784288429843084318432843384348435843684378438843984408441844284438444844584468447844884498450845184528453845484558456845784588459846084618462846384648465846684678468846984708471847284738474847584768477847884798480848184828483848484858486848784888489849084918492849384948495849684978498849985008501850285038504850585068507850885098510851185128513851485158516851785188519852085218522852385248525852685278528852985308531853285338534853585368537853885398540854185428543854485458546854785488549855085518552855385548555855685578558855985608561856285638564856585668567856885698570857185728573857485758576857785788579858085818582858385848585858685878588858985908591859285938594859585968597859885998600860186028603860486058606860786088609861086118612861386148615861686178618861986208621862286238624862586268627862886298630863186328633863486358636863786388639864086418642864386448645864686478648864986508651865286538654865586568657865886598660866186628663866486658666866786688669867086718672867386748675867686778678867986808681868286838684868586868687868886898690869186928693869486958696869786988699870087018702870387048705870687078708870987108711871287138714871587168717871887198720872187228723872487258726872787288729873087318732873387348735873687378738873987408741874287438744874587468747874887498750875187528753875487558756875787588759876087618762876387648765876687678768876987708771877287738774877587768777877887798780878187828783878487858786878787888789879087918792879387948795879687978798879988008801880288038804880588068807880888098810881188128813881488158816881788188819882088218822882388248825882688278828882988308831883288338834883588368837883888398840884188428843884488458846884788488849885088518852885388548855885688578858885988608861886288638864886588668867886888698870887188728873887488758876887788788879888088818882888388848885888688878888888988908891889288938894889588968897889888998900890189028903890489058906890789088909891089118912891389148915891689178918891989208921892289238924892589268927892889298930893189328933893489358936893789388939894089418942894389448945894689478948894989508951895289538954895589568957895889598960896189628963896489658966896789688969897089718972897389748975897689778978897989808981898289838984898589868987898889898990899189928993899489958996899789988999900090019002900390049005900690079008900990109011901290139014901590169017901890199020902190229023902490259026902790289029903090319032903390349035903690379038903990409041904290439044904590469047904890499050905190529053905490559056905790589059906090619062906390649065906690679068906990709071907290739074907590769077907890799080908190829083908490859086908790889089909090919092909390949095909690979098909991009101910291039104910591069107910891099110911191129113911491159116911791189119912091219122912391249125912691279128912991309131913291339134913591369137913891399140914191429143914491459146914791489149915091519152915391549155915691579158915991609161916291639164916591669167916891699170917191729173917491759176917791789179918091819182918391849185918691879188918991909191919291939194919591969197919891999200920192029203920492059206920792089209921092119212921392149215921692179218921992209221922292239224922592269227922892299230923192329233923492359236923792389239924092419242924392449245924692479248924992509251925292539254925592569257925892599260926192629263926492659266926792689269927092719272927392749275927692779278927992809281928292839284928592869287928892899290929192929293929492959296929792989299930093019302930393049305930693079308930993109311931293139314931593169317931893199320932193229323932493259326932793289329933093319332933393349335933693379338933993409341934293439344934593469347934893499350935193529353935493559356935793589359936093619362936393649365936693679368936993709371937293739374937593769377937893799380938193829383938493859386938793889389939093919392939393949395939693979398939994009401940294039404940594069407940894099410941194129413941494159416941794189419942094219422942394249425942694279428942994309431943294339434943594369437943894399440944194429443944494459446944794489449945094519452945394549455945694579458945994609461946294639464946594669467946894699470947194729473947494759476947794789479948094819482948394849485948694879488948994909491949294939494949594969497949894999500950195029503950495059506950795089509951095119512951395149515951695179518951995209521952295239524952595269527952895299530953195329533953495359536953795389539954095419542954395449545954695479548954995509551955295539554955595569557955895599560956195629563956495659566956795689569957095719572957395749575957695779578957995809581958295839584958595869587958895899590959195929593959495959596959795989599960096019602960396049605960696079608960996109611961296139614961596169617961896199620962196229623962496259626962796289629963096319632963396349635963696379638963996409641964296439644964596469647964896499650965196529653965496559656965796589659966096619662966396649665966696679668966996709671967296739674967596769677967896799680968196829683968496859686968796889689969096919692969396949695969696979698969997009701970297039704970597069707970897099710971197129713971497159716971797189719972097219722972397249725972697279728972997309731973297339734973597369737973897399740974197429743974497459746974797489749975097519752975397549755975697579758975997609761976297639764976597669767976897699770977197729773977497759776977797789779978097819782978397849785978697879788978997909791979297939794979597969797979897999800980198029803980498059806980798089809981098119812981398149815981698179818981998209821982298239824982598269827982898299830983198329833983498359836983798389839984098419842984398449845984698479848984998509851985298539854985598569857985898599860986198629863986498659866986798689869987098719872987398749875987698779878987998809881988298839884988598869887988898899890989198929893989498959896989798989899990099019902990399049905990699079908990999109911991299139914991599169917991899199920992199229923992499259926992799289929993099319932993399349935993699379938993999409941994299439944994599469947994899499950995199529953995499559956995799589959996099619962996399649965996699679968996999709971997299739974997599769977997899799980998199829983998499859986998799889989999099919992999399949995999699979998999910000100011000210003100041000510006100071000810009100101001110012100131001410015100161001710018100191002010021100221002310024100251002610027100281002910030100311003210033100341003510036100371003810039100401004110042100431004410045100461004710048100491005010051100521005310054100551005610057100581005910060100611006210063100641006510066100671006810069100701007110072100731007410075100761007710078100791008010081100821008310084100851008610087100881008910090100911009210093100941009510096100971009810099101001010110102101031010410105101061010710108101091011010111101121011310114101151011610117101181011910120101211012210123101241012510126101271012810129101301013110132101331013410135101361013710138101391014010141101421014310144101451014610147101481014910150101511015210153101541015510156101571015810159101601016110162101631016410165101661016710168101691017010171101721017310174101751017610177101781017910180101811018210183101841018510186101871018810189101901019110192101931019410195101961019710198101991020010201102021020310204102051020610207102081020910210102111021210213102141021510216102171021810219102201022110222102231022410225102261022710228102291023010231102321023310234102351023610237102381023910240102411024210243102441024510246102471024810249102501025110252102531025410255102561025710258102591026010261102621026310264102651026610267102681026910270102711027210273102741027510276102771027810279102801028110282102831028410285102861028710288102891029010291102921029310294102951029610297102981029910300103011030210303103041030510306103071030810309103101031110312103131031410315103161031710318103191032010321103221032310324103251032610327103281032910330103311033210333103341033510336103371033810339103401034110342103431034410345103461034710348103491035010351103521035310354103551035610357103581035910360103611036210363103641036510366103671036810369103701037110372103731037410375103761037710378103791038010381103821038310384103851038610387103881038910390103911039210393103941039510396103971039810399104001040110402104031040410405104061040710408104091041010411104121041310414104151041610417104181041910420104211042210423104241042510426104271042810429104301043110432104331043410435104361043710438104391044010441104421044310444104451044610447104481044910450104511045210453104541045510456104571045810459104601046110462104631046410465104661046710468104691047010471104721047310474104751047610477104781047910480104811048210483104841048510486104871048810489104901049110492104931049410495104961049710498104991050010501105021050310504105051050610507105081050910510105111051210513105141051510516105171051810519105201052110522105231052410525105261052710528105291053010531105321053310534105351053610537105381053910540105411054210543105441054510546105471054810549105501055110552105531055410555105561055710558105591056010561105621056310564105651056610567105681056910570105711057210573105741057510576105771057810579105801058110582105831058410585105861058710588105891059010591105921059310594105951059610597105981059910600106011060210603106041060510606106071060810609106101061110612106131061410615106161061710618106191062010621106221062310624106251062610627106281062910630106311063210633106341063510636106371063810639106401064110642106431064410645106461064710648106491065010651106521065310654106551065610657106581065910660106611066210663106641066510666106671066810669106701067110672106731067410675106761067710678106791068010681106821068310684106851068610687106881068910690106911069210693106941069510696106971069810699107001070110702107031070410705107061070710708107091071010711107121071310714107151071610717107181071910720107211072210723107241072510726107271072810729107301073110732107331073410735107361073710738107391074010741107421074310744107451074610747107481074910750107511075210753107541075510756107571075810759107601076110762107631076410765107661076710768107691077010771107721077310774107751077610777107781077910780107811078210783107841078510786107871078810789107901079110792107931079410795107961079710798107991080010801108021080310804108051080610807108081080910810108111081210813108141081510816108171081810819108201082110822108231082410825108261082710828108291083010831108321083310834108351083610837108381083910840108411084210843108441084510846108471084810849108501085110852108531085410855108561085710858108591086010861108621086310864108651086610867108681086910870108711087210873108741087510876108771087810879108801088110882108831088410885108861088710888108891089010891108921089310894108951089610897108981089910900109011090210903109041090510906109071090810909109101091110912109131091410915109161091710918109191092010921109221092310924109251092610927109281092910930109311093210933109341093510936109371093810939109401094110942109431094410945109461094710948109491095010951109521095310954109551095610957109581095910960109611096210963109641096510966109671096810969109701097110972109731097410975109761097710978109791098010981109821098310984109851098610987109881098910990109911099210993109941099510996109971099810999110001100111002110031100411005110061100711008110091101011011110121101311014110151101611017110181101911020110211102211023110241102511026110271102811029110301103111032110331103411035110361103711038110391104011041110421104311044110451104611047110481104911050110511105211053110541105511056110571105811059110601106111062110631106411065110661106711068110691107011071110721107311074110751107611077110781107911080110811108211083110841108511086110871108811089110901109111092110931109411095110961109711098110991110011101111021110311104111051110611107111081110911110111111111211113111141111511116111171111811119111201112111122111231112411125111261112711128111291113011131111321113311134111351113611137111381113911140111411114211143111441114511146111471114811149111501115111152111531115411155111561115711158111591116011161111621116311164111651116611167111681116911170111711117211173111741117511176111771117811179111801118111182111831118411185111861118711188111891119011191111921119311194111951119611197111981119911200112011120211203112041120511206112071120811209112101121111212112131121411215112161121711218112191122011221112221122311224112251122611227112281122911230112311123211233112341123511236112371123811239112401124111242112431124411245112461124711248112491125011251112521125311254112551125611257112581125911260112611126211263112641126511266112671126811269112701127111272112731127411275112761127711278112791128011281112821128311284112851128611287112881128911290112911129211293112941129511296112971129811299113001130111302113031130411305113061130711308113091131011311113121131311314113151131611317113181131911320113211132211323113241132511326113271132811329113301133111332113331133411335113361133711338113391134011341113421134311344113451134611347113481134911350113511135211353113541135511356113571135811359113601136111362113631136411365113661136711368113691137011371113721137311374113751137611377113781137911380113811138211383113841138511386113871138811389113901139111392113931139411395113961139711398113991140011401114021140311404114051140611407114081140911410114111141211413114141141511416114171141811419114201142111422114231142411425114261142711428114291143011431114321143311434114351143611437114381143911440114411144211443114441144511446114471144811449114501145111452114531145411455114561145711458114591146011461114621146311464114651146611467114681146911470114711147211473114741147511476114771147811479114801148111482114831148411485114861148711488114891149011491114921149311494114951149611497114981149911500115011150211503115041150511506115071150811509115101151111512115131151411515115161151711518115191152011521115221152311524115251152611527115281152911530115311153211533115341153511536115371153811539115401154111542115431154411545115461154711548115491155011551115521155311554115551155611557115581155911560115611156211563115641156511566115671156811569115701157111572115731157411575115761157711578115791158011581115821158311584115851158611587115881158911590115911159211593115941159511596115971159811599116001160111602116031160411605116061160711608116091161011611116121161311614116151161611617116181161911620116211162211623116241162511626116271162811629116301163111632116331163411635116361163711638116391164011641116421164311644116451164611647116481164911650116511165211653116541165511656116571165811659116601166111662116631166411665116661166711668116691167011671116721167311674116751167611677116781167911680116811168211683116841168511686116871168811689116901169111692116931169411695116961169711698116991170011701117021170311704117051170611707117081170911710117111171211713117141171511716117171171811719117201172111722117231172411725117261172711728117291173011731117321173311734117351173611737117381173911740117411174211743117441174511746117471174811749117501175111752117531175411755117561175711758117591176011761117621176311764117651176611767117681176911770117711177211773117741177511776117771177811779117801178111782117831178411785117861178711788117891179011791117921179311794117951179611797117981179911800118011180211803118041180511806118071180811809118101181111812118131181411815118161181711818118191182011821118221182311824118251182611827118281182911830118311183211833118341183511836118371183811839118401184111842118431184411845118461184711848118491185011851118521185311854118551185611857118581185911860118611186211863118641186511866118671186811869118701187111872118731187411875118761187711878118791188011881118821188311884118851188611887118881188911890118911189211893118941189511896118971189811899119001190111902119031190411905119061190711908119091191011911119121191311914119151191611917119181191911920119211192211923119241192511926119271192811929119301193111932119331193411935119361193711938119391194011941119421194311944119451194611947119481194911950119511195211953119541195511956119571195811959119601196111962119631196411965119661196711968119691197011971119721197311974119751197611977119781197911980119811198211983119841198511986119871198811989119901199111992119931199411995119961199711998119991200012001120021200312004120051200612007120081200912010120111201212013120141201512016120171201812019120201202112022120231202412025120261202712028120291203012031120321203312034120351203612037120381203912040120411204212043120441204512046120471204812049120501205112052120531205412055120561205712058120591206012061120621206312064120651206612067120681206912070120711207212073120741207512076120771207812079120801208112082120831208412085120861208712088120891209012091120921209312094120951209612097120981209912100121011210212103121041210512106121071210812109121101211112112121131211412115121161211712118121191212012121121221212312124121251212612127121281212912130121311213212133121341213512136121371213812139121401214112142121431214412145121461214712148121491215012151121521215312154121551215612157121581215912160121611216212163121641216512166121671216812169121701217112172121731217412175121761217712178121791218012181121821218312184121851218612187121881218912190121911219212193121941219512196121971219812199122001220112202122031220412205122061220712208122091221012211122121221312214122151221612217122181221912220122211222212223122241222512226122271222812229122301223112232122331223412235122361223712238122391224012241122421224312244122451224612247122481224912250122511225212253122541225512256122571225812259122601226112262122631226412265122661226712268122691227012271122721227312274122751227612277122781227912280122811228212283122841228512286122871228812289122901229112292122931229412295122961229712298122991230012301123021230312304123051230612307123081230912310123111231212313123141231512316123171231812319123201232112322123231232412325123261232712328123291233012331123321233312334123351233612337123381233912340123411234212343123441234512346123471234812349123501235112352123531235412355123561235712358123591236012361123621236312364123651236612367123681236912370123711237212373123741237512376123771237812379123801238112382123831238412385123861238712388123891239012391123921239312394123951239612397123981239912400124011240212403124041240512406124071240812409124101241112412124131241412415124161241712418124191242012421124221242312424124251242612427124281242912430124311243212433124341243512436124371243812439124401244112442124431244412445124461244712448124491245012451124521245312454124551245612457124581245912460124611246212463124641246512466124671246812469124701247112472124731247412475124761247712478124791248012481124821248312484124851248612487124881248912490124911249212493124941249512496124971249812499125001250112502125031250412505125061250712508125091251012511125121251312514125151251612517125181251912520125211252212523125241252512526125271252812529125301253112532125331253412535125361253712538125391254012541125421254312544125451254612547125481254912550125511255212553125541255512556125571255812559125601256112562125631256412565125661256712568125691257012571125721257312574125751257612577125781257912580125811258212583125841258512586125871258812589125901259112592125931259412595125961259712598125991260012601126021260312604
  1. # pyright: reportPropertyTypeMismatch=false
  2. from __future__ import annotations
  3. import collections
  4. import datetime as dt
  5. from functools import partial
  6. import gc
  7. from json import loads
  8. import operator
  9. import pickle
  10. import re
  11. from typing import (
  12. TYPE_CHECKING,
  13. Any,
  14. Callable,
  15. ClassVar,
  16. Hashable,
  17. Iterator,
  18. Literal,
  19. Mapping,
  20. NoReturn,
  21. Sequence,
  22. Type,
  23. cast,
  24. final,
  25. overload,
  26. )
  27. import warnings
  28. import weakref
  29. import numpy as np
  30. from pandas._config import (
  31. config,
  32. using_copy_on_write,
  33. )
  34. from pandas._libs import lib
  35. from pandas._libs.lib import is_range_indexer
  36. from pandas._libs.tslibs import (
  37. Period,
  38. Tick,
  39. Timestamp,
  40. to_offset,
  41. )
  42. from pandas._typing import (
  43. AlignJoin,
  44. AnyArrayLike,
  45. ArrayLike,
  46. Axis,
  47. AxisInt,
  48. CompressionOptions,
  49. Dtype,
  50. DtypeArg,
  51. DtypeBackend,
  52. DtypeObj,
  53. FilePath,
  54. FillnaOptions,
  55. FloatFormatType,
  56. FormattersType,
  57. Frequency,
  58. IgnoreRaise,
  59. IndexKeyFunc,
  60. IndexLabel,
  61. IntervalClosedType,
  62. JSONSerializable,
  63. Level,
  64. Manager,
  65. NaPosition,
  66. NDFrameT,
  67. RandomState,
  68. Renamer,
  69. Scalar,
  70. SortKind,
  71. StorageOptions,
  72. Suffixes,
  73. T,
  74. TimeAmbiguous,
  75. TimedeltaConvertibleTypes,
  76. TimeNonexistent,
  77. TimestampConvertibleTypes,
  78. ValueKeyFunc,
  79. WriteBuffer,
  80. npt,
  81. )
  82. from pandas.compat._optional import import_optional_dependency
  83. from pandas.compat.numpy import function as nv
  84. from pandas.errors import (
  85. AbstractMethodError,
  86. InvalidIndexError,
  87. SettingWithCopyError,
  88. SettingWithCopyWarning,
  89. )
  90. from pandas.util._decorators import doc
  91. from pandas.util._exceptions import find_stack_level
  92. from pandas.util._validators import (
  93. check_dtype_backend,
  94. validate_ascending,
  95. validate_bool_kwarg,
  96. validate_fillna_kwargs,
  97. validate_inclusive,
  98. )
  99. from pandas.core.dtypes.astype import astype_is_view
  100. from pandas.core.dtypes.common import (
  101. ensure_object,
  102. ensure_platform_int,
  103. ensure_str,
  104. is_bool,
  105. is_bool_dtype,
  106. is_datetime64_any_dtype,
  107. is_datetime64tz_dtype,
  108. is_dict_like,
  109. is_dtype_equal,
  110. is_extension_array_dtype,
  111. is_float,
  112. is_list_like,
  113. is_number,
  114. is_numeric_dtype,
  115. is_re_compilable,
  116. is_scalar,
  117. is_timedelta64_dtype,
  118. pandas_dtype,
  119. )
  120. from pandas.core.dtypes.generic import (
  121. ABCDataFrame,
  122. ABCSeries,
  123. )
  124. from pandas.core.dtypes.inference import (
  125. is_hashable,
  126. is_nested_list_like,
  127. )
  128. from pandas.core.dtypes.missing import (
  129. isna,
  130. notna,
  131. )
  132. from pandas.core import (
  133. algorithms as algos,
  134. arraylike,
  135. common,
  136. indexing,
  137. nanops,
  138. sample,
  139. )
  140. from pandas.core.array_algos.replace import should_use_regex
  141. from pandas.core.arrays import ExtensionArray
  142. from pandas.core.base import PandasObject
  143. from pandas.core.construction import extract_array
  144. from pandas.core.flags import Flags
  145. from pandas.core.indexes.api import (
  146. DatetimeIndex,
  147. Index,
  148. MultiIndex,
  149. PeriodIndex,
  150. RangeIndex,
  151. default_index,
  152. ensure_index,
  153. )
  154. from pandas.core.internals import (
  155. ArrayManager,
  156. BlockManager,
  157. SingleArrayManager,
  158. )
  159. from pandas.core.internals.construction import (
  160. mgr_to_mgr,
  161. ndarray_to_mgr,
  162. )
  163. from pandas.core.methods.describe import describe_ndframe
  164. from pandas.core.missing import (
  165. clean_fill_method,
  166. clean_reindex_fill_method,
  167. find_valid_index,
  168. )
  169. from pandas.core.ops import align_method_FRAME
  170. from pandas.core.reshape.concat import concat
  171. from pandas.core.shared_docs import _shared_docs
  172. from pandas.core.sorting import get_indexer_indexer
  173. from pandas.core.window import (
  174. Expanding,
  175. ExponentialMovingWindow,
  176. Rolling,
  177. Window,
  178. )
  179. from pandas.io.formats.format import (
  180. DataFrameFormatter,
  181. DataFrameRenderer,
  182. )
  183. from pandas.io.formats.printing import pprint_thing
  184. if TYPE_CHECKING:
  185. from pandas._libs.tslibs import BaseOffset
  186. from pandas.core.frame import DataFrame
  187. from pandas.core.indexers.objects import BaseIndexer
  188. from pandas.core.resample import Resampler
  189. from pandas.core.series import Series
  190. from pandas.io.pytables import HDFStore
  191. # goal is to be able to define the docs close to function, while still being
  192. # able to share
  193. _shared_docs = {**_shared_docs}
  194. _shared_doc_kwargs = {
  195. "axes": "keywords for axes",
  196. "klass": "Series/DataFrame",
  197. "axes_single_arg": "int or labels for object",
  198. "args_transpose": "axes to permute (int or label for object)",
  199. "inplace": """
  200. inplace : bool, default False
  201. If True, performs operation inplace and returns None.""",
  202. "optional_by": """
  203. by : str or list of str
  204. Name or list of names to sort by""",
  205. "replace_iloc": """
  206. This differs from updating with ``.loc`` or ``.iloc``, which require
  207. you to specify a location to update with some value.""",
  208. }
  209. bool_t = bool # Need alias because NDFrame has def bool:
  210. class NDFrame(PandasObject, indexing.IndexingMixin):
  211. """
  212. N-dimensional analogue of DataFrame. Store multi-dimensional in a
  213. size-mutable, labeled data structure
  214. Parameters
  215. ----------
  216. data : BlockManager
  217. axes : list
  218. copy : bool, default False
  219. """
  220. _internal_names: list[str] = [
  221. "_mgr",
  222. "_cacher",
  223. "_item_cache",
  224. "_cache",
  225. "_is_copy",
  226. "_subtyp",
  227. "_name",
  228. "_default_kind",
  229. "_default_fill_value",
  230. "_metadata",
  231. "__array_struct__",
  232. "__array_interface__",
  233. "_flags",
  234. ]
  235. _internal_names_set: set[str] = set(_internal_names)
  236. _accessors: set[str] = set()
  237. _hidden_attrs: frozenset[str] = frozenset([])
  238. _metadata: list[str] = []
  239. _is_copy: weakref.ReferenceType[NDFrame] | None = None
  240. _mgr: Manager
  241. _attrs: dict[Hashable, Any]
  242. _typ: str
  243. # ----------------------------------------------------------------------
  244. # Constructors
  245. def __init__(
  246. self,
  247. data: Manager,
  248. copy: bool_t = False,
  249. attrs: Mapping[Hashable, Any] | None = None,
  250. ) -> None:
  251. # copy kwarg is retained for mypy compat, is not used
  252. object.__setattr__(self, "_is_copy", None)
  253. object.__setattr__(self, "_mgr", data)
  254. object.__setattr__(self, "_item_cache", {})
  255. if attrs is None:
  256. attrs = {}
  257. else:
  258. attrs = dict(attrs)
  259. object.__setattr__(self, "_attrs", attrs)
  260. object.__setattr__(self, "_flags", Flags(self, allows_duplicate_labels=True))
  261. @classmethod
  262. def _init_mgr(
  263. cls,
  264. mgr: Manager,
  265. axes,
  266. dtype: Dtype | None = None,
  267. copy: bool_t = False,
  268. ) -> Manager:
  269. """passed a manager and a axes dict"""
  270. for a, axe in axes.items():
  271. if axe is not None:
  272. axe = ensure_index(axe)
  273. bm_axis = cls._get_block_manager_axis(a)
  274. mgr = mgr.reindex_axis(axe, axis=bm_axis)
  275. # make a copy if explicitly requested
  276. if copy:
  277. mgr = mgr.copy()
  278. if dtype is not None:
  279. # avoid further copies if we can
  280. if (
  281. isinstance(mgr, BlockManager)
  282. and len(mgr.blocks) == 1
  283. and is_dtype_equal(mgr.blocks[0].values.dtype, dtype)
  284. ):
  285. pass
  286. else:
  287. mgr = mgr.astype(dtype=dtype)
  288. return mgr
  289. def _as_manager(self: NDFrameT, typ: str, copy: bool_t = True) -> NDFrameT:
  290. """
  291. Private helper function to create a DataFrame with specific manager.
  292. Parameters
  293. ----------
  294. typ : {"block", "array"}
  295. copy : bool, default True
  296. Only controls whether the conversion from Block->ArrayManager
  297. copies the 1D arrays (to ensure proper/contiguous memory layout).
  298. Returns
  299. -------
  300. DataFrame
  301. New DataFrame using specified manager type. Is not guaranteed
  302. to be a copy or not.
  303. """
  304. new_mgr: Manager
  305. new_mgr = mgr_to_mgr(self._mgr, typ=typ, copy=copy)
  306. # fastpath of passing a manager doesn't check the option/manager class
  307. return self._constructor(new_mgr).__finalize__(self)
  308. # ----------------------------------------------------------------------
  309. # attrs and flags
  310. @property
  311. def attrs(self) -> dict[Hashable, Any]:
  312. """
  313. Dictionary of global attributes of this dataset.
  314. .. warning::
  315. attrs is experimental and may change without warning.
  316. See Also
  317. --------
  318. DataFrame.flags : Global flags applying to this object.
  319. """
  320. if self._attrs is None:
  321. self._attrs = {}
  322. return self._attrs
  323. @attrs.setter
  324. def attrs(self, value: Mapping[Hashable, Any]) -> None:
  325. self._attrs = dict(value)
  326. @final
  327. @property
  328. def flags(self) -> Flags:
  329. """
  330. Get the properties associated with this pandas object.
  331. The available flags are
  332. * :attr:`Flags.allows_duplicate_labels`
  333. See Also
  334. --------
  335. Flags : Flags that apply to pandas objects.
  336. DataFrame.attrs : Global metadata applying to this dataset.
  337. Notes
  338. -----
  339. "Flags" differ from "metadata". Flags reflect properties of the
  340. pandas object (the Series or DataFrame). Metadata refer to properties
  341. of the dataset, and should be stored in :attr:`DataFrame.attrs`.
  342. Examples
  343. --------
  344. >>> df = pd.DataFrame({"A": [1, 2]})
  345. >>> df.flags
  346. <Flags(allows_duplicate_labels=True)>
  347. Flags can be get or set using ``.``
  348. >>> df.flags.allows_duplicate_labels
  349. True
  350. >>> df.flags.allows_duplicate_labels = False
  351. Or by slicing with a key
  352. >>> df.flags["allows_duplicate_labels"]
  353. False
  354. >>> df.flags["allows_duplicate_labels"] = True
  355. """
  356. return self._flags
  357. @final
  358. def set_flags(
  359. self: NDFrameT,
  360. *,
  361. copy: bool_t = False,
  362. allows_duplicate_labels: bool_t | None = None,
  363. ) -> NDFrameT:
  364. """
  365. Return a new object with updated flags.
  366. Parameters
  367. ----------
  368. copy : bool, default False
  369. Specify if a copy of the object should be made.
  370. allows_duplicate_labels : bool, optional
  371. Whether the returned object allows duplicate labels.
  372. Returns
  373. -------
  374. Series or DataFrame
  375. The same type as the caller.
  376. See Also
  377. --------
  378. DataFrame.attrs : Global metadata applying to this dataset.
  379. DataFrame.flags : Global flags applying to this object.
  380. Notes
  381. -----
  382. This method returns a new object that's a view on the same data
  383. as the input. Mutating the input or the output values will be reflected
  384. in the other.
  385. This method is intended to be used in method chains.
  386. "Flags" differ from "metadata". Flags reflect properties of the
  387. pandas object (the Series or DataFrame). Metadata refer to properties
  388. of the dataset, and should be stored in :attr:`DataFrame.attrs`.
  389. Examples
  390. --------
  391. >>> df = pd.DataFrame({"A": [1, 2]})
  392. >>> df.flags.allows_duplicate_labels
  393. True
  394. >>> df2 = df.set_flags(allows_duplicate_labels=False)
  395. >>> df2.flags.allows_duplicate_labels
  396. False
  397. """
  398. df = self.copy(deep=copy and not using_copy_on_write())
  399. if allows_duplicate_labels is not None:
  400. df.flags["allows_duplicate_labels"] = allows_duplicate_labels
  401. return df
  402. @final
  403. @classmethod
  404. def _validate_dtype(cls, dtype) -> DtypeObj | None:
  405. """validate the passed dtype"""
  406. if dtype is not None:
  407. dtype = pandas_dtype(dtype)
  408. # a compound dtype
  409. if dtype.kind == "V":
  410. raise NotImplementedError(
  411. "compound dtypes are not implemented "
  412. f"in the {cls.__name__} constructor"
  413. )
  414. return dtype
  415. # ----------------------------------------------------------------------
  416. # Construction
  417. @property
  418. def _constructor(self: NDFrameT) -> Callable[..., NDFrameT]:
  419. """
  420. Used when a manipulation result has the same dimensions as the
  421. original.
  422. """
  423. raise AbstractMethodError(self)
  424. # ----------------------------------------------------------------------
  425. # Internals
  426. @final
  427. @property
  428. def _data(self):
  429. # GH#33054 retained because some downstream packages uses this,
  430. # e.g. fastparquet
  431. return self._mgr
  432. # ----------------------------------------------------------------------
  433. # Axis
  434. _stat_axis_number = 0
  435. _stat_axis_name = "index"
  436. _AXIS_ORDERS: list[Literal["index", "columns"]]
  437. _AXIS_TO_AXIS_NUMBER: dict[Axis, AxisInt] = {0: 0, "index": 0, "rows": 0}
  438. _info_axis_number: int
  439. _info_axis_name: Literal["index", "columns"]
  440. _AXIS_LEN: int
  441. @final
  442. def _construct_axes_dict(self, axes: Sequence[Axis] | None = None, **kwargs):
  443. """Return an axes dictionary for myself."""
  444. d = {a: self._get_axis(a) for a in (axes or self._AXIS_ORDERS)}
  445. # error: Argument 1 to "update" of "MutableMapping" has incompatible type
  446. # "Dict[str, Any]"; expected "SupportsKeysAndGetItem[Union[int, str], Any]"
  447. d.update(kwargs) # type: ignore[arg-type]
  448. return d
  449. @final
  450. @classmethod
  451. def _get_axis_number(cls, axis: Axis) -> AxisInt:
  452. try:
  453. return cls._AXIS_TO_AXIS_NUMBER[axis]
  454. except KeyError:
  455. raise ValueError(f"No axis named {axis} for object type {cls.__name__}")
  456. @final
  457. @classmethod
  458. def _get_axis_name(cls, axis: Axis) -> Literal["index", "columns"]:
  459. axis_number = cls._get_axis_number(axis)
  460. return cls._AXIS_ORDERS[axis_number]
  461. @final
  462. def _get_axis(self, axis: Axis) -> Index:
  463. axis_number = self._get_axis_number(axis)
  464. assert axis_number in {0, 1}
  465. return self.index if axis_number == 0 else self.columns
  466. @final
  467. @classmethod
  468. def _get_block_manager_axis(cls, axis: Axis) -> AxisInt:
  469. """Map the axis to the block_manager axis."""
  470. axis = cls._get_axis_number(axis)
  471. ndim = cls._AXIS_LEN
  472. if ndim == 2:
  473. # i.e. DataFrame
  474. return 1 - axis
  475. return axis
  476. @final
  477. def _get_axis_resolvers(self, axis: str) -> dict[str, Series | MultiIndex]:
  478. # index or columns
  479. axis_index = getattr(self, axis)
  480. d = {}
  481. prefix = axis[0]
  482. for i, name in enumerate(axis_index.names):
  483. if name is not None:
  484. key = level = name
  485. else:
  486. # prefix with 'i' or 'c' depending on the input axis
  487. # e.g., you must do ilevel_0 for the 0th level of an unnamed
  488. # multiiindex
  489. key = f"{prefix}level_{i}"
  490. level = i
  491. level_values = axis_index.get_level_values(level)
  492. s = level_values.to_series()
  493. s.index = axis_index
  494. d[key] = s
  495. # put the index/columns itself in the dict
  496. if isinstance(axis_index, MultiIndex):
  497. dindex = axis_index
  498. else:
  499. dindex = axis_index.to_series()
  500. d[axis] = dindex
  501. return d
  502. @final
  503. def _get_index_resolvers(self) -> dict[Hashable, Series | MultiIndex]:
  504. from pandas.core.computation.parsing import clean_column_name
  505. d: dict[str, Series | MultiIndex] = {}
  506. for axis_name in self._AXIS_ORDERS:
  507. d.update(self._get_axis_resolvers(axis_name))
  508. return {clean_column_name(k): v for k, v in d.items() if not isinstance(k, int)}
  509. @final
  510. def _get_cleaned_column_resolvers(self) -> dict[Hashable, Series]:
  511. """
  512. Return the special character free column resolvers of a dataframe.
  513. Column names with special characters are 'cleaned up' so that they can
  514. be referred to by backtick quoting.
  515. Used in :meth:`DataFrame.eval`.
  516. """
  517. from pandas.core.computation.parsing import clean_column_name
  518. if isinstance(self, ABCSeries):
  519. return {clean_column_name(self.name): self}
  520. return {
  521. clean_column_name(k): v for k, v in self.items() if not isinstance(k, int)
  522. }
  523. @property
  524. def _info_axis(self) -> Index:
  525. return getattr(self, self._info_axis_name)
  526. @property
  527. def _stat_axis(self) -> Index:
  528. return getattr(self, self._stat_axis_name)
  529. @property
  530. def shape(self) -> tuple[int, ...]:
  531. """
  532. Return a tuple of axis dimensions
  533. """
  534. return tuple(len(self._get_axis(a)) for a in self._AXIS_ORDERS)
  535. @property
  536. def axes(self) -> list[Index]:
  537. """
  538. Return index label(s) of the internal NDFrame
  539. """
  540. # we do it this way because if we have reversed axes, then
  541. # the block manager shows then reversed
  542. return [self._get_axis(a) for a in self._AXIS_ORDERS]
  543. @property
  544. def ndim(self) -> int:
  545. """
  546. Return an int representing the number of axes / array dimensions.
  547. Return 1 if Series. Otherwise return 2 if DataFrame.
  548. See Also
  549. --------
  550. ndarray.ndim : Number of array dimensions.
  551. Examples
  552. --------
  553. >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3})
  554. >>> s.ndim
  555. 1
  556. >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
  557. >>> df.ndim
  558. 2
  559. """
  560. return self._mgr.ndim
  561. @property
  562. def size(self) -> int:
  563. """
  564. Return an int representing the number of elements in this object.
  565. Return the number of rows if Series. Otherwise return the number of
  566. rows times number of columns if DataFrame.
  567. See Also
  568. --------
  569. ndarray.size : Number of elements in the array.
  570. Examples
  571. --------
  572. >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3})
  573. >>> s.size
  574. 3
  575. >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
  576. >>> df.size
  577. 4
  578. """
  579. # error: Incompatible return value type (got "signedinteger[_64Bit]",
  580. # expected "int") [return-value]
  581. return np.prod(self.shape) # type: ignore[return-value]
  582. def set_axis(
  583. self: NDFrameT,
  584. labels,
  585. *,
  586. axis: Axis = 0,
  587. copy: bool_t | None = None,
  588. ) -> NDFrameT:
  589. """
  590. Assign desired index to given axis.
  591. Indexes for%(extended_summary_sub)s row labels can be changed by assigning
  592. a list-like or Index.
  593. Parameters
  594. ----------
  595. labels : list-like, Index
  596. The values for the new index.
  597. axis : %(axes_single_arg)s, default 0
  598. The axis to update. The value 0 identifies the rows. For `Series`
  599. this parameter is unused and defaults to 0.
  600. copy : bool, default True
  601. Whether to make a copy of the underlying data.
  602. .. versionadded:: 1.5.0
  603. Returns
  604. -------
  605. %(klass)s
  606. An object of type %(klass)s.
  607. See Also
  608. --------
  609. %(klass)s.rename_axis : Alter the name of the index%(see_also_sub)s.
  610. """
  611. return self._set_axis_nocheck(labels, axis, inplace=False, copy=copy)
  612. @final
  613. def _set_axis_nocheck(
  614. self, labels, axis: Axis, inplace: bool_t, copy: bool_t | None
  615. ):
  616. if inplace:
  617. setattr(self, self._get_axis_name(axis), labels)
  618. else:
  619. # With copy=False, we create a new object but don't copy the
  620. # underlying data.
  621. obj = self.copy(deep=copy and not using_copy_on_write())
  622. setattr(obj, obj._get_axis_name(axis), labels)
  623. return obj
  624. @final
  625. def _set_axis(self, axis: AxisInt, labels: AnyArrayLike | list) -> None:
  626. """
  627. This is called from the cython code when we set the `index` attribute
  628. directly, e.g. `series.index = [1, 2, 3]`.
  629. """
  630. labels = ensure_index(labels)
  631. self._mgr.set_axis(axis, labels)
  632. self._clear_item_cache()
  633. @final
  634. def swapaxes(
  635. self: NDFrameT, axis1: Axis, axis2: Axis, copy: bool_t | None = None
  636. ) -> NDFrameT:
  637. """
  638. Interchange axes and swap values axes appropriately.
  639. Returns
  640. -------
  641. same as input
  642. """
  643. i = self._get_axis_number(axis1)
  644. j = self._get_axis_number(axis2)
  645. if i == j:
  646. return self.copy(deep=copy and not using_copy_on_write())
  647. mapping = {i: j, j: i}
  648. new_axes = [self._get_axis(mapping.get(k, k)) for k in range(self._AXIS_LEN)]
  649. new_values = self._values.swapaxes(i, j) # type: ignore[union-attr]
  650. if (
  651. using_copy_on_write()
  652. and self._mgr.is_single_block
  653. and isinstance(self._mgr, BlockManager)
  654. ):
  655. # This should only get hit in case of having a single block, otherwise a
  656. # copy is made, we don't have to set up references.
  657. new_mgr = ndarray_to_mgr(
  658. new_values,
  659. new_axes[0],
  660. new_axes[1],
  661. dtype=None,
  662. copy=False,
  663. typ="block",
  664. )
  665. assert isinstance(new_mgr, BlockManager)
  666. assert isinstance(self._mgr, BlockManager)
  667. new_mgr.blocks[0].refs = self._mgr.blocks[0].refs
  668. new_mgr.blocks[0].refs.add_reference(
  669. new_mgr.blocks[0] # type: ignore[arg-type]
  670. )
  671. return self._constructor(new_mgr).__finalize__(self, method="swapaxes")
  672. elif (copy or copy is None) and self._mgr.is_single_block:
  673. new_values = new_values.copy()
  674. return self._constructor(
  675. new_values,
  676. *new_axes,
  677. # The no-copy case for CoW is handled above
  678. copy=False,
  679. ).__finalize__(self, method="swapaxes")
  680. @final
  681. @doc(klass=_shared_doc_kwargs["klass"])
  682. def droplevel(self: NDFrameT, level: IndexLabel, axis: Axis = 0) -> NDFrameT:
  683. """
  684. Return {klass} with requested index / column level(s) removed.
  685. Parameters
  686. ----------
  687. level : int, str, or list-like
  688. If a string is given, must be the name of a level
  689. If list-like, elements must be names or positional indexes
  690. of levels.
  691. axis : {{0 or 'index', 1 or 'columns'}}, default 0
  692. Axis along which the level(s) is removed:
  693. * 0 or 'index': remove level(s) in column.
  694. * 1 or 'columns': remove level(s) in row.
  695. For `Series` this parameter is unused and defaults to 0.
  696. Returns
  697. -------
  698. {klass}
  699. {klass} with requested index / column level(s) removed.
  700. Examples
  701. --------
  702. >>> df = pd.DataFrame([
  703. ... [1, 2, 3, 4],
  704. ... [5, 6, 7, 8],
  705. ... [9, 10, 11, 12]
  706. ... ]).set_index([0, 1]).rename_axis(['a', 'b'])
  707. >>> df.columns = pd.MultiIndex.from_tuples([
  708. ... ('c', 'e'), ('d', 'f')
  709. ... ], names=['level_1', 'level_2'])
  710. >>> df
  711. level_1 c d
  712. level_2 e f
  713. a b
  714. 1 2 3 4
  715. 5 6 7 8
  716. 9 10 11 12
  717. >>> df.droplevel('a')
  718. level_1 c d
  719. level_2 e f
  720. b
  721. 2 3 4
  722. 6 7 8
  723. 10 11 12
  724. >>> df.droplevel('level_2', axis=1)
  725. level_1 c d
  726. a b
  727. 1 2 3 4
  728. 5 6 7 8
  729. 9 10 11 12
  730. """
  731. labels = self._get_axis(axis)
  732. new_labels = labels.droplevel(level)
  733. return self.set_axis(new_labels, axis=axis, copy=None)
  734. def pop(self, item: Hashable) -> Series | Any:
  735. result = self[item]
  736. del self[item]
  737. return result
  738. @final
  739. def squeeze(self, axis: Axis | None = None):
  740. """
  741. Squeeze 1 dimensional axis objects into scalars.
  742. Series or DataFrames with a single element are squeezed to a scalar.
  743. DataFrames with a single column or a single row are squeezed to a
  744. Series. Otherwise the object is unchanged.
  745. This method is most useful when you don't know if your
  746. object is a Series or DataFrame, but you do know it has just a single
  747. column. In that case you can safely call `squeeze` to ensure you have a
  748. Series.
  749. Parameters
  750. ----------
  751. axis : {0 or 'index', 1 or 'columns', None}, default None
  752. A specific axis to squeeze. By default, all length-1 axes are
  753. squeezed. For `Series` this parameter is unused and defaults to `None`.
  754. Returns
  755. -------
  756. DataFrame, Series, or scalar
  757. The projection after squeezing `axis` or all the axes.
  758. See Also
  759. --------
  760. Series.iloc : Integer-location based indexing for selecting scalars.
  761. DataFrame.iloc : Integer-location based indexing for selecting Series.
  762. Series.to_frame : Inverse of DataFrame.squeeze for a
  763. single-column DataFrame.
  764. Examples
  765. --------
  766. >>> primes = pd.Series([2, 3, 5, 7])
  767. Slicing might produce a Series with a single value:
  768. >>> even_primes = primes[primes % 2 == 0]
  769. >>> even_primes
  770. 0 2
  771. dtype: int64
  772. >>> even_primes.squeeze()
  773. 2
  774. Squeezing objects with more than one value in every axis does nothing:
  775. >>> odd_primes = primes[primes % 2 == 1]
  776. >>> odd_primes
  777. 1 3
  778. 2 5
  779. 3 7
  780. dtype: int64
  781. >>> odd_primes.squeeze()
  782. 1 3
  783. 2 5
  784. 3 7
  785. dtype: int64
  786. Squeezing is even more effective when used with DataFrames.
  787. >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
  788. >>> df
  789. a b
  790. 0 1 2
  791. 1 3 4
  792. Slicing a single column will produce a DataFrame with the columns
  793. having only one value:
  794. >>> df_a = df[['a']]
  795. >>> df_a
  796. a
  797. 0 1
  798. 1 3
  799. So the columns can be squeezed down, resulting in a Series:
  800. >>> df_a.squeeze('columns')
  801. 0 1
  802. 1 3
  803. Name: a, dtype: int64
  804. Slicing a single row from a single column will produce a single
  805. scalar DataFrame:
  806. >>> df_0a = df.loc[df.index < 1, ['a']]
  807. >>> df_0a
  808. a
  809. 0 1
  810. Squeezing the rows produces a single scalar Series:
  811. >>> df_0a.squeeze('rows')
  812. a 1
  813. Name: 0, dtype: int64
  814. Squeezing all axes will project directly into a scalar:
  815. >>> df_0a.squeeze()
  816. 1
  817. """
  818. axes = range(self._AXIS_LEN) if axis is None else (self._get_axis_number(axis),)
  819. return self.iloc[
  820. tuple(
  821. 0 if i in axes and len(a) == 1 else slice(None)
  822. for i, a in enumerate(self.axes)
  823. )
  824. ]
  825. # ----------------------------------------------------------------------
  826. # Rename
  827. def _rename(
  828. self: NDFrameT,
  829. mapper: Renamer | None = None,
  830. *,
  831. index: Renamer | None = None,
  832. columns: Renamer | None = None,
  833. axis: Axis | None = None,
  834. copy: bool_t | None = None,
  835. inplace: bool_t = False,
  836. level: Level | None = None,
  837. errors: str = "ignore",
  838. ) -> NDFrameT | None:
  839. # called by Series.rename and DataFrame.rename
  840. if mapper is None and index is None and columns is None:
  841. raise TypeError("must pass an index to rename")
  842. if index is not None or columns is not None:
  843. if axis is not None:
  844. raise TypeError(
  845. "Cannot specify both 'axis' and any of 'index' or 'columns'"
  846. )
  847. if mapper is not None:
  848. raise TypeError(
  849. "Cannot specify both 'mapper' and any of 'index' or 'columns'"
  850. )
  851. else:
  852. # use the mapper argument
  853. if axis and self._get_axis_number(axis) == 1:
  854. columns = mapper
  855. else:
  856. index = mapper
  857. self._check_inplace_and_allows_duplicate_labels(inplace)
  858. result = self if inplace else self.copy(deep=copy and not using_copy_on_write())
  859. for axis_no, replacements in enumerate((index, columns)):
  860. if replacements is None:
  861. continue
  862. ax = self._get_axis(axis_no)
  863. f = common.get_rename_function(replacements)
  864. if level is not None:
  865. level = ax._get_level_number(level)
  866. # GH 13473
  867. if not callable(replacements):
  868. if ax._is_multi and level is not None:
  869. indexer = ax.get_level_values(level).get_indexer_for(replacements)
  870. else:
  871. indexer = ax.get_indexer_for(replacements)
  872. if errors == "raise" and len(indexer[indexer == -1]):
  873. missing_labels = [
  874. label
  875. for index, label in enumerate(replacements)
  876. if indexer[index] == -1
  877. ]
  878. raise KeyError(f"{missing_labels} not found in axis")
  879. new_index = ax._transform_index(f, level=level)
  880. result._set_axis_nocheck(new_index, axis=axis_no, inplace=True, copy=False)
  881. result._clear_item_cache()
  882. if inplace:
  883. self._update_inplace(result)
  884. return None
  885. else:
  886. return result.__finalize__(self, method="rename")
  887. @overload
  888. def rename_axis(
  889. self: NDFrameT,
  890. mapper: IndexLabel | lib.NoDefault = ...,
  891. *,
  892. index=...,
  893. columns=...,
  894. axis: Axis = ...,
  895. copy: bool_t | None = ...,
  896. inplace: Literal[False] = ...,
  897. ) -> NDFrameT:
  898. ...
  899. @overload
  900. def rename_axis(
  901. self,
  902. mapper: IndexLabel | lib.NoDefault = ...,
  903. *,
  904. index=...,
  905. columns=...,
  906. axis: Axis = ...,
  907. copy: bool_t | None = ...,
  908. inplace: Literal[True],
  909. ) -> None:
  910. ...
  911. @overload
  912. def rename_axis(
  913. self: NDFrameT,
  914. mapper: IndexLabel | lib.NoDefault = ...,
  915. *,
  916. index=...,
  917. columns=...,
  918. axis: Axis = ...,
  919. copy: bool_t | None = ...,
  920. inplace: bool_t = ...,
  921. ) -> NDFrameT | None:
  922. ...
  923. def rename_axis(
  924. self: NDFrameT,
  925. mapper: IndexLabel | lib.NoDefault = lib.no_default,
  926. *,
  927. index=lib.no_default,
  928. columns=lib.no_default,
  929. axis: Axis = 0,
  930. copy: bool_t | None = None,
  931. inplace: bool_t = False,
  932. ) -> NDFrameT | None:
  933. """
  934. Set the name of the axis for the index or columns.
  935. Parameters
  936. ----------
  937. mapper : scalar, list-like, optional
  938. Value to set the axis name attribute.
  939. index, columns : scalar, list-like, dict-like or function, optional
  940. A scalar, list-like, dict-like or functions transformations to
  941. apply to that axis' values.
  942. Note that the ``columns`` parameter is not allowed if the
  943. object is a Series. This parameter only apply for DataFrame
  944. type objects.
  945. Use either ``mapper`` and ``axis`` to
  946. specify the axis to target with ``mapper``, or ``index``
  947. and/or ``columns``.
  948. axis : {0 or 'index', 1 or 'columns'}, default 0
  949. The axis to rename. For `Series` this parameter is unused and defaults to 0.
  950. copy : bool, default None
  951. Also copy underlying data.
  952. inplace : bool, default False
  953. Modifies the object directly, instead of creating a new Series
  954. or DataFrame.
  955. Returns
  956. -------
  957. Series, DataFrame, or None
  958. The same type as the caller or None if ``inplace=True``.
  959. See Also
  960. --------
  961. Series.rename : Alter Series index labels or name.
  962. DataFrame.rename : Alter DataFrame index labels or name.
  963. Index.rename : Set new names on index.
  964. Notes
  965. -----
  966. ``DataFrame.rename_axis`` supports two calling conventions
  967. * ``(index=index_mapper, columns=columns_mapper, ...)``
  968. * ``(mapper, axis={'index', 'columns'}, ...)``
  969. The first calling convention will only modify the names of
  970. the index and/or the names of the Index object that is the columns.
  971. In this case, the parameter ``copy`` is ignored.
  972. The second calling convention will modify the names of the
  973. corresponding index if mapper is a list or a scalar.
  974. However, if mapper is dict-like or a function, it will use the
  975. deprecated behavior of modifying the axis *labels*.
  976. We *highly* recommend using keyword arguments to clarify your
  977. intent.
  978. Examples
  979. --------
  980. **Series**
  981. >>> s = pd.Series(["dog", "cat", "monkey"])
  982. >>> s
  983. 0 dog
  984. 1 cat
  985. 2 monkey
  986. dtype: object
  987. >>> s.rename_axis("animal")
  988. animal
  989. 0 dog
  990. 1 cat
  991. 2 monkey
  992. dtype: object
  993. **DataFrame**
  994. >>> df = pd.DataFrame({"num_legs": [4, 4, 2],
  995. ... "num_arms": [0, 0, 2]},
  996. ... ["dog", "cat", "monkey"])
  997. >>> df
  998. num_legs num_arms
  999. dog 4 0
  1000. cat 4 0
  1001. monkey 2 2
  1002. >>> df = df.rename_axis("animal")
  1003. >>> df
  1004. num_legs num_arms
  1005. animal
  1006. dog 4 0
  1007. cat 4 0
  1008. monkey 2 2
  1009. >>> df = df.rename_axis("limbs", axis="columns")
  1010. >>> df
  1011. limbs num_legs num_arms
  1012. animal
  1013. dog 4 0
  1014. cat 4 0
  1015. monkey 2 2
  1016. **MultiIndex**
  1017. >>> df.index = pd.MultiIndex.from_product([['mammal'],
  1018. ... ['dog', 'cat', 'monkey']],
  1019. ... names=['type', 'name'])
  1020. >>> df
  1021. limbs num_legs num_arms
  1022. type name
  1023. mammal dog 4 0
  1024. cat 4 0
  1025. monkey 2 2
  1026. >>> df.rename_axis(index={'type': 'class'})
  1027. limbs num_legs num_arms
  1028. class name
  1029. mammal dog 4 0
  1030. cat 4 0
  1031. monkey 2 2
  1032. >>> df.rename_axis(columns=str.upper)
  1033. LIMBS num_legs num_arms
  1034. type name
  1035. mammal dog 4 0
  1036. cat 4 0
  1037. monkey 2 2
  1038. """
  1039. axes = {"index": index, "columns": columns}
  1040. if axis is not None:
  1041. axis = self._get_axis_number(axis)
  1042. inplace = validate_bool_kwarg(inplace, "inplace")
  1043. if copy and using_copy_on_write():
  1044. copy = False
  1045. if mapper is not lib.no_default:
  1046. # Use v0.23 behavior if a scalar or list
  1047. non_mapper = is_scalar(mapper) or (
  1048. is_list_like(mapper) and not is_dict_like(mapper)
  1049. )
  1050. if non_mapper:
  1051. return self._set_axis_name(
  1052. mapper, axis=axis, inplace=inplace, copy=copy
  1053. )
  1054. else:
  1055. raise ValueError("Use `.rename` to alter labels with a mapper.")
  1056. else:
  1057. # Use new behavior. Means that index and/or columns
  1058. # is specified
  1059. result = self if inplace else self.copy(deep=copy)
  1060. for axis in range(self._AXIS_LEN):
  1061. v = axes.get(self._get_axis_name(axis))
  1062. if v is lib.no_default:
  1063. continue
  1064. non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v))
  1065. if non_mapper:
  1066. newnames = v
  1067. else:
  1068. f = common.get_rename_function(v)
  1069. curnames = self._get_axis(axis).names
  1070. newnames = [f(name) for name in curnames]
  1071. result._set_axis_name(newnames, axis=axis, inplace=True, copy=copy)
  1072. if not inplace:
  1073. return result
  1074. return None
  1075. @final
  1076. def _set_axis_name(
  1077. self, name, axis: Axis = 0, inplace: bool_t = False, copy: bool_t | None = True
  1078. ):
  1079. """
  1080. Set the name(s) of the axis.
  1081. Parameters
  1082. ----------
  1083. name : str or list of str
  1084. Name(s) to set.
  1085. axis : {0 or 'index', 1 or 'columns'}, default 0
  1086. The axis to set the label. The value 0 or 'index' specifies index,
  1087. and the value 1 or 'columns' specifies columns.
  1088. inplace : bool, default False
  1089. If `True`, do operation inplace and return None.
  1090. copy:
  1091. Whether to make a copy of the result.
  1092. Returns
  1093. -------
  1094. Series, DataFrame, or None
  1095. The same type as the caller or `None` if `inplace` is `True`.
  1096. See Also
  1097. --------
  1098. DataFrame.rename : Alter the axis labels of :class:`DataFrame`.
  1099. Series.rename : Alter the index labels or set the index name
  1100. of :class:`Series`.
  1101. Index.rename : Set the name of :class:`Index` or :class:`MultiIndex`.
  1102. Examples
  1103. --------
  1104. >>> df = pd.DataFrame({"num_legs": [4, 4, 2]},
  1105. ... ["dog", "cat", "monkey"])
  1106. >>> df
  1107. num_legs
  1108. dog 4
  1109. cat 4
  1110. monkey 2
  1111. >>> df._set_axis_name("animal")
  1112. num_legs
  1113. animal
  1114. dog 4
  1115. cat 4
  1116. monkey 2
  1117. >>> df.index = pd.MultiIndex.from_product(
  1118. ... [["mammal"], ['dog', 'cat', 'monkey']])
  1119. >>> df._set_axis_name(["type", "name"])
  1120. num_legs
  1121. type name
  1122. mammal dog 4
  1123. cat 4
  1124. monkey 2
  1125. """
  1126. axis = self._get_axis_number(axis)
  1127. idx = self._get_axis(axis).set_names(name)
  1128. inplace = validate_bool_kwarg(inplace, "inplace")
  1129. renamed = self if inplace else self.copy(deep=copy)
  1130. if axis == 0:
  1131. renamed.index = idx
  1132. else:
  1133. renamed.columns = idx
  1134. if not inplace:
  1135. return renamed
  1136. # ----------------------------------------------------------------------
  1137. # Comparison Methods
  1138. @final
  1139. def _indexed_same(self, other) -> bool_t:
  1140. return all(
  1141. self._get_axis(a).equals(other._get_axis(a)) for a in self._AXIS_ORDERS
  1142. )
  1143. @final
  1144. def equals(self, other: object) -> bool_t:
  1145. """
  1146. Test whether two objects contain the same elements.
  1147. This function allows two Series or DataFrames to be compared against
  1148. each other to see if they have the same shape and elements. NaNs in
  1149. the same location are considered equal.
  1150. The row/column index do not need to have the same type, as long
  1151. as the values are considered equal. Corresponding columns must be of
  1152. the same dtype.
  1153. Parameters
  1154. ----------
  1155. other : Series or DataFrame
  1156. The other Series or DataFrame to be compared with the first.
  1157. Returns
  1158. -------
  1159. bool
  1160. True if all elements are the same in both objects, False
  1161. otherwise.
  1162. See Also
  1163. --------
  1164. Series.eq : Compare two Series objects of the same length
  1165. and return a Series where each element is True if the element
  1166. in each Series is equal, False otherwise.
  1167. DataFrame.eq : Compare two DataFrame objects of the same shape and
  1168. return a DataFrame where each element is True if the respective
  1169. element in each DataFrame is equal, False otherwise.
  1170. testing.assert_series_equal : Raises an AssertionError if left and
  1171. right are not equal. Provides an easy interface to ignore
  1172. inequality in dtypes, indexes and precision among others.
  1173. testing.assert_frame_equal : Like assert_series_equal, but targets
  1174. DataFrames.
  1175. numpy.array_equal : Return True if two arrays have the same shape
  1176. and elements, False otherwise.
  1177. Examples
  1178. --------
  1179. >>> df = pd.DataFrame({1: [10], 2: [20]})
  1180. >>> df
  1181. 1 2
  1182. 0 10 20
  1183. DataFrames df and exactly_equal have the same types and values for
  1184. their elements and column labels, which will return True.
  1185. >>> exactly_equal = pd.DataFrame({1: [10], 2: [20]})
  1186. >>> exactly_equal
  1187. 1 2
  1188. 0 10 20
  1189. >>> df.equals(exactly_equal)
  1190. True
  1191. DataFrames df and different_column_type have the same element
  1192. types and values, but have different types for the column labels,
  1193. which will still return True.
  1194. >>> different_column_type = pd.DataFrame({1.0: [10], 2.0: [20]})
  1195. >>> different_column_type
  1196. 1.0 2.0
  1197. 0 10 20
  1198. >>> df.equals(different_column_type)
  1199. True
  1200. DataFrames df and different_data_type have different types for the
  1201. same values for their elements, and will return False even though
  1202. their column labels are the same values and types.
  1203. >>> different_data_type = pd.DataFrame({1: [10.0], 2: [20.0]})
  1204. >>> different_data_type
  1205. 1 2
  1206. 0 10.0 20.0
  1207. >>> df.equals(different_data_type)
  1208. False
  1209. """
  1210. if not (isinstance(other, type(self)) or isinstance(self, type(other))):
  1211. return False
  1212. other = cast(NDFrame, other)
  1213. return self._mgr.equals(other._mgr)
  1214. # -------------------------------------------------------------------------
  1215. # Unary Methods
  1216. @final
  1217. def __neg__(self: NDFrameT) -> NDFrameT:
  1218. def blk_func(values: ArrayLike):
  1219. if is_bool_dtype(values.dtype):
  1220. # error: Argument 1 to "inv" has incompatible type "Union
  1221. # [ExtensionArray, ndarray[Any, Any]]"; expected
  1222. # "_SupportsInversion[ndarray[Any, dtype[bool_]]]"
  1223. return operator.inv(values) # type: ignore[arg-type]
  1224. else:
  1225. # error: Argument 1 to "neg" has incompatible type "Union
  1226. # [ExtensionArray, ndarray[Any, Any]]"; expected
  1227. # "_SupportsNeg[ndarray[Any, dtype[Any]]]"
  1228. return operator.neg(values) # type: ignore[arg-type]
  1229. new_data = self._mgr.apply(blk_func)
  1230. res = self._constructor(new_data)
  1231. return res.__finalize__(self, method="__neg__")
  1232. @final
  1233. def __pos__(self: NDFrameT) -> NDFrameT:
  1234. def blk_func(values: ArrayLike):
  1235. if is_bool_dtype(values.dtype):
  1236. return values.copy()
  1237. else:
  1238. # error: Argument 1 to "pos" has incompatible type "Union
  1239. # [ExtensionArray, ndarray[Any, Any]]"; expected
  1240. # "_SupportsPos[ndarray[Any, dtype[Any]]]"
  1241. return operator.pos(values) # type: ignore[arg-type]
  1242. new_data = self._mgr.apply(blk_func)
  1243. res = self._constructor(new_data)
  1244. return res.__finalize__(self, method="__pos__")
  1245. @final
  1246. def __invert__(self: NDFrameT) -> NDFrameT:
  1247. if not self.size:
  1248. # inv fails with 0 len
  1249. return self.copy(deep=False)
  1250. new_data = self._mgr.apply(operator.invert)
  1251. return self._constructor(new_data).__finalize__(self, method="__invert__")
  1252. @final
  1253. def __nonzero__(self) -> NoReturn:
  1254. raise ValueError(
  1255. f"The truth value of a {type(self).__name__} is ambiguous. "
  1256. "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
  1257. )
  1258. __bool__ = __nonzero__
  1259. @final
  1260. def bool(self) -> bool_t:
  1261. """
  1262. Return the bool of a single element Series or DataFrame.
  1263. This must be a boolean scalar value, either True or False. It will raise a
  1264. ValueError if the Series or DataFrame does not have exactly 1 element, or that
  1265. element is not boolean (integer values 0 and 1 will also raise an exception).
  1266. Returns
  1267. -------
  1268. bool
  1269. The value in the Series or DataFrame.
  1270. See Also
  1271. --------
  1272. Series.astype : Change the data type of a Series, including to boolean.
  1273. DataFrame.astype : Change the data type of a DataFrame, including to boolean.
  1274. numpy.bool_ : NumPy boolean data type, used by pandas for boolean values.
  1275. Examples
  1276. --------
  1277. The method will only work for single element objects with a boolean value:
  1278. >>> pd.Series([True]).bool()
  1279. True
  1280. >>> pd.Series([False]).bool()
  1281. False
  1282. >>> pd.DataFrame({'col': [True]}).bool()
  1283. True
  1284. >>> pd.DataFrame({'col': [False]}).bool()
  1285. False
  1286. """
  1287. v = self.squeeze()
  1288. if isinstance(v, (bool, np.bool_)):
  1289. return bool(v)
  1290. elif is_scalar(v):
  1291. raise ValueError(
  1292. "bool cannot act on a non-boolean single element "
  1293. f"{type(self).__name__}"
  1294. )
  1295. self.__nonzero__()
  1296. # for mypy (__nonzero__ raises)
  1297. return True
  1298. @final
  1299. def abs(self: NDFrameT) -> NDFrameT:
  1300. """
  1301. Return a Series/DataFrame with absolute numeric value of each element.
  1302. This function only applies to elements that are all numeric.
  1303. Returns
  1304. -------
  1305. abs
  1306. Series/DataFrame containing the absolute value of each element.
  1307. See Also
  1308. --------
  1309. numpy.absolute : Calculate the absolute value element-wise.
  1310. Notes
  1311. -----
  1312. For ``complex`` inputs, ``1.2 + 1j``, the absolute value is
  1313. :math:`\\sqrt{ a^2 + b^2 }`.
  1314. Examples
  1315. --------
  1316. Absolute numeric values in a Series.
  1317. >>> s = pd.Series([-1.10, 2, -3.33, 4])
  1318. >>> s.abs()
  1319. 0 1.10
  1320. 1 2.00
  1321. 2 3.33
  1322. 3 4.00
  1323. dtype: float64
  1324. Absolute numeric values in a Series with complex numbers.
  1325. >>> s = pd.Series([1.2 + 1j])
  1326. >>> s.abs()
  1327. 0 1.56205
  1328. dtype: float64
  1329. Absolute numeric values in a Series with a Timedelta element.
  1330. >>> s = pd.Series([pd.Timedelta('1 days')])
  1331. >>> s.abs()
  1332. 0 1 days
  1333. dtype: timedelta64[ns]
  1334. Select rows with data closest to certain value using argsort (from
  1335. `StackOverflow <https://stackoverflow.com/a/17758115>`__).
  1336. >>> df = pd.DataFrame({
  1337. ... 'a': [4, 5, 6, 7],
  1338. ... 'b': [10, 20, 30, 40],
  1339. ... 'c': [100, 50, -30, -50]
  1340. ... })
  1341. >>> df
  1342. a b c
  1343. 0 4 10 100
  1344. 1 5 20 50
  1345. 2 6 30 -30
  1346. 3 7 40 -50
  1347. >>> df.loc[(df.c - 43).abs().argsort()]
  1348. a b c
  1349. 1 5 20 50
  1350. 0 4 10 100
  1351. 2 6 30 -30
  1352. 3 7 40 -50
  1353. """
  1354. res_mgr = self._mgr.apply(np.abs)
  1355. return self._constructor(res_mgr).__finalize__(self, name="abs")
  1356. @final
  1357. def __abs__(self: NDFrameT) -> NDFrameT:
  1358. return self.abs()
  1359. @final
  1360. def __round__(self: NDFrameT, decimals: int = 0) -> NDFrameT:
  1361. return self.round(decimals).__finalize__(self, method="__round__")
  1362. # -------------------------------------------------------------------------
  1363. # Label or Level Combination Helpers
  1364. #
  1365. # A collection of helper methods for DataFrame/Series operations that
  1366. # accept a combination of column/index labels and levels. All such
  1367. # operations should utilize/extend these methods when possible so that we
  1368. # have consistent precedence and validation logic throughout the library.
  1369. @final
  1370. def _is_level_reference(self, key: Level, axis: Axis = 0) -> bool_t:
  1371. """
  1372. Test whether a key is a level reference for a given axis.
  1373. To be considered a level reference, `key` must be a string that:
  1374. - (axis=0): Matches the name of an index level and does NOT match
  1375. a column label.
  1376. - (axis=1): Matches the name of a column level and does NOT match
  1377. an index label.
  1378. Parameters
  1379. ----------
  1380. key : Hashable
  1381. Potential level name for the given axis
  1382. axis : int, default 0
  1383. Axis that levels are associated with (0 for index, 1 for columns)
  1384. Returns
  1385. -------
  1386. is_level : bool
  1387. """
  1388. axis_int = self._get_axis_number(axis)
  1389. return (
  1390. key is not None
  1391. and is_hashable(key)
  1392. and key in self.axes[axis_int].names
  1393. and not self._is_label_reference(key, axis=axis_int)
  1394. )
  1395. @final
  1396. def _is_label_reference(self, key: Level, axis: Axis = 0) -> bool_t:
  1397. """
  1398. Test whether a key is a label reference for a given axis.
  1399. To be considered a label reference, `key` must be a string that:
  1400. - (axis=0): Matches a column label
  1401. - (axis=1): Matches an index label
  1402. Parameters
  1403. ----------
  1404. key : Hashable
  1405. Potential label name, i.e. Index entry.
  1406. axis : int, default 0
  1407. Axis perpendicular to the axis that labels are associated with
  1408. (0 means search for column labels, 1 means search for index labels)
  1409. Returns
  1410. -------
  1411. is_label: bool
  1412. """
  1413. axis_int = self._get_axis_number(axis)
  1414. other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis_int)
  1415. return (
  1416. key is not None
  1417. and is_hashable(key)
  1418. and any(key in self.axes[ax] for ax in other_axes)
  1419. )
  1420. @final
  1421. def _is_label_or_level_reference(self, key: Level, axis: AxisInt = 0) -> bool_t:
  1422. """
  1423. Test whether a key is a label or level reference for a given axis.
  1424. To be considered either a label or a level reference, `key` must be a
  1425. string that:
  1426. - (axis=0): Matches a column label or an index level
  1427. - (axis=1): Matches an index label or a column level
  1428. Parameters
  1429. ----------
  1430. key : Hashable
  1431. Potential label or level name
  1432. axis : int, default 0
  1433. Axis that levels are associated with (0 for index, 1 for columns)
  1434. Returns
  1435. -------
  1436. bool
  1437. """
  1438. return self._is_level_reference(key, axis=axis) or self._is_label_reference(
  1439. key, axis=axis
  1440. )
  1441. @final
  1442. def _check_label_or_level_ambiguity(self, key: Level, axis: Axis = 0) -> None:
  1443. """
  1444. Check whether `key` is ambiguous.
  1445. By ambiguous, we mean that it matches both a level of the input
  1446. `axis` and a label of the other axis.
  1447. Parameters
  1448. ----------
  1449. key : Hashable
  1450. Label or level name.
  1451. axis : int, default 0
  1452. Axis that levels are associated with (0 for index, 1 for columns).
  1453. Raises
  1454. ------
  1455. ValueError: `key` is ambiguous
  1456. """
  1457. axis_int = self._get_axis_number(axis)
  1458. other_axes = (ax for ax in range(self._AXIS_LEN) if ax != axis_int)
  1459. if (
  1460. key is not None
  1461. and is_hashable(key)
  1462. and key in self.axes[axis_int].names
  1463. and any(key in self.axes[ax] for ax in other_axes)
  1464. ):
  1465. # Build an informative and grammatical warning
  1466. level_article, level_type = (
  1467. ("an", "index") if axis_int == 0 else ("a", "column")
  1468. )
  1469. label_article, label_type = (
  1470. ("a", "column") if axis_int == 0 else ("an", "index")
  1471. )
  1472. msg = (
  1473. f"'{key}' is both {level_article} {level_type} level and "
  1474. f"{label_article} {label_type} label, which is ambiguous."
  1475. )
  1476. raise ValueError(msg)
  1477. @final
  1478. def _get_label_or_level_values(self, key: Level, axis: AxisInt = 0) -> ArrayLike:
  1479. """
  1480. Return a 1-D array of values associated with `key`, a label or level
  1481. from the given `axis`.
  1482. Retrieval logic:
  1483. - (axis=0): Return column values if `key` matches a column label.
  1484. Otherwise return index level values if `key` matches an index
  1485. level.
  1486. - (axis=1): Return row values if `key` matches an index label.
  1487. Otherwise return column level values if 'key' matches a column
  1488. level
  1489. Parameters
  1490. ----------
  1491. key : Hashable
  1492. Label or level name.
  1493. axis : int, default 0
  1494. Axis that levels are associated with (0 for index, 1 for columns)
  1495. Returns
  1496. -------
  1497. np.ndarray or ExtensionArray
  1498. Raises
  1499. ------
  1500. KeyError
  1501. if `key` matches neither a label nor a level
  1502. ValueError
  1503. if `key` matches multiple labels
  1504. """
  1505. axis = self._get_axis_number(axis)
  1506. other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis]
  1507. if self._is_label_reference(key, axis=axis):
  1508. self._check_label_or_level_ambiguity(key, axis=axis)
  1509. values = self.xs(key, axis=other_axes[0])._values
  1510. elif self._is_level_reference(key, axis=axis):
  1511. values = self.axes[axis].get_level_values(key)._values
  1512. else:
  1513. raise KeyError(key)
  1514. # Check for duplicates
  1515. if values.ndim > 1:
  1516. if other_axes and isinstance(self._get_axis(other_axes[0]), MultiIndex):
  1517. multi_message = (
  1518. "\n"
  1519. "For a multi-index, the label must be a "
  1520. "tuple with elements corresponding to each level."
  1521. )
  1522. else:
  1523. multi_message = ""
  1524. label_axis_name = "column" if axis == 0 else "index"
  1525. raise ValueError(
  1526. f"The {label_axis_name} label '{key}' is not unique.{multi_message}"
  1527. )
  1528. return values
  1529. @final
  1530. def _drop_labels_or_levels(self, keys, axis: AxisInt = 0):
  1531. """
  1532. Drop labels and/or levels for the given `axis`.
  1533. For each key in `keys`:
  1534. - (axis=0): If key matches a column label then drop the column.
  1535. Otherwise if key matches an index level then drop the level.
  1536. - (axis=1): If key matches an index label then drop the row.
  1537. Otherwise if key matches a column level then drop the level.
  1538. Parameters
  1539. ----------
  1540. keys : str or list of str
  1541. labels or levels to drop
  1542. axis : int, default 0
  1543. Axis that levels are associated with (0 for index, 1 for columns)
  1544. Returns
  1545. -------
  1546. dropped: DataFrame
  1547. Raises
  1548. ------
  1549. ValueError
  1550. if any `keys` match neither a label nor a level
  1551. """
  1552. axis = self._get_axis_number(axis)
  1553. # Validate keys
  1554. keys = common.maybe_make_list(keys)
  1555. invalid_keys = [
  1556. k for k in keys if not self._is_label_or_level_reference(k, axis=axis)
  1557. ]
  1558. if invalid_keys:
  1559. raise ValueError(
  1560. "The following keys are not valid labels or "
  1561. f"levels for axis {axis}: {invalid_keys}"
  1562. )
  1563. # Compute levels and labels to drop
  1564. levels_to_drop = [k for k in keys if self._is_level_reference(k, axis=axis)]
  1565. labels_to_drop = [k for k in keys if not self._is_level_reference(k, axis=axis)]
  1566. # Perform copy upfront and then use inplace operations below.
  1567. # This ensures that we always perform exactly one copy.
  1568. # ``copy`` and/or ``inplace`` options could be added in the future.
  1569. dropped = self.copy(deep=False)
  1570. if axis == 0:
  1571. # Handle dropping index levels
  1572. if levels_to_drop:
  1573. dropped.reset_index(levels_to_drop, drop=True, inplace=True)
  1574. # Handle dropping columns labels
  1575. if labels_to_drop:
  1576. dropped.drop(labels_to_drop, axis=1, inplace=True)
  1577. else:
  1578. # Handle dropping column levels
  1579. if levels_to_drop:
  1580. if isinstance(dropped.columns, MultiIndex):
  1581. # Drop the specified levels from the MultiIndex
  1582. dropped.columns = dropped.columns.droplevel(levels_to_drop)
  1583. else:
  1584. # Drop the last level of Index by replacing with
  1585. # a RangeIndex
  1586. dropped.columns = RangeIndex(dropped.columns.size)
  1587. # Handle dropping index labels
  1588. if labels_to_drop:
  1589. dropped.drop(labels_to_drop, axis=0, inplace=True)
  1590. return dropped
  1591. # ----------------------------------------------------------------------
  1592. # Iteration
  1593. # https://github.com/python/typeshed/issues/2148#issuecomment-520783318
  1594. # Incompatible types in assignment (expression has type "None", base class
  1595. # "object" defined the type as "Callable[[object], int]")
  1596. __hash__: ClassVar[None] # type: ignore[assignment]
  1597. def __iter__(self) -> Iterator:
  1598. """
  1599. Iterate over info axis.
  1600. Returns
  1601. -------
  1602. iterator
  1603. Info axis as iterator.
  1604. """
  1605. return iter(self._info_axis)
  1606. # can we get a better explanation of this?
  1607. def keys(self) -> Index:
  1608. """
  1609. Get the 'info axis' (see Indexing for more).
  1610. This is index for Series, columns for DataFrame.
  1611. Returns
  1612. -------
  1613. Index
  1614. Info axis.
  1615. """
  1616. return self._info_axis
  1617. def items(self):
  1618. """
  1619. Iterate over (label, values) on info axis
  1620. This is index for Series and columns for DataFrame.
  1621. Returns
  1622. -------
  1623. Generator
  1624. """
  1625. for h in self._info_axis:
  1626. yield h, self[h]
  1627. def __len__(self) -> int:
  1628. """Returns length of info axis"""
  1629. return len(self._info_axis)
  1630. @final
  1631. def __contains__(self, key) -> bool_t:
  1632. """True if the key is in the info axis"""
  1633. return key in self._info_axis
  1634. @property
  1635. def empty(self) -> bool_t:
  1636. """
  1637. Indicator whether Series/DataFrame is empty.
  1638. True if Series/DataFrame is entirely empty (no items), meaning any of the
  1639. axes are of length 0.
  1640. Returns
  1641. -------
  1642. bool
  1643. If Series/DataFrame is empty, return True, if not return False.
  1644. See Also
  1645. --------
  1646. Series.dropna : Return series without null values.
  1647. DataFrame.dropna : Return DataFrame with labels on given axis omitted
  1648. where (all or any) data are missing.
  1649. Notes
  1650. -----
  1651. If Series/DataFrame contains only NaNs, it is still not considered empty. See
  1652. the example below.
  1653. Examples
  1654. --------
  1655. An example of an actual empty DataFrame. Notice the index is empty:
  1656. >>> df_empty = pd.DataFrame({'A' : []})
  1657. >>> df_empty
  1658. Empty DataFrame
  1659. Columns: [A]
  1660. Index: []
  1661. >>> df_empty.empty
  1662. True
  1663. If we only have NaNs in our DataFrame, it is not considered empty! We
  1664. will need to drop the NaNs to make the DataFrame empty:
  1665. >>> df = pd.DataFrame({'A' : [np.nan]})
  1666. >>> df
  1667. A
  1668. 0 NaN
  1669. >>> df.empty
  1670. False
  1671. >>> df.dropna().empty
  1672. True
  1673. >>> ser_empty = pd.Series({'A' : []})
  1674. >>> ser_empty
  1675. A []
  1676. dtype: object
  1677. >>> ser_empty.empty
  1678. False
  1679. >>> ser_empty = pd.Series()
  1680. >>> ser_empty.empty
  1681. True
  1682. """
  1683. return any(len(self._get_axis(a)) == 0 for a in self._AXIS_ORDERS)
  1684. # ----------------------------------------------------------------------
  1685. # Array Interface
  1686. # This is also set in IndexOpsMixin
  1687. # GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented
  1688. __array_priority__: int = 1000
  1689. def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
  1690. values = self._values
  1691. arr = np.asarray(values, dtype=dtype)
  1692. if (
  1693. astype_is_view(values.dtype, arr.dtype)
  1694. and using_copy_on_write()
  1695. and self._mgr.is_single_block
  1696. ):
  1697. # Check if both conversions can be done without a copy
  1698. if astype_is_view(self.dtypes.iloc[0], values.dtype) and astype_is_view(
  1699. values.dtype, arr.dtype
  1700. ):
  1701. arr = arr.view()
  1702. arr.flags.writeable = False
  1703. return arr
  1704. @final
  1705. def __array_ufunc__(
  1706. self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any
  1707. ):
  1708. return arraylike.array_ufunc(self, ufunc, method, *inputs, **kwargs)
  1709. # ----------------------------------------------------------------------
  1710. # Picklability
  1711. @final
  1712. def __getstate__(self) -> dict[str, Any]:
  1713. meta = {k: getattr(self, k, None) for k in self._metadata}
  1714. return {
  1715. "_mgr": self._mgr,
  1716. "_typ": self._typ,
  1717. "_metadata": self._metadata,
  1718. "attrs": self.attrs,
  1719. "_flags": {k: self.flags[k] for k in self.flags._keys},
  1720. **meta,
  1721. }
  1722. @final
  1723. def __setstate__(self, state) -> None:
  1724. if isinstance(state, BlockManager):
  1725. self._mgr = state
  1726. elif isinstance(state, dict):
  1727. if "_data" in state and "_mgr" not in state:
  1728. # compat for older pickles
  1729. state["_mgr"] = state.pop("_data")
  1730. typ = state.get("_typ")
  1731. if typ is not None:
  1732. attrs = state.get("_attrs", {})
  1733. object.__setattr__(self, "_attrs", attrs)
  1734. flags = state.get("_flags", {"allows_duplicate_labels": True})
  1735. object.__setattr__(self, "_flags", Flags(self, **flags))
  1736. # set in the order of internal names
  1737. # to avoid definitional recursion
  1738. # e.g. say fill_value needing _mgr to be
  1739. # defined
  1740. meta = set(self._internal_names + self._metadata)
  1741. for k in list(meta):
  1742. if k in state and k != "_flags":
  1743. v = state[k]
  1744. object.__setattr__(self, k, v)
  1745. for k, v in state.items():
  1746. if k not in meta:
  1747. object.__setattr__(self, k, v)
  1748. else:
  1749. raise NotImplementedError("Pre-0.12 pickles are no longer supported")
  1750. elif len(state) == 2:
  1751. raise NotImplementedError("Pre-0.12 pickles are no longer supported")
  1752. self._item_cache: dict[Hashable, Series] = {}
  1753. # ----------------------------------------------------------------------
  1754. # Rendering Methods
  1755. def __repr__(self) -> str:
  1756. # string representation based upon iterating over self
  1757. # (since, by definition, `PandasContainers` are iterable)
  1758. prepr = f"[{','.join(map(pprint_thing, self))}]"
  1759. return f"{type(self).__name__}({prepr})"
  1760. @final
  1761. def _repr_latex_(self):
  1762. """
  1763. Returns a LaTeX representation for a particular object.
  1764. Mainly for use with nbconvert (jupyter notebook conversion to pdf).
  1765. """
  1766. if config.get_option("styler.render.repr") == "latex":
  1767. return self.to_latex()
  1768. else:
  1769. return None
  1770. @final
  1771. def _repr_data_resource_(self):
  1772. """
  1773. Not a real Jupyter special repr method, but we use the same
  1774. naming convention.
  1775. """
  1776. if config.get_option("display.html.table_schema"):
  1777. data = self.head(config.get_option("display.max_rows"))
  1778. as_json = data.to_json(orient="table")
  1779. as_json = cast(str, as_json)
  1780. return loads(as_json, object_pairs_hook=collections.OrderedDict)
  1781. # ----------------------------------------------------------------------
  1782. # I/O Methods
  1783. @final
  1784. @doc(
  1785. klass="object",
  1786. storage_options=_shared_docs["storage_options"],
  1787. storage_options_versionadded="1.2.0",
  1788. )
  1789. def to_excel(
  1790. self,
  1791. excel_writer,
  1792. sheet_name: str = "Sheet1",
  1793. na_rep: str = "",
  1794. float_format: str | None = None,
  1795. columns: Sequence[Hashable] | None = None,
  1796. header: Sequence[Hashable] | bool_t = True,
  1797. index: bool_t = True,
  1798. index_label: IndexLabel = None,
  1799. startrow: int = 0,
  1800. startcol: int = 0,
  1801. engine: str | None = None,
  1802. merge_cells: bool_t = True,
  1803. inf_rep: str = "inf",
  1804. freeze_panes: tuple[int, int] | None = None,
  1805. storage_options: StorageOptions = None,
  1806. ) -> None:
  1807. """
  1808. Write {klass} to an Excel sheet.
  1809. To write a single {klass} to an Excel .xlsx file it is only necessary to
  1810. specify a target file name. To write to multiple sheets it is necessary to
  1811. create an `ExcelWriter` object with a target file name, and specify a sheet
  1812. in the file to write to.
  1813. Multiple sheets may be written to by specifying unique `sheet_name`.
  1814. With all data written to the file it is necessary to save the changes.
  1815. Note that creating an `ExcelWriter` object with a file name that already
  1816. exists will result in the contents of the existing file being erased.
  1817. Parameters
  1818. ----------
  1819. excel_writer : path-like, file-like, or ExcelWriter object
  1820. File path or existing ExcelWriter.
  1821. sheet_name : str, default 'Sheet1'
  1822. Name of sheet which will contain DataFrame.
  1823. na_rep : str, default ''
  1824. Missing data representation.
  1825. float_format : str, optional
  1826. Format string for floating point numbers. For example
  1827. ``float_format="%.2f"`` will format 0.1234 to 0.12.
  1828. columns : sequence or list of str, optional
  1829. Columns to write.
  1830. header : bool or list of str, default True
  1831. Write out the column names. If a list of string is given it is
  1832. assumed to be aliases for the column names.
  1833. index : bool, default True
  1834. Write row names (index).
  1835. index_label : str or sequence, optional
  1836. Column label for index column(s) if desired. If not specified, and
  1837. `header` and `index` are True, then the index names are used. A
  1838. sequence should be given if the DataFrame uses MultiIndex.
  1839. startrow : int, default 0
  1840. Upper left cell row to dump data frame.
  1841. startcol : int, default 0
  1842. Upper left cell column to dump data frame.
  1843. engine : str, optional
  1844. Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this
  1845. via the options ``io.excel.xlsx.writer`` or
  1846. ``io.excel.xlsm.writer``.
  1847. merge_cells : bool, default True
  1848. Write MultiIndex and Hierarchical Rows as merged cells.
  1849. inf_rep : str, default 'inf'
  1850. Representation for infinity (there is no native representation for
  1851. infinity in Excel).
  1852. freeze_panes : tuple of int (length 2), optional
  1853. Specifies the one-based bottommost row and rightmost column that
  1854. is to be frozen.
  1855. {storage_options}
  1856. .. versionadded:: {storage_options_versionadded}
  1857. See Also
  1858. --------
  1859. to_csv : Write DataFrame to a comma-separated values (csv) file.
  1860. ExcelWriter : Class for writing DataFrame objects into excel sheets.
  1861. read_excel : Read an Excel file into a pandas DataFrame.
  1862. read_csv : Read a comma-separated values (csv) file into DataFrame.
  1863. io.formats.style.Styler.to_excel : Add styles to Excel sheet.
  1864. Notes
  1865. -----
  1866. For compatibility with :meth:`~DataFrame.to_csv`,
  1867. to_excel serializes lists and dicts to strings before writing.
  1868. Once a workbook has been saved it is not possible to write further
  1869. data without rewriting the whole workbook.
  1870. Examples
  1871. --------
  1872. Create, write to and save a workbook:
  1873. >>> df1 = pd.DataFrame([['a', 'b'], ['c', 'd']],
  1874. ... index=['row 1', 'row 2'],
  1875. ... columns=['col 1', 'col 2'])
  1876. >>> df1.to_excel("output.xlsx") # doctest: +SKIP
  1877. To specify the sheet name:
  1878. >>> df1.to_excel("output.xlsx",
  1879. ... sheet_name='Sheet_name_1') # doctest: +SKIP
  1880. If you wish to write to more than one sheet in the workbook, it is
  1881. necessary to specify an ExcelWriter object:
  1882. >>> df2 = df1.copy()
  1883. >>> with pd.ExcelWriter('output.xlsx') as writer: # doctest: +SKIP
  1884. ... df1.to_excel(writer, sheet_name='Sheet_name_1')
  1885. ... df2.to_excel(writer, sheet_name='Sheet_name_2')
  1886. ExcelWriter can also be used to append to an existing Excel file:
  1887. >>> with pd.ExcelWriter('output.xlsx',
  1888. ... mode='a') as writer: # doctest: +SKIP
  1889. ... df.to_excel(writer, sheet_name='Sheet_name_3')
  1890. To set the library that is used to write the Excel file,
  1891. you can pass the `engine` keyword (the default engine is
  1892. automatically chosen depending on the file extension):
  1893. >>> df1.to_excel('output1.xlsx', engine='xlsxwriter') # doctest: +SKIP
  1894. """
  1895. df = self if isinstance(self, ABCDataFrame) else self.to_frame()
  1896. from pandas.io.formats.excel import ExcelFormatter
  1897. formatter = ExcelFormatter(
  1898. df,
  1899. na_rep=na_rep,
  1900. cols=columns,
  1901. header=header,
  1902. float_format=float_format,
  1903. index=index,
  1904. index_label=index_label,
  1905. merge_cells=merge_cells,
  1906. inf_rep=inf_rep,
  1907. )
  1908. formatter.write(
  1909. excel_writer,
  1910. sheet_name=sheet_name,
  1911. startrow=startrow,
  1912. startcol=startcol,
  1913. freeze_panes=freeze_panes,
  1914. engine=engine,
  1915. storage_options=storage_options,
  1916. )
  1917. @final
  1918. @doc(
  1919. storage_options=_shared_docs["storage_options"],
  1920. compression_options=_shared_docs["compression_options"] % "path_or_buf",
  1921. )
  1922. def to_json(
  1923. self,
  1924. path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
  1925. orient: str | None = None,
  1926. date_format: str | None = None,
  1927. double_precision: int = 10,
  1928. force_ascii: bool_t = True,
  1929. date_unit: str = "ms",
  1930. default_handler: Callable[[Any], JSONSerializable] | None = None,
  1931. lines: bool_t = False,
  1932. compression: CompressionOptions = "infer",
  1933. index: bool_t = True,
  1934. indent: int | None = None,
  1935. storage_options: StorageOptions = None,
  1936. mode: Literal["a", "w"] = "w",
  1937. ) -> str | None:
  1938. """
  1939. Convert the object to a JSON string.
  1940. Note NaN's and None will be converted to null and datetime objects
  1941. will be converted to UNIX timestamps.
  1942. Parameters
  1943. ----------
  1944. path_or_buf : str, path object, file-like object, or None, default None
  1945. String, path object (implementing os.PathLike[str]), or file-like
  1946. object implementing a write() function. If None, the result is
  1947. returned as a string.
  1948. orient : str
  1949. Indication of expected JSON string format.
  1950. * Series:
  1951. - default is 'index'
  1952. - allowed values are: {{'split', 'records', 'index', 'table'}}.
  1953. * DataFrame:
  1954. - default is 'columns'
  1955. - allowed values are: {{'split', 'records', 'index', 'columns',
  1956. 'values', 'table'}}.
  1957. * The format of the JSON string:
  1958. - 'split' : dict like {{'index' -> [index], 'columns' -> [columns],
  1959. 'data' -> [values]}}
  1960. - 'records' : list like [{{column -> value}}, ... , {{column -> value}}]
  1961. - 'index' : dict like {{index -> {{column -> value}}}}
  1962. - 'columns' : dict like {{column -> {{index -> value}}}}
  1963. - 'values' : just the values array
  1964. - 'table' : dict like {{'schema': {{schema}}, 'data': {{data}}}}
  1965. Describing the data, where data component is like ``orient='records'``.
  1966. date_format : {{None, 'epoch', 'iso'}}
  1967. Type of date conversion. 'epoch' = epoch milliseconds,
  1968. 'iso' = ISO8601. The default depends on the `orient`. For
  1969. ``orient='table'``, the default is 'iso'. For all other orients,
  1970. the default is 'epoch'.
  1971. double_precision : int, default 10
  1972. The number of decimal places to use when encoding
  1973. floating point values.
  1974. force_ascii : bool, default True
  1975. Force encoded string to be ASCII.
  1976. date_unit : str, default 'ms' (milliseconds)
  1977. The time unit to encode to, governs timestamp and ISO8601
  1978. precision. One of 's', 'ms', 'us', 'ns' for second, millisecond,
  1979. microsecond, and nanosecond respectively.
  1980. default_handler : callable, default None
  1981. Handler to call if object cannot otherwise be converted to a
  1982. suitable format for JSON. Should receive a single argument which is
  1983. the object to convert and return a serialisable object.
  1984. lines : bool, default False
  1985. If 'orient' is 'records' write out line-delimited json format. Will
  1986. throw ValueError if incorrect 'orient' since others are not
  1987. list-like.
  1988. {compression_options}
  1989. .. versionchanged:: 1.4.0 Zstandard support.
  1990. index : bool, default True
  1991. Whether to include the index values in the JSON string. Not
  1992. including the index (``index=False``) is only supported when
  1993. orient is 'split' or 'table'.
  1994. indent : int, optional
  1995. Length of whitespace used to indent each record.
  1996. {storage_options}
  1997. .. versionadded:: 1.2.0
  1998. mode : str, default 'w' (writing)
  1999. Specify the IO mode for output when supplying a path_or_buf.
  2000. Accepted args are 'w' (writing) and 'a' (append) only.
  2001. mode='a' is only supported when lines is True and orient is 'records'.
  2002. Returns
  2003. -------
  2004. None or str
  2005. If path_or_buf is None, returns the resulting json format as a
  2006. string. Otherwise returns None.
  2007. See Also
  2008. --------
  2009. read_json : Convert a JSON string to pandas object.
  2010. Notes
  2011. -----
  2012. The behavior of ``indent=0`` varies from the stdlib, which does not
  2013. indent the output but does insert newlines. Currently, ``indent=0``
  2014. and the default ``indent=None`` are equivalent in pandas, though this
  2015. may change in a future release.
  2016. ``orient='table'`` contains a 'pandas_version' field under 'schema'.
  2017. This stores the version of `pandas` used in the latest revision of the
  2018. schema.
  2019. Examples
  2020. --------
  2021. >>> from json import loads, dumps
  2022. >>> df = pd.DataFrame(
  2023. ... [["a", "b"], ["c", "d"]],
  2024. ... index=["row 1", "row 2"],
  2025. ... columns=["col 1", "col 2"],
  2026. ... )
  2027. >>> result = df.to_json(orient="split")
  2028. >>> parsed = loads(result)
  2029. >>> dumps(parsed, indent=4) # doctest: +SKIP
  2030. {{
  2031. "columns": [
  2032. "col 1",
  2033. "col 2"
  2034. ],
  2035. "index": [
  2036. "row 1",
  2037. "row 2"
  2038. ],
  2039. "data": [
  2040. [
  2041. "a",
  2042. "b"
  2043. ],
  2044. [
  2045. "c",
  2046. "d"
  2047. ]
  2048. ]
  2049. }}
  2050. Encoding/decoding a Dataframe using ``'records'`` formatted JSON.
  2051. Note that index labels are not preserved with this encoding.
  2052. >>> result = df.to_json(orient="records")
  2053. >>> parsed = loads(result)
  2054. >>> dumps(parsed, indent=4) # doctest: +SKIP
  2055. [
  2056. {{
  2057. "col 1": "a",
  2058. "col 2": "b"
  2059. }},
  2060. {{
  2061. "col 1": "c",
  2062. "col 2": "d"
  2063. }}
  2064. ]
  2065. Encoding/decoding a Dataframe using ``'index'`` formatted JSON:
  2066. >>> result = df.to_json(orient="index")
  2067. >>> parsed = loads(result)
  2068. >>> dumps(parsed, indent=4) # doctest: +SKIP
  2069. {{
  2070. "row 1": {{
  2071. "col 1": "a",
  2072. "col 2": "b"
  2073. }},
  2074. "row 2": {{
  2075. "col 1": "c",
  2076. "col 2": "d"
  2077. }}
  2078. }}
  2079. Encoding/decoding a Dataframe using ``'columns'`` formatted JSON:
  2080. >>> result = df.to_json(orient="columns")
  2081. >>> parsed = loads(result)
  2082. >>> dumps(parsed, indent=4) # doctest: +SKIP
  2083. {{
  2084. "col 1": {{
  2085. "row 1": "a",
  2086. "row 2": "c"
  2087. }},
  2088. "col 2": {{
  2089. "row 1": "b",
  2090. "row 2": "d"
  2091. }}
  2092. }}
  2093. Encoding/decoding a Dataframe using ``'values'`` formatted JSON:
  2094. >>> result = df.to_json(orient="values")
  2095. >>> parsed = loads(result)
  2096. >>> dumps(parsed, indent=4) # doctest: +SKIP
  2097. [
  2098. [
  2099. "a",
  2100. "b"
  2101. ],
  2102. [
  2103. "c",
  2104. "d"
  2105. ]
  2106. ]
  2107. Encoding with Table Schema:
  2108. >>> result = df.to_json(orient="table")
  2109. >>> parsed = loads(result)
  2110. >>> dumps(parsed, indent=4) # doctest: +SKIP
  2111. {{
  2112. "schema": {{
  2113. "fields": [
  2114. {{
  2115. "name": "index",
  2116. "type": "string"
  2117. }},
  2118. {{
  2119. "name": "col 1",
  2120. "type": "string"
  2121. }},
  2122. {{
  2123. "name": "col 2",
  2124. "type": "string"
  2125. }}
  2126. ],
  2127. "primaryKey": [
  2128. "index"
  2129. ],
  2130. "pandas_version": "1.4.0"
  2131. }},
  2132. "data": [
  2133. {{
  2134. "index": "row 1",
  2135. "col 1": "a",
  2136. "col 2": "b"
  2137. }},
  2138. {{
  2139. "index": "row 2",
  2140. "col 1": "c",
  2141. "col 2": "d"
  2142. }}
  2143. ]
  2144. }}
  2145. """
  2146. from pandas.io import json
  2147. if date_format is None and orient == "table":
  2148. date_format = "iso"
  2149. elif date_format is None:
  2150. date_format = "epoch"
  2151. config.is_nonnegative_int(indent)
  2152. indent = indent or 0
  2153. return json.to_json(
  2154. path_or_buf=path_or_buf,
  2155. obj=self,
  2156. orient=orient,
  2157. date_format=date_format,
  2158. double_precision=double_precision,
  2159. force_ascii=force_ascii,
  2160. date_unit=date_unit,
  2161. default_handler=default_handler,
  2162. lines=lines,
  2163. compression=compression,
  2164. index=index,
  2165. indent=indent,
  2166. storage_options=storage_options,
  2167. mode=mode,
  2168. )
  2169. @final
  2170. def to_hdf(
  2171. self,
  2172. path_or_buf: FilePath | HDFStore,
  2173. key: str,
  2174. mode: str = "a",
  2175. complevel: int | None = None,
  2176. complib: str | None = None,
  2177. append: bool_t = False,
  2178. format: str | None = None,
  2179. index: bool_t = True,
  2180. min_itemsize: int | dict[str, int] | None = None,
  2181. nan_rep=None,
  2182. dropna: bool_t | None = None,
  2183. data_columns: Literal[True] | list[str] | None = None,
  2184. errors: str = "strict",
  2185. encoding: str = "UTF-8",
  2186. ) -> None:
  2187. """
  2188. Write the contained data to an HDF5 file using HDFStore.
  2189. Hierarchical Data Format (HDF) is self-describing, allowing an
  2190. application to interpret the structure and contents of a file with
  2191. no outside information. One HDF file can hold a mix of related objects
  2192. which can be accessed as a group or as individual objects.
  2193. In order to add another DataFrame or Series to an existing HDF file
  2194. please use append mode and a different a key.
  2195. .. warning::
  2196. One can store a subclass of ``DataFrame`` or ``Series`` to HDF5,
  2197. but the type of the subclass is lost upon storing.
  2198. For more information see the :ref:`user guide <io.hdf5>`.
  2199. Parameters
  2200. ----------
  2201. path_or_buf : str or pandas.HDFStore
  2202. File path or HDFStore object.
  2203. key : str
  2204. Identifier for the group in the store.
  2205. mode : {'a', 'w', 'r+'}, default 'a'
  2206. Mode to open file:
  2207. - 'w': write, a new file is created (an existing file with
  2208. the same name would be deleted).
  2209. - 'a': append, an existing file is opened for reading and
  2210. writing, and if the file does not exist it is created.
  2211. - 'r+': similar to 'a', but the file must already exist.
  2212. complevel : {0-9}, default None
  2213. Specifies a compression level for data.
  2214. A value of 0 or None disables compression.
  2215. complib : {'zlib', 'lzo', 'bzip2', 'blosc'}, default 'zlib'
  2216. Specifies the compression library to be used.
  2217. As of v0.20.2 these additional compressors for Blosc are supported
  2218. (default if no compressor specified: 'blosc:blosclz'):
  2219. {'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy',
  2220. 'blosc:zlib', 'blosc:zstd'}.
  2221. Specifying a compression library which is not available issues
  2222. a ValueError.
  2223. append : bool, default False
  2224. For Table formats, append the input data to the existing.
  2225. format : {'fixed', 'table', None}, default 'fixed'
  2226. Possible values:
  2227. - 'fixed': Fixed format. Fast writing/reading. Not-appendable,
  2228. nor searchable.
  2229. - 'table': Table format. Write as a PyTables Table structure
  2230. which may perform worse but allow more flexible operations
  2231. like searching / selecting subsets of the data.
  2232. - If None, pd.get_option('io.hdf.default_format') is checked,
  2233. followed by fallback to "fixed".
  2234. index : bool, default True
  2235. Write DataFrame index as a column.
  2236. min_itemsize : dict or int, optional
  2237. Map column names to minimum string sizes for columns.
  2238. nan_rep : Any, optional
  2239. How to represent null values as str.
  2240. Not allowed with append=True.
  2241. dropna : bool, default False, optional
  2242. Remove missing values.
  2243. data_columns : list of columns or True, optional
  2244. List of columns to create as indexed data columns for on-disk
  2245. queries, or True to use all columns. By default only the axes
  2246. of the object are indexed. See
  2247. :ref:`Query via data columns<io.hdf5-query-data-columns>`. for
  2248. more information.
  2249. Applicable only to format='table'.
  2250. errors : str, default 'strict'
  2251. Specifies how encoding and decoding errors are to be handled.
  2252. See the errors argument for :func:`open` for a full list
  2253. of options.
  2254. encoding : str, default "UTF-8"
  2255. See Also
  2256. --------
  2257. read_hdf : Read from HDF file.
  2258. DataFrame.to_orc : Write a DataFrame to the binary orc format.
  2259. DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
  2260. DataFrame.to_sql : Write to a SQL table.
  2261. DataFrame.to_feather : Write out feather-format for DataFrames.
  2262. DataFrame.to_csv : Write out to a csv file.
  2263. Examples
  2264. --------
  2265. >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},
  2266. ... index=['a', 'b', 'c']) # doctest: +SKIP
  2267. >>> df.to_hdf('data.h5', key='df', mode='w') # doctest: +SKIP
  2268. We can add another object to the same file:
  2269. >>> s = pd.Series([1, 2, 3, 4]) # doctest: +SKIP
  2270. >>> s.to_hdf('data.h5', key='s') # doctest: +SKIP
  2271. Reading from HDF file:
  2272. >>> pd.read_hdf('data.h5', 'df') # doctest: +SKIP
  2273. A B
  2274. a 1 4
  2275. b 2 5
  2276. c 3 6
  2277. >>> pd.read_hdf('data.h5', 's') # doctest: +SKIP
  2278. 0 1
  2279. 1 2
  2280. 2 3
  2281. 3 4
  2282. dtype: int64
  2283. """
  2284. from pandas.io import pytables
  2285. # Argument 3 to "to_hdf" has incompatible type "NDFrame"; expected
  2286. # "Union[DataFrame, Series]" [arg-type]
  2287. pytables.to_hdf(
  2288. path_or_buf,
  2289. key,
  2290. self, # type: ignore[arg-type]
  2291. mode=mode,
  2292. complevel=complevel,
  2293. complib=complib,
  2294. append=append,
  2295. format=format,
  2296. index=index,
  2297. min_itemsize=min_itemsize,
  2298. nan_rep=nan_rep,
  2299. dropna=dropna,
  2300. data_columns=data_columns,
  2301. errors=errors,
  2302. encoding=encoding,
  2303. )
  2304. @final
  2305. def to_sql(
  2306. self,
  2307. name: str,
  2308. con,
  2309. schema: str | None = None,
  2310. if_exists: Literal["fail", "replace", "append"] = "fail",
  2311. index: bool_t = True,
  2312. index_label: IndexLabel = None,
  2313. chunksize: int | None = None,
  2314. dtype: DtypeArg | None = None,
  2315. method: str | None = None,
  2316. ) -> int | None:
  2317. """
  2318. Write records stored in a DataFrame to a SQL database.
  2319. Databases supported by SQLAlchemy [1]_ are supported. Tables can be
  2320. newly created, appended to, or overwritten.
  2321. Parameters
  2322. ----------
  2323. name : str
  2324. Name of SQL table.
  2325. con : sqlalchemy.engine.(Engine or Connection) or sqlite3.Connection
  2326. Using SQLAlchemy makes it possible to use any DB supported by that
  2327. library. Legacy support is provided for sqlite3.Connection objects. The user
  2328. is responsible for engine disposal and connection closure for the SQLAlchemy
  2329. connectable. See `here \
  2330. <https://docs.sqlalchemy.org/en/20/core/connections.html>`_.
  2331. If passing a sqlalchemy.engine.Connection which is already in a transaction,
  2332. the transaction will not be committed. If passing a sqlite3.Connection,
  2333. it will not be possible to roll back the record insertion.
  2334. schema : str, optional
  2335. Specify the schema (if database flavor supports this). If None, use
  2336. default schema.
  2337. if_exists : {'fail', 'replace', 'append'}, default 'fail'
  2338. How to behave if the table already exists.
  2339. * fail: Raise a ValueError.
  2340. * replace: Drop the table before inserting new values.
  2341. * append: Insert new values to the existing table.
  2342. index : bool, default True
  2343. Write DataFrame index as a column. Uses `index_label` as the column
  2344. name in the table.
  2345. index_label : str or sequence, default None
  2346. Column label for index column(s). If None is given (default) and
  2347. `index` is True, then the index names are used.
  2348. A sequence should be given if the DataFrame uses MultiIndex.
  2349. chunksize : int, optional
  2350. Specify the number of rows in each batch to be written at a time.
  2351. By default, all rows will be written at once.
  2352. dtype : dict or scalar, optional
  2353. Specifying the datatype for columns. If a dictionary is used, the
  2354. keys should be the column names and the values should be the
  2355. SQLAlchemy types or strings for the sqlite3 legacy mode. If a
  2356. scalar is provided, it will be applied to all columns.
  2357. method : {None, 'multi', callable}, optional
  2358. Controls the SQL insertion clause used:
  2359. * None : Uses standard SQL ``INSERT`` clause (one per row).
  2360. * 'multi': Pass multiple values in a single ``INSERT`` clause.
  2361. * callable with signature ``(pd_table, conn, keys, data_iter)``.
  2362. Details and a sample callable implementation can be found in the
  2363. section :ref:`insert method <io.sql.method>`.
  2364. Returns
  2365. -------
  2366. None or int
  2367. Number of rows affected by to_sql. None is returned if the callable
  2368. passed into ``method`` does not return an integer number of rows.
  2369. The number of returned rows affected is the sum of the ``rowcount``
  2370. attribute of ``sqlite3.Cursor`` or SQLAlchemy connectable which may not
  2371. reflect the exact number of written rows as stipulated in the
  2372. `sqlite3 <https://docs.python.org/3/library/sqlite3.html#sqlite3.Cursor.rowcount>`__ or
  2373. `SQLAlchemy <https://docs.sqlalchemy.org/en/20/core/connections.html#sqlalchemy.engine.CursorResult.rowcount>`__.
  2374. .. versionadded:: 1.4.0
  2375. Raises
  2376. ------
  2377. ValueError
  2378. When the table already exists and `if_exists` is 'fail' (the
  2379. default).
  2380. See Also
  2381. --------
  2382. read_sql : Read a DataFrame from a table.
  2383. Notes
  2384. -----
  2385. Timezone aware datetime columns will be written as
  2386. ``Timestamp with timezone`` type with SQLAlchemy if supported by the
  2387. database. Otherwise, the datetimes will be stored as timezone unaware
  2388. timestamps local to the original timezone.
  2389. References
  2390. ----------
  2391. .. [1] https://docs.sqlalchemy.org
  2392. .. [2] https://www.python.org/dev/peps/pep-0249/
  2393. Examples
  2394. --------
  2395. Create an in-memory SQLite database.
  2396. >>> from sqlalchemy import create_engine
  2397. >>> engine = create_engine('sqlite://', echo=False)
  2398. Create a table from scratch with 3 rows.
  2399. >>> df = pd.DataFrame({'name' : ['User 1', 'User 2', 'User 3']})
  2400. >>> df
  2401. name
  2402. 0 User 1
  2403. 1 User 2
  2404. 2 User 3
  2405. >>> df.to_sql('users', con=engine)
  2406. 3
  2407. >>> from sqlalchemy import text
  2408. >>> with engine.connect() as conn:
  2409. ... conn.execute(text("SELECT * FROM users")).fetchall()
  2410. [(0, 'User 1'), (1, 'User 2'), (2, 'User 3')]
  2411. An `sqlalchemy.engine.Connection` can also be passed to `con`:
  2412. >>> with engine.begin() as connection:
  2413. ... df1 = pd.DataFrame({'name' : ['User 4', 'User 5']})
  2414. ... df1.to_sql('users', con=connection, if_exists='append')
  2415. 2
  2416. This is allowed to support operations that require that the same
  2417. DBAPI connection is used for the entire operation.
  2418. >>> df2 = pd.DataFrame({'name' : ['User 6', 'User 7']})
  2419. >>> df2.to_sql('users', con=engine, if_exists='append')
  2420. 2
  2421. >>> with engine.connect() as conn:
  2422. ... conn.execute(text("SELECT * FROM users")).fetchall()
  2423. [(0, 'User 1'), (1, 'User 2'), (2, 'User 3'),
  2424. (0, 'User 4'), (1, 'User 5'), (0, 'User 6'),
  2425. (1, 'User 7')]
  2426. Overwrite the table with just ``df2``.
  2427. >>> df2.to_sql('users', con=engine, if_exists='replace',
  2428. ... index_label='id')
  2429. 2
  2430. >>> with engine.connect() as conn:
  2431. ... conn.execute(text("SELECT * FROM users")).fetchall()
  2432. [(0, 'User 6'), (1, 'User 7')]
  2433. Specify the dtype (especially useful for integers with missing values).
  2434. Notice that while pandas is forced to store the data as floating point,
  2435. the database supports nullable integers. When fetching the data with
  2436. Python, we get back integer scalars.
  2437. >>> df = pd.DataFrame({"A": [1, None, 2]})
  2438. >>> df
  2439. A
  2440. 0 1.0
  2441. 1 NaN
  2442. 2 2.0
  2443. >>> from sqlalchemy.types import Integer
  2444. >>> df.to_sql('integers', con=engine, index=False,
  2445. ... dtype={"A": Integer()})
  2446. 3
  2447. >>> with engine.connect() as conn:
  2448. ... conn.execute(text("SELECT * FROM integers")).fetchall()
  2449. [(1,), (None,), (2,)]
  2450. """ # noqa:E501
  2451. from pandas.io import sql
  2452. return sql.to_sql(
  2453. self,
  2454. name,
  2455. con,
  2456. schema=schema,
  2457. if_exists=if_exists,
  2458. index=index,
  2459. index_label=index_label,
  2460. chunksize=chunksize,
  2461. dtype=dtype,
  2462. method=method,
  2463. )
  2464. @final
  2465. @doc(
  2466. storage_options=_shared_docs["storage_options"],
  2467. compression_options=_shared_docs["compression_options"] % "path",
  2468. )
  2469. def to_pickle(
  2470. self,
  2471. path: FilePath | WriteBuffer[bytes],
  2472. compression: CompressionOptions = "infer",
  2473. protocol: int = pickle.HIGHEST_PROTOCOL,
  2474. storage_options: StorageOptions = None,
  2475. ) -> None:
  2476. """
  2477. Pickle (serialize) object to file.
  2478. Parameters
  2479. ----------
  2480. path : str, path object, or file-like object
  2481. String, path object (implementing ``os.PathLike[str]``), or file-like
  2482. object implementing a binary ``write()`` function. File path where
  2483. the pickled object will be stored.
  2484. {compression_options}
  2485. protocol : int
  2486. Int which indicates which protocol should be used by the pickler,
  2487. default HIGHEST_PROTOCOL (see [1]_ paragraph 12.1.2). The possible
  2488. values are 0, 1, 2, 3, 4, 5. A negative value for the protocol
  2489. parameter is equivalent to setting its value to HIGHEST_PROTOCOL.
  2490. .. [1] https://docs.python.org/3/library/pickle.html.
  2491. {storage_options}
  2492. .. versionadded:: 1.2.0
  2493. See Also
  2494. --------
  2495. read_pickle : Load pickled pandas object (or any object) from file.
  2496. DataFrame.to_hdf : Write DataFrame to an HDF5 file.
  2497. DataFrame.to_sql : Write DataFrame to a SQL database.
  2498. DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
  2499. Examples
  2500. --------
  2501. >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP
  2502. >>> original_df # doctest: +SKIP
  2503. foo bar
  2504. 0 0 5
  2505. 1 1 6
  2506. 2 2 7
  2507. 3 3 8
  2508. 4 4 9
  2509. >>> original_df.to_pickle("./dummy.pkl") # doctest: +SKIP
  2510. >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP
  2511. >>> unpickled_df # doctest: +SKIP
  2512. foo bar
  2513. 0 0 5
  2514. 1 1 6
  2515. 2 2 7
  2516. 3 3 8
  2517. 4 4 9
  2518. """ # noqa: E501
  2519. from pandas.io.pickle import to_pickle
  2520. to_pickle(
  2521. self,
  2522. path,
  2523. compression=compression,
  2524. protocol=protocol,
  2525. storage_options=storage_options,
  2526. )
  2527. @final
  2528. def to_clipboard(
  2529. self, excel: bool_t = True, sep: str | None = None, **kwargs
  2530. ) -> None:
  2531. r"""
  2532. Copy object to the system clipboard.
  2533. Write a text representation of object to the system clipboard.
  2534. This can be pasted into Excel, for example.
  2535. Parameters
  2536. ----------
  2537. excel : bool, default True
  2538. Produce output in a csv format for easy pasting into excel.
  2539. - True, use the provided separator for csv pasting.
  2540. - False, write a string representation of the object to the clipboard.
  2541. sep : str, default ``'\t'``
  2542. Field delimiter.
  2543. **kwargs
  2544. These parameters will be passed to DataFrame.to_csv.
  2545. See Also
  2546. --------
  2547. DataFrame.to_csv : Write a DataFrame to a comma-separated values
  2548. (csv) file.
  2549. read_clipboard : Read text from clipboard and pass to read_csv.
  2550. Notes
  2551. -----
  2552. Requirements for your platform.
  2553. - Linux : `xclip`, or `xsel` (with `PyQt4` modules)
  2554. - Windows : none
  2555. - macOS : none
  2556. This method uses the processes developed for the package `pyperclip`. A
  2557. solution to render any output string format is given in the examples.
  2558. Examples
  2559. --------
  2560. Copy the contents of a DataFrame to the clipboard.
  2561. >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C'])
  2562. >>> df.to_clipboard(sep=',') # doctest: +SKIP
  2563. ... # Wrote the following to the system clipboard:
  2564. ... # ,A,B,C
  2565. ... # 0,1,2,3
  2566. ... # 1,4,5,6
  2567. We can omit the index by passing the keyword `index` and setting
  2568. it to false.
  2569. >>> df.to_clipboard(sep=',', index=False) # doctest: +SKIP
  2570. ... # Wrote the following to the system clipboard:
  2571. ... # A,B,C
  2572. ... # 1,2,3
  2573. ... # 4,5,6
  2574. Using the original `pyperclip` package for any string output format.
  2575. .. code-block:: python
  2576. import pyperclip
  2577. html = df.style.to_html()
  2578. pyperclip.copy(html)
  2579. """
  2580. from pandas.io import clipboards
  2581. clipboards.to_clipboard(self, excel=excel, sep=sep, **kwargs)
  2582. @final
  2583. def to_xarray(self):
  2584. """
  2585. Return an xarray object from the pandas object.
  2586. Returns
  2587. -------
  2588. xarray.DataArray or xarray.Dataset
  2589. Data in the pandas structure converted to Dataset if the object is
  2590. a DataFrame, or a DataArray if the object is a Series.
  2591. See Also
  2592. --------
  2593. DataFrame.to_hdf : Write DataFrame to an HDF5 file.
  2594. DataFrame.to_parquet : Write a DataFrame to the binary parquet format.
  2595. Notes
  2596. -----
  2597. See the `xarray docs <https://xarray.pydata.org/en/stable/>`__
  2598. Examples
  2599. --------
  2600. >>> df = pd.DataFrame([('falcon', 'bird', 389.0, 2),
  2601. ... ('parrot', 'bird', 24.0, 2),
  2602. ... ('lion', 'mammal', 80.5, 4),
  2603. ... ('monkey', 'mammal', np.nan, 4)],
  2604. ... columns=['name', 'class', 'max_speed',
  2605. ... 'num_legs'])
  2606. >>> df
  2607. name class max_speed num_legs
  2608. 0 falcon bird 389.0 2
  2609. 1 parrot bird 24.0 2
  2610. 2 lion mammal 80.5 4
  2611. 3 monkey mammal NaN 4
  2612. >>> df.to_xarray()
  2613. <xarray.Dataset>
  2614. Dimensions: (index: 4)
  2615. Coordinates:
  2616. * index (index) int64 0 1 2 3
  2617. Data variables:
  2618. name (index) object 'falcon' 'parrot' 'lion' 'monkey'
  2619. class (index) object 'bird' 'bird' 'mammal' 'mammal'
  2620. max_speed (index) float64 389.0 24.0 80.5 nan
  2621. num_legs (index) int64 2 2 4 4
  2622. >>> df['max_speed'].to_xarray()
  2623. <xarray.DataArray 'max_speed' (index: 4)>
  2624. array([389. , 24. , 80.5, nan])
  2625. Coordinates:
  2626. * index (index) int64 0 1 2 3
  2627. >>> dates = pd.to_datetime(['2018-01-01', '2018-01-01',
  2628. ... '2018-01-02', '2018-01-02'])
  2629. >>> df_multiindex = pd.DataFrame({'date': dates,
  2630. ... 'animal': ['falcon', 'parrot',
  2631. ... 'falcon', 'parrot'],
  2632. ... 'speed': [350, 18, 361, 15]})
  2633. >>> df_multiindex = df_multiindex.set_index(['date', 'animal'])
  2634. >>> df_multiindex
  2635. speed
  2636. date animal
  2637. 2018-01-01 falcon 350
  2638. parrot 18
  2639. 2018-01-02 falcon 361
  2640. parrot 15
  2641. >>> df_multiindex.to_xarray()
  2642. <xarray.Dataset>
  2643. Dimensions: (date: 2, animal: 2)
  2644. Coordinates:
  2645. * date (date) datetime64[ns] 2018-01-01 2018-01-02
  2646. * animal (animal) object 'falcon' 'parrot'
  2647. Data variables:
  2648. speed (date, animal) int64 350 18 361 15
  2649. """
  2650. xarray = import_optional_dependency("xarray")
  2651. if self.ndim == 1:
  2652. return xarray.DataArray.from_series(self)
  2653. else:
  2654. return xarray.Dataset.from_dataframe(self)
  2655. @overload
  2656. def to_latex(
  2657. self,
  2658. buf: None = ...,
  2659. columns: Sequence[Hashable] | None = ...,
  2660. header: bool_t | Sequence[str] = ...,
  2661. index: bool_t = ...,
  2662. na_rep: str = ...,
  2663. formatters: FormattersType | None = ...,
  2664. float_format: FloatFormatType | None = ...,
  2665. sparsify: bool_t | None = ...,
  2666. index_names: bool_t = ...,
  2667. bold_rows: bool_t = ...,
  2668. column_format: str | None = ...,
  2669. longtable: bool_t | None = ...,
  2670. escape: bool_t | None = ...,
  2671. encoding: str | None = ...,
  2672. decimal: str = ...,
  2673. multicolumn: bool_t | None = ...,
  2674. multicolumn_format: str | None = ...,
  2675. multirow: bool_t | None = ...,
  2676. caption: str | tuple[str, str] | None = ...,
  2677. label: str | None = ...,
  2678. position: str | None = ...,
  2679. ) -> str:
  2680. ...
  2681. @overload
  2682. def to_latex(
  2683. self,
  2684. buf: FilePath | WriteBuffer[str],
  2685. columns: Sequence[Hashable] | None = ...,
  2686. header: bool_t | Sequence[str] = ...,
  2687. index: bool_t = ...,
  2688. na_rep: str = ...,
  2689. formatters: FormattersType | None = ...,
  2690. float_format: FloatFormatType | None = ...,
  2691. sparsify: bool_t | None = ...,
  2692. index_names: bool_t = ...,
  2693. bold_rows: bool_t = ...,
  2694. column_format: str | None = ...,
  2695. longtable: bool_t | None = ...,
  2696. escape: bool_t | None = ...,
  2697. encoding: str | None = ...,
  2698. decimal: str = ...,
  2699. multicolumn: bool_t | None = ...,
  2700. multicolumn_format: str | None = ...,
  2701. multirow: bool_t | None = ...,
  2702. caption: str | tuple[str, str] | None = ...,
  2703. label: str | None = ...,
  2704. position: str | None = ...,
  2705. ) -> None:
  2706. ...
  2707. @final
  2708. def to_latex(
  2709. self,
  2710. buf: FilePath | WriteBuffer[str] | None = None,
  2711. columns: Sequence[Hashable] | None = None,
  2712. header: bool_t | Sequence[str] = True,
  2713. index: bool_t = True,
  2714. na_rep: str = "NaN",
  2715. formatters: FormattersType | None = None,
  2716. float_format: FloatFormatType | None = None,
  2717. sparsify: bool_t | None = None,
  2718. index_names: bool_t = True,
  2719. bold_rows: bool_t = False,
  2720. column_format: str | None = None,
  2721. longtable: bool_t | None = None,
  2722. escape: bool_t | None = None,
  2723. encoding: str | None = None,
  2724. decimal: str = ".",
  2725. multicolumn: bool_t | None = None,
  2726. multicolumn_format: str | None = None,
  2727. multirow: bool_t | None = None,
  2728. caption: str | tuple[str, str] | None = None,
  2729. label: str | None = None,
  2730. position: str | None = None,
  2731. ) -> str | None:
  2732. r"""
  2733. Render object to a LaTeX tabular, longtable, or nested table.
  2734. Requires ``\usepackage{{booktabs}}``. The output can be copy/pasted
  2735. into a main LaTeX document or read from an external file
  2736. with ``\input{{table.tex}}``.
  2737. .. versionchanged:: 1.2.0
  2738. Added position argument, changed meaning of caption argument.
  2739. .. versionchanged:: 2.0.0
  2740. Refactored to use the Styler implementation via jinja2 templating.
  2741. Parameters
  2742. ----------
  2743. buf : str, Path or StringIO-like, optional, default None
  2744. Buffer to write to. If None, the output is returned as a string.
  2745. columns : list of label, optional
  2746. The subset of columns to write. Writes all columns by default.
  2747. header : bool or list of str, default True
  2748. Write out the column names. If a list of strings is given,
  2749. it is assumed to be aliases for the column names.
  2750. index : bool, default True
  2751. Write row names (index).
  2752. na_rep : str, default 'NaN'
  2753. Missing data representation.
  2754. formatters : list of functions or dict of {{str: function}}, optional
  2755. Formatter functions to apply to columns' elements by position or
  2756. name. The result of each function must be a unicode string.
  2757. List must be of length equal to the number of columns.
  2758. float_format : one-parameter function or str, optional, default None
  2759. Formatter for floating point numbers. For example
  2760. ``float_format="%.2f"`` and ``float_format="{{:0.2f}}".format`` will
  2761. both result in 0.1234 being formatted as 0.12.
  2762. sparsify : bool, optional
  2763. Set to False for a DataFrame with a hierarchical index to print
  2764. every multiindex key at each row. By default, the value will be
  2765. read from the config module.
  2766. index_names : bool, default True
  2767. Prints the names of the indexes.
  2768. bold_rows : bool, default False
  2769. Make the row labels bold in the output.
  2770. column_format : str, optional
  2771. The columns format as specified in `LaTeX table format
  2772. <https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g. 'rcl' for 3
  2773. columns. By default, 'l' will be used for all columns except
  2774. columns of numbers, which default to 'r'.
  2775. longtable : bool, optional
  2776. Use a longtable environment instead of tabular. Requires
  2777. adding a \usepackage{{longtable}} to your LaTeX preamble.
  2778. By default, the value will be read from the pandas config
  2779. module, and set to `True` if the option ``styler.latex.environment`` is
  2780. `"longtable"`.
  2781. .. versionchanged:: 2.0.0
  2782. The pandas option affecting this argument has changed.
  2783. escape : bool, optional
  2784. By default, the value will be read from the pandas config
  2785. module and set to `True` if the option ``styler.format.escape`` is
  2786. `"latex"`. When set to False prevents from escaping latex special
  2787. characters in column names.
  2788. .. versionchanged:: 2.0.0
  2789. The pandas option affecting this argument has changed, as has the
  2790. default value to `False`.
  2791. encoding : str, optional
  2792. A string representing the encoding to use in the output file,
  2793. defaults to 'utf-8'.
  2794. decimal : str, default '.'
  2795. Character recognized as decimal separator, e.g. ',' in Europe.
  2796. multicolumn : bool, default True
  2797. Use \multicolumn to enhance MultiIndex columns.
  2798. The default will be read from the config module, and is set
  2799. as the option ``styler.sparse.columns``.
  2800. .. versionchanged:: 2.0.0
  2801. The pandas option affecting this argument has changed.
  2802. multicolumn_format : str, default 'r'
  2803. The alignment for multicolumns, similar to `column_format`
  2804. The default will be read from the config module, and is set as the option
  2805. ``styler.latex.multicol_align``.
  2806. .. versionchanged:: 2.0.0
  2807. The pandas option affecting this argument has changed, as has the
  2808. default value to "r".
  2809. multirow : bool, default True
  2810. Use \multirow to enhance MultiIndex rows. Requires adding a
  2811. \usepackage{{multirow}} to your LaTeX preamble. Will print
  2812. centered labels (instead of top-aligned) across the contained
  2813. rows, separating groups via clines. The default will be read
  2814. from the pandas config module, and is set as the option
  2815. ``styler.sparse.index``.
  2816. .. versionchanged:: 2.0.0
  2817. The pandas option affecting this argument has changed, as has the
  2818. default value to `True`.
  2819. caption : str or tuple, optional
  2820. Tuple (full_caption, short_caption),
  2821. which results in ``\caption[short_caption]{{full_caption}}``;
  2822. if a single string is passed, no short caption will be set.
  2823. .. versionchanged:: 1.2.0
  2824. Optionally allow caption to be a tuple ``(full_caption, short_caption)``.
  2825. label : str, optional
  2826. The LaTeX label to be placed inside ``\label{{}}`` in the output.
  2827. This is used with ``\ref{{}}`` in the main ``.tex`` file.
  2828. position : str, optional
  2829. The LaTeX positional argument for tables, to be placed after
  2830. ``\begin{{}}`` in the output.
  2831. .. versionadded:: 1.2.0
  2832. Returns
  2833. -------
  2834. str or None
  2835. If buf is None, returns the result as a string. Otherwise returns None.
  2836. See Also
  2837. --------
  2838. io.formats.style.Styler.to_latex : Render a DataFrame to LaTeX
  2839. with conditional formatting.
  2840. DataFrame.to_string : Render a DataFrame to a console-friendly
  2841. tabular output.
  2842. DataFrame.to_html : Render a DataFrame as an HTML table.
  2843. Notes
  2844. -----
  2845. As of v2.0.0 this method has changed to use the Styler implementation as
  2846. part of :meth:`.Styler.to_latex` via ``jinja2`` templating. This means
  2847. that ``jinja2`` is a requirement, and needs to be installed, for this method
  2848. to function. It is advised that users switch to using Styler, since that
  2849. implementation is more frequently updated and contains much more
  2850. flexibility with the output.
  2851. Examples
  2852. --------
  2853. Convert a general DataFrame to LaTeX with formatting:
  2854. >>> df = pd.DataFrame(dict(name=['Raphael', 'Donatello'],
  2855. ... age=[26, 45],
  2856. ... height=[181.23, 177.65]))
  2857. >>> print(df.to_latex(index=False,
  2858. ... formatters={"name": str.upper},
  2859. ... float_format="{:.1f}".format,
  2860. ... )) # doctest: +SKIP
  2861. \begin{tabular}{lrr}
  2862. \toprule
  2863. name & age & height \\
  2864. \midrule
  2865. RAPHAEL & 26 & 181.2 \\
  2866. DONATELLO & 45 & 177.7 \\
  2867. \bottomrule
  2868. \end{tabular}
  2869. """
  2870. # Get defaults from the pandas config
  2871. if self.ndim == 1:
  2872. self = self.to_frame()
  2873. if longtable is None:
  2874. longtable = config.get_option("styler.latex.environment") == "longtable"
  2875. if escape is None:
  2876. escape = config.get_option("styler.format.escape") == "latex"
  2877. if multicolumn is None:
  2878. multicolumn = config.get_option("styler.sparse.columns")
  2879. if multicolumn_format is None:
  2880. multicolumn_format = config.get_option("styler.latex.multicol_align")
  2881. if multirow is None:
  2882. multirow = config.get_option("styler.sparse.index")
  2883. if column_format is not None and not isinstance(column_format, str):
  2884. raise ValueError("`column_format` must be str or unicode")
  2885. length = len(self.columns) if columns is None else len(columns)
  2886. if isinstance(header, (list, tuple)) and len(header) != length:
  2887. raise ValueError(f"Writing {length} cols but got {len(header)} aliases")
  2888. # Refactor formatters/float_format/decimal/na_rep/escape to Styler structure
  2889. base_format_ = {
  2890. "na_rep": na_rep,
  2891. "escape": "latex" if escape else None,
  2892. "decimal": decimal,
  2893. }
  2894. index_format_: dict[str, Any] = {"axis": 0, **base_format_}
  2895. column_format_: dict[str, Any] = {"axis": 1, **base_format_}
  2896. if isinstance(float_format, str):
  2897. float_format_: Callable | None = lambda x: float_format % x
  2898. else:
  2899. float_format_ = float_format
  2900. def _wrap(x, alt_format_):
  2901. if isinstance(x, (float, complex)) and float_format_ is not None:
  2902. return float_format_(x)
  2903. else:
  2904. return alt_format_(x)
  2905. formatters_: list | tuple | dict | Callable | None = None
  2906. if isinstance(formatters, list):
  2907. formatters_ = {
  2908. c: partial(_wrap, alt_format_=formatters[i])
  2909. for i, c in enumerate(self.columns)
  2910. }
  2911. elif isinstance(formatters, dict):
  2912. index_formatter = formatters.pop("__index__", None)
  2913. column_formatter = formatters.pop("__columns__", None)
  2914. if index_formatter is not None:
  2915. index_format_.update({"formatter": index_formatter})
  2916. if column_formatter is not None:
  2917. column_format_.update({"formatter": column_formatter})
  2918. formatters_ = formatters
  2919. float_columns = self.select_dtypes(include="float").columns
  2920. for col in float_columns:
  2921. if col not in formatters.keys():
  2922. formatters_.update({col: float_format_})
  2923. elif formatters is None and float_format is not None:
  2924. formatters_ = partial(_wrap, alt_format_=lambda v: v)
  2925. format_index_ = [index_format_, column_format_]
  2926. # Deal with hiding indexes and relabelling column names
  2927. hide_: list[dict] = []
  2928. relabel_index_: list[dict] = []
  2929. if columns:
  2930. hide_.append(
  2931. {
  2932. "subset": [c for c in self.columns if c not in columns],
  2933. "axis": "columns",
  2934. }
  2935. )
  2936. if header is False:
  2937. hide_.append({"axis": "columns"})
  2938. elif isinstance(header, (list, tuple)):
  2939. relabel_index_.append({"labels": header, "axis": "columns"})
  2940. format_index_ = [index_format_] # column_format is overwritten
  2941. if index is False:
  2942. hide_.append({"axis": "index"})
  2943. if index_names is False:
  2944. hide_.append({"names": True, "axis": "index"})
  2945. render_kwargs_ = {
  2946. "hrules": True,
  2947. "sparse_index": sparsify,
  2948. "sparse_columns": sparsify,
  2949. "environment": "longtable" if longtable else None,
  2950. "multicol_align": multicolumn_format
  2951. if multicolumn
  2952. else f"naive-{multicolumn_format}",
  2953. "multirow_align": "t" if multirow else "naive",
  2954. "encoding": encoding,
  2955. "caption": caption,
  2956. "label": label,
  2957. "position": position,
  2958. "column_format": column_format,
  2959. "clines": "skip-last;data"
  2960. if (multirow and isinstance(self.index, MultiIndex))
  2961. else None,
  2962. "bold_rows": bold_rows,
  2963. }
  2964. return self._to_latex_via_styler(
  2965. buf,
  2966. hide=hide_,
  2967. relabel_index=relabel_index_,
  2968. format={"formatter": formatters_, **base_format_},
  2969. format_index=format_index_,
  2970. render_kwargs=render_kwargs_,
  2971. )
  2972. def _to_latex_via_styler(
  2973. self,
  2974. buf=None,
  2975. *,
  2976. hide: dict | list[dict] | None = None,
  2977. relabel_index: dict | list[dict] | None = None,
  2978. format: dict | list[dict] | None = None,
  2979. format_index: dict | list[dict] | None = None,
  2980. render_kwargs: dict | None = None,
  2981. ):
  2982. """
  2983. Render object to a LaTeX tabular, longtable, or nested table.
  2984. Uses the ``Styler`` implementation with the following, ordered, method chaining:
  2985. .. code-block:: python
  2986. styler = Styler(DataFrame)
  2987. styler.hide(**hide)
  2988. styler.relabel_index(**relabel_index)
  2989. styler.format(**format)
  2990. styler.format_index(**format_index)
  2991. styler.to_latex(buf=buf, **render_kwargs)
  2992. Parameters
  2993. ----------
  2994. buf : str, Path or StringIO-like, optional, default None
  2995. Buffer to write to. If None, the output is returned as a string.
  2996. hide : dict, list of dict
  2997. Keyword args to pass to the method call of ``Styler.hide``. If a list will
  2998. call the method numerous times.
  2999. relabel_index : dict, list of dict
  3000. Keyword args to pass to the method of ``Styler.relabel_index``. If a list
  3001. will call the method numerous times.
  3002. format : dict, list of dict
  3003. Keyword args to pass to the method call of ``Styler.format``. If a list will
  3004. call the method numerous times.
  3005. format_index : dict, list of dict
  3006. Keyword args to pass to the method call of ``Styler.format_index``. If a
  3007. list will call the method numerous times.
  3008. render_kwargs : dict
  3009. Keyword args to pass to the method call of ``Styler.to_latex``.
  3010. Returns
  3011. -------
  3012. str or None
  3013. If buf is None, returns the result as a string. Otherwise returns None.
  3014. """
  3015. from pandas.io.formats.style import Styler
  3016. self = cast("DataFrame", self)
  3017. styler = Styler(self, uuid="")
  3018. for kw_name in ["hide", "relabel_index", "format", "format_index"]:
  3019. kw = vars()[kw_name]
  3020. if isinstance(kw, dict):
  3021. getattr(styler, kw_name)(**kw)
  3022. elif isinstance(kw, list):
  3023. for sub_kw in kw:
  3024. getattr(styler, kw_name)(**sub_kw)
  3025. # bold_rows is not a direct kwarg of Styler.to_latex
  3026. render_kwargs = {} if render_kwargs is None else render_kwargs
  3027. if render_kwargs.pop("bold_rows"):
  3028. styler.applymap_index(lambda v: "textbf:--rwrap;")
  3029. return styler.to_latex(buf=buf, **render_kwargs)
  3030. @overload
  3031. def to_csv(
  3032. self,
  3033. path_or_buf: None = ...,
  3034. sep: str = ...,
  3035. na_rep: str = ...,
  3036. float_format: str | Callable | None = ...,
  3037. columns: Sequence[Hashable] | None = ...,
  3038. header: bool_t | list[str] = ...,
  3039. index: bool_t = ...,
  3040. index_label: IndexLabel | None = ...,
  3041. mode: str = ...,
  3042. encoding: str | None = ...,
  3043. compression: CompressionOptions = ...,
  3044. quoting: int | None = ...,
  3045. quotechar: str = ...,
  3046. lineterminator: str | None = ...,
  3047. chunksize: int | None = ...,
  3048. date_format: str | None = ...,
  3049. doublequote: bool_t = ...,
  3050. escapechar: str | None = ...,
  3051. decimal: str = ...,
  3052. errors: str = ...,
  3053. storage_options: StorageOptions = ...,
  3054. ) -> str:
  3055. ...
  3056. @overload
  3057. def to_csv(
  3058. self,
  3059. path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str],
  3060. sep: str = ...,
  3061. na_rep: str = ...,
  3062. float_format: str | Callable | None = ...,
  3063. columns: Sequence[Hashable] | None = ...,
  3064. header: bool_t | list[str] = ...,
  3065. index: bool_t = ...,
  3066. index_label: IndexLabel | None = ...,
  3067. mode: str = ...,
  3068. encoding: str | None = ...,
  3069. compression: CompressionOptions = ...,
  3070. quoting: int | None = ...,
  3071. quotechar: str = ...,
  3072. lineterminator: str | None = ...,
  3073. chunksize: int | None = ...,
  3074. date_format: str | None = ...,
  3075. doublequote: bool_t = ...,
  3076. escapechar: str | None = ...,
  3077. decimal: str = ...,
  3078. errors: str = ...,
  3079. storage_options: StorageOptions = ...,
  3080. ) -> None:
  3081. ...
  3082. @final
  3083. @doc(
  3084. storage_options=_shared_docs["storage_options"],
  3085. compression_options=_shared_docs["compression_options"] % "path_or_buf",
  3086. )
  3087. def to_csv(
  3088. self,
  3089. path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
  3090. sep: str = ",",
  3091. na_rep: str = "",
  3092. float_format: str | Callable | None = None,
  3093. columns: Sequence[Hashable] | None = None,
  3094. header: bool_t | list[str] = True,
  3095. index: bool_t = True,
  3096. index_label: IndexLabel | None = None,
  3097. mode: str = "w",
  3098. encoding: str | None = None,
  3099. compression: CompressionOptions = "infer",
  3100. quoting: int | None = None,
  3101. quotechar: str = '"',
  3102. lineterminator: str | None = None,
  3103. chunksize: int | None = None,
  3104. date_format: str | None = None,
  3105. doublequote: bool_t = True,
  3106. escapechar: str | None = None,
  3107. decimal: str = ".",
  3108. errors: str = "strict",
  3109. storage_options: StorageOptions = None,
  3110. ) -> str | None:
  3111. r"""
  3112. Write object to a comma-separated values (csv) file.
  3113. Parameters
  3114. ----------
  3115. path_or_buf : str, path object, file-like object, or None, default None
  3116. String, path object (implementing os.PathLike[str]), or file-like
  3117. object implementing a write() function. If None, the result is
  3118. returned as a string. If a non-binary file object is passed, it should
  3119. be opened with `newline=''`, disabling universal newlines. If a binary
  3120. file object is passed, `mode` might need to contain a `'b'`.
  3121. .. versionchanged:: 1.2.0
  3122. Support for binary file objects was introduced.
  3123. sep : str, default ','
  3124. String of length 1. Field delimiter for the output file.
  3125. na_rep : str, default ''
  3126. Missing data representation.
  3127. float_format : str, Callable, default None
  3128. Format string for floating point numbers. If a Callable is given, it takes
  3129. precedence over other numeric formatting parameters, like decimal.
  3130. columns : sequence, optional
  3131. Columns to write.
  3132. header : bool or list of str, default True
  3133. Write out the column names. If a list of strings is given it is
  3134. assumed to be aliases for the column names.
  3135. index : bool, default True
  3136. Write row names (index).
  3137. index_label : str or sequence, or False, default None
  3138. Column label for index column(s) if desired. If None is given, and
  3139. `header` and `index` are True, then the index names are used. A
  3140. sequence should be given if the object uses MultiIndex. If
  3141. False do not print fields for index names. Use index_label=False
  3142. for easier importing in R.
  3143. mode : str, default 'w'
  3144. Python write mode. The available write modes are the same as
  3145. :py:func:`open`.
  3146. encoding : str, optional
  3147. A string representing the encoding to use in the output file,
  3148. defaults to 'utf-8'. `encoding` is not supported if `path_or_buf`
  3149. is a non-binary file object.
  3150. {compression_options}
  3151. .. versionchanged:: 1.0.0
  3152. May now be a dict with key 'method' as compression mode
  3153. and other entries as additional compression options if
  3154. compression mode is 'zip'.
  3155. .. versionchanged:: 1.1.0
  3156. Passing compression options as keys in dict is
  3157. supported for compression modes 'gzip', 'bz2', 'zstd', and 'zip'.
  3158. .. versionchanged:: 1.2.0
  3159. Compression is supported for binary file objects.
  3160. .. versionchanged:: 1.2.0
  3161. Previous versions forwarded dict entries for 'gzip' to
  3162. `gzip.open` instead of `gzip.GzipFile` which prevented
  3163. setting `mtime`.
  3164. quoting : optional constant from csv module
  3165. Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
  3166. then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
  3167. will treat them as non-numeric.
  3168. quotechar : str, default '\"'
  3169. String of length 1. Character used to quote fields.
  3170. lineterminator : str, optional
  3171. The newline character or character sequence to use in the output
  3172. file. Defaults to `os.linesep`, which depends on the OS in which
  3173. this method is called ('\\n' for linux, '\\r\\n' for Windows, i.e.).
  3174. .. versionchanged:: 1.5.0
  3175. Previously was line_terminator, changed for consistency with
  3176. read_csv and the standard library 'csv' module.
  3177. chunksize : int or None
  3178. Rows to write at a time.
  3179. date_format : str, default None
  3180. Format string for datetime objects.
  3181. doublequote : bool, default True
  3182. Control quoting of `quotechar` inside a field.
  3183. escapechar : str, default None
  3184. String of length 1. Character used to escape `sep` and `quotechar`
  3185. when appropriate.
  3186. decimal : str, default '.'
  3187. Character recognized as decimal separator. E.g. use ',' for
  3188. European data.
  3189. errors : str, default 'strict'
  3190. Specifies how encoding and decoding errors are to be handled.
  3191. See the errors argument for :func:`open` for a full list
  3192. of options.
  3193. .. versionadded:: 1.1.0
  3194. {storage_options}
  3195. .. versionadded:: 1.2.0
  3196. Returns
  3197. -------
  3198. None or str
  3199. If path_or_buf is None, returns the resulting csv format as a
  3200. string. Otherwise returns None.
  3201. See Also
  3202. --------
  3203. read_csv : Load a CSV file into a DataFrame.
  3204. to_excel : Write DataFrame to an Excel file.
  3205. Examples
  3206. --------
  3207. >>> df = pd.DataFrame({{'name': ['Raphael', 'Donatello'],
  3208. ... 'mask': ['red', 'purple'],
  3209. ... 'weapon': ['sai', 'bo staff']}})
  3210. >>> df.to_csv(index=False)
  3211. 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n'
  3212. Create 'out.zip' containing 'out.csv'
  3213. >>> compression_opts = dict(method='zip',
  3214. ... archive_name='out.csv') # doctest: +SKIP
  3215. >>> df.to_csv('out.zip', index=False,
  3216. ... compression=compression_opts) # doctest: +SKIP
  3217. To write a csv file to a new folder or nested folder you will first
  3218. need to create it using either Pathlib or os:
  3219. >>> from pathlib import Path # doctest: +SKIP
  3220. >>> filepath = Path('folder/subfolder/out.csv') # doctest: +SKIP
  3221. >>> filepath.parent.mkdir(parents=True, exist_ok=True) # doctest: +SKIP
  3222. >>> df.to_csv(filepath) # doctest: +SKIP
  3223. >>> import os # doctest: +SKIP
  3224. >>> os.makedirs('folder/subfolder', exist_ok=True) # doctest: +SKIP
  3225. >>> df.to_csv('folder/subfolder/out.csv') # doctest: +SKIP
  3226. """
  3227. df = self if isinstance(self, ABCDataFrame) else self.to_frame()
  3228. formatter = DataFrameFormatter(
  3229. frame=df,
  3230. header=header,
  3231. index=index,
  3232. na_rep=na_rep,
  3233. float_format=float_format,
  3234. decimal=decimal,
  3235. )
  3236. return DataFrameRenderer(formatter).to_csv(
  3237. path_or_buf,
  3238. lineterminator=lineterminator,
  3239. sep=sep,
  3240. encoding=encoding,
  3241. errors=errors,
  3242. compression=compression,
  3243. quoting=quoting,
  3244. columns=columns,
  3245. index_label=index_label,
  3246. mode=mode,
  3247. chunksize=chunksize,
  3248. quotechar=quotechar,
  3249. date_format=date_format,
  3250. doublequote=doublequote,
  3251. escapechar=escapechar,
  3252. storage_options=storage_options,
  3253. )
  3254. # ----------------------------------------------------------------------
  3255. # Lookup Caching
  3256. def _reset_cacher(self) -> None:
  3257. """
  3258. Reset the cacher.
  3259. """
  3260. raise AbstractMethodError(self)
  3261. def _maybe_update_cacher(
  3262. self,
  3263. clear: bool_t = False,
  3264. verify_is_copy: bool_t = True,
  3265. inplace: bool_t = False,
  3266. ) -> None:
  3267. """
  3268. See if we need to update our parent cacher if clear, then clear our
  3269. cache.
  3270. Parameters
  3271. ----------
  3272. clear : bool, default False
  3273. Clear the item cache.
  3274. verify_is_copy : bool, default True
  3275. Provide is_copy checks.
  3276. """
  3277. if using_copy_on_write():
  3278. return
  3279. if verify_is_copy:
  3280. self._check_setitem_copy(t="referent")
  3281. if clear:
  3282. self._clear_item_cache()
  3283. def _clear_item_cache(self) -> None:
  3284. raise AbstractMethodError(self)
  3285. # ----------------------------------------------------------------------
  3286. # Indexing Methods
  3287. def take(self: NDFrameT, indices, axis: Axis = 0, **kwargs) -> NDFrameT:
  3288. """
  3289. Return the elements in the given *positional* indices along an axis.
  3290. This means that we are not indexing according to actual values in
  3291. the index attribute of the object. We are indexing according to the
  3292. actual position of the element in the object.
  3293. Parameters
  3294. ----------
  3295. indices : array-like
  3296. An array of ints indicating which positions to take.
  3297. axis : {0 or 'index', 1 or 'columns', None}, default 0
  3298. The axis on which to select elements. ``0`` means that we are
  3299. selecting rows, ``1`` means that we are selecting columns.
  3300. For `Series` this parameter is unused and defaults to 0.
  3301. **kwargs
  3302. For compatibility with :meth:`numpy.take`. Has no effect on the
  3303. output.
  3304. Returns
  3305. -------
  3306. same type as caller
  3307. An array-like containing the elements taken from the object.
  3308. See Also
  3309. --------
  3310. DataFrame.loc : Select a subset of a DataFrame by labels.
  3311. DataFrame.iloc : Select a subset of a DataFrame by positions.
  3312. numpy.take : Take elements from an array along an axis.
  3313. Examples
  3314. --------
  3315. >>> df = pd.DataFrame([('falcon', 'bird', 389.0),
  3316. ... ('parrot', 'bird', 24.0),
  3317. ... ('lion', 'mammal', 80.5),
  3318. ... ('monkey', 'mammal', np.nan)],
  3319. ... columns=['name', 'class', 'max_speed'],
  3320. ... index=[0, 2, 3, 1])
  3321. >>> df
  3322. name class max_speed
  3323. 0 falcon bird 389.0
  3324. 2 parrot bird 24.0
  3325. 3 lion mammal 80.5
  3326. 1 monkey mammal NaN
  3327. Take elements at positions 0 and 3 along the axis 0 (default).
  3328. Note how the actual indices selected (0 and 1) do not correspond to
  3329. our selected indices 0 and 3. That's because we are selecting the 0th
  3330. and 3rd rows, not rows whose indices equal 0 and 3.
  3331. >>> df.take([0, 3])
  3332. name class max_speed
  3333. 0 falcon bird 389.0
  3334. 1 monkey mammal NaN
  3335. Take elements at indices 1 and 2 along the axis 1 (column selection).
  3336. >>> df.take([1, 2], axis=1)
  3337. class max_speed
  3338. 0 bird 389.0
  3339. 2 bird 24.0
  3340. 3 mammal 80.5
  3341. 1 mammal NaN
  3342. We may take elements using negative integers for positive indices,
  3343. starting from the end of the object, just like with Python lists.
  3344. >>> df.take([-1, -2])
  3345. name class max_speed
  3346. 1 monkey mammal NaN
  3347. 3 lion mammal 80.5
  3348. """
  3349. nv.validate_take((), kwargs)
  3350. return self._take(indices, axis)
  3351. def _take(
  3352. self: NDFrameT,
  3353. indices,
  3354. axis: Axis = 0,
  3355. convert_indices: bool_t = True,
  3356. ) -> NDFrameT:
  3357. """
  3358. Internal version of the `take` allowing specification of additional args.
  3359. See the docstring of `take` for full explanation of the parameters.
  3360. """
  3361. if not isinstance(indices, slice):
  3362. indices = np.asarray(indices, dtype=np.intp)
  3363. if (
  3364. axis == 0
  3365. and indices.ndim == 1
  3366. and using_copy_on_write()
  3367. and is_range_indexer(indices, len(self))
  3368. ):
  3369. return self.copy(deep=None)
  3370. new_data = self._mgr.take(
  3371. indices,
  3372. axis=self._get_block_manager_axis(axis),
  3373. verify=True,
  3374. convert_indices=convert_indices,
  3375. )
  3376. return self._constructor(new_data).__finalize__(self, method="take")
  3377. def _take_with_is_copy(self: NDFrameT, indices, axis: Axis = 0) -> NDFrameT:
  3378. """
  3379. Internal version of the `take` method that sets the `_is_copy`
  3380. attribute to keep track of the parent dataframe (using in indexing
  3381. for the SettingWithCopyWarning).
  3382. See the docstring of `take` for full explanation of the parameters.
  3383. """
  3384. result = self._take(indices=indices, axis=axis)
  3385. # Maybe set copy if we didn't actually change the index.
  3386. if not result._get_axis(axis).equals(self._get_axis(axis)):
  3387. result._set_is_copy(self)
  3388. return result
  3389. @final
  3390. def xs(
  3391. self: NDFrameT,
  3392. key: IndexLabel,
  3393. axis: Axis = 0,
  3394. level: IndexLabel = None,
  3395. drop_level: bool_t = True,
  3396. ) -> NDFrameT:
  3397. """
  3398. Return cross-section from the Series/DataFrame.
  3399. This method takes a `key` argument to select data at a particular
  3400. level of a MultiIndex.
  3401. Parameters
  3402. ----------
  3403. key : label or tuple of label
  3404. Label contained in the index, or partially in a MultiIndex.
  3405. axis : {0 or 'index', 1 or 'columns'}, default 0
  3406. Axis to retrieve cross-section on.
  3407. level : object, defaults to first n levels (n=1 or len(key))
  3408. In case of a key partially contained in a MultiIndex, indicate
  3409. which levels are used. Levels can be referred by label or position.
  3410. drop_level : bool, default True
  3411. If False, returns object with same levels as self.
  3412. Returns
  3413. -------
  3414. Series or DataFrame
  3415. Cross-section from the original Series or DataFrame
  3416. corresponding to the selected index levels.
  3417. See Also
  3418. --------
  3419. DataFrame.loc : Access a group of rows and columns
  3420. by label(s) or a boolean array.
  3421. DataFrame.iloc : Purely integer-location based indexing
  3422. for selection by position.
  3423. Notes
  3424. -----
  3425. `xs` can not be used to set values.
  3426. MultiIndex Slicers is a generic way to get/set values on
  3427. any level or levels.
  3428. It is a superset of `xs` functionality, see
  3429. :ref:`MultiIndex Slicers <advanced.mi_slicers>`.
  3430. Examples
  3431. --------
  3432. >>> d = {'num_legs': [4, 4, 2, 2],
  3433. ... 'num_wings': [0, 0, 2, 2],
  3434. ... 'class': ['mammal', 'mammal', 'mammal', 'bird'],
  3435. ... 'animal': ['cat', 'dog', 'bat', 'penguin'],
  3436. ... 'locomotion': ['walks', 'walks', 'flies', 'walks']}
  3437. >>> df = pd.DataFrame(data=d)
  3438. >>> df = df.set_index(['class', 'animal', 'locomotion'])
  3439. >>> df
  3440. num_legs num_wings
  3441. class animal locomotion
  3442. mammal cat walks 4 0
  3443. dog walks 4 0
  3444. bat flies 2 2
  3445. bird penguin walks 2 2
  3446. Get values at specified index
  3447. >>> df.xs('mammal')
  3448. num_legs num_wings
  3449. animal locomotion
  3450. cat walks 4 0
  3451. dog walks 4 0
  3452. bat flies 2 2
  3453. Get values at several indexes
  3454. >>> df.xs(('mammal', 'dog', 'walks'))
  3455. num_legs 4
  3456. num_wings 0
  3457. Name: (mammal, dog, walks), dtype: int64
  3458. Get values at specified index and level
  3459. >>> df.xs('cat', level=1)
  3460. num_legs num_wings
  3461. class locomotion
  3462. mammal walks 4 0
  3463. Get values at several indexes and levels
  3464. >>> df.xs(('bird', 'walks'),
  3465. ... level=[0, 'locomotion'])
  3466. num_legs num_wings
  3467. animal
  3468. penguin 2 2
  3469. Get values at specified column and axis
  3470. >>> df.xs('num_wings', axis=1)
  3471. class animal locomotion
  3472. mammal cat walks 0
  3473. dog walks 0
  3474. bat flies 2
  3475. bird penguin walks 2
  3476. Name: num_wings, dtype: int64
  3477. """
  3478. axis = self._get_axis_number(axis)
  3479. labels = self._get_axis(axis)
  3480. if isinstance(key, list):
  3481. raise TypeError("list keys are not supported in xs, pass a tuple instead")
  3482. if level is not None:
  3483. if not isinstance(labels, MultiIndex):
  3484. raise TypeError("Index must be a MultiIndex")
  3485. loc, new_ax = labels.get_loc_level(key, level=level, drop_level=drop_level)
  3486. # create the tuple of the indexer
  3487. _indexer = [slice(None)] * self.ndim
  3488. _indexer[axis] = loc
  3489. indexer = tuple(_indexer)
  3490. result = self.iloc[indexer]
  3491. setattr(result, result._get_axis_name(axis), new_ax)
  3492. return result
  3493. if axis == 1:
  3494. if drop_level:
  3495. return self[key]
  3496. index = self.columns
  3497. else:
  3498. index = self.index
  3499. if isinstance(index, MultiIndex):
  3500. loc, new_index = index._get_loc_level(key, level=0)
  3501. if not drop_level:
  3502. if lib.is_integer(loc):
  3503. new_index = index[loc : loc + 1]
  3504. else:
  3505. new_index = index[loc]
  3506. else:
  3507. loc = index.get_loc(key)
  3508. if isinstance(loc, np.ndarray):
  3509. if loc.dtype == np.bool_:
  3510. (inds,) = loc.nonzero()
  3511. return self._take_with_is_copy(inds, axis=axis)
  3512. else:
  3513. return self._take_with_is_copy(loc, axis=axis)
  3514. if not is_scalar(loc):
  3515. new_index = index[loc]
  3516. if is_scalar(loc) and axis == 0:
  3517. # In this case loc should be an integer
  3518. if self.ndim == 1:
  3519. # if we encounter an array-like and we only have 1 dim
  3520. # that means that their are list/ndarrays inside the Series!
  3521. # so just return them (GH 6394)
  3522. return self._values[loc]
  3523. new_mgr = self._mgr.fast_xs(loc)
  3524. result = self._constructor_sliced(
  3525. new_mgr, name=self.index[loc]
  3526. ).__finalize__(self)
  3527. elif is_scalar(loc):
  3528. result = self.iloc[:, slice(loc, loc + 1)]
  3529. elif axis == 1:
  3530. result = self.iloc[:, loc]
  3531. else:
  3532. result = self.iloc[loc]
  3533. result.index = new_index
  3534. # this could be a view
  3535. # but only in a single-dtyped view sliceable case
  3536. result._set_is_copy(self, copy=not result._is_view)
  3537. return result
  3538. def __getitem__(self, item):
  3539. raise AbstractMethodError(self)
  3540. def _slice(self: NDFrameT, slobj: slice, axis: Axis = 0) -> NDFrameT:
  3541. """
  3542. Construct a slice of this container.
  3543. Slicing with this method is *always* positional.
  3544. """
  3545. assert isinstance(slobj, slice), type(slobj)
  3546. axis = self._get_block_manager_axis(axis)
  3547. result = self._constructor(self._mgr.get_slice(slobj, axis=axis))
  3548. result = result.__finalize__(self)
  3549. # this could be a view
  3550. # but only in a single-dtyped view sliceable case
  3551. is_copy = axis != 0 or result._is_view
  3552. result._set_is_copy(self, copy=is_copy)
  3553. return result
  3554. @final
  3555. def _set_is_copy(self, ref: NDFrame, copy: bool_t = True) -> None:
  3556. if not copy:
  3557. self._is_copy = None
  3558. else:
  3559. assert ref is not None
  3560. self._is_copy = weakref.ref(ref)
  3561. def _check_is_chained_assignment_possible(self) -> bool_t:
  3562. """
  3563. Check if we are a view, have a cacher, and are of mixed type.
  3564. If so, then force a setitem_copy check.
  3565. Should be called just near setting a value
  3566. Will return a boolean if it we are a view and are cached, but a
  3567. single-dtype meaning that the cacher should be updated following
  3568. setting.
  3569. """
  3570. if self._is_copy:
  3571. self._check_setitem_copy(t="referent")
  3572. return False
  3573. @final
  3574. def _check_setitem_copy(self, t: str = "setting", force: bool_t = False):
  3575. """
  3576. Parameters
  3577. ----------
  3578. t : str, the type of setting error
  3579. force : bool, default False
  3580. If True, then force showing an error.
  3581. validate if we are doing a setitem on a chained copy.
  3582. It is technically possible to figure out that we are setting on
  3583. a copy even WITH a multi-dtyped pandas object. In other words, some
  3584. blocks may be views while other are not. Currently _is_view will ALWAYS
  3585. return False for multi-blocks to avoid having to handle this case.
  3586. df = DataFrame(np.arange(0,9), columns=['count'])
  3587. df['group'] = 'b'
  3588. # This technically need not raise SettingWithCopy if both are view
  3589. # (which is not generally guaranteed but is usually True. However,
  3590. # this is in general not a good practice and we recommend using .loc.
  3591. df.iloc[0:5]['group'] = 'a'
  3592. """
  3593. if using_copy_on_write():
  3594. return
  3595. # return early if the check is not needed
  3596. if not (force or self._is_copy):
  3597. return
  3598. value = config.get_option("mode.chained_assignment")
  3599. if value is None:
  3600. return
  3601. # see if the copy is not actually referred; if so, then dissolve
  3602. # the copy weakref
  3603. if self._is_copy is not None and not isinstance(self._is_copy, str):
  3604. r = self._is_copy()
  3605. if not gc.get_referents(r) or (r is not None and r.shape == self.shape):
  3606. self._is_copy = None
  3607. return
  3608. # a custom message
  3609. if isinstance(self._is_copy, str):
  3610. t = self._is_copy
  3611. elif t == "referent":
  3612. t = (
  3613. "\n"
  3614. "A value is trying to be set on a copy of a slice from a "
  3615. "DataFrame\n\n"
  3616. "See the caveats in the documentation: "
  3617. "https://pandas.pydata.org/pandas-docs/stable/user_guide/"
  3618. "indexing.html#returning-a-view-versus-a-copy"
  3619. )
  3620. else:
  3621. t = (
  3622. "\n"
  3623. "A value is trying to be set on a copy of a slice from a "
  3624. "DataFrame.\n"
  3625. "Try using .loc[row_indexer,col_indexer] = value "
  3626. "instead\n\nSee the caveats in the documentation: "
  3627. "https://pandas.pydata.org/pandas-docs/stable/user_guide/"
  3628. "indexing.html#returning-a-view-versus-a-copy"
  3629. )
  3630. if value == "raise":
  3631. raise SettingWithCopyError(t)
  3632. if value == "warn":
  3633. warnings.warn(t, SettingWithCopyWarning, stacklevel=find_stack_level())
  3634. def __delitem__(self, key) -> None:
  3635. """
  3636. Delete item
  3637. """
  3638. deleted = False
  3639. maybe_shortcut = False
  3640. if self.ndim == 2 and isinstance(self.columns, MultiIndex):
  3641. try:
  3642. # By using engine's __contains__ we effectively
  3643. # restrict to same-length tuples
  3644. maybe_shortcut = key not in self.columns._engine
  3645. except TypeError:
  3646. pass
  3647. if maybe_shortcut:
  3648. # Allow shorthand to delete all columns whose first len(key)
  3649. # elements match key:
  3650. if not isinstance(key, tuple):
  3651. key = (key,)
  3652. for col in self.columns:
  3653. if isinstance(col, tuple) and col[: len(key)] == key:
  3654. del self[col]
  3655. deleted = True
  3656. if not deleted:
  3657. # If the above loop ran and didn't delete anything because
  3658. # there was no match, this call should raise the appropriate
  3659. # exception:
  3660. loc = self.axes[-1].get_loc(key)
  3661. self._mgr = self._mgr.idelete(loc)
  3662. # delete from the caches
  3663. try:
  3664. del self._item_cache[key]
  3665. except KeyError:
  3666. pass
  3667. # ----------------------------------------------------------------------
  3668. # Unsorted
  3669. @final
  3670. def _check_inplace_and_allows_duplicate_labels(self, inplace):
  3671. if inplace and not self.flags.allows_duplicate_labels:
  3672. raise ValueError(
  3673. "Cannot specify 'inplace=True' when "
  3674. "'self.flags.allows_duplicate_labels' is False."
  3675. )
  3676. @final
  3677. def get(self, key, default=None):
  3678. """
  3679. Get item from object for given key (ex: DataFrame column).
  3680. Returns default value if not found.
  3681. Parameters
  3682. ----------
  3683. key : object
  3684. Returns
  3685. -------
  3686. same type as items contained in object
  3687. Examples
  3688. --------
  3689. >>> df = pd.DataFrame(
  3690. ... [
  3691. ... [24.3, 75.7, "high"],
  3692. ... [31, 87.8, "high"],
  3693. ... [22, 71.6, "medium"],
  3694. ... [35, 95, "medium"],
  3695. ... ],
  3696. ... columns=["temp_celsius", "temp_fahrenheit", "windspeed"],
  3697. ... index=pd.date_range(start="2014-02-12", end="2014-02-15", freq="D"),
  3698. ... )
  3699. >>> df
  3700. temp_celsius temp_fahrenheit windspeed
  3701. 2014-02-12 24.3 75.7 high
  3702. 2014-02-13 31.0 87.8 high
  3703. 2014-02-14 22.0 71.6 medium
  3704. 2014-02-15 35.0 95.0 medium
  3705. >>> df.get(["temp_celsius", "windspeed"])
  3706. temp_celsius windspeed
  3707. 2014-02-12 24.3 high
  3708. 2014-02-13 31.0 high
  3709. 2014-02-14 22.0 medium
  3710. 2014-02-15 35.0 medium
  3711. >>> ser = df['windspeed']
  3712. >>> ser.get('2014-02-13')
  3713. 'high'
  3714. If the key isn't found, the default value will be used.
  3715. >>> df.get(["temp_celsius", "temp_kelvin"], default="default_value")
  3716. 'default_value'
  3717. >>> ser.get('2014-02-10', '[unknown]')
  3718. '[unknown]'
  3719. """
  3720. try:
  3721. return self[key]
  3722. except (KeyError, ValueError, IndexError):
  3723. return default
  3724. @final
  3725. @property
  3726. def _is_view(self) -> bool_t:
  3727. """Return boolean indicating if self is view of another array"""
  3728. return self._mgr.is_view
  3729. @final
  3730. def reindex_like(
  3731. self: NDFrameT,
  3732. other,
  3733. method: Literal["backfill", "bfill", "pad", "ffill", "nearest"] | None = None,
  3734. copy: bool_t | None = None,
  3735. limit=None,
  3736. tolerance=None,
  3737. ) -> NDFrameT:
  3738. """
  3739. Return an object with matching indices as other object.
  3740. Conform the object to the same index on all axes. Optional
  3741. filling logic, placing NaN in locations having no value
  3742. in the previous index. A new object is produced unless the
  3743. new index is equivalent to the current one and copy=False.
  3744. Parameters
  3745. ----------
  3746. other : Object of the same data type
  3747. Its row and column indices are used to define the new indices
  3748. of this object.
  3749. method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}
  3750. Method to use for filling holes in reindexed DataFrame.
  3751. Please note: this is only applicable to DataFrames/Series with a
  3752. monotonically increasing/decreasing index.
  3753. * None (default): don't fill gaps
  3754. * pad / ffill: propagate last valid observation forward to next
  3755. valid
  3756. * backfill / bfill: use next valid observation to fill gap
  3757. * nearest: use nearest valid observations to fill gap.
  3758. copy : bool, default True
  3759. Return a new object, even if the passed indexes are the same.
  3760. limit : int, default None
  3761. Maximum number of consecutive labels to fill for inexact matches.
  3762. tolerance : optional
  3763. Maximum distance between original and new labels for inexact
  3764. matches. The values of the index at the matching locations must
  3765. satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
  3766. Tolerance may be a scalar value, which applies the same tolerance
  3767. to all values, or list-like, which applies variable tolerance per
  3768. element. List-like includes list, tuple, array, Series, and must be
  3769. the same size as the index and its dtype must exactly match the
  3770. index's type.
  3771. Returns
  3772. -------
  3773. Series or DataFrame
  3774. Same type as caller, but with changed indices on each axis.
  3775. See Also
  3776. --------
  3777. DataFrame.set_index : Set row labels.
  3778. DataFrame.reset_index : Remove row labels or move them to new columns.
  3779. DataFrame.reindex : Change to new indices or expand indices.
  3780. Notes
  3781. -----
  3782. Same as calling
  3783. ``.reindex(index=other.index, columns=other.columns,...)``.
  3784. Examples
  3785. --------
  3786. >>> df1 = pd.DataFrame([[24.3, 75.7, 'high'],
  3787. ... [31, 87.8, 'high'],
  3788. ... [22, 71.6, 'medium'],
  3789. ... [35, 95, 'medium']],
  3790. ... columns=['temp_celsius', 'temp_fahrenheit',
  3791. ... 'windspeed'],
  3792. ... index=pd.date_range(start='2014-02-12',
  3793. ... end='2014-02-15', freq='D'))
  3794. >>> df1
  3795. temp_celsius temp_fahrenheit windspeed
  3796. 2014-02-12 24.3 75.7 high
  3797. 2014-02-13 31.0 87.8 high
  3798. 2014-02-14 22.0 71.6 medium
  3799. 2014-02-15 35.0 95.0 medium
  3800. >>> df2 = pd.DataFrame([[28, 'low'],
  3801. ... [30, 'low'],
  3802. ... [35.1, 'medium']],
  3803. ... columns=['temp_celsius', 'windspeed'],
  3804. ... index=pd.DatetimeIndex(['2014-02-12', '2014-02-13',
  3805. ... '2014-02-15']))
  3806. >>> df2
  3807. temp_celsius windspeed
  3808. 2014-02-12 28.0 low
  3809. 2014-02-13 30.0 low
  3810. 2014-02-15 35.1 medium
  3811. >>> df2.reindex_like(df1)
  3812. temp_celsius temp_fahrenheit windspeed
  3813. 2014-02-12 28.0 NaN low
  3814. 2014-02-13 30.0 NaN low
  3815. 2014-02-14 NaN NaN NaN
  3816. 2014-02-15 35.1 NaN medium
  3817. """
  3818. d = other._construct_axes_dict(
  3819. axes=self._AXIS_ORDERS,
  3820. method=method,
  3821. copy=copy,
  3822. limit=limit,
  3823. tolerance=tolerance,
  3824. )
  3825. return self.reindex(**d)
  3826. @overload
  3827. def drop(
  3828. self,
  3829. labels: IndexLabel = ...,
  3830. *,
  3831. axis: Axis = ...,
  3832. index: IndexLabel = ...,
  3833. columns: IndexLabel = ...,
  3834. level: Level | None = ...,
  3835. inplace: Literal[True],
  3836. errors: IgnoreRaise = ...,
  3837. ) -> None:
  3838. ...
  3839. @overload
  3840. def drop(
  3841. self: NDFrameT,
  3842. labels: IndexLabel = ...,
  3843. *,
  3844. axis: Axis = ...,
  3845. index: IndexLabel = ...,
  3846. columns: IndexLabel = ...,
  3847. level: Level | None = ...,
  3848. inplace: Literal[False] = ...,
  3849. errors: IgnoreRaise = ...,
  3850. ) -> NDFrameT:
  3851. ...
  3852. @overload
  3853. def drop(
  3854. self: NDFrameT,
  3855. labels: IndexLabel = ...,
  3856. *,
  3857. axis: Axis = ...,
  3858. index: IndexLabel = ...,
  3859. columns: IndexLabel = ...,
  3860. level: Level | None = ...,
  3861. inplace: bool_t = ...,
  3862. errors: IgnoreRaise = ...,
  3863. ) -> NDFrameT | None:
  3864. ...
  3865. def drop(
  3866. self: NDFrameT,
  3867. labels: IndexLabel = None,
  3868. *,
  3869. axis: Axis = 0,
  3870. index: IndexLabel = None,
  3871. columns: IndexLabel = None,
  3872. level: Level | None = None,
  3873. inplace: bool_t = False,
  3874. errors: IgnoreRaise = "raise",
  3875. ) -> NDFrameT | None:
  3876. inplace = validate_bool_kwarg(inplace, "inplace")
  3877. if labels is not None:
  3878. if index is not None or columns is not None:
  3879. raise ValueError("Cannot specify both 'labels' and 'index'/'columns'")
  3880. axis_name = self._get_axis_name(axis)
  3881. axes = {axis_name: labels}
  3882. elif index is not None or columns is not None:
  3883. axes = {"index": index}
  3884. if self.ndim == 2:
  3885. axes["columns"] = columns
  3886. else:
  3887. raise ValueError(
  3888. "Need to specify at least one of 'labels', 'index' or 'columns'"
  3889. )
  3890. obj = self
  3891. for axis, labels in axes.items():
  3892. if labels is not None:
  3893. obj = obj._drop_axis(labels, axis, level=level, errors=errors)
  3894. if inplace:
  3895. self._update_inplace(obj)
  3896. return None
  3897. else:
  3898. return obj
  3899. @final
  3900. def _drop_axis(
  3901. self: NDFrameT,
  3902. labels,
  3903. axis,
  3904. level=None,
  3905. errors: IgnoreRaise = "raise",
  3906. only_slice: bool_t = False,
  3907. ) -> NDFrameT:
  3908. """
  3909. Drop labels from specified axis. Used in the ``drop`` method
  3910. internally.
  3911. Parameters
  3912. ----------
  3913. labels : single label or list-like
  3914. axis : int or axis name
  3915. level : int or level name, default None
  3916. For MultiIndex
  3917. errors : {'ignore', 'raise'}, default 'raise'
  3918. If 'ignore', suppress error and existing labels are dropped.
  3919. only_slice : bool, default False
  3920. Whether indexing along columns should be view-only.
  3921. """
  3922. axis_num = self._get_axis_number(axis)
  3923. axis = self._get_axis(axis)
  3924. if axis.is_unique:
  3925. if level is not None:
  3926. if not isinstance(axis, MultiIndex):
  3927. raise AssertionError("axis must be a MultiIndex")
  3928. new_axis = axis.drop(labels, level=level, errors=errors)
  3929. else:
  3930. new_axis = axis.drop(labels, errors=errors)
  3931. indexer = axis.get_indexer(new_axis)
  3932. # Case for non-unique axis
  3933. else:
  3934. is_tuple_labels = is_nested_list_like(labels) or isinstance(labels, tuple)
  3935. labels = ensure_object(common.index_labels_to_array(labels))
  3936. if level is not None:
  3937. if not isinstance(axis, MultiIndex):
  3938. raise AssertionError("axis must be a MultiIndex")
  3939. mask = ~axis.get_level_values(level).isin(labels)
  3940. # GH 18561 MultiIndex.drop should raise if label is absent
  3941. if errors == "raise" and mask.all():
  3942. raise KeyError(f"{labels} not found in axis")
  3943. elif (
  3944. isinstance(axis, MultiIndex)
  3945. and labels.dtype == "object"
  3946. and not is_tuple_labels
  3947. ):
  3948. # Set level to zero in case of MultiIndex and label is string,
  3949. # because isin can't handle strings for MultiIndexes GH#36293
  3950. # In case of tuples we get dtype object but have to use isin GH#42771
  3951. mask = ~axis.get_level_values(0).isin(labels)
  3952. else:
  3953. mask = ~axis.isin(labels)
  3954. # Check if label doesn't exist along axis
  3955. labels_missing = (axis.get_indexer_for(labels) == -1).any()
  3956. if errors == "raise" and labels_missing:
  3957. raise KeyError(f"{labels} not found in axis")
  3958. if is_extension_array_dtype(mask.dtype):
  3959. # GH#45860
  3960. mask = mask.to_numpy(dtype=bool)
  3961. indexer = mask.nonzero()[0]
  3962. new_axis = axis.take(indexer)
  3963. bm_axis = self.ndim - axis_num - 1
  3964. new_mgr = self._mgr.reindex_indexer(
  3965. new_axis,
  3966. indexer,
  3967. axis=bm_axis,
  3968. allow_dups=True,
  3969. copy=None,
  3970. only_slice=only_slice,
  3971. )
  3972. result = self._constructor(new_mgr)
  3973. if self.ndim == 1:
  3974. result.name = self.name
  3975. return result.__finalize__(self)
  3976. @final
  3977. def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None:
  3978. """
  3979. Replace self internals with result.
  3980. Parameters
  3981. ----------
  3982. result : same type as self
  3983. verify_is_copy : bool, default True
  3984. Provide is_copy checks.
  3985. """
  3986. # NOTE: This does *not* call __finalize__ and that's an explicit
  3987. # decision that we may revisit in the future.
  3988. self._reset_cache()
  3989. self._clear_item_cache()
  3990. self._mgr = result._mgr
  3991. self._maybe_update_cacher(verify_is_copy=verify_is_copy, inplace=True)
  3992. @final
  3993. def add_prefix(self: NDFrameT, prefix: str, axis: Axis | None = None) -> NDFrameT:
  3994. """
  3995. Prefix labels with string `prefix`.
  3996. For Series, the row labels are prefixed.
  3997. For DataFrame, the column labels are prefixed.
  3998. Parameters
  3999. ----------
  4000. prefix : str
  4001. The string to add before each label.
  4002. axis : {{0 or 'index', 1 or 'columns', None}}, default None
  4003. Axis to add prefix on
  4004. .. versionadded:: 2.0.0
  4005. Returns
  4006. -------
  4007. Series or DataFrame
  4008. New Series or DataFrame with updated labels.
  4009. See Also
  4010. --------
  4011. Series.add_suffix: Suffix row labels with string `suffix`.
  4012. DataFrame.add_suffix: Suffix column labels with string `suffix`.
  4013. Examples
  4014. --------
  4015. >>> s = pd.Series([1, 2, 3, 4])
  4016. >>> s
  4017. 0 1
  4018. 1 2
  4019. 2 3
  4020. 3 4
  4021. dtype: int64
  4022. >>> s.add_prefix('item_')
  4023. item_0 1
  4024. item_1 2
  4025. item_2 3
  4026. item_3 4
  4027. dtype: int64
  4028. >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})
  4029. >>> df
  4030. A B
  4031. 0 1 3
  4032. 1 2 4
  4033. 2 3 5
  4034. 3 4 6
  4035. >>> df.add_prefix('col_')
  4036. col_A col_B
  4037. 0 1 3
  4038. 1 2 4
  4039. 2 3 5
  4040. 3 4 6
  4041. """
  4042. f = lambda x: f"{prefix}{x}"
  4043. axis_name = self._info_axis_name
  4044. if axis is not None:
  4045. axis_name = self._get_axis_name(axis)
  4046. mapper = {axis_name: f}
  4047. # error: Incompatible return value type (got "Optional[NDFrameT]",
  4048. # expected "NDFrameT")
  4049. # error: Argument 1 to "rename" of "NDFrame" has incompatible type
  4050. # "**Dict[str, partial[str]]"; expected "Union[str, int, None]"
  4051. # error: Keywords must be strings
  4052. return self._rename(**mapper) # type: ignore[return-value, arg-type, misc]
  4053. @final
  4054. def add_suffix(self: NDFrameT, suffix: str, axis: Axis | None = None) -> NDFrameT:
  4055. """
  4056. Suffix labels with string `suffix`.
  4057. For Series, the row labels are suffixed.
  4058. For DataFrame, the column labels are suffixed.
  4059. Parameters
  4060. ----------
  4061. suffix : str
  4062. The string to add after each label.
  4063. axis : {{0 or 'index', 1 or 'columns', None}}, default None
  4064. Axis to add suffix on
  4065. .. versionadded:: 2.0.0
  4066. Returns
  4067. -------
  4068. Series or DataFrame
  4069. New Series or DataFrame with updated labels.
  4070. See Also
  4071. --------
  4072. Series.add_prefix: Prefix row labels with string `prefix`.
  4073. DataFrame.add_prefix: Prefix column labels with string `prefix`.
  4074. Examples
  4075. --------
  4076. >>> s = pd.Series([1, 2, 3, 4])
  4077. >>> s
  4078. 0 1
  4079. 1 2
  4080. 2 3
  4081. 3 4
  4082. dtype: int64
  4083. >>> s.add_suffix('_item')
  4084. 0_item 1
  4085. 1_item 2
  4086. 2_item 3
  4087. 3_item 4
  4088. dtype: int64
  4089. >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]})
  4090. >>> df
  4091. A B
  4092. 0 1 3
  4093. 1 2 4
  4094. 2 3 5
  4095. 3 4 6
  4096. >>> df.add_suffix('_col')
  4097. A_col B_col
  4098. 0 1 3
  4099. 1 2 4
  4100. 2 3 5
  4101. 3 4 6
  4102. """
  4103. f = lambda x: f"{x}{suffix}"
  4104. axis_name = self._info_axis_name
  4105. if axis is not None:
  4106. axis_name = self._get_axis_name(axis)
  4107. mapper = {axis_name: f}
  4108. # error: Incompatible return value type (got "Optional[NDFrameT]",
  4109. # expected "NDFrameT")
  4110. # error: Argument 1 to "rename" of "NDFrame" has incompatible type
  4111. # "**Dict[str, partial[str]]"; expected "Union[str, int, None]"
  4112. # error: Keywords must be strings
  4113. return self._rename(**mapper) # type: ignore[return-value, arg-type, misc]
  4114. @overload
  4115. def sort_values(
  4116. self: NDFrameT,
  4117. *,
  4118. axis: Axis = ...,
  4119. ascending: bool_t | Sequence[bool_t] = ...,
  4120. inplace: Literal[False] = ...,
  4121. kind: str = ...,
  4122. na_position: str = ...,
  4123. ignore_index: bool_t = ...,
  4124. key: ValueKeyFunc = ...,
  4125. ) -> NDFrameT:
  4126. ...
  4127. @overload
  4128. def sort_values(
  4129. self,
  4130. *,
  4131. axis: Axis = ...,
  4132. ascending: bool_t | Sequence[bool_t] = ...,
  4133. inplace: Literal[True],
  4134. kind: str = ...,
  4135. na_position: str = ...,
  4136. ignore_index: bool_t = ...,
  4137. key: ValueKeyFunc = ...,
  4138. ) -> None:
  4139. ...
  4140. @overload
  4141. def sort_values(
  4142. self: NDFrameT,
  4143. *,
  4144. axis: Axis = ...,
  4145. ascending: bool_t | Sequence[bool_t] = ...,
  4146. inplace: bool_t = ...,
  4147. kind: str = ...,
  4148. na_position: str = ...,
  4149. ignore_index: bool_t = ...,
  4150. key: ValueKeyFunc = ...,
  4151. ) -> NDFrameT | None:
  4152. ...
  4153. def sort_values(
  4154. self: NDFrameT,
  4155. *,
  4156. axis: Axis = 0,
  4157. ascending: bool_t | Sequence[bool_t] = True,
  4158. inplace: bool_t = False,
  4159. kind: str = "quicksort",
  4160. na_position: str = "last",
  4161. ignore_index: bool_t = False,
  4162. key: ValueKeyFunc = None,
  4163. ) -> NDFrameT | None:
  4164. """
  4165. Sort by the values along either axis.
  4166. Parameters
  4167. ----------%(optional_by)s
  4168. axis : %(axes_single_arg)s, default 0
  4169. Axis to be sorted.
  4170. ascending : bool or list of bool, default True
  4171. Sort ascending vs. descending. Specify list for multiple sort
  4172. orders. If this is a list of bools, must match the length of
  4173. the by.
  4174. inplace : bool, default False
  4175. If True, perform operation in-place.
  4176. kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort'
  4177. Choice of sorting algorithm. See also :func:`numpy.sort` for more
  4178. information. `mergesort` and `stable` are the only stable algorithms. For
  4179. DataFrames, this option is only applied when sorting on a single
  4180. column or label.
  4181. na_position : {'first', 'last'}, default 'last'
  4182. Puts NaNs at the beginning if `first`; `last` puts NaNs at the
  4183. end.
  4184. ignore_index : bool, default False
  4185. If True, the resulting axis will be labeled 0, 1, …, n - 1.
  4186. key : callable, optional
  4187. Apply the key function to the values
  4188. before sorting. This is similar to the `key` argument in the
  4189. builtin :meth:`sorted` function, with the notable difference that
  4190. this `key` function should be *vectorized*. It should expect a
  4191. ``Series`` and return a Series with the same shape as the input.
  4192. It will be applied to each column in `by` independently.
  4193. .. versionadded:: 1.1.0
  4194. Returns
  4195. -------
  4196. DataFrame or None
  4197. DataFrame with sorted values or None if ``inplace=True``.
  4198. See Also
  4199. --------
  4200. DataFrame.sort_index : Sort a DataFrame by the index.
  4201. Series.sort_values : Similar method for a Series.
  4202. Examples
  4203. --------
  4204. >>> df = pd.DataFrame({
  4205. ... 'col1': ['A', 'A', 'B', np.nan, 'D', 'C'],
  4206. ... 'col2': [2, 1, 9, 8, 7, 4],
  4207. ... 'col3': [0, 1, 9, 4, 2, 3],
  4208. ... 'col4': ['a', 'B', 'c', 'D', 'e', 'F']
  4209. ... })
  4210. >>> df
  4211. col1 col2 col3 col4
  4212. 0 A 2 0 a
  4213. 1 A 1 1 B
  4214. 2 B 9 9 c
  4215. 3 NaN 8 4 D
  4216. 4 D 7 2 e
  4217. 5 C 4 3 F
  4218. Sort by col1
  4219. >>> df.sort_values(by=['col1'])
  4220. col1 col2 col3 col4
  4221. 0 A 2 0 a
  4222. 1 A 1 1 B
  4223. 2 B 9 9 c
  4224. 5 C 4 3 F
  4225. 4 D 7 2 e
  4226. 3 NaN 8 4 D
  4227. Sort by multiple columns
  4228. >>> df.sort_values(by=['col1', 'col2'])
  4229. col1 col2 col3 col4
  4230. 1 A 1 1 B
  4231. 0 A 2 0 a
  4232. 2 B 9 9 c
  4233. 5 C 4 3 F
  4234. 4 D 7 2 e
  4235. 3 NaN 8 4 D
  4236. Sort Descending
  4237. >>> df.sort_values(by='col1', ascending=False)
  4238. col1 col2 col3 col4
  4239. 4 D 7 2 e
  4240. 5 C 4 3 F
  4241. 2 B 9 9 c
  4242. 0 A 2 0 a
  4243. 1 A 1 1 B
  4244. 3 NaN 8 4 D
  4245. Putting NAs first
  4246. >>> df.sort_values(by='col1', ascending=False, na_position='first')
  4247. col1 col2 col3 col4
  4248. 3 NaN 8 4 D
  4249. 4 D 7 2 e
  4250. 5 C 4 3 F
  4251. 2 B 9 9 c
  4252. 0 A 2 0 a
  4253. 1 A 1 1 B
  4254. Sorting with a key function
  4255. >>> df.sort_values(by='col4', key=lambda col: col.str.lower())
  4256. col1 col2 col3 col4
  4257. 0 A 2 0 a
  4258. 1 A 1 1 B
  4259. 2 B 9 9 c
  4260. 3 NaN 8 4 D
  4261. 4 D 7 2 e
  4262. 5 C 4 3 F
  4263. Natural sort with the key argument,
  4264. using the `natsort <https://github.com/SethMMorton/natsort>` package.
  4265. >>> df = pd.DataFrame({
  4266. ... "time": ['0hr', '128hr', '72hr', '48hr', '96hr'],
  4267. ... "value": [10, 20, 30, 40, 50]
  4268. ... })
  4269. >>> df
  4270. time value
  4271. 0 0hr 10
  4272. 1 128hr 20
  4273. 2 72hr 30
  4274. 3 48hr 40
  4275. 4 96hr 50
  4276. >>> from natsort import index_natsorted
  4277. >>> df.sort_values(
  4278. ... by="time",
  4279. ... key=lambda x: np.argsort(index_natsorted(df["time"]))
  4280. ... )
  4281. time value
  4282. 0 0hr 10
  4283. 3 48hr 40
  4284. 2 72hr 30
  4285. 4 96hr 50
  4286. 1 128hr 20
  4287. """
  4288. raise AbstractMethodError(self)
  4289. @overload
  4290. def sort_index(
  4291. self,
  4292. *,
  4293. axis: Axis = ...,
  4294. level: IndexLabel = ...,
  4295. ascending: bool_t | Sequence[bool_t] = ...,
  4296. inplace: Literal[True],
  4297. kind: SortKind = ...,
  4298. na_position: NaPosition = ...,
  4299. sort_remaining: bool_t = ...,
  4300. ignore_index: bool_t = ...,
  4301. key: IndexKeyFunc = ...,
  4302. ) -> None:
  4303. ...
  4304. @overload
  4305. def sort_index(
  4306. self: NDFrameT,
  4307. *,
  4308. axis: Axis = ...,
  4309. level: IndexLabel = ...,
  4310. ascending: bool_t | Sequence[bool_t] = ...,
  4311. inplace: Literal[False] = ...,
  4312. kind: SortKind = ...,
  4313. na_position: NaPosition = ...,
  4314. sort_remaining: bool_t = ...,
  4315. ignore_index: bool_t = ...,
  4316. key: IndexKeyFunc = ...,
  4317. ) -> NDFrameT:
  4318. ...
  4319. @overload
  4320. def sort_index(
  4321. self: NDFrameT,
  4322. *,
  4323. axis: Axis = ...,
  4324. level: IndexLabel = ...,
  4325. ascending: bool_t | Sequence[bool_t] = ...,
  4326. inplace: bool_t = ...,
  4327. kind: SortKind = ...,
  4328. na_position: NaPosition = ...,
  4329. sort_remaining: bool_t = ...,
  4330. ignore_index: bool_t = ...,
  4331. key: IndexKeyFunc = ...,
  4332. ) -> NDFrameT | None:
  4333. ...
  4334. def sort_index(
  4335. self: NDFrameT,
  4336. *,
  4337. axis: Axis = 0,
  4338. level: IndexLabel = None,
  4339. ascending: bool_t | Sequence[bool_t] = True,
  4340. inplace: bool_t = False,
  4341. kind: SortKind = "quicksort",
  4342. na_position: NaPosition = "last",
  4343. sort_remaining: bool_t = True,
  4344. ignore_index: bool_t = False,
  4345. key: IndexKeyFunc = None,
  4346. ) -> NDFrameT | None:
  4347. inplace = validate_bool_kwarg(inplace, "inplace")
  4348. axis = self._get_axis_number(axis)
  4349. ascending = validate_ascending(ascending)
  4350. target = self._get_axis(axis)
  4351. indexer = get_indexer_indexer(
  4352. target, level, ascending, kind, na_position, sort_remaining, key
  4353. )
  4354. if indexer is None:
  4355. if inplace:
  4356. result = self
  4357. else:
  4358. result = self.copy(deep=None)
  4359. if ignore_index:
  4360. result.index = default_index(len(self))
  4361. if inplace:
  4362. return None
  4363. else:
  4364. return result
  4365. baxis = self._get_block_manager_axis(axis)
  4366. new_data = self._mgr.take(indexer, axis=baxis, verify=False)
  4367. # reconstruct axis if needed
  4368. new_data.set_axis(baxis, new_data.axes[baxis]._sort_levels_monotonic())
  4369. if ignore_index:
  4370. axis = 1 if isinstance(self, ABCDataFrame) else 0
  4371. new_data.set_axis(axis, default_index(len(indexer)))
  4372. result = self._constructor(new_data)
  4373. if inplace:
  4374. return self._update_inplace(result)
  4375. else:
  4376. return result.__finalize__(self, method="sort_index")
  4377. @doc(
  4378. klass=_shared_doc_kwargs["klass"],
  4379. optional_reindex="",
  4380. )
  4381. def reindex(
  4382. self: NDFrameT,
  4383. labels=None,
  4384. index=None,
  4385. columns=None,
  4386. axis: Axis | None = None,
  4387. method: str | None = None,
  4388. copy: bool_t | None = None,
  4389. level: Level | None = None,
  4390. fill_value: Scalar | None = np.nan,
  4391. limit: int | None = None,
  4392. tolerance=None,
  4393. ) -> NDFrameT:
  4394. """
  4395. Conform {klass} to new index with optional filling logic.
  4396. Places NA/NaN in locations having no value in the previous index. A new object
  4397. is produced unless the new index is equivalent to the current one and
  4398. ``copy=False``.
  4399. Parameters
  4400. ----------
  4401. {optional_reindex}
  4402. method : {{None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'}}
  4403. Method to use for filling holes in reindexed DataFrame.
  4404. Please note: this is only applicable to DataFrames/Series with a
  4405. monotonically increasing/decreasing index.
  4406. * None (default): don't fill gaps
  4407. * pad / ffill: Propagate last valid observation forward to next
  4408. valid.
  4409. * backfill / bfill: Use next valid observation to fill gap.
  4410. * nearest: Use nearest valid observations to fill gap.
  4411. copy : bool, default True
  4412. Return a new object, even if the passed indexes are the same.
  4413. level : int or name
  4414. Broadcast across a level, matching Index values on the
  4415. passed MultiIndex level.
  4416. fill_value : scalar, default np.NaN
  4417. Value to use for missing values. Defaults to NaN, but can be any
  4418. "compatible" value.
  4419. limit : int, default None
  4420. Maximum number of consecutive elements to forward or backward fill.
  4421. tolerance : optional
  4422. Maximum distance between original and new labels for inexact
  4423. matches. The values of the index at the matching locations most
  4424. satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
  4425. Tolerance may be a scalar value, which applies the same tolerance
  4426. to all values, or list-like, which applies variable tolerance per
  4427. element. List-like includes list, tuple, array, Series, and must be
  4428. the same size as the index and its dtype must exactly match the
  4429. index's type.
  4430. Returns
  4431. -------
  4432. {klass} with changed index.
  4433. See Also
  4434. --------
  4435. DataFrame.set_index : Set row labels.
  4436. DataFrame.reset_index : Remove row labels or move them to new columns.
  4437. DataFrame.reindex_like : Change to same indices as other DataFrame.
  4438. Examples
  4439. --------
  4440. ``DataFrame.reindex`` supports two calling conventions
  4441. * ``(index=index_labels, columns=column_labels, ...)``
  4442. * ``(labels, axis={{'index', 'columns'}}, ...)``
  4443. We *highly* recommend using keyword arguments to clarify your
  4444. intent.
  4445. Create a dataframe with some fictional data.
  4446. >>> index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror']
  4447. >>> df = pd.DataFrame({{'http_status': [200, 200, 404, 404, 301],
  4448. ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]}},
  4449. ... index=index)
  4450. >>> df
  4451. http_status response_time
  4452. Firefox 200 0.04
  4453. Chrome 200 0.02
  4454. Safari 404 0.07
  4455. IE10 404 0.08
  4456. Konqueror 301 1.00
  4457. Create a new index and reindex the dataframe. By default
  4458. values in the new index that do not have corresponding
  4459. records in the dataframe are assigned ``NaN``.
  4460. >>> new_index = ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10',
  4461. ... 'Chrome']
  4462. >>> df.reindex(new_index)
  4463. http_status response_time
  4464. Safari 404.0 0.07
  4465. Iceweasel NaN NaN
  4466. Comodo Dragon NaN NaN
  4467. IE10 404.0 0.08
  4468. Chrome 200.0 0.02
  4469. We can fill in the missing values by passing a value to
  4470. the keyword ``fill_value``. Because the index is not monotonically
  4471. increasing or decreasing, we cannot use arguments to the keyword
  4472. ``method`` to fill the ``NaN`` values.
  4473. >>> df.reindex(new_index, fill_value=0)
  4474. http_status response_time
  4475. Safari 404 0.07
  4476. Iceweasel 0 0.00
  4477. Comodo Dragon 0 0.00
  4478. IE10 404 0.08
  4479. Chrome 200 0.02
  4480. >>> df.reindex(new_index, fill_value='missing')
  4481. http_status response_time
  4482. Safari 404 0.07
  4483. Iceweasel missing missing
  4484. Comodo Dragon missing missing
  4485. IE10 404 0.08
  4486. Chrome 200 0.02
  4487. We can also reindex the columns.
  4488. >>> df.reindex(columns=['http_status', 'user_agent'])
  4489. http_status user_agent
  4490. Firefox 200 NaN
  4491. Chrome 200 NaN
  4492. Safari 404 NaN
  4493. IE10 404 NaN
  4494. Konqueror 301 NaN
  4495. Or we can use "axis-style" keyword arguments
  4496. >>> df.reindex(['http_status', 'user_agent'], axis="columns")
  4497. http_status user_agent
  4498. Firefox 200 NaN
  4499. Chrome 200 NaN
  4500. Safari 404 NaN
  4501. IE10 404 NaN
  4502. Konqueror 301 NaN
  4503. To further illustrate the filling functionality in
  4504. ``reindex``, we will create a dataframe with a
  4505. monotonically increasing index (for example, a sequence
  4506. of dates).
  4507. >>> date_index = pd.date_range('1/1/2010', periods=6, freq='D')
  4508. >>> df2 = pd.DataFrame({{"prices": [100, 101, np.nan, 100, 89, 88]}},
  4509. ... index=date_index)
  4510. >>> df2
  4511. prices
  4512. 2010-01-01 100.0
  4513. 2010-01-02 101.0
  4514. 2010-01-03 NaN
  4515. 2010-01-04 100.0
  4516. 2010-01-05 89.0
  4517. 2010-01-06 88.0
  4518. Suppose we decide to expand the dataframe to cover a wider
  4519. date range.
  4520. >>> date_index2 = pd.date_range('12/29/2009', periods=10, freq='D')
  4521. >>> df2.reindex(date_index2)
  4522. prices
  4523. 2009-12-29 NaN
  4524. 2009-12-30 NaN
  4525. 2009-12-31 NaN
  4526. 2010-01-01 100.0
  4527. 2010-01-02 101.0
  4528. 2010-01-03 NaN
  4529. 2010-01-04 100.0
  4530. 2010-01-05 89.0
  4531. 2010-01-06 88.0
  4532. 2010-01-07 NaN
  4533. The index entries that did not have a value in the original data frame
  4534. (for example, '2009-12-29') are by default filled with ``NaN``.
  4535. If desired, we can fill in the missing values using one of several
  4536. options.
  4537. For example, to back-propagate the last valid value to fill the ``NaN``
  4538. values, pass ``bfill`` as an argument to the ``method`` keyword.
  4539. >>> df2.reindex(date_index2, method='bfill')
  4540. prices
  4541. 2009-12-29 100.0
  4542. 2009-12-30 100.0
  4543. 2009-12-31 100.0
  4544. 2010-01-01 100.0
  4545. 2010-01-02 101.0
  4546. 2010-01-03 NaN
  4547. 2010-01-04 100.0
  4548. 2010-01-05 89.0
  4549. 2010-01-06 88.0
  4550. 2010-01-07 NaN
  4551. Please note that the ``NaN`` value present in the original dataframe
  4552. (at index value 2010-01-03) will not be filled by any of the
  4553. value propagation schemes. This is because filling while reindexing
  4554. does not look at dataframe values, but only compares the original and
  4555. desired indexes. If you do want to fill in the ``NaN`` values present
  4556. in the original dataframe, use the ``fillna()`` method.
  4557. See the :ref:`user guide <basics.reindexing>` for more.
  4558. """
  4559. # TODO: Decide if we care about having different examples for different
  4560. # kinds
  4561. if index is not None and columns is not None and labels is not None:
  4562. raise TypeError("Cannot specify all of 'labels', 'index', 'columns'.")
  4563. elif index is not None or columns is not None:
  4564. if axis is not None:
  4565. raise TypeError(
  4566. "Cannot specify both 'axis' and any of 'index' or 'columns'"
  4567. )
  4568. if labels is not None:
  4569. if index is not None:
  4570. columns = labels
  4571. else:
  4572. index = labels
  4573. else:
  4574. if axis and self._get_axis_number(axis) == 1:
  4575. columns = labels
  4576. else:
  4577. index = labels
  4578. axes: dict[Literal["index", "columns"], Any] = {
  4579. "index": index,
  4580. "columns": columns,
  4581. }
  4582. method = clean_reindex_fill_method(method)
  4583. # if all axes that are requested to reindex are equal, then only copy
  4584. # if indicated must have index names equal here as well as values
  4585. if copy and using_copy_on_write():
  4586. copy = False
  4587. if all(
  4588. self._get_axis(axis_name).identical(ax)
  4589. for axis_name, ax in axes.items()
  4590. if ax is not None
  4591. ):
  4592. return self.copy(deep=copy)
  4593. # check if we are a multi reindex
  4594. if self._needs_reindex_multi(axes, method, level):
  4595. return self._reindex_multi(axes, copy, fill_value)
  4596. # perform the reindex on the axes
  4597. return self._reindex_axes(
  4598. axes, level, limit, tolerance, method, fill_value, copy
  4599. ).__finalize__(self, method="reindex")
  4600. def _reindex_axes(
  4601. self: NDFrameT, axes, level, limit, tolerance, method, fill_value, copy
  4602. ) -> NDFrameT:
  4603. """Perform the reindex for all the axes."""
  4604. obj = self
  4605. for a in self._AXIS_ORDERS:
  4606. labels = axes[a]
  4607. if labels is None:
  4608. continue
  4609. ax = self._get_axis(a)
  4610. new_index, indexer = ax.reindex(
  4611. labels, level=level, limit=limit, tolerance=tolerance, method=method
  4612. )
  4613. axis = self._get_axis_number(a)
  4614. obj = obj._reindex_with_indexers(
  4615. {axis: [new_index, indexer]},
  4616. fill_value=fill_value,
  4617. copy=copy,
  4618. allow_dups=False,
  4619. )
  4620. # If we've made a copy once, no need to make another one
  4621. copy = False
  4622. return obj
  4623. def _needs_reindex_multi(self, axes, method, level) -> bool_t:
  4624. """Check if we do need a multi reindex."""
  4625. return (
  4626. (common.count_not_none(*axes.values()) == self._AXIS_LEN)
  4627. and method is None
  4628. and level is None
  4629. and not self._is_mixed_type
  4630. and not (
  4631. self.ndim == 2
  4632. and len(self.dtypes) == 1
  4633. and is_extension_array_dtype(self.dtypes.iloc[0])
  4634. )
  4635. )
  4636. def _reindex_multi(self, axes, copy, fill_value):
  4637. raise AbstractMethodError(self)
  4638. @final
  4639. def _reindex_with_indexers(
  4640. self: NDFrameT,
  4641. reindexers,
  4642. fill_value=None,
  4643. copy: bool_t | None = False,
  4644. allow_dups: bool_t = False,
  4645. ) -> NDFrameT:
  4646. """allow_dups indicates an internal call here"""
  4647. # reindex doing multiple operations on different axes if indicated
  4648. new_data = self._mgr
  4649. for axis in sorted(reindexers.keys()):
  4650. index, indexer = reindexers[axis]
  4651. baxis = self._get_block_manager_axis(axis)
  4652. if index is None:
  4653. continue
  4654. index = ensure_index(index)
  4655. if indexer is not None:
  4656. indexer = ensure_platform_int(indexer)
  4657. # TODO: speed up on homogeneous DataFrame objects (see _reindex_multi)
  4658. new_data = new_data.reindex_indexer(
  4659. index,
  4660. indexer,
  4661. axis=baxis,
  4662. fill_value=fill_value,
  4663. allow_dups=allow_dups,
  4664. copy=copy,
  4665. )
  4666. # If we've made a copy once, no need to make another one
  4667. copy = False
  4668. if (
  4669. (copy or copy is None)
  4670. and new_data is self._mgr
  4671. and not using_copy_on_write()
  4672. ):
  4673. new_data = new_data.copy(deep=copy)
  4674. elif using_copy_on_write() and new_data is self._mgr:
  4675. new_data = new_data.copy(deep=False)
  4676. return self._constructor(new_data).__finalize__(self)
  4677. def filter(
  4678. self: NDFrameT,
  4679. items=None,
  4680. like: str | None = None,
  4681. regex: str | None = None,
  4682. axis: Axis | None = None,
  4683. ) -> NDFrameT:
  4684. """
  4685. Subset the dataframe rows or columns according to the specified index labels.
  4686. Note that this routine does not filter a dataframe on its
  4687. contents. The filter is applied to the labels of the index.
  4688. Parameters
  4689. ----------
  4690. items : list-like
  4691. Keep labels from axis which are in items.
  4692. like : str
  4693. Keep labels from axis for which "like in label == True".
  4694. regex : str (regular expression)
  4695. Keep labels from axis for which re.search(regex, label) == True.
  4696. axis : {0 or ‘index’, 1 or ‘columns’, None}, default None
  4697. The axis to filter on, expressed either as an index (int)
  4698. or axis name (str). By default this is the info axis, 'columns' for
  4699. DataFrame. For `Series` this parameter is unused and defaults to `None`.
  4700. Returns
  4701. -------
  4702. same type as input object
  4703. See Also
  4704. --------
  4705. DataFrame.loc : Access a group of rows and columns
  4706. by label(s) or a boolean array.
  4707. Notes
  4708. -----
  4709. The ``items``, ``like``, and ``regex`` parameters are
  4710. enforced to be mutually exclusive.
  4711. ``axis`` defaults to the info axis that is used when indexing
  4712. with ``[]``.
  4713. Examples
  4714. --------
  4715. >>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])),
  4716. ... index=['mouse', 'rabbit'],
  4717. ... columns=['one', 'two', 'three'])
  4718. >>> df
  4719. one two three
  4720. mouse 1 2 3
  4721. rabbit 4 5 6
  4722. >>> # select columns by name
  4723. >>> df.filter(items=['one', 'three'])
  4724. one three
  4725. mouse 1 3
  4726. rabbit 4 6
  4727. >>> # select columns by regular expression
  4728. >>> df.filter(regex='e$', axis=1)
  4729. one three
  4730. mouse 1 3
  4731. rabbit 4 6
  4732. >>> # select rows containing 'bbi'
  4733. >>> df.filter(like='bbi', axis=0)
  4734. one two three
  4735. rabbit 4 5 6
  4736. """
  4737. nkw = common.count_not_none(items, like, regex)
  4738. if nkw > 1:
  4739. raise TypeError(
  4740. "Keyword arguments `items`, `like`, or `regex` "
  4741. "are mutually exclusive"
  4742. )
  4743. if axis is None:
  4744. axis = self._info_axis_name
  4745. labels = self._get_axis(axis)
  4746. if items is not None:
  4747. name = self._get_axis_name(axis)
  4748. # error: Keywords must be strings
  4749. return self.reindex( # type: ignore[misc]
  4750. **{name: [r for r in items if r in labels]} # type: ignore[arg-type]
  4751. )
  4752. elif like:
  4753. def f(x) -> bool_t:
  4754. assert like is not None # needed for mypy
  4755. return like in ensure_str(x)
  4756. values = labels.map(f)
  4757. return self.loc(axis=axis)[values]
  4758. elif regex:
  4759. def f(x) -> bool_t:
  4760. return matcher.search(ensure_str(x)) is not None
  4761. matcher = re.compile(regex)
  4762. values = labels.map(f)
  4763. return self.loc(axis=axis)[values]
  4764. else:
  4765. raise TypeError("Must pass either `items`, `like`, or `regex`")
  4766. @final
  4767. def head(self: NDFrameT, n: int = 5) -> NDFrameT:
  4768. """
  4769. Return the first `n` rows.
  4770. This function returns the first `n` rows for the object based
  4771. on position. It is useful for quickly testing if your object
  4772. has the right type of data in it.
  4773. For negative values of `n`, this function returns all rows except
  4774. the last `|n|` rows, equivalent to ``df[:n]``.
  4775. If n is larger than the number of rows, this function returns all rows.
  4776. Parameters
  4777. ----------
  4778. n : int, default 5
  4779. Number of rows to select.
  4780. Returns
  4781. -------
  4782. same type as caller
  4783. The first `n` rows of the caller object.
  4784. See Also
  4785. --------
  4786. DataFrame.tail: Returns the last `n` rows.
  4787. Examples
  4788. --------
  4789. >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
  4790. ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']})
  4791. >>> df
  4792. animal
  4793. 0 alligator
  4794. 1 bee
  4795. 2 falcon
  4796. 3 lion
  4797. 4 monkey
  4798. 5 parrot
  4799. 6 shark
  4800. 7 whale
  4801. 8 zebra
  4802. Viewing the first 5 lines
  4803. >>> df.head()
  4804. animal
  4805. 0 alligator
  4806. 1 bee
  4807. 2 falcon
  4808. 3 lion
  4809. 4 monkey
  4810. Viewing the first `n` lines (three in this case)
  4811. >>> df.head(3)
  4812. animal
  4813. 0 alligator
  4814. 1 bee
  4815. 2 falcon
  4816. For negative values of `n`
  4817. >>> df.head(-3)
  4818. animal
  4819. 0 alligator
  4820. 1 bee
  4821. 2 falcon
  4822. 3 lion
  4823. 4 monkey
  4824. 5 parrot
  4825. """
  4826. return self.iloc[:n]
  4827. @final
  4828. def tail(self: NDFrameT, n: int = 5) -> NDFrameT:
  4829. """
  4830. Return the last `n` rows.
  4831. This function returns last `n` rows from the object based on
  4832. position. It is useful for quickly verifying data, for example,
  4833. after sorting or appending rows.
  4834. For negative values of `n`, this function returns all rows except
  4835. the first `|n|` rows, equivalent to ``df[|n|:]``.
  4836. If n is larger than the number of rows, this function returns all rows.
  4837. Parameters
  4838. ----------
  4839. n : int, default 5
  4840. Number of rows to select.
  4841. Returns
  4842. -------
  4843. type of caller
  4844. The last `n` rows of the caller object.
  4845. See Also
  4846. --------
  4847. DataFrame.head : The first `n` rows of the caller object.
  4848. Examples
  4849. --------
  4850. >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
  4851. ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']})
  4852. >>> df
  4853. animal
  4854. 0 alligator
  4855. 1 bee
  4856. 2 falcon
  4857. 3 lion
  4858. 4 monkey
  4859. 5 parrot
  4860. 6 shark
  4861. 7 whale
  4862. 8 zebra
  4863. Viewing the last 5 lines
  4864. >>> df.tail()
  4865. animal
  4866. 4 monkey
  4867. 5 parrot
  4868. 6 shark
  4869. 7 whale
  4870. 8 zebra
  4871. Viewing the last `n` lines (three in this case)
  4872. >>> df.tail(3)
  4873. animal
  4874. 6 shark
  4875. 7 whale
  4876. 8 zebra
  4877. For negative values of `n`
  4878. >>> df.tail(-3)
  4879. animal
  4880. 3 lion
  4881. 4 monkey
  4882. 5 parrot
  4883. 6 shark
  4884. 7 whale
  4885. 8 zebra
  4886. """
  4887. if n == 0:
  4888. return self.iloc[0:0]
  4889. return self.iloc[-n:]
  4890. @final
  4891. def sample(
  4892. self: NDFrameT,
  4893. n: int | None = None,
  4894. frac: float | None = None,
  4895. replace: bool_t = False,
  4896. weights=None,
  4897. random_state: RandomState | None = None,
  4898. axis: Axis | None = None,
  4899. ignore_index: bool_t = False,
  4900. ) -> NDFrameT:
  4901. """
  4902. Return a random sample of items from an axis of object.
  4903. You can use `random_state` for reproducibility.
  4904. Parameters
  4905. ----------
  4906. n : int, optional
  4907. Number of items from axis to return. Cannot be used with `frac`.
  4908. Default = 1 if `frac` = None.
  4909. frac : float, optional
  4910. Fraction of axis items to return. Cannot be used with `n`.
  4911. replace : bool, default False
  4912. Allow or disallow sampling of the same row more than once.
  4913. weights : str or ndarray-like, optional
  4914. Default 'None' results in equal probability weighting.
  4915. If passed a Series, will align with target object on index. Index
  4916. values in weights not found in sampled object will be ignored and
  4917. index values in sampled object not in weights will be assigned
  4918. weights of zero.
  4919. If called on a DataFrame, will accept the name of a column
  4920. when axis = 0.
  4921. Unless weights are a Series, weights must be same length as axis
  4922. being sampled.
  4923. If weights do not sum to 1, they will be normalized to sum to 1.
  4924. Missing values in the weights column will be treated as zero.
  4925. Infinite values not allowed.
  4926. random_state : int, array-like, BitGenerator, np.random.RandomState, np.random.Generator, optional
  4927. If int, array-like, or BitGenerator, seed for random number generator.
  4928. If np.random.RandomState or np.random.Generator, use as given.
  4929. .. versionchanged:: 1.1.0
  4930. array-like and BitGenerator object now passed to np.random.RandomState()
  4931. as seed
  4932. .. versionchanged:: 1.4.0
  4933. np.random.Generator objects now accepted
  4934. axis : {0 or ‘index’, 1 or ‘columns’, None}, default None
  4935. Axis to sample. Accepts axis number or name. Default is stat axis
  4936. for given data type. For `Series` this parameter is unused and defaults to `None`.
  4937. ignore_index : bool, default False
  4938. If True, the resulting index will be labeled 0, 1, …, n - 1.
  4939. .. versionadded:: 1.3.0
  4940. Returns
  4941. -------
  4942. Series or DataFrame
  4943. A new object of same type as caller containing `n` items randomly
  4944. sampled from the caller object.
  4945. See Also
  4946. --------
  4947. DataFrameGroupBy.sample: Generates random samples from each group of a
  4948. DataFrame object.
  4949. SeriesGroupBy.sample: Generates random samples from each group of a
  4950. Series object.
  4951. numpy.random.choice: Generates a random sample from a given 1-D numpy
  4952. array.
  4953. Notes
  4954. -----
  4955. If `frac` > 1, `replacement` should be set to `True`.
  4956. Examples
  4957. --------
  4958. >>> df = pd.DataFrame({'num_legs': [2, 4, 8, 0],
  4959. ... 'num_wings': [2, 0, 0, 0],
  4960. ... 'num_specimen_seen': [10, 2, 1, 8]},
  4961. ... index=['falcon', 'dog', 'spider', 'fish'])
  4962. >>> df
  4963. num_legs num_wings num_specimen_seen
  4964. falcon 2 2 10
  4965. dog 4 0 2
  4966. spider 8 0 1
  4967. fish 0 0 8
  4968. Extract 3 random elements from the ``Series`` ``df['num_legs']``:
  4969. Note that we use `random_state` to ensure the reproducibility of
  4970. the examples.
  4971. >>> df['num_legs'].sample(n=3, random_state=1)
  4972. fish 0
  4973. spider 8
  4974. falcon 2
  4975. Name: num_legs, dtype: int64
  4976. A random 50% sample of the ``DataFrame`` with replacement:
  4977. >>> df.sample(frac=0.5, replace=True, random_state=1)
  4978. num_legs num_wings num_specimen_seen
  4979. dog 4 0 2
  4980. fish 0 0 8
  4981. An upsample sample of the ``DataFrame`` with replacement:
  4982. Note that `replace` parameter has to be `True` for `frac` parameter > 1.
  4983. >>> df.sample(frac=2, replace=True, random_state=1)
  4984. num_legs num_wings num_specimen_seen
  4985. dog 4 0 2
  4986. fish 0 0 8
  4987. falcon 2 2 10
  4988. falcon 2 2 10
  4989. fish 0 0 8
  4990. dog 4 0 2
  4991. fish 0 0 8
  4992. dog 4 0 2
  4993. Using a DataFrame column as weights. Rows with larger value in the
  4994. `num_specimen_seen` column are more likely to be sampled.
  4995. >>> df.sample(n=2, weights='num_specimen_seen', random_state=1)
  4996. num_legs num_wings num_specimen_seen
  4997. falcon 2 2 10
  4998. fish 0 0 8
  4999. """ # noqa:E501
  5000. if axis is None:
  5001. axis = self._stat_axis_number
  5002. axis = self._get_axis_number(axis)
  5003. obj_len = self.shape[axis]
  5004. # Process random_state argument
  5005. rs = common.random_state(random_state)
  5006. size = sample.process_sampling_size(n, frac, replace)
  5007. if size is None:
  5008. assert frac is not None
  5009. size = round(frac * obj_len)
  5010. if weights is not None:
  5011. weights = sample.preprocess_weights(self, weights, axis)
  5012. sampled_indices = sample.sample(obj_len, size, replace, weights, rs)
  5013. result = self.take(sampled_indices, axis=axis)
  5014. if ignore_index:
  5015. result.index = default_index(len(result))
  5016. return result
  5017. @final
  5018. @doc(klass=_shared_doc_kwargs["klass"])
  5019. def pipe(
  5020. self,
  5021. func: Callable[..., T] | tuple[Callable[..., T], str],
  5022. *args,
  5023. **kwargs,
  5024. ) -> T:
  5025. r"""
  5026. Apply chainable functions that expect Series or DataFrames.
  5027. Parameters
  5028. ----------
  5029. func : function
  5030. Function to apply to the {klass}.
  5031. ``args``, and ``kwargs`` are passed into ``func``.
  5032. Alternatively a ``(callable, data_keyword)`` tuple where
  5033. ``data_keyword`` is a string indicating the keyword of
  5034. ``callable`` that expects the {klass}.
  5035. args : iterable, optional
  5036. Positional arguments passed into ``func``.
  5037. kwargs : mapping, optional
  5038. A dictionary of keyword arguments passed into ``func``.
  5039. Returns
  5040. -------
  5041. the return type of ``func``.
  5042. See Also
  5043. --------
  5044. DataFrame.apply : Apply a function along input axis of DataFrame.
  5045. DataFrame.applymap : Apply a function elementwise on a whole DataFrame.
  5046. Series.map : Apply a mapping correspondence on a
  5047. :class:`~pandas.Series`.
  5048. Notes
  5049. -----
  5050. Use ``.pipe`` when chaining together functions that expect
  5051. Series, DataFrames or GroupBy objects. Instead of writing
  5052. >>> func(g(h(df), arg1=a), arg2=b, arg3=c) # doctest: +SKIP
  5053. You can write
  5054. >>> (df.pipe(h)
  5055. ... .pipe(g, arg1=a)
  5056. ... .pipe(func, arg2=b, arg3=c)
  5057. ... ) # doctest: +SKIP
  5058. If you have a function that takes the data as (say) the second
  5059. argument, pass a tuple indicating which keyword expects the
  5060. data. For example, suppose ``func`` takes its data as ``arg2``:
  5061. >>> (df.pipe(h)
  5062. ... .pipe(g, arg1=a)
  5063. ... .pipe((func, 'arg2'), arg1=a, arg3=c)
  5064. ... ) # doctest: +SKIP
  5065. """
  5066. if using_copy_on_write():
  5067. return common.pipe(self.copy(deep=None), func, *args, **kwargs)
  5068. return common.pipe(self, func, *args, **kwargs)
  5069. # ----------------------------------------------------------------------
  5070. # Attribute access
  5071. @final
  5072. def __finalize__(
  5073. self: NDFrameT, other, method: str | None = None, **kwargs
  5074. ) -> NDFrameT:
  5075. """
  5076. Propagate metadata from other to self.
  5077. Parameters
  5078. ----------
  5079. other : the object from which to get the attributes that we are going
  5080. to propagate
  5081. method : str, optional
  5082. A passed method name providing context on where ``__finalize__``
  5083. was called.
  5084. .. warning::
  5085. The value passed as `method` are not currently considered
  5086. stable across pandas releases.
  5087. """
  5088. if isinstance(other, NDFrame):
  5089. for name in other.attrs:
  5090. self.attrs[name] = other.attrs[name]
  5091. self.flags.allows_duplicate_labels = other.flags.allows_duplicate_labels
  5092. # For subclasses using _metadata.
  5093. for name in set(self._metadata) & set(other._metadata):
  5094. assert isinstance(name, str)
  5095. object.__setattr__(self, name, getattr(other, name, None))
  5096. if method == "concat":
  5097. attrs = other.objs[0].attrs
  5098. check_attrs = all(objs.attrs == attrs for objs in other.objs[1:])
  5099. if check_attrs:
  5100. for name in attrs:
  5101. self.attrs[name] = attrs[name]
  5102. allows_duplicate_labels = all(
  5103. x.flags.allows_duplicate_labels for x in other.objs
  5104. )
  5105. self.flags.allows_duplicate_labels = allows_duplicate_labels
  5106. return self
  5107. def __getattr__(self, name: str):
  5108. """
  5109. After regular attribute access, try looking up the name
  5110. This allows simpler access to columns for interactive use.
  5111. """
  5112. # Note: obj.x will always call obj.__getattribute__('x') prior to
  5113. # calling obj.__getattr__('x').
  5114. if (
  5115. name not in self._internal_names_set
  5116. and name not in self._metadata
  5117. and name not in self._accessors
  5118. and self._info_axis._can_hold_identifiers_and_holds_name(name)
  5119. ):
  5120. return self[name]
  5121. return object.__getattribute__(self, name)
  5122. def __setattr__(self, name: str, value) -> None:
  5123. """
  5124. After regular attribute access, try setting the name
  5125. This allows simpler access to columns for interactive use.
  5126. """
  5127. # first try regular attribute access via __getattribute__, so that
  5128. # e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify
  5129. # the same attribute.
  5130. try:
  5131. object.__getattribute__(self, name)
  5132. return object.__setattr__(self, name, value)
  5133. except AttributeError:
  5134. pass
  5135. # if this fails, go on to more involved attribute setting
  5136. # (note that this matches __getattr__, above).
  5137. if name in self._internal_names_set:
  5138. object.__setattr__(self, name, value)
  5139. elif name in self._metadata:
  5140. object.__setattr__(self, name, value)
  5141. else:
  5142. try:
  5143. existing = getattr(self, name)
  5144. if isinstance(existing, Index):
  5145. object.__setattr__(self, name, value)
  5146. elif name in self._info_axis:
  5147. self[name] = value
  5148. else:
  5149. object.__setattr__(self, name, value)
  5150. except (AttributeError, TypeError):
  5151. if isinstance(self, ABCDataFrame) and (is_list_like(value)):
  5152. warnings.warn(
  5153. "Pandas doesn't allow columns to be "
  5154. "created via a new attribute name - see "
  5155. "https://pandas.pydata.org/pandas-docs/"
  5156. "stable/indexing.html#attribute-access",
  5157. stacklevel=find_stack_level(),
  5158. )
  5159. object.__setattr__(self, name, value)
  5160. @final
  5161. def _dir_additions(self) -> set[str]:
  5162. """
  5163. add the string-like attributes from the info_axis.
  5164. If info_axis is a MultiIndex, its first level values are used.
  5165. """
  5166. additions = super()._dir_additions()
  5167. if self._info_axis._can_hold_strings:
  5168. additions.update(self._info_axis._dir_additions_for_owner)
  5169. return additions
  5170. # ----------------------------------------------------------------------
  5171. # Consolidation of internals
  5172. @final
  5173. def _protect_consolidate(self, f):
  5174. """
  5175. Consolidate _mgr -- if the blocks have changed, then clear the
  5176. cache
  5177. """
  5178. if isinstance(self._mgr, (ArrayManager, SingleArrayManager)):
  5179. return f()
  5180. blocks_before = len(self._mgr.blocks)
  5181. result = f()
  5182. if len(self._mgr.blocks) != blocks_before:
  5183. self._clear_item_cache()
  5184. return result
  5185. @final
  5186. def _consolidate_inplace(self) -> None:
  5187. """Consolidate data in place and return None"""
  5188. def f() -> None:
  5189. self._mgr = self._mgr.consolidate()
  5190. self._protect_consolidate(f)
  5191. @final
  5192. def _consolidate(self):
  5193. """
  5194. Compute NDFrame with "consolidated" internals (data of each dtype
  5195. grouped together in a single ndarray).
  5196. Returns
  5197. -------
  5198. consolidated : same type as caller
  5199. """
  5200. f = lambda: self._mgr.consolidate()
  5201. cons_data = self._protect_consolidate(f)
  5202. return self._constructor(cons_data).__finalize__(self)
  5203. @property
  5204. def _is_mixed_type(self) -> bool_t:
  5205. if self._mgr.is_single_block:
  5206. return False
  5207. if self._mgr.any_extension_types:
  5208. # Even if they have the same dtype, we can't consolidate them,
  5209. # so we pretend this is "mixed'"
  5210. return True
  5211. return self.dtypes.nunique() > 1
  5212. @final
  5213. def _check_inplace_setting(self, value) -> bool_t:
  5214. """check whether we allow in-place setting with this type of value"""
  5215. if self._is_mixed_type and not self._mgr.is_numeric_mixed_type:
  5216. # allow an actual np.nan through
  5217. if is_float(value) and np.isnan(value) or value is lib.no_default:
  5218. return True
  5219. raise TypeError(
  5220. "Cannot do inplace boolean setting on "
  5221. "mixed-types with a non np.nan value"
  5222. )
  5223. return True
  5224. @final
  5225. def _get_numeric_data(self: NDFrameT) -> NDFrameT:
  5226. return self._constructor(self._mgr.get_numeric_data()).__finalize__(self)
  5227. @final
  5228. def _get_bool_data(self):
  5229. return self._constructor(self._mgr.get_bool_data()).__finalize__(self)
  5230. # ----------------------------------------------------------------------
  5231. # Internal Interface Methods
  5232. @property
  5233. def values(self):
  5234. raise AbstractMethodError(self)
  5235. @property
  5236. def _values(self) -> ArrayLike:
  5237. """internal implementation"""
  5238. raise AbstractMethodError(self)
  5239. @property
  5240. def dtypes(self):
  5241. """
  5242. Return the dtypes in the DataFrame.
  5243. This returns a Series with the data type of each column.
  5244. The result's index is the original DataFrame's columns. Columns
  5245. with mixed types are stored with the ``object`` dtype. See
  5246. :ref:`the User Guide <basics.dtypes>` for more.
  5247. Returns
  5248. -------
  5249. pandas.Series
  5250. The data type of each column.
  5251. Examples
  5252. --------
  5253. >>> df = pd.DataFrame({'float': [1.0],
  5254. ... 'int': [1],
  5255. ... 'datetime': [pd.Timestamp('20180310')],
  5256. ... 'string': ['foo']})
  5257. >>> df.dtypes
  5258. float float64
  5259. int int64
  5260. datetime datetime64[ns]
  5261. string object
  5262. dtype: object
  5263. """
  5264. data = self._mgr.get_dtypes()
  5265. return self._constructor_sliced(data, index=self._info_axis, dtype=np.object_)
  5266. def astype(
  5267. self: NDFrameT, dtype, copy: bool_t | None = None, errors: IgnoreRaise = "raise"
  5268. ) -> NDFrameT:
  5269. """
  5270. Cast a pandas object to a specified dtype ``dtype``.
  5271. Parameters
  5272. ----------
  5273. dtype : str, data type, Series or Mapping of column name -> data type
  5274. Use a str, numpy.dtype, pandas.ExtensionDtype or Python type to
  5275. cast entire pandas object to the same type. Alternatively, use a
  5276. mapping, e.g. {col: dtype, ...}, where col is a column label and dtype is
  5277. a numpy.dtype or Python type to cast one or more of the DataFrame's
  5278. columns to column-specific types.
  5279. copy : bool, default True
  5280. Return a copy when ``copy=True`` (be very careful setting
  5281. ``copy=False`` as changes to values then may propagate to other
  5282. pandas objects).
  5283. errors : {'raise', 'ignore'}, default 'raise'
  5284. Control raising of exceptions on invalid data for provided dtype.
  5285. - ``raise`` : allow exceptions to be raised
  5286. - ``ignore`` : suppress exceptions. On error return original object.
  5287. Returns
  5288. -------
  5289. same type as caller
  5290. See Also
  5291. --------
  5292. to_datetime : Convert argument to datetime.
  5293. to_timedelta : Convert argument to timedelta.
  5294. to_numeric : Convert argument to a numeric type.
  5295. numpy.ndarray.astype : Cast a numpy array to a specified type.
  5296. Notes
  5297. -----
  5298. .. versionchanged:: 2.0.0
  5299. Using ``astype`` to convert from timezone-naive dtype to
  5300. timezone-aware dtype will raise an exception.
  5301. Use :meth:`Series.dt.tz_localize` instead.
  5302. Examples
  5303. --------
  5304. Create a DataFrame:
  5305. >>> d = {'col1': [1, 2], 'col2': [3, 4]}
  5306. >>> df = pd.DataFrame(data=d)
  5307. >>> df.dtypes
  5308. col1 int64
  5309. col2 int64
  5310. dtype: object
  5311. Cast all columns to int32:
  5312. >>> df.astype('int32').dtypes
  5313. col1 int32
  5314. col2 int32
  5315. dtype: object
  5316. Cast col1 to int32 using a dictionary:
  5317. >>> df.astype({'col1': 'int32'}).dtypes
  5318. col1 int32
  5319. col2 int64
  5320. dtype: object
  5321. Create a series:
  5322. >>> ser = pd.Series([1, 2], dtype='int32')
  5323. >>> ser
  5324. 0 1
  5325. 1 2
  5326. dtype: int32
  5327. >>> ser.astype('int64')
  5328. 0 1
  5329. 1 2
  5330. dtype: int64
  5331. Convert to categorical type:
  5332. >>> ser.astype('category')
  5333. 0 1
  5334. 1 2
  5335. dtype: category
  5336. Categories (2, int32): [1, 2]
  5337. Convert to ordered categorical type with custom ordering:
  5338. >>> from pandas.api.types import CategoricalDtype
  5339. >>> cat_dtype = CategoricalDtype(
  5340. ... categories=[2, 1], ordered=True)
  5341. >>> ser.astype(cat_dtype)
  5342. 0 1
  5343. 1 2
  5344. dtype: category
  5345. Categories (2, int64): [2 < 1]
  5346. Create a series of dates:
  5347. >>> ser_date = pd.Series(pd.date_range('20200101', periods=3))
  5348. >>> ser_date
  5349. 0 2020-01-01
  5350. 1 2020-01-02
  5351. 2 2020-01-03
  5352. dtype: datetime64[ns]
  5353. """
  5354. if copy and using_copy_on_write():
  5355. copy = False
  5356. if is_dict_like(dtype):
  5357. if self.ndim == 1: # i.e. Series
  5358. if len(dtype) > 1 or self.name not in dtype:
  5359. raise KeyError(
  5360. "Only the Series name can be used for "
  5361. "the key in Series dtype mappings."
  5362. )
  5363. new_type = dtype[self.name]
  5364. return self.astype(new_type, copy, errors)
  5365. # GH#44417 cast to Series so we can use .iat below, which will be
  5366. # robust in case we
  5367. from pandas import Series
  5368. dtype_ser = Series(dtype, dtype=object)
  5369. for col_name in dtype_ser.index:
  5370. if col_name not in self:
  5371. raise KeyError(
  5372. "Only a column name can be used for the "
  5373. "key in a dtype mappings argument. "
  5374. f"'{col_name}' not found in columns."
  5375. )
  5376. dtype_ser = dtype_ser.reindex(self.columns, fill_value=None, copy=False)
  5377. results = []
  5378. for i, (col_name, col) in enumerate(self.items()):
  5379. cdt = dtype_ser.iat[i]
  5380. if isna(cdt):
  5381. res_col = col.copy(deep=copy)
  5382. else:
  5383. try:
  5384. res_col = col.astype(dtype=cdt, copy=copy, errors=errors)
  5385. except ValueError as ex:
  5386. ex.args = (
  5387. f"{ex}: Error while type casting for column '{col_name}'",
  5388. )
  5389. raise
  5390. results.append(res_col)
  5391. elif is_extension_array_dtype(dtype) and self.ndim > 1:
  5392. # GH 18099/22869: columnwise conversion to extension dtype
  5393. # GH 24704: use iloc to handle duplicate column names
  5394. # TODO(EA2D): special case not needed with 2D EAs
  5395. results = [
  5396. self.iloc[:, i].astype(dtype, copy=copy)
  5397. for i in range(len(self.columns))
  5398. ]
  5399. else:
  5400. # else, only a single dtype is given
  5401. new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
  5402. return self._constructor(new_data).__finalize__(self, method="astype")
  5403. # GH 33113: handle empty frame or series
  5404. if not results:
  5405. return self.copy(deep=None)
  5406. # GH 19920: retain column metadata after concat
  5407. result = concat(results, axis=1, copy=False)
  5408. # GH#40810 retain subclass
  5409. # error: Incompatible types in assignment
  5410. # (expression has type "NDFrameT", variable has type "DataFrame")
  5411. result = self._constructor(result) # type: ignore[assignment]
  5412. result.columns = self.columns
  5413. result = result.__finalize__(self, method="astype")
  5414. # https://github.com/python/mypy/issues/8354
  5415. return cast(NDFrameT, result)
  5416. @final
  5417. def copy(self: NDFrameT, deep: bool_t | None = True) -> NDFrameT:
  5418. """
  5419. Make a copy of this object's indices and data.
  5420. When ``deep=True`` (default), a new object will be created with a
  5421. copy of the calling object's data and indices. Modifications to
  5422. the data or indices of the copy will not be reflected in the
  5423. original object (see notes below).
  5424. When ``deep=False``, a new object will be created without copying
  5425. the calling object's data or index (only references to the data
  5426. and index are copied). Any changes to the data of the original
  5427. will be reflected in the shallow copy (and vice versa).
  5428. Parameters
  5429. ----------
  5430. deep : bool, default True
  5431. Make a deep copy, including a copy of the data and the indices.
  5432. With ``deep=False`` neither the indices nor the data are copied.
  5433. Returns
  5434. -------
  5435. Series or DataFrame
  5436. Object type matches caller.
  5437. Notes
  5438. -----
  5439. When ``deep=True``, data is copied but actual Python objects
  5440. will not be copied recursively, only the reference to the object.
  5441. This is in contrast to `copy.deepcopy` in the Standard Library,
  5442. which recursively copies object data (see examples below).
  5443. While ``Index`` objects are copied when ``deep=True``, the underlying
  5444. numpy array is not copied for performance reasons. Since ``Index`` is
  5445. immutable, the underlying data can be safely shared and a copy
  5446. is not needed.
  5447. Since pandas is not thread safe, see the
  5448. :ref:`gotchas <gotchas.thread-safety>` when copying in a threading
  5449. environment.
  5450. Examples
  5451. --------
  5452. >>> s = pd.Series([1, 2], index=["a", "b"])
  5453. >>> s
  5454. a 1
  5455. b 2
  5456. dtype: int64
  5457. >>> s_copy = s.copy()
  5458. >>> s_copy
  5459. a 1
  5460. b 2
  5461. dtype: int64
  5462. **Shallow copy versus default (deep) copy:**
  5463. >>> s = pd.Series([1, 2], index=["a", "b"])
  5464. >>> deep = s.copy()
  5465. >>> shallow = s.copy(deep=False)
  5466. Shallow copy shares data and index with original.
  5467. >>> s is shallow
  5468. False
  5469. >>> s.values is shallow.values and s.index is shallow.index
  5470. True
  5471. Deep copy has own copy of data and index.
  5472. >>> s is deep
  5473. False
  5474. >>> s.values is deep.values or s.index is deep.index
  5475. False
  5476. Updates to the data shared by shallow copy and original is reflected
  5477. in both; deep copy remains unchanged.
  5478. >>> s[0] = 3
  5479. >>> shallow[1] = 4
  5480. >>> s
  5481. a 3
  5482. b 4
  5483. dtype: int64
  5484. >>> shallow
  5485. a 3
  5486. b 4
  5487. dtype: int64
  5488. >>> deep
  5489. a 1
  5490. b 2
  5491. dtype: int64
  5492. Note that when copying an object containing Python objects, a deep copy
  5493. will copy the data, but will not do so recursively. Updating a nested
  5494. data object will be reflected in the deep copy.
  5495. >>> s = pd.Series([[1, 2], [3, 4]])
  5496. >>> deep = s.copy()
  5497. >>> s[0][0] = 10
  5498. >>> s
  5499. 0 [10, 2]
  5500. 1 [3, 4]
  5501. dtype: object
  5502. >>> deep
  5503. 0 [10, 2]
  5504. 1 [3, 4]
  5505. dtype: object
  5506. """
  5507. data = self._mgr.copy(deep=deep)
  5508. self._clear_item_cache()
  5509. return self._constructor(data).__finalize__(self, method="copy")
  5510. @final
  5511. def __copy__(self: NDFrameT, deep: bool_t = True) -> NDFrameT:
  5512. return self.copy(deep=deep)
  5513. @final
  5514. def __deepcopy__(self: NDFrameT, memo=None) -> NDFrameT:
  5515. """
  5516. Parameters
  5517. ----------
  5518. memo, default None
  5519. Standard signature. Unused
  5520. """
  5521. return self.copy(deep=True)
  5522. @final
  5523. def infer_objects(self: NDFrameT, copy: bool_t | None = None) -> NDFrameT:
  5524. """
  5525. Attempt to infer better dtypes for object columns.
  5526. Attempts soft conversion of object-dtyped
  5527. columns, leaving non-object and unconvertible
  5528. columns unchanged. The inference rules are the
  5529. same as during normal Series/DataFrame construction.
  5530. Parameters
  5531. ----------
  5532. copy : bool, default True
  5533. Whether to make a copy for non-object or non-inferrable columns
  5534. or Series.
  5535. Returns
  5536. -------
  5537. same type as input object
  5538. See Also
  5539. --------
  5540. to_datetime : Convert argument to datetime.
  5541. to_timedelta : Convert argument to timedelta.
  5542. to_numeric : Convert argument to numeric type.
  5543. convert_dtypes : Convert argument to best possible dtype.
  5544. Examples
  5545. --------
  5546. >>> df = pd.DataFrame({"A": ["a", 1, 2, 3]})
  5547. >>> df = df.iloc[1:]
  5548. >>> df
  5549. A
  5550. 1 1
  5551. 2 2
  5552. 3 3
  5553. >>> df.dtypes
  5554. A object
  5555. dtype: object
  5556. >>> df.infer_objects().dtypes
  5557. A int64
  5558. dtype: object
  5559. """
  5560. new_mgr = self._mgr.convert(copy=copy)
  5561. return self._constructor(new_mgr).__finalize__(self, method="infer_objects")
  5562. @final
  5563. def convert_dtypes(
  5564. self: NDFrameT,
  5565. infer_objects: bool_t = True,
  5566. convert_string: bool_t = True,
  5567. convert_integer: bool_t = True,
  5568. convert_boolean: bool_t = True,
  5569. convert_floating: bool_t = True,
  5570. dtype_backend: DtypeBackend = "numpy_nullable",
  5571. ) -> NDFrameT:
  5572. """
  5573. Convert columns to the best possible dtypes using dtypes supporting ``pd.NA``.
  5574. Parameters
  5575. ----------
  5576. infer_objects : bool, default True
  5577. Whether object dtypes should be converted to the best possible types.
  5578. convert_string : bool, default True
  5579. Whether object dtypes should be converted to ``StringDtype()``.
  5580. convert_integer : bool, default True
  5581. Whether, if possible, conversion can be done to integer extension types.
  5582. convert_boolean : bool, defaults True
  5583. Whether object dtypes should be converted to ``BooleanDtypes()``.
  5584. convert_floating : bool, defaults True
  5585. Whether, if possible, conversion can be done to floating extension types.
  5586. If `convert_integer` is also True, preference will be give to integer
  5587. dtypes if the floats can be faithfully casted to integers.
  5588. .. versionadded:: 1.2.0
  5589. dtype_backend : {"numpy_nullable", "pyarrow"}, default "numpy_nullable"
  5590. Which dtype_backend to use, e.g. whether a DataFrame should use nullable
  5591. dtypes for all dtypes that have a nullable
  5592. implementation when "numpy_nullable" is set, pyarrow is used for all
  5593. dtypes if "pyarrow" is set.
  5594. The dtype_backends are still experimential.
  5595. .. versionadded:: 2.0
  5596. Returns
  5597. -------
  5598. Series or DataFrame
  5599. Copy of input object with new dtype.
  5600. See Also
  5601. --------
  5602. infer_objects : Infer dtypes of objects.
  5603. to_datetime : Convert argument to datetime.
  5604. to_timedelta : Convert argument to timedelta.
  5605. to_numeric : Convert argument to a numeric type.
  5606. Notes
  5607. -----
  5608. By default, ``convert_dtypes`` will attempt to convert a Series (or each
  5609. Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options
  5610. ``convert_string``, ``convert_integer``, ``convert_boolean`` and
  5611. ``convert_floating``, it is possible to turn off individual conversions
  5612. to ``StringDtype``, the integer extension types, ``BooleanDtype``
  5613. or floating extension types, respectively.
  5614. For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference
  5615. rules as during normal Series/DataFrame construction. Then, if possible,
  5616. convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer
  5617. or floating extension type, otherwise leave as ``object``.
  5618. If the dtype is integer, convert to an appropriate integer extension type.
  5619. If the dtype is numeric, and consists of all integers, convert to an
  5620. appropriate integer extension type. Otherwise, convert to an
  5621. appropriate floating extension type.
  5622. .. versionchanged:: 1.2
  5623. Starting with pandas 1.2, this method also converts float columns
  5624. to the nullable floating extension type.
  5625. In the future, as new dtypes are added that support ``pd.NA``, the results
  5626. of this method will change to support those new dtypes.
  5627. Examples
  5628. --------
  5629. >>> df = pd.DataFrame(
  5630. ... {
  5631. ... "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
  5632. ... "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),
  5633. ... "c": pd.Series([True, False, np.nan], dtype=np.dtype("O")),
  5634. ... "d": pd.Series(["h", "i", np.nan], dtype=np.dtype("O")),
  5635. ... "e": pd.Series([10, np.nan, 20], dtype=np.dtype("float")),
  5636. ... "f": pd.Series([np.nan, 100.5, 200], dtype=np.dtype("float")),
  5637. ... }
  5638. ... )
  5639. Start with a DataFrame with default dtypes.
  5640. >>> df
  5641. a b c d e f
  5642. 0 1 x True h 10.0 NaN
  5643. 1 2 y False i NaN 100.5
  5644. 2 3 z NaN NaN 20.0 200.0
  5645. >>> df.dtypes
  5646. a int32
  5647. b object
  5648. c object
  5649. d object
  5650. e float64
  5651. f float64
  5652. dtype: object
  5653. Convert the DataFrame to use best possible dtypes.
  5654. >>> dfn = df.convert_dtypes()
  5655. >>> dfn
  5656. a b c d e f
  5657. 0 1 x True h 10 <NA>
  5658. 1 2 y False i <NA> 100.5
  5659. 2 3 z <NA> <NA> 20 200.0
  5660. >>> dfn.dtypes
  5661. a Int32
  5662. b string[python]
  5663. c boolean
  5664. d string[python]
  5665. e Int64
  5666. f Float64
  5667. dtype: object
  5668. Start with a Series of strings and missing data represented by ``np.nan``.
  5669. >>> s = pd.Series(["a", "b", np.nan])
  5670. >>> s
  5671. 0 a
  5672. 1 b
  5673. 2 NaN
  5674. dtype: object
  5675. Obtain a Series with dtype ``StringDtype``.
  5676. >>> s.convert_dtypes()
  5677. 0 a
  5678. 1 b
  5679. 2 <NA>
  5680. dtype: string
  5681. """
  5682. check_dtype_backend(dtype_backend)
  5683. if self.ndim == 1:
  5684. return self._convert_dtypes(
  5685. infer_objects,
  5686. convert_string,
  5687. convert_integer,
  5688. convert_boolean,
  5689. convert_floating,
  5690. dtype_backend=dtype_backend,
  5691. )
  5692. else:
  5693. results = [
  5694. col._convert_dtypes(
  5695. infer_objects,
  5696. convert_string,
  5697. convert_integer,
  5698. convert_boolean,
  5699. convert_floating,
  5700. dtype_backend=dtype_backend,
  5701. )
  5702. for col_name, col in self.items()
  5703. ]
  5704. if len(results) > 0:
  5705. result = concat(results, axis=1, copy=False, keys=self.columns)
  5706. cons = cast(Type["DataFrame"], self._constructor)
  5707. result = cons(result)
  5708. result = result.__finalize__(self, method="convert_dtypes")
  5709. # https://github.com/python/mypy/issues/8354
  5710. return cast(NDFrameT, result)
  5711. else:
  5712. return self.copy(deep=None)
  5713. # ----------------------------------------------------------------------
  5714. # Filling NA's
  5715. @overload
  5716. def fillna(
  5717. self: NDFrameT,
  5718. value: Hashable | Mapping | Series | DataFrame = ...,
  5719. *,
  5720. method: FillnaOptions | None = ...,
  5721. axis: Axis | None = ...,
  5722. inplace: Literal[False] = ...,
  5723. limit: int | None = ...,
  5724. downcast: dict | None = ...,
  5725. ) -> NDFrameT:
  5726. ...
  5727. @overload
  5728. def fillna(
  5729. self,
  5730. value: Hashable | Mapping | Series | DataFrame = ...,
  5731. *,
  5732. method: FillnaOptions | None = ...,
  5733. axis: Axis | None = ...,
  5734. inplace: Literal[True],
  5735. limit: int | None = ...,
  5736. downcast: dict | None = ...,
  5737. ) -> None:
  5738. ...
  5739. @overload
  5740. def fillna(
  5741. self: NDFrameT,
  5742. value: Hashable | Mapping | Series | DataFrame = ...,
  5743. *,
  5744. method: FillnaOptions | None = ...,
  5745. axis: Axis | None = ...,
  5746. inplace: bool_t = ...,
  5747. limit: int | None = ...,
  5748. downcast: dict | None = ...,
  5749. ) -> NDFrameT | None:
  5750. ...
  5751. @doc(**_shared_doc_kwargs)
  5752. def fillna(
  5753. self: NDFrameT,
  5754. value: Hashable | Mapping | Series | DataFrame = None,
  5755. *,
  5756. method: FillnaOptions | None = None,
  5757. axis: Axis | None = None,
  5758. inplace: bool_t = False,
  5759. limit: int | None = None,
  5760. downcast: dict | None = None,
  5761. ) -> NDFrameT | None:
  5762. """
  5763. Fill NA/NaN values using the specified method.
  5764. Parameters
  5765. ----------
  5766. value : scalar, dict, Series, or DataFrame
  5767. Value to use to fill holes (e.g. 0), alternately a
  5768. dict/Series/DataFrame of values specifying which value to use for
  5769. each index (for a Series) or column (for a DataFrame). Values not
  5770. in the dict/Series/DataFrame will not be filled. This value cannot
  5771. be a list.
  5772. method : {{'backfill', 'bfill', 'ffill', None}}, default None
  5773. Method to use for filling holes in reindexed Series:
  5774. * ffill: propagate last valid observation forward to next valid.
  5775. * backfill / bfill: use next valid observation to fill gap.
  5776. axis : {axes_single_arg}
  5777. Axis along which to fill missing values. For `Series`
  5778. this parameter is unused and defaults to 0.
  5779. inplace : bool, default False
  5780. If True, fill in-place. Note: this will modify any
  5781. other views on this object (e.g., a no-copy slice for a column in a
  5782. DataFrame).
  5783. limit : int, default None
  5784. If method is specified, this is the maximum number of consecutive
  5785. NaN values to forward/backward fill. In other words, if there is
  5786. a gap with more than this number of consecutive NaNs, it will only
  5787. be partially filled. If method is not specified, this is the
  5788. maximum number of entries along the entire axis where NaNs will be
  5789. filled. Must be greater than 0 if not None.
  5790. downcast : dict, default is None
  5791. A dict of item->dtype of what to downcast if possible,
  5792. or the string 'infer' which will try to downcast to an appropriate
  5793. equal type (e.g. float64 to int64 if possible).
  5794. Returns
  5795. -------
  5796. {klass} or None
  5797. Object with missing values filled or None if ``inplace=True``.
  5798. See Also
  5799. --------
  5800. interpolate : Fill NaN values using interpolation.
  5801. reindex : Conform object to new index.
  5802. asfreq : Convert TimeSeries to specified frequency.
  5803. Examples
  5804. --------
  5805. >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0],
  5806. ... [3, 4, np.nan, 1],
  5807. ... [np.nan, np.nan, np.nan, np.nan],
  5808. ... [np.nan, 3, np.nan, 4]],
  5809. ... columns=list("ABCD"))
  5810. >>> df
  5811. A B C D
  5812. 0 NaN 2.0 NaN 0.0
  5813. 1 3.0 4.0 NaN 1.0
  5814. 2 NaN NaN NaN NaN
  5815. 3 NaN 3.0 NaN 4.0
  5816. Replace all NaN elements with 0s.
  5817. >>> df.fillna(0)
  5818. A B C D
  5819. 0 0.0 2.0 0.0 0.0
  5820. 1 3.0 4.0 0.0 1.0
  5821. 2 0.0 0.0 0.0 0.0
  5822. 3 0.0 3.0 0.0 4.0
  5823. We can also propagate non-null values forward or backward.
  5824. >>> df.fillna(method="ffill")
  5825. A B C D
  5826. 0 NaN 2.0 NaN 0.0
  5827. 1 3.0 4.0 NaN 1.0
  5828. 2 3.0 4.0 NaN 1.0
  5829. 3 3.0 3.0 NaN 4.0
  5830. Replace all NaN elements in column 'A', 'B', 'C', and 'D', with 0, 1,
  5831. 2, and 3 respectively.
  5832. >>> values = {{"A": 0, "B": 1, "C": 2, "D": 3}}
  5833. >>> df.fillna(value=values)
  5834. A B C D
  5835. 0 0.0 2.0 2.0 0.0
  5836. 1 3.0 4.0 2.0 1.0
  5837. 2 0.0 1.0 2.0 3.0
  5838. 3 0.0 3.0 2.0 4.0
  5839. Only replace the first NaN element.
  5840. >>> df.fillna(value=values, limit=1)
  5841. A B C D
  5842. 0 0.0 2.0 2.0 0.0
  5843. 1 3.0 4.0 NaN 1.0
  5844. 2 NaN 1.0 NaN 3.0
  5845. 3 NaN 3.0 NaN 4.0
  5846. When filling using a DataFrame, replacement happens along
  5847. the same column names and same indices
  5848. >>> df2 = pd.DataFrame(np.zeros((4, 4)), columns=list("ABCE"))
  5849. >>> df.fillna(df2)
  5850. A B C D
  5851. 0 0.0 2.0 0.0 0.0
  5852. 1 3.0 4.0 0.0 1.0
  5853. 2 0.0 0.0 0.0 NaN
  5854. 3 0.0 3.0 0.0 4.0
  5855. Note that column D is not affected since it is not present in df2.
  5856. """
  5857. inplace = validate_bool_kwarg(inplace, "inplace")
  5858. value, method = validate_fillna_kwargs(value, method)
  5859. # set the default here, so functions examining the signaure
  5860. # can detect if something was set (e.g. in groupby) (GH9221)
  5861. if axis is None:
  5862. axis = 0
  5863. axis = self._get_axis_number(axis)
  5864. if value is None:
  5865. if not self._mgr.is_single_block and axis == 1:
  5866. if inplace:
  5867. raise NotImplementedError()
  5868. result = self.T.fillna(method=method, limit=limit).T
  5869. return result
  5870. new_data = self._mgr.interpolate(
  5871. method=method,
  5872. axis=axis,
  5873. limit=limit,
  5874. inplace=inplace,
  5875. downcast=downcast,
  5876. )
  5877. else:
  5878. if self.ndim == 1:
  5879. if isinstance(value, (dict, ABCSeries)):
  5880. if not len(value):
  5881. # test_fillna_nonscalar
  5882. if inplace:
  5883. return None
  5884. return self.copy(deep=None)
  5885. from pandas import Series
  5886. value = Series(value)
  5887. value = value.reindex(self.index, copy=False)
  5888. value = value._values
  5889. elif not is_list_like(value):
  5890. pass
  5891. else:
  5892. raise TypeError(
  5893. '"value" parameter must be a scalar, dict '
  5894. "or Series, but you passed a "
  5895. f'"{type(value).__name__}"'
  5896. )
  5897. new_data = self._mgr.fillna(
  5898. value=value, limit=limit, inplace=inplace, downcast=downcast
  5899. )
  5900. elif isinstance(value, (dict, ABCSeries)):
  5901. if axis == 1:
  5902. raise NotImplementedError(
  5903. "Currently only can fill "
  5904. "with dict/Series column "
  5905. "by column"
  5906. )
  5907. if using_copy_on_write():
  5908. result = self.copy(deep=None)
  5909. else:
  5910. result = self if inplace else self.copy()
  5911. is_dict = isinstance(downcast, dict)
  5912. for k, v in value.items():
  5913. if k not in result:
  5914. continue
  5915. # error: Item "None" of "Optional[Dict[Any, Any]]" has no
  5916. # attribute "get"
  5917. downcast_k = (
  5918. downcast
  5919. if not is_dict
  5920. else downcast.get(k) # type: ignore[union-attr]
  5921. )
  5922. res_k = result[k].fillna(v, limit=limit, downcast=downcast_k)
  5923. if not inplace:
  5924. result[k] = res_k
  5925. else:
  5926. # We can write into our existing column(s) iff dtype
  5927. # was preserved.
  5928. if isinstance(res_k, ABCSeries):
  5929. # i.e. 'k' only shows up once in self.columns
  5930. if res_k.dtype == result[k].dtype:
  5931. result.loc[:, k] = res_k
  5932. else:
  5933. # Different dtype -> no way to do inplace.
  5934. result[k] = res_k
  5935. else:
  5936. # see test_fillna_dict_inplace_nonunique_columns
  5937. locs = result.columns.get_loc(k)
  5938. if isinstance(locs, slice):
  5939. locs = np.arange(self.shape[1])[locs]
  5940. elif (
  5941. isinstance(locs, np.ndarray) and locs.dtype.kind == "b"
  5942. ):
  5943. locs = locs.nonzero()[0]
  5944. elif not (
  5945. isinstance(locs, np.ndarray) and locs.dtype.kind == "i"
  5946. ):
  5947. # Should never be reached, but let's cover our bases
  5948. raise NotImplementedError(
  5949. "Unexpected get_loc result, please report a bug at "
  5950. "https://github.com/pandas-dev/pandas"
  5951. )
  5952. for i, loc in enumerate(locs):
  5953. res_loc = res_k.iloc[:, i]
  5954. target = self.iloc[:, loc]
  5955. if res_loc.dtype == target.dtype:
  5956. result.iloc[:, loc] = res_loc
  5957. else:
  5958. result.isetitem(loc, res_loc)
  5959. if inplace:
  5960. return self._update_inplace(result)
  5961. else:
  5962. return result
  5963. elif not is_list_like(value):
  5964. if axis == 1:
  5965. result = self.T.fillna(value=value, limit=limit).T
  5966. new_data = result
  5967. else:
  5968. new_data = self._mgr.fillna(
  5969. value=value, limit=limit, inplace=inplace, downcast=downcast
  5970. )
  5971. elif isinstance(value, ABCDataFrame) and self.ndim == 2:
  5972. new_data = self.where(self.notna(), value)._mgr
  5973. else:
  5974. raise ValueError(f"invalid fill value with a {type(value)}")
  5975. result = self._constructor(new_data)
  5976. if inplace:
  5977. return self._update_inplace(result)
  5978. else:
  5979. return result.__finalize__(self, method="fillna")
  5980. @overload
  5981. def ffill(
  5982. self: NDFrameT,
  5983. *,
  5984. axis: None | Axis = ...,
  5985. inplace: Literal[False] = ...,
  5986. limit: None | int = ...,
  5987. downcast: dict | None = ...,
  5988. ) -> NDFrameT:
  5989. ...
  5990. @overload
  5991. def ffill(
  5992. self,
  5993. *,
  5994. axis: None | Axis = ...,
  5995. inplace: Literal[True],
  5996. limit: None | int = ...,
  5997. downcast: dict | None = ...,
  5998. ) -> None:
  5999. ...
  6000. @overload
  6001. def ffill(
  6002. self: NDFrameT,
  6003. *,
  6004. axis: None | Axis = ...,
  6005. inplace: bool_t = ...,
  6006. limit: None | int = ...,
  6007. downcast: dict | None = ...,
  6008. ) -> NDFrameT | None:
  6009. ...
  6010. @doc(klass=_shared_doc_kwargs["klass"])
  6011. def ffill(
  6012. self: NDFrameT,
  6013. *,
  6014. axis: None | Axis = None,
  6015. inplace: bool_t = False,
  6016. limit: None | int = None,
  6017. downcast: dict | None = None,
  6018. ) -> NDFrameT | None:
  6019. """
  6020. Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``.
  6021. Returns
  6022. -------
  6023. {klass} or None
  6024. Object with missing values filled or None if ``inplace=True``.
  6025. """
  6026. return self.fillna(
  6027. method="ffill", axis=axis, inplace=inplace, limit=limit, downcast=downcast
  6028. )
  6029. @doc(klass=_shared_doc_kwargs["klass"])
  6030. def pad(
  6031. self: NDFrameT,
  6032. *,
  6033. axis: None | Axis = None,
  6034. inplace: bool_t = False,
  6035. limit: None | int = None,
  6036. downcast: dict | None = None,
  6037. ) -> NDFrameT | None:
  6038. """
  6039. Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``.
  6040. .. deprecated:: 2.0
  6041. {klass}.pad is deprecated. Use {klass}.ffill instead.
  6042. Returns
  6043. -------
  6044. {klass} or None
  6045. Object with missing values filled or None if ``inplace=True``.
  6046. """
  6047. warnings.warn(
  6048. "DataFrame.pad/Series.pad is deprecated. Use "
  6049. "DataFrame.ffill/Series.ffill instead",
  6050. FutureWarning,
  6051. stacklevel=find_stack_level(),
  6052. )
  6053. return self.ffill(axis=axis, inplace=inplace, limit=limit, downcast=downcast)
  6054. @overload
  6055. def bfill(
  6056. self: NDFrameT,
  6057. *,
  6058. axis: None | Axis = ...,
  6059. inplace: Literal[False] = ...,
  6060. limit: None | int = ...,
  6061. downcast: dict | None = ...,
  6062. ) -> NDFrameT:
  6063. ...
  6064. @overload
  6065. def bfill(
  6066. self,
  6067. *,
  6068. axis: None | Axis = ...,
  6069. inplace: Literal[True],
  6070. limit: None | int = ...,
  6071. downcast: dict | None = ...,
  6072. ) -> None:
  6073. ...
  6074. @overload
  6075. def bfill(
  6076. self: NDFrameT,
  6077. *,
  6078. axis: None | Axis = ...,
  6079. inplace: bool_t = ...,
  6080. limit: None | int = ...,
  6081. downcast: dict | None = ...,
  6082. ) -> NDFrameT | None:
  6083. ...
  6084. @doc(klass=_shared_doc_kwargs["klass"])
  6085. def bfill(
  6086. self: NDFrameT,
  6087. *,
  6088. axis: None | Axis = None,
  6089. inplace: bool_t = False,
  6090. limit: None | int = None,
  6091. downcast: dict | None = None,
  6092. ) -> NDFrameT | None:
  6093. """
  6094. Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``.
  6095. Returns
  6096. -------
  6097. {klass} or None
  6098. Object with missing values filled or None if ``inplace=True``.
  6099. """
  6100. return self.fillna(
  6101. method="bfill", axis=axis, inplace=inplace, limit=limit, downcast=downcast
  6102. )
  6103. @doc(klass=_shared_doc_kwargs["klass"])
  6104. def backfill(
  6105. self: NDFrameT,
  6106. *,
  6107. axis: None | Axis = None,
  6108. inplace: bool_t = False,
  6109. limit: None | int = None,
  6110. downcast: dict | None = None,
  6111. ) -> NDFrameT | None:
  6112. """
  6113. Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``.
  6114. .. deprecated:: 2.0
  6115. {klass}.backfill is deprecated. Use {klass}.bfill instead.
  6116. Returns
  6117. -------
  6118. {klass} or None
  6119. Object with missing values filled or None if ``inplace=True``.
  6120. """
  6121. warnings.warn(
  6122. "DataFrame.backfill/Series.backfill is deprecated. Use "
  6123. "DataFrame.bfill/Series.bfill instead",
  6124. FutureWarning,
  6125. stacklevel=find_stack_level(),
  6126. )
  6127. return self.bfill(axis=axis, inplace=inplace, limit=limit, downcast=downcast)
  6128. @overload
  6129. def replace(
  6130. self: NDFrameT,
  6131. to_replace=...,
  6132. value=...,
  6133. *,
  6134. inplace: Literal[False] = ...,
  6135. limit: int | None = ...,
  6136. regex: bool_t = ...,
  6137. method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ...,
  6138. ) -> NDFrameT:
  6139. ...
  6140. @overload
  6141. def replace(
  6142. self,
  6143. to_replace=...,
  6144. value=...,
  6145. *,
  6146. inplace: Literal[True],
  6147. limit: int | None = ...,
  6148. regex: bool_t = ...,
  6149. method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ...,
  6150. ) -> None:
  6151. ...
  6152. @overload
  6153. def replace(
  6154. self: NDFrameT,
  6155. to_replace=...,
  6156. value=...,
  6157. *,
  6158. inplace: bool_t = ...,
  6159. limit: int | None = ...,
  6160. regex: bool_t = ...,
  6161. method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ...,
  6162. ) -> NDFrameT | None:
  6163. ...
  6164. @doc(
  6165. _shared_docs["replace"],
  6166. klass=_shared_doc_kwargs["klass"],
  6167. inplace=_shared_doc_kwargs["inplace"],
  6168. replace_iloc=_shared_doc_kwargs["replace_iloc"],
  6169. )
  6170. def replace(
  6171. self: NDFrameT,
  6172. to_replace=None,
  6173. value=lib.no_default,
  6174. *,
  6175. inplace: bool_t = False,
  6176. limit: int | None = None,
  6177. regex: bool_t = False,
  6178. method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = lib.no_default,
  6179. ) -> NDFrameT | None:
  6180. if not (
  6181. is_scalar(to_replace)
  6182. or is_re_compilable(to_replace)
  6183. or is_list_like(to_replace)
  6184. ):
  6185. raise TypeError(
  6186. "Expecting 'to_replace' to be either a scalar, array-like, "
  6187. "dict or None, got invalid type "
  6188. f"{repr(type(to_replace).__name__)}"
  6189. )
  6190. inplace = validate_bool_kwarg(inplace, "inplace")
  6191. if not is_bool(regex) and to_replace is not None:
  6192. raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool")
  6193. if value is lib.no_default or method is not lib.no_default:
  6194. # GH#36984 if the user explicitly passes value=None we want to
  6195. # respect that. We have the corner case where the user explicitly
  6196. # passes value=None *and* a method, which we interpret as meaning
  6197. # they want the (documented) default behavior.
  6198. if method is lib.no_default:
  6199. # TODO: get this to show up as the default in the docs?
  6200. method = "pad"
  6201. # passing a single value that is scalar like
  6202. # when value is None (GH5319), for compat
  6203. if not is_dict_like(to_replace) and not is_dict_like(regex):
  6204. to_replace = [to_replace]
  6205. if isinstance(to_replace, (tuple, list)):
  6206. # TODO: Consider copy-on-write for non-replaced columns's here
  6207. if isinstance(self, ABCDataFrame):
  6208. from pandas import Series
  6209. result = self.apply(
  6210. Series._replace_single,
  6211. args=(to_replace, method, inplace, limit),
  6212. )
  6213. if inplace:
  6214. return None
  6215. return result
  6216. return self._replace_single(to_replace, method, inplace, limit)
  6217. if not is_dict_like(to_replace):
  6218. if not is_dict_like(regex):
  6219. raise TypeError(
  6220. 'If "to_replace" and "value" are both None '
  6221. 'and "to_replace" is not a list, then '
  6222. "regex must be a mapping"
  6223. )
  6224. to_replace = regex
  6225. regex = True
  6226. items = list(to_replace.items())
  6227. if items:
  6228. keys, values = zip(*items)
  6229. else:
  6230. keys, values = ([], [])
  6231. are_mappings = [is_dict_like(v) for v in values]
  6232. if any(are_mappings):
  6233. if not all(are_mappings):
  6234. raise TypeError(
  6235. "If a nested mapping is passed, all values "
  6236. "of the top level mapping must be mappings"
  6237. )
  6238. # passed a nested dict/Series
  6239. to_rep_dict = {}
  6240. value_dict = {}
  6241. for k, v in items:
  6242. keys, values = list(zip(*v.items())) or ([], [])
  6243. to_rep_dict[k] = list(keys)
  6244. value_dict[k] = list(values)
  6245. to_replace, value = to_rep_dict, value_dict
  6246. else:
  6247. to_replace, value = keys, values
  6248. return self.replace(
  6249. to_replace, value, inplace=inplace, limit=limit, regex=regex
  6250. )
  6251. else:
  6252. # need a non-zero len on all axes
  6253. if not self.size:
  6254. if inplace:
  6255. return None
  6256. return self.copy(deep=None)
  6257. if is_dict_like(to_replace):
  6258. if is_dict_like(value): # {'A' : NA} -> {'A' : 0}
  6259. # Note: Checking below for `in foo.keys()` instead of
  6260. # `in foo` is needed for when we have a Series and not dict
  6261. mapping = {
  6262. col: (to_replace[col], value[col])
  6263. for col in to_replace.keys()
  6264. if col in value.keys() and col in self
  6265. }
  6266. return self._replace_columnwise(mapping, inplace, regex)
  6267. # {'A': NA} -> 0
  6268. elif not is_list_like(value):
  6269. # Operate column-wise
  6270. if self.ndim == 1:
  6271. raise ValueError(
  6272. "Series.replace cannot use dict-like to_replace "
  6273. "and non-None value"
  6274. )
  6275. mapping = {
  6276. col: (to_rep, value) for col, to_rep in to_replace.items()
  6277. }
  6278. return self._replace_columnwise(mapping, inplace, regex)
  6279. else:
  6280. raise TypeError("value argument must be scalar, dict, or Series")
  6281. elif is_list_like(to_replace):
  6282. if not is_list_like(value):
  6283. # e.g. to_replace = [NA, ''] and value is 0,
  6284. # so we replace NA with 0 and then replace '' with 0
  6285. value = [value] * len(to_replace)
  6286. # e.g. we have to_replace = [NA, ''] and value = [0, 'missing']
  6287. if len(to_replace) != len(value):
  6288. raise ValueError(
  6289. f"Replacement lists must match in length. "
  6290. f"Expecting {len(to_replace)} got {len(value)} "
  6291. )
  6292. new_data = self._mgr.replace_list(
  6293. src_list=to_replace,
  6294. dest_list=value,
  6295. inplace=inplace,
  6296. regex=regex,
  6297. )
  6298. elif to_replace is None:
  6299. if not (
  6300. is_re_compilable(regex)
  6301. or is_list_like(regex)
  6302. or is_dict_like(regex)
  6303. ):
  6304. raise TypeError(
  6305. f"'regex' must be a string or a compiled regular expression "
  6306. f"or a list or dict of strings or regular expressions, "
  6307. f"you passed a {repr(type(regex).__name__)}"
  6308. )
  6309. return self.replace(
  6310. regex, value, inplace=inplace, limit=limit, regex=True
  6311. )
  6312. else:
  6313. # dest iterable dict-like
  6314. if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1}
  6315. # Operate column-wise
  6316. if self.ndim == 1:
  6317. raise ValueError(
  6318. "Series.replace cannot use dict-value and "
  6319. "non-None to_replace"
  6320. )
  6321. mapping = {col: (to_replace, val) for col, val in value.items()}
  6322. return self._replace_columnwise(mapping, inplace, regex)
  6323. elif not is_list_like(value): # NA -> 0
  6324. regex = should_use_regex(regex, to_replace)
  6325. if regex:
  6326. new_data = self._mgr.replace_regex(
  6327. to_replace=to_replace,
  6328. value=value,
  6329. inplace=inplace,
  6330. )
  6331. else:
  6332. new_data = self._mgr.replace(
  6333. to_replace=to_replace, value=value, inplace=inplace
  6334. )
  6335. else:
  6336. raise TypeError(
  6337. f'Invalid "to_replace" type: {repr(type(to_replace).__name__)}'
  6338. )
  6339. result = self._constructor(new_data)
  6340. if inplace:
  6341. return self._update_inplace(result)
  6342. else:
  6343. return result.__finalize__(self, method="replace")
  6344. def interpolate(
  6345. self: NDFrameT,
  6346. method: str = "linear",
  6347. *,
  6348. axis: Axis = 0,
  6349. limit: int | None = None,
  6350. inplace: bool_t = False,
  6351. limit_direction: str | None = None,
  6352. limit_area: str | None = None,
  6353. downcast: str | None = None,
  6354. **kwargs,
  6355. ) -> NDFrameT | None:
  6356. """
  6357. Fill NaN values using an interpolation method.
  6358. Please note that only ``method='linear'`` is supported for
  6359. DataFrame/Series with a MultiIndex.
  6360. Parameters
  6361. ----------
  6362. method : str, default 'linear'
  6363. Interpolation technique to use. One of:
  6364. * 'linear': Ignore the index and treat the values as equally
  6365. spaced. This is the only method supported on MultiIndexes.
  6366. * 'time': Works on daily and higher resolution data to interpolate
  6367. given length of interval.
  6368. * 'index', 'values': use the actual numerical values of the index.
  6369. * 'pad': Fill in NaNs using existing values.
  6370. * 'nearest', 'zero', 'slinear', 'quadratic', 'cubic',
  6371. 'barycentric', 'polynomial': Passed to
  6372. `scipy.interpolate.interp1d`, whereas 'spline' is passed to
  6373. `scipy.interpolate.UnivariateSpline`. These methods use the numerical
  6374. values of the index. Both 'polynomial' and 'spline' require that
  6375. you also specify an `order` (int), e.g.
  6376. ``df.interpolate(method='polynomial', order=5)``. Note that,
  6377. `slinear` method in Pandas refers to the Scipy first order `spline`
  6378. instead of Pandas first order `spline`.
  6379. * 'krogh', 'piecewise_polynomial', 'spline', 'pchip', 'akima',
  6380. 'cubicspline': Wrappers around the SciPy interpolation methods of
  6381. similar names. See `Notes`.
  6382. * 'from_derivatives': Refers to
  6383. `scipy.interpolate.BPoly.from_derivatives` which
  6384. replaces 'piecewise_polynomial' interpolation method in
  6385. scipy 0.18.
  6386. axis : {{0 or 'index', 1 or 'columns', None}}, default None
  6387. Axis to interpolate along. For `Series` this parameter is unused
  6388. and defaults to 0.
  6389. limit : int, optional
  6390. Maximum number of consecutive NaNs to fill. Must be greater than
  6391. 0.
  6392. inplace : bool, default False
  6393. Update the data in place if possible.
  6394. limit_direction : {{'forward', 'backward', 'both'}}, Optional
  6395. Consecutive NaNs will be filled in this direction.
  6396. If limit is specified:
  6397. * If 'method' is 'pad' or 'ffill', 'limit_direction' must be 'forward'.
  6398. * If 'method' is 'backfill' or 'bfill', 'limit_direction' must be
  6399. 'backwards'.
  6400. If 'limit' is not specified:
  6401. * If 'method' is 'backfill' or 'bfill', the default is 'backward'
  6402. * else the default is 'forward'
  6403. .. versionchanged:: 1.1.0
  6404. raises ValueError if `limit_direction` is 'forward' or 'both' and
  6405. method is 'backfill' or 'bfill'.
  6406. raises ValueError if `limit_direction` is 'backward' or 'both' and
  6407. method is 'pad' or 'ffill'.
  6408. limit_area : {{`None`, 'inside', 'outside'}}, default None
  6409. If limit is specified, consecutive NaNs will be filled with this
  6410. restriction.
  6411. * ``None``: No fill restriction.
  6412. * 'inside': Only fill NaNs surrounded by valid values
  6413. (interpolate).
  6414. * 'outside': Only fill NaNs outside valid values (extrapolate).
  6415. downcast : optional, 'infer' or None, defaults to None
  6416. Downcast dtypes if possible.
  6417. ``**kwargs`` : optional
  6418. Keyword arguments to pass on to the interpolating function.
  6419. Returns
  6420. -------
  6421. Series or DataFrame or None
  6422. Returns the same object type as the caller, interpolated at
  6423. some or all ``NaN`` values or None if ``inplace=True``.
  6424. See Also
  6425. --------
  6426. fillna : Fill missing values using different methods.
  6427. scipy.interpolate.Akima1DInterpolator : Piecewise cubic polynomials
  6428. (Akima interpolator).
  6429. scipy.interpolate.BPoly.from_derivatives : Piecewise polynomial in the
  6430. Bernstein basis.
  6431. scipy.interpolate.interp1d : Interpolate a 1-D function.
  6432. scipy.interpolate.KroghInterpolator : Interpolate polynomial (Krogh
  6433. interpolator).
  6434. scipy.interpolate.PchipInterpolator : PCHIP 1-d monotonic cubic
  6435. interpolation.
  6436. scipy.interpolate.CubicSpline : Cubic spline data interpolator.
  6437. Notes
  6438. -----
  6439. The 'krogh', 'piecewise_polynomial', 'spline', 'pchip' and 'akima'
  6440. methods are wrappers around the respective SciPy implementations of
  6441. similar names. These use the actual numerical values of the index.
  6442. For more information on their behavior, see the
  6443. `SciPy documentation
  6444. <https://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation>`__.
  6445. Examples
  6446. --------
  6447. Filling in ``NaN`` in a :class:`~pandas.Series` via linear
  6448. interpolation.
  6449. >>> s = pd.Series([0, 1, np.nan, 3])
  6450. >>> s
  6451. 0 0.0
  6452. 1 1.0
  6453. 2 NaN
  6454. 3 3.0
  6455. dtype: float64
  6456. >>> s.interpolate()
  6457. 0 0.0
  6458. 1 1.0
  6459. 2 2.0
  6460. 3 3.0
  6461. dtype: float64
  6462. Filling in ``NaN`` in a Series by padding, but filling at most two
  6463. consecutive ``NaN`` at a time.
  6464. >>> s = pd.Series([np.nan, "single_one", np.nan,
  6465. ... "fill_two_more", np.nan, np.nan, np.nan,
  6466. ... 4.71, np.nan])
  6467. >>> s
  6468. 0 NaN
  6469. 1 single_one
  6470. 2 NaN
  6471. 3 fill_two_more
  6472. 4 NaN
  6473. 5 NaN
  6474. 6 NaN
  6475. 7 4.71
  6476. 8 NaN
  6477. dtype: object
  6478. >>> s.interpolate(method='pad', limit=2)
  6479. 0 NaN
  6480. 1 single_one
  6481. 2 single_one
  6482. 3 fill_two_more
  6483. 4 fill_two_more
  6484. 5 fill_two_more
  6485. 6 NaN
  6486. 7 4.71
  6487. 8 4.71
  6488. dtype: object
  6489. Filling in ``NaN`` in a Series via polynomial interpolation or splines:
  6490. Both 'polynomial' and 'spline' methods require that you also specify
  6491. an ``order`` (int).
  6492. >>> s = pd.Series([0, 2, np.nan, 8])
  6493. >>> s.interpolate(method='polynomial', order=2)
  6494. 0 0.000000
  6495. 1 2.000000
  6496. 2 4.666667
  6497. 3 8.000000
  6498. dtype: float64
  6499. Fill the DataFrame forward (that is, going down) along each column
  6500. using linear interpolation.
  6501. Note how the last entry in column 'a' is interpolated differently,
  6502. because there is no entry after it to use for interpolation.
  6503. Note how the first entry in column 'b' remains ``NaN``, because there
  6504. is no entry before it to use for interpolation.
  6505. >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0),
  6506. ... (np.nan, 2.0, np.nan, np.nan),
  6507. ... (2.0, 3.0, np.nan, 9.0),
  6508. ... (np.nan, 4.0, -4.0, 16.0)],
  6509. ... columns=list('abcd'))
  6510. >>> df
  6511. a b c d
  6512. 0 0.0 NaN -1.0 1.0
  6513. 1 NaN 2.0 NaN NaN
  6514. 2 2.0 3.0 NaN 9.0
  6515. 3 NaN 4.0 -4.0 16.0
  6516. >>> df.interpolate(method='linear', limit_direction='forward', axis=0)
  6517. a b c d
  6518. 0 0.0 NaN -1.0 1.0
  6519. 1 1.0 2.0 -2.0 5.0
  6520. 2 2.0 3.0 -3.0 9.0
  6521. 3 2.0 4.0 -4.0 16.0
  6522. Using polynomial interpolation.
  6523. >>> df['d'].interpolate(method='polynomial', order=2)
  6524. 0 1.0
  6525. 1 4.0
  6526. 2 9.0
  6527. 3 16.0
  6528. Name: d, dtype: float64
  6529. """
  6530. inplace = validate_bool_kwarg(inplace, "inplace")
  6531. axis = self._get_axis_number(axis)
  6532. fillna_methods = ["ffill", "bfill", "pad", "backfill"]
  6533. should_transpose = axis == 1 and method not in fillna_methods
  6534. obj = self.T if should_transpose else self
  6535. if obj.empty:
  6536. return self.copy()
  6537. if method not in fillna_methods:
  6538. axis = self._info_axis_number
  6539. if isinstance(obj.index, MultiIndex) and method != "linear":
  6540. raise ValueError(
  6541. "Only `method=linear` interpolation is supported on MultiIndexes."
  6542. )
  6543. # Set `limit_direction` depending on `method`
  6544. if limit_direction is None:
  6545. limit_direction = (
  6546. "backward" if method in ("backfill", "bfill") else "forward"
  6547. )
  6548. else:
  6549. if method in ("pad", "ffill") and limit_direction != "forward":
  6550. raise ValueError(
  6551. f"`limit_direction` must be 'forward' for method `{method}`"
  6552. )
  6553. if method in ("backfill", "bfill") and limit_direction != "backward":
  6554. raise ValueError(
  6555. f"`limit_direction` must be 'backward' for method `{method}`"
  6556. )
  6557. if obj.ndim == 2 and np.all(obj.dtypes == np.dtype("object")):
  6558. raise TypeError(
  6559. "Cannot interpolate with all object-dtype columns "
  6560. "in the DataFrame. Try setting at least one "
  6561. "column to a numeric dtype."
  6562. )
  6563. # create/use the index
  6564. if method == "linear":
  6565. # prior default
  6566. index = Index(np.arange(len(obj.index)))
  6567. else:
  6568. index = obj.index
  6569. methods = {"index", "values", "nearest", "time"}
  6570. is_numeric_or_datetime = (
  6571. is_numeric_dtype(index.dtype)
  6572. or is_datetime64_any_dtype(index.dtype)
  6573. or is_timedelta64_dtype(index.dtype)
  6574. )
  6575. if method not in methods and not is_numeric_or_datetime:
  6576. raise ValueError(
  6577. "Index column must be numeric or datetime type when "
  6578. f"using {method} method other than linear. "
  6579. "Try setting a numeric or datetime index column before "
  6580. "interpolating."
  6581. )
  6582. if isna(index).any():
  6583. raise NotImplementedError(
  6584. "Interpolation with NaNs in the index "
  6585. "has not been implemented. Try filling "
  6586. "those NaNs before interpolating."
  6587. )
  6588. new_data = obj._mgr.interpolate(
  6589. method=method,
  6590. axis=axis,
  6591. index=index,
  6592. limit=limit,
  6593. limit_direction=limit_direction,
  6594. limit_area=limit_area,
  6595. inplace=inplace,
  6596. downcast=downcast,
  6597. **kwargs,
  6598. )
  6599. result = self._constructor(new_data)
  6600. if should_transpose:
  6601. result = result.T
  6602. if inplace:
  6603. return self._update_inplace(result)
  6604. else:
  6605. return result.__finalize__(self, method="interpolate")
  6606. # ----------------------------------------------------------------------
  6607. # Timeseries methods Methods
  6608. @final
  6609. def asof(self, where, subset=None):
  6610. """
  6611. Return the last row(s) without any NaNs before `where`.
  6612. The last row (for each element in `where`, if list) without any
  6613. NaN is taken.
  6614. In case of a :class:`~pandas.DataFrame`, the last row without NaN
  6615. considering only the subset of columns (if not `None`)
  6616. If there is no good value, NaN is returned for a Series or
  6617. a Series of NaN values for a DataFrame
  6618. Parameters
  6619. ----------
  6620. where : date or array-like of dates
  6621. Date(s) before which the last row(s) are returned.
  6622. subset : str or array-like of str, default `None`
  6623. For DataFrame, if not `None`, only use these columns to
  6624. check for NaNs.
  6625. Returns
  6626. -------
  6627. scalar, Series, or DataFrame
  6628. The return can be:
  6629. * scalar : when `self` is a Series and `where` is a scalar
  6630. * Series: when `self` is a Series and `where` is an array-like,
  6631. or when `self` is a DataFrame and `where` is a scalar
  6632. * DataFrame : when `self` is a DataFrame and `where` is an
  6633. array-like
  6634. Return scalar, Series, or DataFrame.
  6635. See Also
  6636. --------
  6637. merge_asof : Perform an asof merge. Similar to left join.
  6638. Notes
  6639. -----
  6640. Dates are assumed to be sorted. Raises if this is not the case.
  6641. Examples
  6642. --------
  6643. A Series and a scalar `where`.
  6644. >>> s = pd.Series([1, 2, np.nan, 4], index=[10, 20, 30, 40])
  6645. >>> s
  6646. 10 1.0
  6647. 20 2.0
  6648. 30 NaN
  6649. 40 4.0
  6650. dtype: float64
  6651. >>> s.asof(20)
  6652. 2.0
  6653. For a sequence `where`, a Series is returned. The first value is
  6654. NaN, because the first element of `where` is before the first
  6655. index value.
  6656. >>> s.asof([5, 20])
  6657. 5 NaN
  6658. 20 2.0
  6659. dtype: float64
  6660. Missing values are not considered. The following is ``2.0``, not
  6661. NaN, even though NaN is at the index location for ``30``.
  6662. >>> s.asof(30)
  6663. 2.0
  6664. Take all columns into consideration
  6665. >>> df = pd.DataFrame({'a': [10, 20, 30, 40, 50],
  6666. ... 'b': [None, None, None, None, 500]},
  6667. ... index=pd.DatetimeIndex(['2018-02-27 09:01:00',
  6668. ... '2018-02-27 09:02:00',
  6669. ... '2018-02-27 09:03:00',
  6670. ... '2018-02-27 09:04:00',
  6671. ... '2018-02-27 09:05:00']))
  6672. >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30',
  6673. ... '2018-02-27 09:04:30']))
  6674. a b
  6675. 2018-02-27 09:03:30 NaN NaN
  6676. 2018-02-27 09:04:30 NaN NaN
  6677. Take a single column into consideration
  6678. >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30',
  6679. ... '2018-02-27 09:04:30']),
  6680. ... subset=['a'])
  6681. a b
  6682. 2018-02-27 09:03:30 30 NaN
  6683. 2018-02-27 09:04:30 40 NaN
  6684. """
  6685. if isinstance(where, str):
  6686. where = Timestamp(where)
  6687. if not self.index.is_monotonic_increasing:
  6688. raise ValueError("asof requires a sorted index")
  6689. is_series = isinstance(self, ABCSeries)
  6690. if is_series:
  6691. if subset is not None:
  6692. raise ValueError("subset is not valid for Series")
  6693. else:
  6694. if subset is None:
  6695. subset = self.columns
  6696. if not is_list_like(subset):
  6697. subset = [subset]
  6698. is_list = is_list_like(where)
  6699. if not is_list:
  6700. start = self.index[0]
  6701. if isinstance(self.index, PeriodIndex):
  6702. where = Period(where, freq=self.index.freq)
  6703. if where < start:
  6704. if not is_series:
  6705. return self._constructor_sliced(
  6706. index=self.columns, name=where, dtype=np.float64
  6707. )
  6708. return np.nan
  6709. # It's always much faster to use a *while* loop here for
  6710. # Series than pre-computing all the NAs. However a
  6711. # *while* loop is extremely expensive for DataFrame
  6712. # so we later pre-compute all the NAs and use the same
  6713. # code path whether *where* is a scalar or list.
  6714. # See PR: https://github.com/pandas-dev/pandas/pull/14476
  6715. if is_series:
  6716. loc = self.index.searchsorted(where, side="right")
  6717. if loc > 0:
  6718. loc -= 1
  6719. values = self._values
  6720. while loc > 0 and isna(values[loc]):
  6721. loc -= 1
  6722. return values[loc]
  6723. if not isinstance(where, Index):
  6724. where = Index(where) if is_list else Index([where])
  6725. nulls = self.isna() if is_series else self[subset].isna().any(axis=1)
  6726. if nulls.all():
  6727. if is_series:
  6728. self = cast("Series", self)
  6729. return self._constructor(np.nan, index=where, name=self.name)
  6730. elif is_list:
  6731. self = cast("DataFrame", self)
  6732. return self._constructor(np.nan, index=where, columns=self.columns)
  6733. else:
  6734. self = cast("DataFrame", self)
  6735. return self._constructor_sliced(
  6736. np.nan, index=self.columns, name=where[0]
  6737. )
  6738. locs = self.index.asof_locs(where, ~(nulls._values))
  6739. # mask the missing
  6740. missing = locs == -1
  6741. data = self.take(locs)
  6742. data.index = where
  6743. if missing.any():
  6744. # GH#16063 only do this setting when necessary, otherwise
  6745. # we'd cast e.g. bools to floats
  6746. data.loc[missing] = np.nan
  6747. return data if is_list else data.iloc[-1]
  6748. # ----------------------------------------------------------------------
  6749. # Action Methods
  6750. @doc(klass=_shared_doc_kwargs["klass"])
  6751. def isna(self: NDFrameT) -> NDFrameT:
  6752. """
  6753. Detect missing values.
  6754. Return a boolean same-sized object indicating if the values are NA.
  6755. NA values, such as None or :attr:`numpy.NaN`, gets mapped to True
  6756. values.
  6757. Everything else gets mapped to False values. Characters such as empty
  6758. strings ``''`` or :attr:`numpy.inf` are not considered NA values
  6759. (unless you set ``pandas.options.mode.use_inf_as_na = True``).
  6760. Returns
  6761. -------
  6762. {klass}
  6763. Mask of bool values for each element in {klass} that
  6764. indicates whether an element is an NA value.
  6765. See Also
  6766. --------
  6767. {klass}.isnull : Alias of isna.
  6768. {klass}.notna : Boolean inverse of isna.
  6769. {klass}.dropna : Omit axes labels with missing values.
  6770. isna : Top-level isna.
  6771. Examples
  6772. --------
  6773. Show which entries in a DataFrame are NA.
  6774. >>> df = pd.DataFrame(dict(age=[5, 6, np.NaN],
  6775. ... born=[pd.NaT, pd.Timestamp('1939-05-27'),
  6776. ... pd.Timestamp('1940-04-25')],
  6777. ... name=['Alfred', 'Batman', ''],
  6778. ... toy=[None, 'Batmobile', 'Joker']))
  6779. >>> df
  6780. age born name toy
  6781. 0 5.0 NaT Alfred None
  6782. 1 6.0 1939-05-27 Batman Batmobile
  6783. 2 NaN 1940-04-25 Joker
  6784. >>> df.isna()
  6785. age born name toy
  6786. 0 False True False True
  6787. 1 False False False False
  6788. 2 True False False False
  6789. Show which entries in a Series are NA.
  6790. >>> ser = pd.Series([5, 6, np.NaN])
  6791. >>> ser
  6792. 0 5.0
  6793. 1 6.0
  6794. 2 NaN
  6795. dtype: float64
  6796. >>> ser.isna()
  6797. 0 False
  6798. 1 False
  6799. 2 True
  6800. dtype: bool
  6801. """
  6802. return isna(self).__finalize__(self, method="isna")
  6803. @doc(isna, klass=_shared_doc_kwargs["klass"])
  6804. def isnull(self: NDFrameT) -> NDFrameT:
  6805. return isna(self).__finalize__(self, method="isnull")
  6806. @doc(klass=_shared_doc_kwargs["klass"])
  6807. def notna(self: NDFrameT) -> NDFrameT:
  6808. """
  6809. Detect existing (non-missing) values.
  6810. Return a boolean same-sized object indicating if the values are not NA.
  6811. Non-missing values get mapped to True. Characters such as empty
  6812. strings ``''`` or :attr:`numpy.inf` are not considered NA values
  6813. (unless you set ``pandas.options.mode.use_inf_as_na = True``).
  6814. NA values, such as None or :attr:`numpy.NaN`, get mapped to False
  6815. values.
  6816. Returns
  6817. -------
  6818. {klass}
  6819. Mask of bool values for each element in {klass} that
  6820. indicates whether an element is not an NA value.
  6821. See Also
  6822. --------
  6823. {klass}.notnull : Alias of notna.
  6824. {klass}.isna : Boolean inverse of notna.
  6825. {klass}.dropna : Omit axes labels with missing values.
  6826. notna : Top-level notna.
  6827. Examples
  6828. --------
  6829. Show which entries in a DataFrame are not NA.
  6830. >>> df = pd.DataFrame(dict(age=[5, 6, np.NaN],
  6831. ... born=[pd.NaT, pd.Timestamp('1939-05-27'),
  6832. ... pd.Timestamp('1940-04-25')],
  6833. ... name=['Alfred', 'Batman', ''],
  6834. ... toy=[None, 'Batmobile', 'Joker']))
  6835. >>> df
  6836. age born name toy
  6837. 0 5.0 NaT Alfred None
  6838. 1 6.0 1939-05-27 Batman Batmobile
  6839. 2 NaN 1940-04-25 Joker
  6840. >>> df.notna()
  6841. age born name toy
  6842. 0 True False True False
  6843. 1 True True True True
  6844. 2 False True True True
  6845. Show which entries in a Series are not NA.
  6846. >>> ser = pd.Series([5, 6, np.NaN])
  6847. >>> ser
  6848. 0 5.0
  6849. 1 6.0
  6850. 2 NaN
  6851. dtype: float64
  6852. >>> ser.notna()
  6853. 0 True
  6854. 1 True
  6855. 2 False
  6856. dtype: bool
  6857. """
  6858. return notna(self).__finalize__(self, method="notna")
  6859. @doc(notna, klass=_shared_doc_kwargs["klass"])
  6860. def notnull(self: NDFrameT) -> NDFrameT:
  6861. return notna(self).__finalize__(self, method="notnull")
  6862. @final
  6863. def _clip_with_scalar(self, lower, upper, inplace: bool_t = False):
  6864. if (lower is not None and np.any(isna(lower))) or (
  6865. upper is not None and np.any(isna(upper))
  6866. ):
  6867. raise ValueError("Cannot use an NA value as a clip threshold")
  6868. result = self
  6869. mask = isna(self._values)
  6870. with np.errstate(all="ignore"):
  6871. if upper is not None:
  6872. subset = self <= upper
  6873. result = result.where(subset, upper, axis=None, inplace=False)
  6874. if lower is not None:
  6875. subset = self >= lower
  6876. result = result.where(subset, lower, axis=None, inplace=False)
  6877. if np.any(mask):
  6878. result[mask] = np.nan
  6879. if inplace:
  6880. return self._update_inplace(result)
  6881. else:
  6882. return result
  6883. @final
  6884. def _clip_with_one_bound(self, threshold, method, axis, inplace):
  6885. if axis is not None:
  6886. axis = self._get_axis_number(axis)
  6887. # method is self.le for upper bound and self.ge for lower bound
  6888. if is_scalar(threshold) and is_number(threshold):
  6889. if method.__name__ == "le":
  6890. return self._clip_with_scalar(None, threshold, inplace=inplace)
  6891. return self._clip_with_scalar(threshold, None, inplace=inplace)
  6892. # GH #15390
  6893. # In order for where method to work, the threshold must
  6894. # be transformed to NDFrame from other array like structure.
  6895. if (not isinstance(threshold, ABCSeries)) and is_list_like(threshold):
  6896. if isinstance(self, ABCSeries):
  6897. threshold = self._constructor(threshold, index=self.index)
  6898. else:
  6899. threshold = align_method_FRAME(self, threshold, axis, flex=None)[1]
  6900. # GH 40420
  6901. # Treat missing thresholds as no bounds, not clipping the values
  6902. if is_list_like(threshold):
  6903. fill_value = np.inf if method.__name__ == "le" else -np.inf
  6904. threshold_inf = threshold.fillna(fill_value)
  6905. else:
  6906. threshold_inf = threshold
  6907. subset = method(threshold_inf, axis=axis) | isna(self)
  6908. # GH 40420
  6909. return self.where(subset, threshold, axis=axis, inplace=inplace)
  6910. def clip(
  6911. self: NDFrameT,
  6912. lower=None,
  6913. upper=None,
  6914. *,
  6915. axis: Axis | None = None,
  6916. inplace: bool_t = False,
  6917. **kwargs,
  6918. ) -> NDFrameT | None:
  6919. """
  6920. Trim values at input threshold(s).
  6921. Assigns values outside boundary to boundary values. Thresholds
  6922. can be singular values or array like, and in the latter case
  6923. the clipping is performed element-wise in the specified axis.
  6924. Parameters
  6925. ----------
  6926. lower : float or array-like, default None
  6927. Minimum threshold value. All values below this
  6928. threshold will be set to it. A missing
  6929. threshold (e.g `NA`) will not clip the value.
  6930. upper : float or array-like, default None
  6931. Maximum threshold value. All values above this
  6932. threshold will be set to it. A missing
  6933. threshold (e.g `NA`) will not clip the value.
  6934. axis : {{0 or 'index', 1 or 'columns', None}}, default None
  6935. Align object with lower and upper along the given axis.
  6936. For `Series` this parameter is unused and defaults to `None`.
  6937. inplace : bool, default False
  6938. Whether to perform the operation in place on the data.
  6939. *args, **kwargs
  6940. Additional keywords have no effect but might be accepted
  6941. for compatibility with numpy.
  6942. Returns
  6943. -------
  6944. Series or DataFrame or None
  6945. Same type as calling object with the values outside the
  6946. clip boundaries replaced or None if ``inplace=True``.
  6947. See Also
  6948. --------
  6949. Series.clip : Trim values at input threshold in series.
  6950. DataFrame.clip : Trim values at input threshold in dataframe.
  6951. numpy.clip : Clip (limit) the values in an array.
  6952. Examples
  6953. --------
  6954. >>> data = {'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]}
  6955. >>> df = pd.DataFrame(data)
  6956. >>> df
  6957. col_0 col_1
  6958. 0 9 -2
  6959. 1 -3 -7
  6960. 2 0 6
  6961. 3 -1 8
  6962. 4 5 -5
  6963. Clips per column using lower and upper thresholds:
  6964. >>> df.clip(-4, 6)
  6965. col_0 col_1
  6966. 0 6 -2
  6967. 1 -3 -4
  6968. 2 0 6
  6969. 3 -1 6
  6970. 4 5 -4
  6971. Clips using specific lower and upper thresholds per column element:
  6972. >>> t = pd.Series([2, -4, -1, 6, 3])
  6973. >>> t
  6974. 0 2
  6975. 1 -4
  6976. 2 -1
  6977. 3 6
  6978. 4 3
  6979. dtype: int64
  6980. >>> df.clip(t, t + 4, axis=0)
  6981. col_0 col_1
  6982. 0 6 2
  6983. 1 -3 -4
  6984. 2 0 3
  6985. 3 6 8
  6986. 4 5 3
  6987. Clips using specific lower threshold per column element, with missing values:
  6988. >>> t = pd.Series([2, -4, np.NaN, 6, 3])
  6989. >>> t
  6990. 0 2.0
  6991. 1 -4.0
  6992. 2 NaN
  6993. 3 6.0
  6994. 4 3.0
  6995. dtype: float64
  6996. >>> df.clip(t, axis=0)
  6997. col_0 col_1
  6998. 0 9 2
  6999. 1 -3 -4
  7000. 2 0 6
  7001. 3 6 8
  7002. 4 5 3
  7003. """
  7004. inplace = validate_bool_kwarg(inplace, "inplace")
  7005. axis = nv.validate_clip_with_axis(axis, (), kwargs)
  7006. if axis is not None:
  7007. axis = self._get_axis_number(axis)
  7008. # GH 17276
  7009. # numpy doesn't like NaN as a clip value
  7010. # so ignore
  7011. # GH 19992
  7012. # numpy doesn't drop a list-like bound containing NaN
  7013. isna_lower = isna(lower)
  7014. if not is_list_like(lower):
  7015. if np.any(isna_lower):
  7016. lower = None
  7017. elif np.all(isna_lower):
  7018. lower = None
  7019. isna_upper = isna(upper)
  7020. if not is_list_like(upper):
  7021. if np.any(isna_upper):
  7022. upper = None
  7023. elif np.all(isna_upper):
  7024. upper = None
  7025. # GH 2747 (arguments were reversed)
  7026. if (
  7027. lower is not None
  7028. and upper is not None
  7029. and is_scalar(lower)
  7030. and is_scalar(upper)
  7031. ):
  7032. lower, upper = min(lower, upper), max(lower, upper)
  7033. # fast-path for scalars
  7034. if (lower is None or (is_scalar(lower) and is_number(lower))) and (
  7035. upper is None or (is_scalar(upper) and is_number(upper))
  7036. ):
  7037. return self._clip_with_scalar(lower, upper, inplace=inplace)
  7038. result = self
  7039. if lower is not None:
  7040. result = result._clip_with_one_bound(
  7041. lower, method=self.ge, axis=axis, inplace=inplace
  7042. )
  7043. if upper is not None:
  7044. if inplace:
  7045. result = self
  7046. result = result._clip_with_one_bound(
  7047. upper, method=self.le, axis=axis, inplace=inplace
  7048. )
  7049. return result
  7050. @doc(**_shared_doc_kwargs)
  7051. def asfreq(
  7052. self: NDFrameT,
  7053. freq: Frequency,
  7054. method: FillnaOptions | None = None,
  7055. how: str | None = None,
  7056. normalize: bool_t = False,
  7057. fill_value: Hashable = None,
  7058. ) -> NDFrameT:
  7059. """
  7060. Convert time series to specified frequency.
  7061. Returns the original data conformed to a new index with the specified
  7062. frequency.
  7063. If the index of this {klass} is a :class:`~pandas.PeriodIndex`, the new index
  7064. is the result of transforming the original index with
  7065. :meth:`PeriodIndex.asfreq <pandas.PeriodIndex.asfreq>` (so the original index
  7066. will map one-to-one to the new index).
  7067. Otherwise, the new index will be equivalent to ``pd.date_range(start, end,
  7068. freq=freq)`` where ``start`` and ``end`` are, respectively, the first and
  7069. last entries in the original index (see :func:`pandas.date_range`). The
  7070. values corresponding to any timesteps in the new index which were not present
  7071. in the original index will be null (``NaN``), unless a method for filling
  7072. such unknowns is provided (see the ``method`` parameter below).
  7073. The :meth:`resample` method is more appropriate if an operation on each group of
  7074. timesteps (such as an aggregate) is necessary to represent the data at the new
  7075. frequency.
  7076. Parameters
  7077. ----------
  7078. freq : DateOffset or str
  7079. Frequency DateOffset or string.
  7080. method : {{'backfill'/'bfill', 'pad'/'ffill'}}, default None
  7081. Method to use for filling holes in reindexed Series (note this
  7082. does not fill NaNs that already were present):
  7083. * 'pad' / 'ffill': propagate last valid observation forward to next
  7084. valid
  7085. * 'backfill' / 'bfill': use NEXT valid observation to fill.
  7086. how : {{'start', 'end'}}, default end
  7087. For PeriodIndex only (see PeriodIndex.asfreq).
  7088. normalize : bool, default False
  7089. Whether to reset output index to midnight.
  7090. fill_value : scalar, optional
  7091. Value to use for missing values, applied during upsampling (note
  7092. this does not fill NaNs that already were present).
  7093. Returns
  7094. -------
  7095. {klass}
  7096. {klass} object reindexed to the specified frequency.
  7097. See Also
  7098. --------
  7099. reindex : Conform DataFrame to new index with optional filling logic.
  7100. Notes
  7101. -----
  7102. To learn more about the frequency strings, please see `this link
  7103. <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
  7104. Examples
  7105. --------
  7106. Start by creating a series with 4 one minute timestamps.
  7107. >>> index = pd.date_range('1/1/2000', periods=4, freq='T')
  7108. >>> series = pd.Series([0.0, None, 2.0, 3.0], index=index)
  7109. >>> df = pd.DataFrame({{'s': series}})
  7110. >>> df
  7111. s
  7112. 2000-01-01 00:00:00 0.0
  7113. 2000-01-01 00:01:00 NaN
  7114. 2000-01-01 00:02:00 2.0
  7115. 2000-01-01 00:03:00 3.0
  7116. Upsample the series into 30 second bins.
  7117. >>> df.asfreq(freq='30S')
  7118. s
  7119. 2000-01-01 00:00:00 0.0
  7120. 2000-01-01 00:00:30 NaN
  7121. 2000-01-01 00:01:00 NaN
  7122. 2000-01-01 00:01:30 NaN
  7123. 2000-01-01 00:02:00 2.0
  7124. 2000-01-01 00:02:30 NaN
  7125. 2000-01-01 00:03:00 3.0
  7126. Upsample again, providing a ``fill value``.
  7127. >>> df.asfreq(freq='30S', fill_value=9.0)
  7128. s
  7129. 2000-01-01 00:00:00 0.0
  7130. 2000-01-01 00:00:30 9.0
  7131. 2000-01-01 00:01:00 NaN
  7132. 2000-01-01 00:01:30 9.0
  7133. 2000-01-01 00:02:00 2.0
  7134. 2000-01-01 00:02:30 9.0
  7135. 2000-01-01 00:03:00 3.0
  7136. Upsample again, providing a ``method``.
  7137. >>> df.asfreq(freq='30S', method='bfill')
  7138. s
  7139. 2000-01-01 00:00:00 0.0
  7140. 2000-01-01 00:00:30 NaN
  7141. 2000-01-01 00:01:00 NaN
  7142. 2000-01-01 00:01:30 2.0
  7143. 2000-01-01 00:02:00 2.0
  7144. 2000-01-01 00:02:30 3.0
  7145. 2000-01-01 00:03:00 3.0
  7146. """
  7147. from pandas.core.resample import asfreq
  7148. return asfreq(
  7149. self,
  7150. freq,
  7151. method=method,
  7152. how=how,
  7153. normalize=normalize,
  7154. fill_value=fill_value,
  7155. )
  7156. @final
  7157. def at_time(
  7158. self: NDFrameT, time, asof: bool_t = False, axis: Axis | None = None
  7159. ) -> NDFrameT:
  7160. """
  7161. Select values at particular time of day (e.g., 9:30AM).
  7162. Parameters
  7163. ----------
  7164. time : datetime.time or str
  7165. The values to select.
  7166. axis : {0 or 'index', 1 or 'columns'}, default 0
  7167. For `Series` this parameter is unused and defaults to 0.
  7168. Returns
  7169. -------
  7170. Series or DataFrame
  7171. Raises
  7172. ------
  7173. TypeError
  7174. If the index is not a :class:`DatetimeIndex`
  7175. See Also
  7176. --------
  7177. between_time : Select values between particular times of the day.
  7178. first : Select initial periods of time series based on a date offset.
  7179. last : Select final periods of time series based on a date offset.
  7180. DatetimeIndex.indexer_at_time : Get just the index locations for
  7181. values at particular time of the day.
  7182. Examples
  7183. --------
  7184. >>> i = pd.date_range('2018-04-09', periods=4, freq='12H')
  7185. >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i)
  7186. >>> ts
  7187. A
  7188. 2018-04-09 00:00:00 1
  7189. 2018-04-09 12:00:00 2
  7190. 2018-04-10 00:00:00 3
  7191. 2018-04-10 12:00:00 4
  7192. >>> ts.at_time('12:00')
  7193. A
  7194. 2018-04-09 12:00:00 2
  7195. 2018-04-10 12:00:00 4
  7196. """
  7197. if axis is None:
  7198. axis = self._stat_axis_number
  7199. axis = self._get_axis_number(axis)
  7200. index = self._get_axis(axis)
  7201. if not isinstance(index, DatetimeIndex):
  7202. raise TypeError("Index must be DatetimeIndex")
  7203. indexer = index.indexer_at_time(time, asof=asof)
  7204. return self._take_with_is_copy(indexer, axis=axis)
  7205. @final
  7206. def between_time(
  7207. self: NDFrameT,
  7208. start_time,
  7209. end_time,
  7210. inclusive: IntervalClosedType = "both",
  7211. axis: Axis | None = None,
  7212. ) -> NDFrameT:
  7213. """
  7214. Select values between particular times of the day (e.g., 9:00-9:30 AM).
  7215. By setting ``start_time`` to be later than ``end_time``,
  7216. you can get the times that are *not* between the two times.
  7217. Parameters
  7218. ----------
  7219. start_time : datetime.time or str
  7220. Initial time as a time filter limit.
  7221. end_time : datetime.time or str
  7222. End time as a time filter limit.
  7223. inclusive : {"both", "neither", "left", "right"}, default "both"
  7224. Include boundaries; whether to set each bound as closed or open.
  7225. axis : {0 or 'index', 1 or 'columns'}, default 0
  7226. Determine range time on index or columns value.
  7227. For `Series` this parameter is unused and defaults to 0.
  7228. Returns
  7229. -------
  7230. Series or DataFrame
  7231. Data from the original object filtered to the specified dates range.
  7232. Raises
  7233. ------
  7234. TypeError
  7235. If the index is not a :class:`DatetimeIndex`
  7236. See Also
  7237. --------
  7238. at_time : Select values at a particular time of the day.
  7239. first : Select initial periods of time series based on a date offset.
  7240. last : Select final periods of time series based on a date offset.
  7241. DatetimeIndex.indexer_between_time : Get just the index locations for
  7242. values between particular times of the day.
  7243. Examples
  7244. --------
  7245. >>> i = pd.date_range('2018-04-09', periods=4, freq='1D20min')
  7246. >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i)
  7247. >>> ts
  7248. A
  7249. 2018-04-09 00:00:00 1
  7250. 2018-04-10 00:20:00 2
  7251. 2018-04-11 00:40:00 3
  7252. 2018-04-12 01:00:00 4
  7253. >>> ts.between_time('0:15', '0:45')
  7254. A
  7255. 2018-04-10 00:20:00 2
  7256. 2018-04-11 00:40:00 3
  7257. You get the times that are *not* between two times by setting
  7258. ``start_time`` later than ``end_time``:
  7259. >>> ts.between_time('0:45', '0:15')
  7260. A
  7261. 2018-04-09 00:00:00 1
  7262. 2018-04-12 01:00:00 4
  7263. """
  7264. if axis is None:
  7265. axis = self._stat_axis_number
  7266. axis = self._get_axis_number(axis)
  7267. index = self._get_axis(axis)
  7268. if not isinstance(index, DatetimeIndex):
  7269. raise TypeError("Index must be DatetimeIndex")
  7270. left_inclusive, right_inclusive = validate_inclusive(inclusive)
  7271. indexer = index.indexer_between_time(
  7272. start_time,
  7273. end_time,
  7274. include_start=left_inclusive,
  7275. include_end=right_inclusive,
  7276. )
  7277. return self._take_with_is_copy(indexer, axis=axis)
  7278. @doc(**_shared_doc_kwargs)
  7279. def resample(
  7280. self,
  7281. rule,
  7282. axis: Axis = 0,
  7283. closed: str | None = None,
  7284. label: str | None = None,
  7285. convention: str = "start",
  7286. kind: str | None = None,
  7287. on: Level = None,
  7288. level: Level = None,
  7289. origin: str | TimestampConvertibleTypes = "start_day",
  7290. offset: TimedeltaConvertibleTypes | None = None,
  7291. group_keys: bool_t = False,
  7292. ) -> Resampler:
  7293. """
  7294. Resample time-series data.
  7295. Convenience method for frequency conversion and resampling of time series.
  7296. The object must have a datetime-like index (`DatetimeIndex`, `PeriodIndex`,
  7297. or `TimedeltaIndex`), or the caller must pass the label of a datetime-like
  7298. series/index to the ``on``/``level`` keyword parameter.
  7299. Parameters
  7300. ----------
  7301. rule : DateOffset, Timedelta or str
  7302. The offset string or object representing target conversion.
  7303. axis : {{0 or 'index', 1 or 'columns'}}, default 0
  7304. Which axis to use for up- or down-sampling. For `Series` this parameter
  7305. is unused and defaults to 0. Must be
  7306. `DatetimeIndex`, `TimedeltaIndex` or `PeriodIndex`.
  7307. closed : {{'right', 'left'}}, default None
  7308. Which side of bin interval is closed. The default is 'left'
  7309. for all frequency offsets except for 'M', 'A', 'Q', 'BM',
  7310. 'BA', 'BQ', and 'W' which all have a default of 'right'.
  7311. label : {{'right', 'left'}}, default None
  7312. Which bin edge label to label bucket with. The default is 'left'
  7313. for all frequency offsets except for 'M', 'A', 'Q', 'BM',
  7314. 'BA', 'BQ', and 'W' which all have a default of 'right'.
  7315. convention : {{'start', 'end', 's', 'e'}}, default 'start'
  7316. For `PeriodIndex` only, controls whether to use the start or
  7317. end of `rule`.
  7318. kind : {{'timestamp', 'period'}}, optional, default None
  7319. Pass 'timestamp' to convert the resulting index to a
  7320. `DateTimeIndex` or 'period' to convert it to a `PeriodIndex`.
  7321. By default the input representation is retained.
  7322. on : str, optional
  7323. For a DataFrame, column to use instead of index for resampling.
  7324. Column must be datetime-like.
  7325. level : str or int, optional
  7326. For a MultiIndex, level (name or number) to use for
  7327. resampling. `level` must be datetime-like.
  7328. origin : Timestamp or str, default 'start_day'
  7329. The timestamp on which to adjust the grouping. The timezone of origin
  7330. must match the timezone of the index.
  7331. If string, must be one of the following:
  7332. - 'epoch': `origin` is 1970-01-01
  7333. - 'start': `origin` is the first value of the timeseries
  7334. - 'start_day': `origin` is the first day at midnight of the timeseries
  7335. .. versionadded:: 1.1.0
  7336. - 'end': `origin` is the last value of the timeseries
  7337. - 'end_day': `origin` is the ceiling midnight of the last day
  7338. .. versionadded:: 1.3.0
  7339. offset : Timedelta or str, default is None
  7340. An offset timedelta added to the origin.
  7341. .. versionadded:: 1.1.0
  7342. group_keys : bool, default False
  7343. Whether to include the group keys in the result index when using
  7344. ``.apply()`` on the resampled object.
  7345. .. versionadded:: 1.5.0
  7346. Not specifying ``group_keys`` will retain values-dependent behavior
  7347. from pandas 1.4 and earlier (see :ref:`pandas 1.5.0 Release notes
  7348. <whatsnew_150.enhancements.resample_group_keys>` for examples).
  7349. .. versionchanged:: 2.0.0
  7350. ``group_keys`` now defaults to ``False``.
  7351. Returns
  7352. -------
  7353. pandas.core.Resampler
  7354. :class:`~pandas.core.Resampler` object.
  7355. See Also
  7356. --------
  7357. Series.resample : Resample a Series.
  7358. DataFrame.resample : Resample a DataFrame.
  7359. groupby : Group {klass} by mapping, function, label, or list of labels.
  7360. asfreq : Reindex a {klass} with the given frequency without grouping.
  7361. Notes
  7362. -----
  7363. See the `user guide
  7364. <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#resampling>`__
  7365. for more.
  7366. To learn more about the offset strings, please see `this link
  7367. <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects>`__.
  7368. Examples
  7369. --------
  7370. Start by creating a series with 9 one minute timestamps.
  7371. >>> index = pd.date_range('1/1/2000', periods=9, freq='T')
  7372. >>> series = pd.Series(range(9), index=index)
  7373. >>> series
  7374. 2000-01-01 00:00:00 0
  7375. 2000-01-01 00:01:00 1
  7376. 2000-01-01 00:02:00 2
  7377. 2000-01-01 00:03:00 3
  7378. 2000-01-01 00:04:00 4
  7379. 2000-01-01 00:05:00 5
  7380. 2000-01-01 00:06:00 6
  7381. 2000-01-01 00:07:00 7
  7382. 2000-01-01 00:08:00 8
  7383. Freq: T, dtype: int64
  7384. Downsample the series into 3 minute bins and sum the values
  7385. of the timestamps falling into a bin.
  7386. >>> series.resample('3T').sum()
  7387. 2000-01-01 00:00:00 3
  7388. 2000-01-01 00:03:00 12
  7389. 2000-01-01 00:06:00 21
  7390. Freq: 3T, dtype: int64
  7391. Downsample the series into 3 minute bins as above, but label each
  7392. bin using the right edge instead of the left. Please note that the
  7393. value in the bucket used as the label is not included in the bucket,
  7394. which it labels. For example, in the original series the
  7395. bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed
  7396. value in the resampled bucket with the label ``2000-01-01 00:03:00``
  7397. does not include 3 (if it did, the summed value would be 6, not 3).
  7398. To include this value close the right side of the bin interval as
  7399. illustrated in the example below this one.
  7400. >>> series.resample('3T', label='right').sum()
  7401. 2000-01-01 00:03:00 3
  7402. 2000-01-01 00:06:00 12
  7403. 2000-01-01 00:09:00 21
  7404. Freq: 3T, dtype: int64
  7405. Downsample the series into 3 minute bins as above, but close the right
  7406. side of the bin interval.
  7407. >>> series.resample('3T', label='right', closed='right').sum()
  7408. 2000-01-01 00:00:00 0
  7409. 2000-01-01 00:03:00 6
  7410. 2000-01-01 00:06:00 15
  7411. 2000-01-01 00:09:00 15
  7412. Freq: 3T, dtype: int64
  7413. Upsample the series into 30 second bins.
  7414. >>> series.resample('30S').asfreq()[0:5] # Select first 5 rows
  7415. 2000-01-01 00:00:00 0.0
  7416. 2000-01-01 00:00:30 NaN
  7417. 2000-01-01 00:01:00 1.0
  7418. 2000-01-01 00:01:30 NaN
  7419. 2000-01-01 00:02:00 2.0
  7420. Freq: 30S, dtype: float64
  7421. Upsample the series into 30 second bins and fill the ``NaN``
  7422. values using the ``ffill`` method.
  7423. >>> series.resample('30S').ffill()[0:5]
  7424. 2000-01-01 00:00:00 0
  7425. 2000-01-01 00:00:30 0
  7426. 2000-01-01 00:01:00 1
  7427. 2000-01-01 00:01:30 1
  7428. 2000-01-01 00:02:00 2
  7429. Freq: 30S, dtype: int64
  7430. Upsample the series into 30 second bins and fill the
  7431. ``NaN`` values using the ``bfill`` method.
  7432. >>> series.resample('30S').bfill()[0:5]
  7433. 2000-01-01 00:00:00 0
  7434. 2000-01-01 00:00:30 1
  7435. 2000-01-01 00:01:00 1
  7436. 2000-01-01 00:01:30 2
  7437. 2000-01-01 00:02:00 2
  7438. Freq: 30S, dtype: int64
  7439. Pass a custom function via ``apply``
  7440. >>> def custom_resampler(arraylike):
  7441. ... return np.sum(arraylike) + 5
  7442. ...
  7443. >>> series.resample('3T').apply(custom_resampler)
  7444. 2000-01-01 00:00:00 8
  7445. 2000-01-01 00:03:00 17
  7446. 2000-01-01 00:06:00 26
  7447. Freq: 3T, dtype: int64
  7448. For a Series with a PeriodIndex, the keyword `convention` can be
  7449. used to control whether to use the start or end of `rule`.
  7450. Resample a year by quarter using 'start' `convention`. Values are
  7451. assigned to the first quarter of the period.
  7452. >>> s = pd.Series([1, 2], index=pd.period_range('2012-01-01',
  7453. ... freq='A',
  7454. ... periods=2))
  7455. >>> s
  7456. 2012 1
  7457. 2013 2
  7458. Freq: A-DEC, dtype: int64
  7459. >>> s.resample('Q', convention='start').asfreq()
  7460. 2012Q1 1.0
  7461. 2012Q2 NaN
  7462. 2012Q3 NaN
  7463. 2012Q4 NaN
  7464. 2013Q1 2.0
  7465. 2013Q2 NaN
  7466. 2013Q3 NaN
  7467. 2013Q4 NaN
  7468. Freq: Q-DEC, dtype: float64
  7469. Resample quarters by month using 'end' `convention`. Values are
  7470. assigned to the last month of the period.
  7471. >>> q = pd.Series([1, 2, 3, 4], index=pd.period_range('2018-01-01',
  7472. ... freq='Q',
  7473. ... periods=4))
  7474. >>> q
  7475. 2018Q1 1
  7476. 2018Q2 2
  7477. 2018Q3 3
  7478. 2018Q4 4
  7479. Freq: Q-DEC, dtype: int64
  7480. >>> q.resample('M', convention='end').asfreq()
  7481. 2018-03 1.0
  7482. 2018-04 NaN
  7483. 2018-05 NaN
  7484. 2018-06 2.0
  7485. 2018-07 NaN
  7486. 2018-08 NaN
  7487. 2018-09 3.0
  7488. 2018-10 NaN
  7489. 2018-11 NaN
  7490. 2018-12 4.0
  7491. Freq: M, dtype: float64
  7492. For DataFrame objects, the keyword `on` can be used to specify the
  7493. column instead of the index for resampling.
  7494. >>> d = {{'price': [10, 11, 9, 13, 14, 18, 17, 19],
  7495. ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}}
  7496. >>> df = pd.DataFrame(d)
  7497. >>> df['week_starting'] = pd.date_range('01/01/2018',
  7498. ... periods=8,
  7499. ... freq='W')
  7500. >>> df
  7501. price volume week_starting
  7502. 0 10 50 2018-01-07
  7503. 1 11 60 2018-01-14
  7504. 2 9 40 2018-01-21
  7505. 3 13 100 2018-01-28
  7506. 4 14 50 2018-02-04
  7507. 5 18 100 2018-02-11
  7508. 6 17 40 2018-02-18
  7509. 7 19 50 2018-02-25
  7510. >>> df.resample('M', on='week_starting').mean()
  7511. price volume
  7512. week_starting
  7513. 2018-01-31 10.75 62.5
  7514. 2018-02-28 17.00 60.0
  7515. For a DataFrame with MultiIndex, the keyword `level` can be used to
  7516. specify on which level the resampling needs to take place.
  7517. >>> days = pd.date_range('1/1/2000', periods=4, freq='D')
  7518. >>> d2 = {{'price': [10, 11, 9, 13, 14, 18, 17, 19],
  7519. ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}}
  7520. >>> df2 = pd.DataFrame(
  7521. ... d2,
  7522. ... index=pd.MultiIndex.from_product(
  7523. ... [days, ['morning', 'afternoon']]
  7524. ... )
  7525. ... )
  7526. >>> df2
  7527. price volume
  7528. 2000-01-01 morning 10 50
  7529. afternoon 11 60
  7530. 2000-01-02 morning 9 40
  7531. afternoon 13 100
  7532. 2000-01-03 morning 14 50
  7533. afternoon 18 100
  7534. 2000-01-04 morning 17 40
  7535. afternoon 19 50
  7536. >>> df2.resample('D', level=0).sum()
  7537. price volume
  7538. 2000-01-01 21 110
  7539. 2000-01-02 22 140
  7540. 2000-01-03 32 150
  7541. 2000-01-04 36 90
  7542. If you want to adjust the start of the bins based on a fixed timestamp:
  7543. >>> start, end = '2000-10-01 23:30:00', '2000-10-02 00:30:00'
  7544. >>> rng = pd.date_range(start, end, freq='7min')
  7545. >>> ts = pd.Series(np.arange(len(rng)) * 3, index=rng)
  7546. >>> ts
  7547. 2000-10-01 23:30:00 0
  7548. 2000-10-01 23:37:00 3
  7549. 2000-10-01 23:44:00 6
  7550. 2000-10-01 23:51:00 9
  7551. 2000-10-01 23:58:00 12
  7552. 2000-10-02 00:05:00 15
  7553. 2000-10-02 00:12:00 18
  7554. 2000-10-02 00:19:00 21
  7555. 2000-10-02 00:26:00 24
  7556. Freq: 7T, dtype: int64
  7557. >>> ts.resample('17min').sum()
  7558. 2000-10-01 23:14:00 0
  7559. 2000-10-01 23:31:00 9
  7560. 2000-10-01 23:48:00 21
  7561. 2000-10-02 00:05:00 54
  7562. 2000-10-02 00:22:00 24
  7563. Freq: 17T, dtype: int64
  7564. >>> ts.resample('17min', origin='epoch').sum()
  7565. 2000-10-01 23:18:00 0
  7566. 2000-10-01 23:35:00 18
  7567. 2000-10-01 23:52:00 27
  7568. 2000-10-02 00:09:00 39
  7569. 2000-10-02 00:26:00 24
  7570. Freq: 17T, dtype: int64
  7571. >>> ts.resample('17min', origin='2000-01-01').sum()
  7572. 2000-10-01 23:24:00 3
  7573. 2000-10-01 23:41:00 15
  7574. 2000-10-01 23:58:00 45
  7575. 2000-10-02 00:15:00 45
  7576. Freq: 17T, dtype: int64
  7577. If you want to adjust the start of the bins with an `offset` Timedelta, the two
  7578. following lines are equivalent:
  7579. >>> ts.resample('17min', origin='start').sum()
  7580. 2000-10-01 23:30:00 9
  7581. 2000-10-01 23:47:00 21
  7582. 2000-10-02 00:04:00 54
  7583. 2000-10-02 00:21:00 24
  7584. Freq: 17T, dtype: int64
  7585. >>> ts.resample('17min', offset='23h30min').sum()
  7586. 2000-10-01 23:30:00 9
  7587. 2000-10-01 23:47:00 21
  7588. 2000-10-02 00:04:00 54
  7589. 2000-10-02 00:21:00 24
  7590. Freq: 17T, dtype: int64
  7591. If you want to take the largest Timestamp as the end of the bins:
  7592. >>> ts.resample('17min', origin='end').sum()
  7593. 2000-10-01 23:35:00 0
  7594. 2000-10-01 23:52:00 18
  7595. 2000-10-02 00:09:00 27
  7596. 2000-10-02 00:26:00 63
  7597. Freq: 17T, dtype: int64
  7598. In contrast with the `start_day`, you can use `end_day` to take the ceiling
  7599. midnight of the largest Timestamp as the end of the bins and drop the bins
  7600. not containing data:
  7601. >>> ts.resample('17min', origin='end_day').sum()
  7602. 2000-10-01 23:38:00 3
  7603. 2000-10-01 23:55:00 15
  7604. 2000-10-02 00:12:00 45
  7605. 2000-10-02 00:29:00 45
  7606. Freq: 17T, dtype: int64
  7607. """
  7608. from pandas.core.resample import get_resampler
  7609. axis = self._get_axis_number(axis)
  7610. return get_resampler(
  7611. cast("Series | DataFrame", self),
  7612. freq=rule,
  7613. label=label,
  7614. closed=closed,
  7615. axis=axis,
  7616. kind=kind,
  7617. convention=convention,
  7618. key=on,
  7619. level=level,
  7620. origin=origin,
  7621. offset=offset,
  7622. group_keys=group_keys,
  7623. )
  7624. @final
  7625. def first(self: NDFrameT, offset) -> NDFrameT:
  7626. """
  7627. Select initial periods of time series data based on a date offset.
  7628. For a DataFrame with a sorted DatetimeIndex, this function can
  7629. select the first few rows based on a date offset.
  7630. Parameters
  7631. ----------
  7632. offset : str, DateOffset or dateutil.relativedelta
  7633. The offset length of the data that will be selected. For instance,
  7634. '1M' will display all the rows having their index within the first month.
  7635. Returns
  7636. -------
  7637. Series or DataFrame
  7638. A subset of the caller.
  7639. Raises
  7640. ------
  7641. TypeError
  7642. If the index is not a :class:`DatetimeIndex`
  7643. See Also
  7644. --------
  7645. last : Select final periods of time series based on a date offset.
  7646. at_time : Select values at a particular time of the day.
  7647. between_time : Select values between particular times of the day.
  7648. Examples
  7649. --------
  7650. >>> i = pd.date_range('2018-04-09', periods=4, freq='2D')
  7651. >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i)
  7652. >>> ts
  7653. A
  7654. 2018-04-09 1
  7655. 2018-04-11 2
  7656. 2018-04-13 3
  7657. 2018-04-15 4
  7658. Get the rows for the first 3 days:
  7659. >>> ts.first('3D')
  7660. A
  7661. 2018-04-09 1
  7662. 2018-04-11 2
  7663. Notice the data for 3 first calendar days were returned, not the first
  7664. 3 days observed in the dataset, and therefore data for 2018-04-13 was
  7665. not returned.
  7666. """
  7667. if not isinstance(self.index, DatetimeIndex):
  7668. raise TypeError("'first' only supports a DatetimeIndex index")
  7669. if len(self.index) == 0:
  7670. return self.copy(deep=False)
  7671. offset = to_offset(offset)
  7672. if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]):
  7673. # GH#29623 if first value is end of period, remove offset with n = 1
  7674. # before adding the real offset
  7675. end_date = end = self.index[0] - offset.base + offset
  7676. else:
  7677. end_date = end = self.index[0] + offset
  7678. # Tick-like, e.g. 3 weeks
  7679. if isinstance(offset, Tick) and end_date in self.index:
  7680. end = self.index.searchsorted(end_date, side="left")
  7681. return self.iloc[:end]
  7682. return self.loc[:end]
  7683. @final
  7684. def last(self: NDFrameT, offset) -> NDFrameT:
  7685. """
  7686. Select final periods of time series data based on a date offset.
  7687. For a DataFrame with a sorted DatetimeIndex, this function
  7688. selects the last few rows based on a date offset.
  7689. Parameters
  7690. ----------
  7691. offset : str, DateOffset, dateutil.relativedelta
  7692. The offset length of the data that will be selected. For instance,
  7693. '3D' will display all the rows having their index within the last 3 days.
  7694. Returns
  7695. -------
  7696. Series or DataFrame
  7697. A subset of the caller.
  7698. Raises
  7699. ------
  7700. TypeError
  7701. If the index is not a :class:`DatetimeIndex`
  7702. See Also
  7703. --------
  7704. first : Select initial periods of time series based on a date offset.
  7705. at_time : Select values at a particular time of the day.
  7706. between_time : Select values between particular times of the day.
  7707. Examples
  7708. --------
  7709. >>> i = pd.date_range('2018-04-09', periods=4, freq='2D')
  7710. >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i)
  7711. >>> ts
  7712. A
  7713. 2018-04-09 1
  7714. 2018-04-11 2
  7715. 2018-04-13 3
  7716. 2018-04-15 4
  7717. Get the rows for the last 3 days:
  7718. >>> ts.last('3D')
  7719. A
  7720. 2018-04-13 3
  7721. 2018-04-15 4
  7722. Notice the data for 3 last calendar days were returned, not the last
  7723. 3 observed days in the dataset, and therefore data for 2018-04-11 was
  7724. not returned.
  7725. """
  7726. if not isinstance(self.index, DatetimeIndex):
  7727. raise TypeError("'last' only supports a DatetimeIndex index")
  7728. if len(self.index) == 0:
  7729. return self.copy(deep=False)
  7730. offset = to_offset(offset)
  7731. start_date = self.index[-1] - offset
  7732. start = self.index.searchsorted(start_date, side="right")
  7733. return self.iloc[start:]
  7734. @final
  7735. def rank(
  7736. self: NDFrameT,
  7737. axis: Axis = 0,
  7738. method: str = "average",
  7739. numeric_only: bool_t = False,
  7740. na_option: str = "keep",
  7741. ascending: bool_t = True,
  7742. pct: bool_t = False,
  7743. ) -> NDFrameT:
  7744. """
  7745. Compute numerical data ranks (1 through n) along axis.
  7746. By default, equal values are assigned a rank that is the average of the
  7747. ranks of those values.
  7748. Parameters
  7749. ----------
  7750. axis : {0 or 'index', 1 or 'columns'}, default 0
  7751. Index to direct ranking.
  7752. For `Series` this parameter is unused and defaults to 0.
  7753. method : {'average', 'min', 'max', 'first', 'dense'}, default 'average'
  7754. How to rank the group of records that have the same value (i.e. ties):
  7755. * average: average rank of the group
  7756. * min: lowest rank in the group
  7757. * max: highest rank in the group
  7758. * first: ranks assigned in order they appear in the array
  7759. * dense: like 'min', but rank always increases by 1 between groups.
  7760. numeric_only : bool, default False
  7761. For DataFrame objects, rank only numeric columns if set to True.
  7762. .. versionchanged:: 2.0.0
  7763. The default value of ``numeric_only`` is now ``False``.
  7764. na_option : {'keep', 'top', 'bottom'}, default 'keep'
  7765. How to rank NaN values:
  7766. * keep: assign NaN rank to NaN values
  7767. * top: assign lowest rank to NaN values
  7768. * bottom: assign highest rank to NaN values
  7769. ascending : bool, default True
  7770. Whether or not the elements should be ranked in ascending order.
  7771. pct : bool, default False
  7772. Whether or not to display the returned rankings in percentile
  7773. form.
  7774. Returns
  7775. -------
  7776. same type as caller
  7777. Return a Series or DataFrame with data ranks as values.
  7778. See Also
  7779. --------
  7780. core.groupby.DataFrameGroupBy.rank : Rank of values within each group.
  7781. core.groupby.SeriesGroupBy.rank : Rank of values within each group.
  7782. Examples
  7783. --------
  7784. >>> df = pd.DataFrame(data={'Animal': ['cat', 'penguin', 'dog',
  7785. ... 'spider', 'snake'],
  7786. ... 'Number_legs': [4, 2, 4, 8, np.nan]})
  7787. >>> df
  7788. Animal Number_legs
  7789. 0 cat 4.0
  7790. 1 penguin 2.0
  7791. 2 dog 4.0
  7792. 3 spider 8.0
  7793. 4 snake NaN
  7794. Ties are assigned the mean of the ranks (by default) for the group.
  7795. >>> s = pd.Series(range(5), index=list("abcde"))
  7796. >>> s["d"] = s["b"]
  7797. >>> s.rank()
  7798. a 1.0
  7799. b 2.5
  7800. c 4.0
  7801. d 2.5
  7802. e 5.0
  7803. dtype: float64
  7804. The following example shows how the method behaves with the above
  7805. parameters:
  7806. * default_rank: this is the default behaviour obtained without using
  7807. any parameter.
  7808. * max_rank: setting ``method = 'max'`` the records that have the
  7809. same values are ranked using the highest rank (e.g.: since 'cat'
  7810. and 'dog' are both in the 2nd and 3rd position, rank 3 is assigned.)
  7811. * NA_bottom: choosing ``na_option = 'bottom'``, if there are records
  7812. with NaN values they are placed at the bottom of the ranking.
  7813. * pct_rank: when setting ``pct = True``, the ranking is expressed as
  7814. percentile rank.
  7815. >>> df['default_rank'] = df['Number_legs'].rank()
  7816. >>> df['max_rank'] = df['Number_legs'].rank(method='max')
  7817. >>> df['NA_bottom'] = df['Number_legs'].rank(na_option='bottom')
  7818. >>> df['pct_rank'] = df['Number_legs'].rank(pct=True)
  7819. >>> df
  7820. Animal Number_legs default_rank max_rank NA_bottom pct_rank
  7821. 0 cat 4.0 2.5 3.0 2.5 0.625
  7822. 1 penguin 2.0 1.0 1.0 1.0 0.250
  7823. 2 dog 4.0 2.5 3.0 2.5 0.625
  7824. 3 spider 8.0 4.0 4.0 4.0 1.000
  7825. 4 snake NaN NaN NaN 5.0 NaN
  7826. """
  7827. axis_int = self._get_axis_number(axis)
  7828. if na_option not in {"keep", "top", "bottom"}:
  7829. msg = "na_option must be one of 'keep', 'top', or 'bottom'"
  7830. raise ValueError(msg)
  7831. def ranker(data):
  7832. if data.ndim == 2:
  7833. # i.e. DataFrame, we cast to ndarray
  7834. values = data.values
  7835. else:
  7836. # i.e. Series, can dispatch to EA
  7837. values = data._values
  7838. if isinstance(values, ExtensionArray):
  7839. ranks = values._rank(
  7840. axis=axis_int,
  7841. method=method,
  7842. ascending=ascending,
  7843. na_option=na_option,
  7844. pct=pct,
  7845. )
  7846. else:
  7847. ranks = algos.rank(
  7848. values,
  7849. axis=axis_int,
  7850. method=method,
  7851. ascending=ascending,
  7852. na_option=na_option,
  7853. pct=pct,
  7854. )
  7855. ranks_obj = self._constructor(ranks, **data._construct_axes_dict())
  7856. return ranks_obj.__finalize__(self, method="rank")
  7857. if numeric_only:
  7858. if self.ndim == 1 and not is_numeric_dtype(self.dtype):
  7859. # GH#47500
  7860. raise TypeError(
  7861. "Series.rank does not allow numeric_only=True with "
  7862. "non-numeric dtype."
  7863. )
  7864. data = self._get_numeric_data()
  7865. else:
  7866. data = self
  7867. return ranker(data)
  7868. @doc(_shared_docs["compare"], klass=_shared_doc_kwargs["klass"])
  7869. def compare(
  7870. self,
  7871. other,
  7872. align_axis: Axis = 1,
  7873. keep_shape: bool_t = False,
  7874. keep_equal: bool_t = False,
  7875. result_names: Suffixes = ("self", "other"),
  7876. ):
  7877. if type(self) is not type(other):
  7878. cls_self, cls_other = type(self).__name__, type(other).__name__
  7879. raise TypeError(
  7880. f"can only compare '{cls_self}' (not '{cls_other}') with '{cls_self}'"
  7881. )
  7882. mask = ~((self == other) | (self.isna() & other.isna()))
  7883. mask.fillna(True, inplace=True)
  7884. if not keep_equal:
  7885. self = self.where(mask)
  7886. other = other.where(mask)
  7887. if not keep_shape:
  7888. if isinstance(self, ABCDataFrame):
  7889. cmask = mask.any()
  7890. rmask = mask.any(axis=1)
  7891. self = self.loc[rmask, cmask]
  7892. other = other.loc[rmask, cmask]
  7893. else:
  7894. self = self[mask]
  7895. other = other[mask]
  7896. if not isinstance(result_names, tuple):
  7897. raise TypeError(
  7898. f"Passing 'result_names' as a {type(result_names)} is not "
  7899. "supported. Provide 'result_names' as a tuple instead."
  7900. )
  7901. if align_axis in (1, "columns"): # This is needed for Series
  7902. axis = 1
  7903. else:
  7904. axis = self._get_axis_number(align_axis)
  7905. diff = concat([self, other], axis=axis, keys=result_names)
  7906. if axis >= self.ndim:
  7907. # No need to reorganize data if stacking on new axis
  7908. # This currently applies for stacking two Series on columns
  7909. return diff
  7910. ax = diff._get_axis(axis)
  7911. ax_names = np.array(ax.names)
  7912. # set index names to positions to avoid confusion
  7913. ax.names = np.arange(len(ax_names))
  7914. # bring self-other to inner level
  7915. order = list(range(1, ax.nlevels)) + [0]
  7916. if isinstance(diff, ABCDataFrame):
  7917. diff = diff.reorder_levels(order, axis=axis)
  7918. else:
  7919. diff = diff.reorder_levels(order)
  7920. # restore the index names in order
  7921. diff._get_axis(axis=axis).names = ax_names[order]
  7922. # reorder axis to keep things organized
  7923. indices = (
  7924. np.arange(diff.shape[axis]).reshape([2, diff.shape[axis] // 2]).T.flatten()
  7925. )
  7926. diff = diff.take(indices, axis=axis)
  7927. return diff
  7928. @doc(**_shared_doc_kwargs)
  7929. def align(
  7930. self: NDFrameT,
  7931. other: NDFrameT,
  7932. join: AlignJoin = "outer",
  7933. axis: Axis | None = None,
  7934. level: Level = None,
  7935. copy: bool_t | None = None,
  7936. fill_value: Hashable = None,
  7937. method: FillnaOptions | None = None,
  7938. limit: int | None = None,
  7939. fill_axis: Axis = 0,
  7940. broadcast_axis: Axis | None = None,
  7941. ) -> NDFrameT:
  7942. """
  7943. Align two objects on their axes with the specified join method.
  7944. Join method is specified for each axis Index.
  7945. Parameters
  7946. ----------
  7947. other : DataFrame or Series
  7948. join : {{'outer', 'inner', 'left', 'right'}}, default 'outer'
  7949. axis : allowed axis of the other object, default None
  7950. Align on index (0), columns (1), or both (None).
  7951. level : int or level name, default None
  7952. Broadcast across a level, matching Index values on the
  7953. passed MultiIndex level.
  7954. copy : bool, default True
  7955. Always returns new objects. If copy=False and no reindexing is
  7956. required then original objects are returned.
  7957. fill_value : scalar, default np.NaN
  7958. Value to use for missing values. Defaults to NaN, but can be any
  7959. "compatible" value.
  7960. method : {{'backfill', 'bfill', 'pad', 'ffill', None}}, default None
  7961. Method to use for filling holes in reindexed Series:
  7962. - pad / ffill: propagate last valid observation forward to next valid.
  7963. - backfill / bfill: use NEXT valid observation to fill gap.
  7964. limit : int, default None
  7965. If method is specified, this is the maximum number of consecutive
  7966. NaN values to forward/backward fill. In other words, if there is
  7967. a gap with more than this number of consecutive NaNs, it will only
  7968. be partially filled. If method is not specified, this is the
  7969. maximum number of entries along the entire axis where NaNs will be
  7970. filled. Must be greater than 0 if not None.
  7971. fill_axis : {axes_single_arg}, default 0
  7972. Filling axis, method and limit.
  7973. broadcast_axis : {axes_single_arg}, default None
  7974. Broadcast values along this axis, if aligning two objects of
  7975. different dimensions.
  7976. Returns
  7977. -------
  7978. tuple of ({klass}, type of other)
  7979. Aligned objects.
  7980. Examples
  7981. --------
  7982. >>> df = pd.DataFrame(
  7983. ... [[1, 2, 3, 4], [6, 7, 8, 9]], columns=["D", "B", "E", "A"], index=[1, 2]
  7984. ... )
  7985. >>> other = pd.DataFrame(
  7986. ... [[10, 20, 30, 40], [60, 70, 80, 90], [600, 700, 800, 900]],
  7987. ... columns=["A", "B", "C", "D"],
  7988. ... index=[2, 3, 4],
  7989. ... )
  7990. >>> df
  7991. D B E A
  7992. 1 1 2 3 4
  7993. 2 6 7 8 9
  7994. >>> other
  7995. A B C D
  7996. 2 10 20 30 40
  7997. 3 60 70 80 90
  7998. 4 600 700 800 900
  7999. Align on columns:
  8000. >>> left, right = df.align(other, join="outer", axis=1)
  8001. >>> left
  8002. A B C D E
  8003. 1 4 2 NaN 1 3
  8004. 2 9 7 NaN 6 8
  8005. >>> right
  8006. A B C D E
  8007. 2 10 20 30 40 NaN
  8008. 3 60 70 80 90 NaN
  8009. 4 600 700 800 900 NaN
  8010. We can also align on the index:
  8011. >>> left, right = df.align(other, join="outer", axis=0)
  8012. >>> left
  8013. D B E A
  8014. 1 1.0 2.0 3.0 4.0
  8015. 2 6.0 7.0 8.0 9.0
  8016. 3 NaN NaN NaN NaN
  8017. 4 NaN NaN NaN NaN
  8018. >>> right
  8019. A B C D
  8020. 1 NaN NaN NaN NaN
  8021. 2 10.0 20.0 30.0 40.0
  8022. 3 60.0 70.0 80.0 90.0
  8023. 4 600.0 700.0 800.0 900.0
  8024. Finally, the default `axis=None` will align on both index and columns:
  8025. >>> left, right = df.align(other, join="outer", axis=None)
  8026. >>> left
  8027. A B C D E
  8028. 1 4.0 2.0 NaN 1.0 3.0
  8029. 2 9.0 7.0 NaN 6.0 8.0
  8030. 3 NaN NaN NaN NaN NaN
  8031. 4 NaN NaN NaN NaN NaN
  8032. >>> right
  8033. A B C D E
  8034. 1 NaN NaN NaN NaN NaN
  8035. 2 10.0 20.0 30.0 40.0 NaN
  8036. 3 60.0 70.0 80.0 90.0 NaN
  8037. 4 600.0 700.0 800.0 900.0 NaN
  8038. """
  8039. method = clean_fill_method(method)
  8040. if broadcast_axis == 1 and self.ndim != other.ndim:
  8041. if isinstance(self, ABCSeries):
  8042. # this means other is a DataFrame, and we need to broadcast
  8043. # self
  8044. cons = self._constructor_expanddim
  8045. df = cons(
  8046. {c: self for c in other.columns}, **other._construct_axes_dict()
  8047. )
  8048. return df._align_frame(
  8049. other,
  8050. join=join,
  8051. axis=axis,
  8052. level=level,
  8053. copy=copy,
  8054. fill_value=fill_value,
  8055. method=method,
  8056. limit=limit,
  8057. fill_axis=fill_axis,
  8058. )
  8059. elif isinstance(other, ABCSeries):
  8060. # this means self is a DataFrame, and we need to broadcast
  8061. # other
  8062. cons = other._constructor_expanddim
  8063. df = cons(
  8064. {c: other for c in self.columns}, **self._construct_axes_dict()
  8065. )
  8066. return self._align_frame(
  8067. df,
  8068. join=join,
  8069. axis=axis,
  8070. level=level,
  8071. copy=copy,
  8072. fill_value=fill_value,
  8073. method=method,
  8074. limit=limit,
  8075. fill_axis=fill_axis,
  8076. )
  8077. if axis is not None:
  8078. axis = self._get_axis_number(axis)
  8079. if isinstance(other, ABCDataFrame):
  8080. return self._align_frame(
  8081. other,
  8082. join=join,
  8083. axis=axis,
  8084. level=level,
  8085. copy=copy,
  8086. fill_value=fill_value,
  8087. method=method,
  8088. limit=limit,
  8089. fill_axis=fill_axis,
  8090. )
  8091. elif isinstance(other, ABCSeries):
  8092. return self._align_series(
  8093. other,
  8094. join=join,
  8095. axis=axis,
  8096. level=level,
  8097. copy=copy,
  8098. fill_value=fill_value,
  8099. method=method,
  8100. limit=limit,
  8101. fill_axis=fill_axis,
  8102. )
  8103. else: # pragma: no cover
  8104. raise TypeError(f"unsupported type: {type(other)}")
  8105. @final
  8106. def _align_frame(
  8107. self,
  8108. other,
  8109. join: AlignJoin = "outer",
  8110. axis: Axis | None = None,
  8111. level=None,
  8112. copy: bool_t | None = None,
  8113. fill_value=None,
  8114. method=None,
  8115. limit=None,
  8116. fill_axis: Axis = 0,
  8117. ):
  8118. # defaults
  8119. join_index, join_columns = None, None
  8120. ilidx, iridx = None, None
  8121. clidx, cridx = None, None
  8122. is_series = isinstance(self, ABCSeries)
  8123. if (axis is None or axis == 0) and not self.index.equals(other.index):
  8124. join_index, ilidx, iridx = self.index.join(
  8125. other.index, how=join, level=level, return_indexers=True
  8126. )
  8127. if (
  8128. (axis is None or axis == 1)
  8129. and not is_series
  8130. and not self.columns.equals(other.columns)
  8131. ):
  8132. join_columns, clidx, cridx = self.columns.join(
  8133. other.columns, how=join, level=level, return_indexers=True
  8134. )
  8135. if is_series:
  8136. reindexers = {0: [join_index, ilidx]}
  8137. else:
  8138. reindexers = {0: [join_index, ilidx], 1: [join_columns, clidx]}
  8139. left = self._reindex_with_indexers(
  8140. reindexers, copy=copy, fill_value=fill_value, allow_dups=True
  8141. )
  8142. # other must be always DataFrame
  8143. right = other._reindex_with_indexers(
  8144. {0: [join_index, iridx], 1: [join_columns, cridx]},
  8145. copy=copy,
  8146. fill_value=fill_value,
  8147. allow_dups=True,
  8148. )
  8149. if method is not None:
  8150. _left = left.fillna(method=method, axis=fill_axis, limit=limit)
  8151. assert _left is not None # needed for mypy
  8152. left = _left
  8153. right = right.fillna(method=method, axis=fill_axis, limit=limit)
  8154. # if DatetimeIndex have different tz, convert to UTC
  8155. left, right = _align_as_utc(left, right, join_index)
  8156. return (
  8157. left.__finalize__(self),
  8158. right.__finalize__(other),
  8159. )
  8160. @final
  8161. def _align_series(
  8162. self,
  8163. other,
  8164. join: AlignJoin = "outer",
  8165. axis: Axis | None = None,
  8166. level=None,
  8167. copy: bool_t | None = None,
  8168. fill_value=None,
  8169. method=None,
  8170. limit=None,
  8171. fill_axis: Axis = 0,
  8172. ):
  8173. is_series = isinstance(self, ABCSeries)
  8174. if copy and using_copy_on_write():
  8175. copy = False
  8176. if (not is_series and axis is None) or axis not in [None, 0, 1]:
  8177. raise ValueError("Must specify axis=0 or 1")
  8178. if is_series and axis == 1:
  8179. raise ValueError("cannot align series to a series other than axis 0")
  8180. # series/series compat, other must always be a Series
  8181. if not axis:
  8182. # equal
  8183. if self.index.equals(other.index):
  8184. join_index, lidx, ridx = None, None, None
  8185. else:
  8186. join_index, lidx, ridx = self.index.join(
  8187. other.index, how=join, level=level, return_indexers=True
  8188. )
  8189. if is_series:
  8190. left = self._reindex_indexer(join_index, lidx, copy)
  8191. elif lidx is None or join_index is None:
  8192. left = self.copy(deep=copy)
  8193. else:
  8194. left = self._constructor(
  8195. self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy)
  8196. )
  8197. right = other._reindex_indexer(join_index, ridx, copy)
  8198. else:
  8199. # one has > 1 ndim
  8200. fdata = self._mgr
  8201. join_index = self.axes[1]
  8202. lidx, ridx = None, None
  8203. if not join_index.equals(other.index):
  8204. join_index, lidx, ridx = join_index.join(
  8205. other.index, how=join, level=level, return_indexers=True
  8206. )
  8207. if lidx is not None:
  8208. bm_axis = self._get_block_manager_axis(1)
  8209. fdata = fdata.reindex_indexer(join_index, lidx, axis=bm_axis)
  8210. if copy and fdata is self._mgr:
  8211. fdata = fdata.copy()
  8212. left = self._constructor(fdata)
  8213. if ridx is None:
  8214. right = other.copy(deep=copy)
  8215. else:
  8216. right = other.reindex(join_index, level=level)
  8217. # fill
  8218. fill_na = notna(fill_value) or (method is not None)
  8219. if fill_na:
  8220. left = left.fillna(fill_value, method=method, limit=limit, axis=fill_axis)
  8221. right = right.fillna(fill_value, method=method, limit=limit)
  8222. # if DatetimeIndex have different tz, convert to UTC
  8223. if is_series or (not is_series and axis == 0):
  8224. left, right = _align_as_utc(left, right, join_index)
  8225. return (
  8226. left.__finalize__(self),
  8227. right.__finalize__(other),
  8228. )
  8229. @final
  8230. def _where(
  8231. self,
  8232. cond,
  8233. other=lib.no_default,
  8234. inplace: bool_t = False,
  8235. axis: Axis | None = None,
  8236. level=None,
  8237. ):
  8238. """
  8239. Equivalent to public method `where`, except that `other` is not
  8240. applied as a function even if callable. Used in __setitem__.
  8241. """
  8242. inplace = validate_bool_kwarg(inplace, "inplace")
  8243. if axis is not None:
  8244. axis = self._get_axis_number(axis)
  8245. # align the cond to same shape as myself
  8246. cond = common.apply_if_callable(cond, self)
  8247. if isinstance(cond, NDFrame):
  8248. # CoW: Make sure reference is not kept alive
  8249. cond = cond.align(self, join="right", broadcast_axis=1, copy=False)[0]
  8250. else:
  8251. if not hasattr(cond, "shape"):
  8252. cond = np.asanyarray(cond)
  8253. if cond.shape != self.shape:
  8254. raise ValueError("Array conditional must be same shape as self")
  8255. cond = self._constructor(cond, **self._construct_axes_dict(), copy=False)
  8256. # make sure we are boolean
  8257. fill_value = bool(inplace)
  8258. cond = cond.fillna(fill_value)
  8259. msg = "Boolean array expected for the condition, not {dtype}"
  8260. if not cond.empty:
  8261. if not isinstance(cond, ABCDataFrame):
  8262. # This is a single-dimensional object.
  8263. if not is_bool_dtype(cond):
  8264. raise ValueError(msg.format(dtype=cond.dtype))
  8265. else:
  8266. for _dt in cond.dtypes:
  8267. if not is_bool_dtype(_dt):
  8268. raise ValueError(msg.format(dtype=_dt))
  8269. else:
  8270. # GH#21947 we have an empty DataFrame/Series, could be object-dtype
  8271. cond = cond.astype(bool)
  8272. cond = -cond if inplace else cond
  8273. cond = cond.reindex(self._info_axis, axis=self._info_axis_number, copy=False)
  8274. # try to align with other
  8275. if isinstance(other, NDFrame):
  8276. # align with me
  8277. if other.ndim <= self.ndim:
  8278. # CoW: Make sure reference is not kept alive
  8279. other = self.align(
  8280. other,
  8281. join="left",
  8282. axis=axis,
  8283. level=level,
  8284. fill_value=None,
  8285. copy=False,
  8286. )[1]
  8287. # if we are NOT aligned, raise as we cannot where index
  8288. if axis is None and not other._indexed_same(self):
  8289. raise InvalidIndexError
  8290. if other.ndim < self.ndim:
  8291. # TODO(EA2D): avoid object-dtype cast in EA case GH#38729
  8292. other = other._values
  8293. if axis == 0:
  8294. other = np.reshape(other, (-1, 1))
  8295. elif axis == 1:
  8296. other = np.reshape(other, (1, -1))
  8297. other = np.broadcast_to(other, self.shape)
  8298. # slice me out of the other
  8299. else:
  8300. raise NotImplementedError(
  8301. "cannot align with a higher dimensional NDFrame"
  8302. )
  8303. elif not isinstance(other, (MultiIndex, NDFrame)):
  8304. # mainly just catching Index here
  8305. other = extract_array(other, extract_numpy=True)
  8306. if isinstance(other, (np.ndarray, ExtensionArray)):
  8307. if other.shape != self.shape:
  8308. if self.ndim != 1:
  8309. # In the ndim == 1 case we may have
  8310. # other length 1, which we treat as scalar (GH#2745, GH#4192)
  8311. # or len(other) == icond.sum(), which we treat like
  8312. # __setitem__ (GH#3235)
  8313. raise ValueError(
  8314. "other must be the same shape as self when an ndarray"
  8315. )
  8316. # we are the same shape, so create an actual object for alignment
  8317. else:
  8318. other = self._constructor(
  8319. other, **self._construct_axes_dict(), copy=False
  8320. )
  8321. if axis is None:
  8322. axis = 0
  8323. if self.ndim == getattr(other, "ndim", 0):
  8324. align = True
  8325. else:
  8326. align = self._get_axis_number(axis) == 1
  8327. if inplace:
  8328. # we may have different type blocks come out of putmask, so
  8329. # reconstruct the block manager
  8330. self._check_inplace_setting(other)
  8331. new_data = self._mgr.putmask(mask=cond, new=other, align=align)
  8332. result = self._constructor(new_data)
  8333. return self._update_inplace(result)
  8334. else:
  8335. new_data = self._mgr.where(
  8336. other=other,
  8337. cond=cond,
  8338. align=align,
  8339. )
  8340. result = self._constructor(new_data)
  8341. return result.__finalize__(self)
  8342. @overload
  8343. def where(
  8344. self: NDFrameT,
  8345. cond,
  8346. other=...,
  8347. *,
  8348. inplace: Literal[False] = ...,
  8349. axis: Axis | None = ...,
  8350. level: Level = ...,
  8351. ) -> NDFrameT:
  8352. ...
  8353. @overload
  8354. def where(
  8355. self,
  8356. cond,
  8357. other=...,
  8358. *,
  8359. inplace: Literal[True],
  8360. axis: Axis | None = ...,
  8361. level: Level = ...,
  8362. ) -> None:
  8363. ...
  8364. @overload
  8365. def where(
  8366. self: NDFrameT,
  8367. cond,
  8368. other=...,
  8369. *,
  8370. inplace: bool_t = ...,
  8371. axis: Axis | None = ...,
  8372. level: Level = ...,
  8373. ) -> NDFrameT | None:
  8374. ...
  8375. @doc(
  8376. klass=_shared_doc_kwargs["klass"],
  8377. cond="True",
  8378. cond_rev="False",
  8379. name="where",
  8380. name_other="mask",
  8381. )
  8382. def where(
  8383. self: NDFrameT,
  8384. cond,
  8385. other=np.nan,
  8386. *,
  8387. inplace: bool_t = False,
  8388. axis: Axis | None = None,
  8389. level: Level = None,
  8390. ) -> NDFrameT | None:
  8391. """
  8392. Replace values where the condition is {cond_rev}.
  8393. Parameters
  8394. ----------
  8395. cond : bool {klass}, array-like, or callable
  8396. Where `cond` is {cond}, keep the original value. Where
  8397. {cond_rev}, replace with corresponding value from `other`.
  8398. If `cond` is callable, it is computed on the {klass} and
  8399. should return boolean {klass} or array. The callable must
  8400. not change input {klass} (though pandas doesn't check it).
  8401. other : scalar, {klass}, or callable
  8402. Entries where `cond` is {cond_rev} are replaced with
  8403. corresponding value from `other`.
  8404. If other is callable, it is computed on the {klass} and
  8405. should return scalar or {klass}. The callable must not
  8406. change input {klass} (though pandas doesn't check it).
  8407. If not specified, entries will be filled with the corresponding
  8408. NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension
  8409. dtypes).
  8410. inplace : bool, default False
  8411. Whether to perform the operation in place on the data.
  8412. axis : int, default None
  8413. Alignment axis if needed. For `Series` this parameter is
  8414. unused and defaults to 0.
  8415. level : int, default None
  8416. Alignment level if needed.
  8417. Returns
  8418. -------
  8419. Same type as caller or None if ``inplace=True``.
  8420. See Also
  8421. --------
  8422. :func:`DataFrame.{name_other}` : Return an object of same shape as
  8423. self.
  8424. Notes
  8425. -----
  8426. The {name} method is an application of the if-then idiom. For each
  8427. element in the calling DataFrame, if ``cond`` is ``{cond}`` the
  8428. element is used; otherwise the corresponding element from the DataFrame
  8429. ``other`` is used. If the axis of ``other`` does not align with axis of
  8430. ``cond`` {klass}, the misaligned index positions will be filled with
  8431. {cond_rev}.
  8432. The signature for :func:`DataFrame.where` differs from
  8433. :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to
  8434. ``np.where(m, df1, df2)``.
  8435. For further details and examples see the ``{name}`` documentation in
  8436. :ref:`indexing <indexing.where_mask>`.
  8437. The dtype of the object takes precedence. The fill value is casted to
  8438. the object's dtype, if this can be done losslessly.
  8439. Examples
  8440. --------
  8441. >>> s = pd.Series(range(5))
  8442. >>> s.where(s > 0)
  8443. 0 NaN
  8444. 1 1.0
  8445. 2 2.0
  8446. 3 3.0
  8447. 4 4.0
  8448. dtype: float64
  8449. >>> s.mask(s > 0)
  8450. 0 0.0
  8451. 1 NaN
  8452. 2 NaN
  8453. 3 NaN
  8454. 4 NaN
  8455. dtype: float64
  8456. >>> s = pd.Series(range(5))
  8457. >>> t = pd.Series([True, False])
  8458. >>> s.where(t, 99)
  8459. 0 0
  8460. 1 99
  8461. 2 99
  8462. 3 99
  8463. 4 99
  8464. dtype: int64
  8465. >>> s.mask(t, 99)
  8466. 0 99
  8467. 1 1
  8468. 2 99
  8469. 3 99
  8470. 4 99
  8471. dtype: int64
  8472. >>> s.where(s > 1, 10)
  8473. 0 10
  8474. 1 10
  8475. 2 2
  8476. 3 3
  8477. 4 4
  8478. dtype: int64
  8479. >>> s.mask(s > 1, 10)
  8480. 0 0
  8481. 1 1
  8482. 2 10
  8483. 3 10
  8484. 4 10
  8485. dtype: int64
  8486. >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])
  8487. >>> df
  8488. A B
  8489. 0 0 1
  8490. 1 2 3
  8491. 2 4 5
  8492. 3 6 7
  8493. 4 8 9
  8494. >>> m = df % 3 == 0
  8495. >>> df.where(m, -df)
  8496. A B
  8497. 0 0 -1
  8498. 1 -2 3
  8499. 2 -4 -5
  8500. 3 6 -7
  8501. 4 -8 9
  8502. >>> df.where(m, -df) == np.where(m, df, -df)
  8503. A B
  8504. 0 True True
  8505. 1 True True
  8506. 2 True True
  8507. 3 True True
  8508. 4 True True
  8509. >>> df.where(m, -df) == df.mask(~m, -df)
  8510. A B
  8511. 0 True True
  8512. 1 True True
  8513. 2 True True
  8514. 3 True True
  8515. 4 True True
  8516. """
  8517. other = common.apply_if_callable(other, self)
  8518. return self._where(cond, other, inplace, axis, level)
  8519. @overload
  8520. def mask(
  8521. self: NDFrameT,
  8522. cond,
  8523. other=...,
  8524. *,
  8525. inplace: Literal[False] = ...,
  8526. axis: Axis | None = ...,
  8527. level: Level = ...,
  8528. ) -> NDFrameT:
  8529. ...
  8530. @overload
  8531. def mask(
  8532. self,
  8533. cond,
  8534. other=...,
  8535. *,
  8536. inplace: Literal[True],
  8537. axis: Axis | None = ...,
  8538. level: Level = ...,
  8539. ) -> None:
  8540. ...
  8541. @overload
  8542. def mask(
  8543. self: NDFrameT,
  8544. cond,
  8545. other=...,
  8546. *,
  8547. inplace: bool_t = ...,
  8548. axis: Axis | None = ...,
  8549. level: Level = ...,
  8550. ) -> NDFrameT | None:
  8551. ...
  8552. @doc(
  8553. where,
  8554. klass=_shared_doc_kwargs["klass"],
  8555. cond="False",
  8556. cond_rev="True",
  8557. name="mask",
  8558. name_other="where",
  8559. )
  8560. def mask(
  8561. self: NDFrameT,
  8562. cond,
  8563. other=lib.no_default,
  8564. *,
  8565. inplace: bool_t = False,
  8566. axis: Axis | None = None,
  8567. level: Level = None,
  8568. ) -> NDFrameT | None:
  8569. inplace = validate_bool_kwarg(inplace, "inplace")
  8570. cond = common.apply_if_callable(cond, self)
  8571. # see gh-21891
  8572. if not hasattr(cond, "__invert__"):
  8573. cond = np.array(cond)
  8574. return self.where(
  8575. ~cond,
  8576. other=other,
  8577. inplace=inplace,
  8578. axis=axis,
  8579. level=level,
  8580. )
  8581. @doc(klass=_shared_doc_kwargs["klass"])
  8582. def shift(
  8583. self: NDFrameT,
  8584. periods: int = 1,
  8585. freq=None,
  8586. axis: Axis = 0,
  8587. fill_value: Hashable = None,
  8588. ) -> NDFrameT:
  8589. """
  8590. Shift index by desired number of periods with an optional time `freq`.
  8591. When `freq` is not passed, shift the index without realigning the data.
  8592. If `freq` is passed (in this case, the index must be date or datetime,
  8593. or it will raise a `NotImplementedError`), the index will be
  8594. increased using the periods and the `freq`. `freq` can be inferred
  8595. when specified as "infer" as long as either freq or inferred_freq
  8596. attribute is set in the index.
  8597. Parameters
  8598. ----------
  8599. periods : int
  8600. Number of periods to shift. Can be positive or negative.
  8601. freq : DateOffset, tseries.offsets, timedelta, or str, optional
  8602. Offset to use from the tseries module or time rule (e.g. 'EOM').
  8603. If `freq` is specified then the index values are shifted but the
  8604. data is not realigned. That is, use `freq` if you would like to
  8605. extend the index when shifting and preserve the original data.
  8606. If `freq` is specified as "infer" then it will be inferred from
  8607. the freq or inferred_freq attributes of the index. If neither of
  8608. those attributes exist, a ValueError is thrown.
  8609. axis : {{0 or 'index', 1 or 'columns', None}}, default None
  8610. Shift direction. For `Series` this parameter is unused and defaults to 0.
  8611. fill_value : object, optional
  8612. The scalar value to use for newly introduced missing values.
  8613. the default depends on the dtype of `self`.
  8614. For numeric data, ``np.nan`` is used.
  8615. For datetime, timedelta, or period data, etc. :attr:`NaT` is used.
  8616. For extension dtypes, ``self.dtype.na_value`` is used.
  8617. .. versionchanged:: 1.1.0
  8618. Returns
  8619. -------
  8620. {klass}
  8621. Copy of input object, shifted.
  8622. See Also
  8623. --------
  8624. Index.shift : Shift values of Index.
  8625. DatetimeIndex.shift : Shift values of DatetimeIndex.
  8626. PeriodIndex.shift : Shift values of PeriodIndex.
  8627. Examples
  8628. --------
  8629. >>> df = pd.DataFrame({{"Col1": [10, 20, 15, 30, 45],
  8630. ... "Col2": [13, 23, 18, 33, 48],
  8631. ... "Col3": [17, 27, 22, 37, 52]}},
  8632. ... index=pd.date_range("2020-01-01", "2020-01-05"))
  8633. >>> df
  8634. Col1 Col2 Col3
  8635. 2020-01-01 10 13 17
  8636. 2020-01-02 20 23 27
  8637. 2020-01-03 15 18 22
  8638. 2020-01-04 30 33 37
  8639. 2020-01-05 45 48 52
  8640. >>> df.shift(periods=3)
  8641. Col1 Col2 Col3
  8642. 2020-01-01 NaN NaN NaN
  8643. 2020-01-02 NaN NaN NaN
  8644. 2020-01-03 NaN NaN NaN
  8645. 2020-01-04 10.0 13.0 17.0
  8646. 2020-01-05 20.0 23.0 27.0
  8647. >>> df.shift(periods=1, axis="columns")
  8648. Col1 Col2 Col3
  8649. 2020-01-01 NaN 10 13
  8650. 2020-01-02 NaN 20 23
  8651. 2020-01-03 NaN 15 18
  8652. 2020-01-04 NaN 30 33
  8653. 2020-01-05 NaN 45 48
  8654. >>> df.shift(periods=3, fill_value=0)
  8655. Col1 Col2 Col3
  8656. 2020-01-01 0 0 0
  8657. 2020-01-02 0 0 0
  8658. 2020-01-03 0 0 0
  8659. 2020-01-04 10 13 17
  8660. 2020-01-05 20 23 27
  8661. >>> df.shift(periods=3, freq="D")
  8662. Col1 Col2 Col3
  8663. 2020-01-04 10 13 17
  8664. 2020-01-05 20 23 27
  8665. 2020-01-06 15 18 22
  8666. 2020-01-07 30 33 37
  8667. 2020-01-08 45 48 52
  8668. >>> df.shift(periods=3, freq="infer")
  8669. Col1 Col2 Col3
  8670. 2020-01-04 10 13 17
  8671. 2020-01-05 20 23 27
  8672. 2020-01-06 15 18 22
  8673. 2020-01-07 30 33 37
  8674. 2020-01-08 45 48 52
  8675. """
  8676. if periods == 0:
  8677. return self.copy(deep=None)
  8678. if freq is None:
  8679. # when freq is None, data is shifted, index is not
  8680. axis = self._get_axis_number(axis)
  8681. new_data = self._mgr.shift(
  8682. periods=periods, axis=axis, fill_value=fill_value
  8683. )
  8684. return self._constructor(new_data).__finalize__(self, method="shift")
  8685. # when freq is given, index is shifted, data is not
  8686. index = self._get_axis(axis)
  8687. if freq == "infer":
  8688. freq = getattr(index, "freq", None)
  8689. if freq is None:
  8690. freq = getattr(index, "inferred_freq", None)
  8691. if freq is None:
  8692. msg = "Freq was not set in the index hence cannot be inferred"
  8693. raise ValueError(msg)
  8694. elif isinstance(freq, str):
  8695. freq = to_offset(freq)
  8696. if isinstance(index, PeriodIndex):
  8697. orig_freq = to_offset(index.freq)
  8698. if freq != orig_freq:
  8699. assert orig_freq is not None # for mypy
  8700. raise ValueError(
  8701. f"Given freq {freq.rule_code} does not match "
  8702. f"PeriodIndex freq {orig_freq.rule_code}"
  8703. )
  8704. new_ax = index.shift(periods)
  8705. else:
  8706. new_ax = index.shift(periods, freq)
  8707. result = self.set_axis(new_ax, axis=axis)
  8708. return result.__finalize__(self, method="shift")
  8709. def truncate(
  8710. self: NDFrameT,
  8711. before=None,
  8712. after=None,
  8713. axis: Axis | None = None,
  8714. copy: bool_t | None = None,
  8715. ) -> NDFrameT:
  8716. """
  8717. Truncate a Series or DataFrame before and after some index value.
  8718. This is a useful shorthand for boolean indexing based on index
  8719. values above or below certain thresholds.
  8720. Parameters
  8721. ----------
  8722. before : date, str, int
  8723. Truncate all rows before this index value.
  8724. after : date, str, int
  8725. Truncate all rows after this index value.
  8726. axis : {0 or 'index', 1 or 'columns'}, optional
  8727. Axis to truncate. Truncates the index (rows) by default.
  8728. For `Series` this parameter is unused and defaults to 0.
  8729. copy : bool, default is True,
  8730. Return a copy of the truncated section.
  8731. Returns
  8732. -------
  8733. type of caller
  8734. The truncated Series or DataFrame.
  8735. See Also
  8736. --------
  8737. DataFrame.loc : Select a subset of a DataFrame by label.
  8738. DataFrame.iloc : Select a subset of a DataFrame by position.
  8739. Notes
  8740. -----
  8741. If the index being truncated contains only datetime values,
  8742. `before` and `after` may be specified as strings instead of
  8743. Timestamps.
  8744. Examples
  8745. --------
  8746. >>> df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'],
  8747. ... 'B': ['f', 'g', 'h', 'i', 'j'],
  8748. ... 'C': ['k', 'l', 'm', 'n', 'o']},
  8749. ... index=[1, 2, 3, 4, 5])
  8750. >>> df
  8751. A B C
  8752. 1 a f k
  8753. 2 b g l
  8754. 3 c h m
  8755. 4 d i n
  8756. 5 e j o
  8757. >>> df.truncate(before=2, after=4)
  8758. A B C
  8759. 2 b g l
  8760. 3 c h m
  8761. 4 d i n
  8762. The columns of a DataFrame can be truncated.
  8763. >>> df.truncate(before="A", after="B", axis="columns")
  8764. A B
  8765. 1 a f
  8766. 2 b g
  8767. 3 c h
  8768. 4 d i
  8769. 5 e j
  8770. For Series, only rows can be truncated.
  8771. >>> df['A'].truncate(before=2, after=4)
  8772. 2 b
  8773. 3 c
  8774. 4 d
  8775. Name: A, dtype: object
  8776. The index values in ``truncate`` can be datetimes or string
  8777. dates.
  8778. >>> dates = pd.date_range('2016-01-01', '2016-02-01', freq='s')
  8779. >>> df = pd.DataFrame(index=dates, data={'A': 1})
  8780. >>> df.tail()
  8781. A
  8782. 2016-01-31 23:59:56 1
  8783. 2016-01-31 23:59:57 1
  8784. 2016-01-31 23:59:58 1
  8785. 2016-01-31 23:59:59 1
  8786. 2016-02-01 00:00:00 1
  8787. >>> df.truncate(before=pd.Timestamp('2016-01-05'),
  8788. ... after=pd.Timestamp('2016-01-10')).tail()
  8789. A
  8790. 2016-01-09 23:59:56 1
  8791. 2016-01-09 23:59:57 1
  8792. 2016-01-09 23:59:58 1
  8793. 2016-01-09 23:59:59 1
  8794. 2016-01-10 00:00:00 1
  8795. Because the index is a DatetimeIndex containing only dates, we can
  8796. specify `before` and `after` as strings. They will be coerced to
  8797. Timestamps before truncation.
  8798. >>> df.truncate('2016-01-05', '2016-01-10').tail()
  8799. A
  8800. 2016-01-09 23:59:56 1
  8801. 2016-01-09 23:59:57 1
  8802. 2016-01-09 23:59:58 1
  8803. 2016-01-09 23:59:59 1
  8804. 2016-01-10 00:00:00 1
  8805. Note that ``truncate`` assumes a 0 value for any unspecified time
  8806. component (midnight). This differs from partial string slicing, which
  8807. returns any partially matching dates.
  8808. >>> df.loc['2016-01-05':'2016-01-10', :].tail()
  8809. A
  8810. 2016-01-10 23:59:55 1
  8811. 2016-01-10 23:59:56 1
  8812. 2016-01-10 23:59:57 1
  8813. 2016-01-10 23:59:58 1
  8814. 2016-01-10 23:59:59 1
  8815. """
  8816. if axis is None:
  8817. axis = self._stat_axis_number
  8818. axis = self._get_axis_number(axis)
  8819. ax = self._get_axis(axis)
  8820. # GH 17935
  8821. # Check that index is sorted
  8822. if not ax.is_monotonic_increasing and not ax.is_monotonic_decreasing:
  8823. raise ValueError("truncate requires a sorted index")
  8824. # if we have a date index, convert to dates, otherwise
  8825. # treat like a slice
  8826. if ax._is_all_dates:
  8827. from pandas.core.tools.datetimes import to_datetime
  8828. before = to_datetime(before)
  8829. after = to_datetime(after)
  8830. if before is not None and after is not None and before > after:
  8831. raise ValueError(f"Truncate: {after} must be after {before}")
  8832. if len(ax) > 1 and ax.is_monotonic_decreasing and ax.nunique() > 1:
  8833. before, after = after, before
  8834. slicer = [slice(None, None)] * self._AXIS_LEN
  8835. slicer[axis] = slice(before, after)
  8836. result = self.loc[tuple(slicer)]
  8837. if isinstance(ax, MultiIndex):
  8838. setattr(result, self._get_axis_name(axis), ax.truncate(before, after))
  8839. result = result.copy(deep=copy and not using_copy_on_write())
  8840. return result
  8841. @final
  8842. @doc(klass=_shared_doc_kwargs["klass"])
  8843. def tz_convert(
  8844. self: NDFrameT, tz, axis: Axis = 0, level=None, copy: bool_t | None = None
  8845. ) -> NDFrameT:
  8846. """
  8847. Convert tz-aware axis to target time zone.
  8848. Parameters
  8849. ----------
  8850. tz : str or tzinfo object or None
  8851. Target time zone. Passing ``None`` will convert to
  8852. UTC and remove the timezone information.
  8853. axis : {{0 or 'index', 1 or 'columns'}}, default 0
  8854. The axis to convert
  8855. level : int, str, default None
  8856. If axis is a MultiIndex, convert a specific level. Otherwise
  8857. must be None.
  8858. copy : bool, default True
  8859. Also make a copy of the underlying data.
  8860. Returns
  8861. -------
  8862. {klass}
  8863. Object with time zone converted axis.
  8864. Raises
  8865. ------
  8866. TypeError
  8867. If the axis is tz-naive.
  8868. Examples
  8869. --------
  8870. Change to another time zone:
  8871. >>> s = pd.Series(
  8872. ... [1],
  8873. ... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']),
  8874. ... )
  8875. >>> s.tz_convert('Asia/Shanghai')
  8876. 2018-09-15 07:30:00+08:00 1
  8877. dtype: int64
  8878. Pass None to convert to UTC and get a tz-naive index:
  8879. >>> s = pd.Series([1],
  8880. ... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']))
  8881. >>> s.tz_convert(None)
  8882. 2018-09-14 23:30:00 1
  8883. dtype: int64
  8884. """
  8885. axis = self._get_axis_number(axis)
  8886. ax = self._get_axis(axis)
  8887. def _tz_convert(ax, tz):
  8888. if not hasattr(ax, "tz_convert"):
  8889. if len(ax) > 0:
  8890. ax_name = self._get_axis_name(axis)
  8891. raise TypeError(
  8892. f"{ax_name} is not a valid DatetimeIndex or PeriodIndex"
  8893. )
  8894. ax = DatetimeIndex([], tz=tz)
  8895. else:
  8896. ax = ax.tz_convert(tz)
  8897. return ax
  8898. # if a level is given it must be a MultiIndex level or
  8899. # equivalent to the axis name
  8900. if isinstance(ax, MultiIndex):
  8901. level = ax._get_level_number(level)
  8902. new_level = _tz_convert(ax.levels[level], tz)
  8903. ax = ax.set_levels(new_level, level=level)
  8904. else:
  8905. if level not in (None, 0, ax.name):
  8906. raise ValueError(f"The level {level} is not valid")
  8907. ax = _tz_convert(ax, tz)
  8908. result = self.copy(deep=copy and not using_copy_on_write())
  8909. result = result.set_axis(ax, axis=axis, copy=False)
  8910. return result.__finalize__(self, method="tz_convert")
  8911. @final
  8912. @doc(klass=_shared_doc_kwargs["klass"])
  8913. def tz_localize(
  8914. self: NDFrameT,
  8915. tz,
  8916. axis: Axis = 0,
  8917. level=None,
  8918. copy: bool_t | None = None,
  8919. ambiguous: TimeAmbiguous = "raise",
  8920. nonexistent: TimeNonexistent = "raise",
  8921. ) -> NDFrameT:
  8922. """
  8923. Localize tz-naive index of a Series or DataFrame to target time zone.
  8924. This operation localizes the Index. To localize the values in a
  8925. timezone-naive Series, use :meth:`Series.dt.tz_localize`.
  8926. Parameters
  8927. ----------
  8928. tz : str or tzinfo or None
  8929. Time zone to localize. Passing ``None`` will remove the
  8930. time zone information and preserve local time.
  8931. axis : {{0 or 'index', 1 or 'columns'}}, default 0
  8932. The axis to localize
  8933. level : int, str, default None
  8934. If axis ia a MultiIndex, localize a specific level. Otherwise
  8935. must be None.
  8936. copy : bool, default True
  8937. Also make a copy of the underlying data.
  8938. ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
  8939. When clocks moved backward due to DST, ambiguous times may arise.
  8940. For example in Central European Time (UTC+01), when going from
  8941. 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at
  8942. 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the
  8943. `ambiguous` parameter dictates how ambiguous times should be
  8944. handled.
  8945. - 'infer' will attempt to infer fall dst-transition hours based on
  8946. order
  8947. - bool-ndarray where True signifies a DST time, False designates
  8948. a non-DST time (note that this flag is only applicable for
  8949. ambiguous times)
  8950. - 'NaT' will return NaT where there are ambiguous times
  8951. - 'raise' will raise an AmbiguousTimeError if there are ambiguous
  8952. times.
  8953. nonexistent : str, default 'raise'
  8954. A nonexistent time does not exist in a particular timezone
  8955. where clocks moved forward due to DST. Valid values are:
  8956. - 'shift_forward' will shift the nonexistent time forward to the
  8957. closest existing time
  8958. - 'shift_backward' will shift the nonexistent time backward to the
  8959. closest existing time
  8960. - 'NaT' will return NaT where there are nonexistent times
  8961. - timedelta objects will shift nonexistent times by the timedelta
  8962. - 'raise' will raise an NonExistentTimeError if there are
  8963. nonexistent times.
  8964. Returns
  8965. -------
  8966. {klass}
  8967. Same type as the input.
  8968. Raises
  8969. ------
  8970. TypeError
  8971. If the TimeSeries is tz-aware and tz is not None.
  8972. Examples
  8973. --------
  8974. Localize local times:
  8975. >>> s = pd.Series(
  8976. ... [1],
  8977. ... index=pd.DatetimeIndex(['2018-09-15 01:30:00']),
  8978. ... )
  8979. >>> s.tz_localize('CET')
  8980. 2018-09-15 01:30:00+02:00 1
  8981. dtype: int64
  8982. Pass None to convert to tz-naive index and preserve local time:
  8983. >>> s = pd.Series([1],
  8984. ... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']))
  8985. >>> s.tz_localize(None)
  8986. 2018-09-15 01:30:00 1
  8987. dtype: int64
  8988. Be careful with DST changes. When there is sequential data, pandas
  8989. can infer the DST time:
  8990. >>> s = pd.Series(range(7),
  8991. ... index=pd.DatetimeIndex(['2018-10-28 01:30:00',
  8992. ... '2018-10-28 02:00:00',
  8993. ... '2018-10-28 02:30:00',
  8994. ... '2018-10-28 02:00:00',
  8995. ... '2018-10-28 02:30:00',
  8996. ... '2018-10-28 03:00:00',
  8997. ... '2018-10-28 03:30:00']))
  8998. >>> s.tz_localize('CET', ambiguous='infer')
  8999. 2018-10-28 01:30:00+02:00 0
  9000. 2018-10-28 02:00:00+02:00 1
  9001. 2018-10-28 02:30:00+02:00 2
  9002. 2018-10-28 02:00:00+01:00 3
  9003. 2018-10-28 02:30:00+01:00 4
  9004. 2018-10-28 03:00:00+01:00 5
  9005. 2018-10-28 03:30:00+01:00 6
  9006. dtype: int64
  9007. In some cases, inferring the DST is impossible. In such cases, you can
  9008. pass an ndarray to the ambiguous parameter to set the DST explicitly
  9009. >>> s = pd.Series(range(3),
  9010. ... index=pd.DatetimeIndex(['2018-10-28 01:20:00',
  9011. ... '2018-10-28 02:36:00',
  9012. ... '2018-10-28 03:46:00']))
  9013. >>> s.tz_localize('CET', ambiguous=np.array([True, True, False]))
  9014. 2018-10-28 01:20:00+02:00 0
  9015. 2018-10-28 02:36:00+02:00 1
  9016. 2018-10-28 03:46:00+01:00 2
  9017. dtype: int64
  9018. If the DST transition causes nonexistent times, you can shift these
  9019. dates forward or backward with a timedelta object or `'shift_forward'`
  9020. or `'shift_backward'`.
  9021. >>> s = pd.Series(range(2),
  9022. ... index=pd.DatetimeIndex(['2015-03-29 02:30:00',
  9023. ... '2015-03-29 03:30:00']))
  9024. >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_forward')
  9025. 2015-03-29 03:00:00+02:00 0
  9026. 2015-03-29 03:30:00+02:00 1
  9027. dtype: int64
  9028. >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_backward')
  9029. 2015-03-29 01:59:59.999999999+01:00 0
  9030. 2015-03-29 03:30:00+02:00 1
  9031. dtype: int64
  9032. >>> s.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H'))
  9033. 2015-03-29 03:30:00+02:00 0
  9034. 2015-03-29 03:30:00+02:00 1
  9035. dtype: int64
  9036. """
  9037. nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward")
  9038. if nonexistent not in nonexistent_options and not isinstance(
  9039. nonexistent, dt.timedelta
  9040. ):
  9041. raise ValueError(
  9042. "The nonexistent argument must be one of 'raise', "
  9043. "'NaT', 'shift_forward', 'shift_backward' or "
  9044. "a timedelta object"
  9045. )
  9046. axis = self._get_axis_number(axis)
  9047. ax = self._get_axis(axis)
  9048. def _tz_localize(ax, tz, ambiguous, nonexistent):
  9049. if not hasattr(ax, "tz_localize"):
  9050. if len(ax) > 0:
  9051. ax_name = self._get_axis_name(axis)
  9052. raise TypeError(
  9053. f"{ax_name} is not a valid DatetimeIndex or PeriodIndex"
  9054. )
  9055. ax = DatetimeIndex([], tz=tz)
  9056. else:
  9057. ax = ax.tz_localize(tz, ambiguous=ambiguous, nonexistent=nonexistent)
  9058. return ax
  9059. # if a level is given it must be a MultiIndex level or
  9060. # equivalent to the axis name
  9061. if isinstance(ax, MultiIndex):
  9062. level = ax._get_level_number(level)
  9063. new_level = _tz_localize(ax.levels[level], tz, ambiguous, nonexistent)
  9064. ax = ax.set_levels(new_level, level=level)
  9065. else:
  9066. if level not in (None, 0, ax.name):
  9067. raise ValueError(f"The level {level} is not valid")
  9068. ax = _tz_localize(ax, tz, ambiguous, nonexistent)
  9069. result = self.copy(deep=copy and not using_copy_on_write())
  9070. result = result.set_axis(ax, axis=axis, copy=False)
  9071. return result.__finalize__(self, method="tz_localize")
  9072. # ----------------------------------------------------------------------
  9073. # Numeric Methods
  9074. @final
  9075. def describe(
  9076. self: NDFrameT,
  9077. percentiles=None,
  9078. include=None,
  9079. exclude=None,
  9080. ) -> NDFrameT:
  9081. """
  9082. Generate descriptive statistics.
  9083. Descriptive statistics include those that summarize the central
  9084. tendency, dispersion and shape of a
  9085. dataset's distribution, excluding ``NaN`` values.
  9086. Analyzes both numeric and object series, as well
  9087. as ``DataFrame`` column sets of mixed data types. The output
  9088. will vary depending on what is provided. Refer to the notes
  9089. below for more detail.
  9090. Parameters
  9091. ----------
  9092. percentiles : list-like of numbers, optional
  9093. The percentiles to include in the output. All should
  9094. fall between 0 and 1. The default is
  9095. ``[.25, .5, .75]``, which returns the 25th, 50th, and
  9096. 75th percentiles.
  9097. include : 'all', list-like of dtypes or None (default), optional
  9098. A white list of data types to include in the result. Ignored
  9099. for ``Series``. Here are the options:
  9100. - 'all' : All columns of the input will be included in the output.
  9101. - A list-like of dtypes : Limits the results to the
  9102. provided data types.
  9103. To limit the result to numeric types submit
  9104. ``numpy.number``. To limit it instead to object columns submit
  9105. the ``numpy.object`` data type. Strings
  9106. can also be used in the style of
  9107. ``select_dtypes`` (e.g. ``df.describe(include=['O'])``). To
  9108. select pandas categorical columns, use ``'category'``
  9109. - None (default) : The result will include all numeric columns.
  9110. exclude : list-like of dtypes or None (default), optional,
  9111. A black list of data types to omit from the result. Ignored
  9112. for ``Series``. Here are the options:
  9113. - A list-like of dtypes : Excludes the provided data types
  9114. from the result. To exclude numeric types submit
  9115. ``numpy.number``. To exclude object columns submit the data
  9116. type ``numpy.object``. Strings can also be used in the style of
  9117. ``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``). To
  9118. exclude pandas categorical columns, use ``'category'``
  9119. - None (default) : The result will exclude nothing.
  9120. Returns
  9121. -------
  9122. Series or DataFrame
  9123. Summary statistics of the Series or Dataframe provided.
  9124. See Also
  9125. --------
  9126. DataFrame.count: Count number of non-NA/null observations.
  9127. DataFrame.max: Maximum of the values in the object.
  9128. DataFrame.min: Minimum of the values in the object.
  9129. DataFrame.mean: Mean of the values.
  9130. DataFrame.std: Standard deviation of the observations.
  9131. DataFrame.select_dtypes: Subset of a DataFrame including/excluding
  9132. columns based on their dtype.
  9133. Notes
  9134. -----
  9135. For numeric data, the result's index will include ``count``,
  9136. ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
  9137. upper percentiles. By default the lower percentile is ``25`` and the
  9138. upper percentile is ``75``. The ``50`` percentile is the
  9139. same as the median.
  9140. For object data (e.g. strings or timestamps), the result's index
  9141. will include ``count``, ``unique``, ``top``, and ``freq``. The ``top``
  9142. is the most common value. The ``freq`` is the most common value's
  9143. frequency. Timestamps also include the ``first`` and ``last`` items.
  9144. If multiple object values have the highest count, then the
  9145. ``count`` and ``top`` results will be arbitrarily chosen from
  9146. among those with the highest count.
  9147. For mixed data types provided via a ``DataFrame``, the default is to
  9148. return only an analysis of numeric columns. If the dataframe consists
  9149. only of object and categorical data without any numeric columns, the
  9150. default is to return an analysis of both the object and categorical
  9151. columns. If ``include='all'`` is provided as an option, the result
  9152. will include a union of attributes of each type.
  9153. The `include` and `exclude` parameters can be used to limit
  9154. which columns in a ``DataFrame`` are analyzed for the output.
  9155. The parameters are ignored when analyzing a ``Series``.
  9156. Examples
  9157. --------
  9158. Describing a numeric ``Series``.
  9159. >>> s = pd.Series([1, 2, 3])
  9160. >>> s.describe()
  9161. count 3.0
  9162. mean 2.0
  9163. std 1.0
  9164. min 1.0
  9165. 25% 1.5
  9166. 50% 2.0
  9167. 75% 2.5
  9168. max 3.0
  9169. dtype: float64
  9170. Describing a categorical ``Series``.
  9171. >>> s = pd.Series(['a', 'a', 'b', 'c'])
  9172. >>> s.describe()
  9173. count 4
  9174. unique 3
  9175. top a
  9176. freq 2
  9177. dtype: object
  9178. Describing a timestamp ``Series``.
  9179. >>> s = pd.Series([
  9180. ... np.datetime64("2000-01-01"),
  9181. ... np.datetime64("2010-01-01"),
  9182. ... np.datetime64("2010-01-01")
  9183. ... ])
  9184. >>> s.describe()
  9185. count 3
  9186. mean 2006-09-01 08:00:00
  9187. min 2000-01-01 00:00:00
  9188. 25% 2004-12-31 12:00:00
  9189. 50% 2010-01-01 00:00:00
  9190. 75% 2010-01-01 00:00:00
  9191. max 2010-01-01 00:00:00
  9192. dtype: object
  9193. Describing a ``DataFrame``. By default only numeric fields
  9194. are returned.
  9195. >>> df = pd.DataFrame({'categorical': pd.Categorical(['d','e','f']),
  9196. ... 'numeric': [1, 2, 3],
  9197. ... 'object': ['a', 'b', 'c']
  9198. ... })
  9199. >>> df.describe()
  9200. numeric
  9201. count 3.0
  9202. mean 2.0
  9203. std 1.0
  9204. min 1.0
  9205. 25% 1.5
  9206. 50% 2.0
  9207. 75% 2.5
  9208. max 3.0
  9209. Describing all columns of a ``DataFrame`` regardless of data type.
  9210. >>> df.describe(include='all') # doctest: +SKIP
  9211. categorical numeric object
  9212. count 3 3.0 3
  9213. unique 3 NaN 3
  9214. top f NaN a
  9215. freq 1 NaN 1
  9216. mean NaN 2.0 NaN
  9217. std NaN 1.0 NaN
  9218. min NaN 1.0 NaN
  9219. 25% NaN 1.5 NaN
  9220. 50% NaN 2.0 NaN
  9221. 75% NaN 2.5 NaN
  9222. max NaN 3.0 NaN
  9223. Describing a column from a ``DataFrame`` by accessing it as
  9224. an attribute.
  9225. >>> df.numeric.describe()
  9226. count 3.0
  9227. mean 2.0
  9228. std 1.0
  9229. min 1.0
  9230. 25% 1.5
  9231. 50% 2.0
  9232. 75% 2.5
  9233. max 3.0
  9234. Name: numeric, dtype: float64
  9235. Including only numeric columns in a ``DataFrame`` description.
  9236. >>> df.describe(include=[np.number])
  9237. numeric
  9238. count 3.0
  9239. mean 2.0
  9240. std 1.0
  9241. min 1.0
  9242. 25% 1.5
  9243. 50% 2.0
  9244. 75% 2.5
  9245. max 3.0
  9246. Including only string columns in a ``DataFrame`` description.
  9247. >>> df.describe(include=[object]) # doctest: +SKIP
  9248. object
  9249. count 3
  9250. unique 3
  9251. top a
  9252. freq 1
  9253. Including only categorical columns from a ``DataFrame`` description.
  9254. >>> df.describe(include=['category'])
  9255. categorical
  9256. count 3
  9257. unique 3
  9258. top d
  9259. freq 1
  9260. Excluding numeric columns from a ``DataFrame`` description.
  9261. >>> df.describe(exclude=[np.number]) # doctest: +SKIP
  9262. categorical object
  9263. count 3 3
  9264. unique 3 3
  9265. top f a
  9266. freq 1 1
  9267. Excluding object columns from a ``DataFrame`` description.
  9268. >>> df.describe(exclude=[object]) # doctest: +SKIP
  9269. categorical numeric
  9270. count 3 3.0
  9271. unique 3 NaN
  9272. top f NaN
  9273. freq 1 NaN
  9274. mean NaN 2.0
  9275. std NaN 1.0
  9276. min NaN 1.0
  9277. 25% NaN 1.5
  9278. 50% NaN 2.0
  9279. 75% NaN 2.5
  9280. max NaN 3.0
  9281. """
  9282. return describe_ndframe(
  9283. obj=self,
  9284. include=include,
  9285. exclude=exclude,
  9286. percentiles=percentiles,
  9287. )
  9288. @final
  9289. def pct_change(
  9290. self: NDFrameT,
  9291. periods: int = 1,
  9292. fill_method: Literal["backfill", "bfill", "pad", "ffill"] | None = "pad",
  9293. limit=None,
  9294. freq=None,
  9295. **kwargs,
  9296. ) -> NDFrameT:
  9297. """
  9298. Percentage change between the current and a prior element.
  9299. Computes the percentage change from the immediately previous row by
  9300. default. This is useful in comparing the percentage of change in a time
  9301. series of elements.
  9302. Parameters
  9303. ----------
  9304. periods : int, default 1
  9305. Periods to shift for forming percent change.
  9306. fill_method : {'backfill', 'bfill', 'pad', 'ffill', None}, default 'pad'
  9307. How to handle NAs **before** computing percent changes.
  9308. limit : int, default None
  9309. The number of consecutive NAs to fill before stopping.
  9310. freq : DateOffset, timedelta, or str, optional
  9311. Increment to use from time series API (e.g. 'M' or BDay()).
  9312. **kwargs
  9313. Additional keyword arguments are passed into
  9314. `DataFrame.shift` or `Series.shift`.
  9315. Returns
  9316. -------
  9317. Series or DataFrame
  9318. The same type as the calling object.
  9319. See Also
  9320. --------
  9321. Series.diff : Compute the difference of two elements in a Series.
  9322. DataFrame.diff : Compute the difference of two elements in a DataFrame.
  9323. Series.shift : Shift the index by some number of periods.
  9324. DataFrame.shift : Shift the index by some number of periods.
  9325. Examples
  9326. --------
  9327. **Series**
  9328. >>> s = pd.Series([90, 91, 85])
  9329. >>> s
  9330. 0 90
  9331. 1 91
  9332. 2 85
  9333. dtype: int64
  9334. >>> s.pct_change()
  9335. 0 NaN
  9336. 1 0.011111
  9337. 2 -0.065934
  9338. dtype: float64
  9339. >>> s.pct_change(periods=2)
  9340. 0 NaN
  9341. 1 NaN
  9342. 2 -0.055556
  9343. dtype: float64
  9344. See the percentage change in a Series where filling NAs with last
  9345. valid observation forward to next valid.
  9346. >>> s = pd.Series([90, 91, None, 85])
  9347. >>> s
  9348. 0 90.0
  9349. 1 91.0
  9350. 2 NaN
  9351. 3 85.0
  9352. dtype: float64
  9353. >>> s.pct_change(fill_method='ffill')
  9354. 0 NaN
  9355. 1 0.011111
  9356. 2 0.000000
  9357. 3 -0.065934
  9358. dtype: float64
  9359. **DataFrame**
  9360. Percentage change in French franc, Deutsche Mark, and Italian lira from
  9361. 1980-01-01 to 1980-03-01.
  9362. >>> df = pd.DataFrame({
  9363. ... 'FR': [4.0405, 4.0963, 4.3149],
  9364. ... 'GR': [1.7246, 1.7482, 1.8519],
  9365. ... 'IT': [804.74, 810.01, 860.13]},
  9366. ... index=['1980-01-01', '1980-02-01', '1980-03-01'])
  9367. >>> df
  9368. FR GR IT
  9369. 1980-01-01 4.0405 1.7246 804.74
  9370. 1980-02-01 4.0963 1.7482 810.01
  9371. 1980-03-01 4.3149 1.8519 860.13
  9372. >>> df.pct_change()
  9373. FR GR IT
  9374. 1980-01-01 NaN NaN NaN
  9375. 1980-02-01 0.013810 0.013684 0.006549
  9376. 1980-03-01 0.053365 0.059318 0.061876
  9377. Percentage of change in GOOG and APPL stock volume. Shows computing
  9378. the percentage change between columns.
  9379. >>> df = pd.DataFrame({
  9380. ... '2016': [1769950, 30586265],
  9381. ... '2015': [1500923, 40912316],
  9382. ... '2014': [1371819, 41403351]},
  9383. ... index=['GOOG', 'APPL'])
  9384. >>> df
  9385. 2016 2015 2014
  9386. GOOG 1769950 1500923 1371819
  9387. APPL 30586265 40912316 41403351
  9388. >>> df.pct_change(axis='columns', periods=-1)
  9389. 2016 2015 2014
  9390. GOOG 0.179241 0.094112 NaN
  9391. APPL -0.252395 -0.011860 NaN
  9392. """
  9393. axis = self._get_axis_number(kwargs.pop("axis", self._stat_axis_name))
  9394. if fill_method is None:
  9395. data = self
  9396. else:
  9397. _data = self.fillna(method=fill_method, axis=axis, limit=limit)
  9398. assert _data is not None # needed for mypy
  9399. data = _data
  9400. shifted = data.shift(periods=periods, freq=freq, axis=axis, **kwargs)
  9401. # Unsupported left operand type for / ("NDFrameT")
  9402. rs = data / shifted - 1 # type: ignore[operator]
  9403. if freq is not None:
  9404. # Shift method is implemented differently when freq is not None
  9405. # We want to restore the original index
  9406. rs = rs.loc[~rs.index.duplicated()]
  9407. rs = rs.reindex_like(data)
  9408. return rs.__finalize__(self, method="pct_change")
  9409. @final
  9410. def _logical_func(
  9411. self,
  9412. name: str,
  9413. func,
  9414. axis: Axis = 0,
  9415. bool_only: bool_t = False,
  9416. skipna: bool_t = True,
  9417. **kwargs,
  9418. ) -> Series | bool_t:
  9419. nv.validate_logical_func((), kwargs, fname=name)
  9420. validate_bool_kwarg(skipna, "skipna", none_allowed=False)
  9421. if self.ndim > 1 and axis is None:
  9422. # Reduce along one dimension then the other, to simplify DataFrame._reduce
  9423. res = self._logical_func(
  9424. name, func, axis=0, bool_only=bool_only, skipna=skipna, **kwargs
  9425. )
  9426. return res._logical_func(name, func, skipna=skipna, **kwargs)
  9427. if (
  9428. self.ndim > 1
  9429. and axis == 1
  9430. and len(self._mgr.arrays) > 1
  9431. # TODO(EA2D): special-case not needed
  9432. and all(x.ndim == 2 for x in self._mgr.arrays)
  9433. and not kwargs
  9434. ):
  9435. # Fastpath avoiding potentially expensive transpose
  9436. obj = self
  9437. if bool_only:
  9438. obj = self._get_bool_data()
  9439. return obj._reduce_axis1(name, func, skipna=skipna)
  9440. return self._reduce(
  9441. func,
  9442. name=name,
  9443. axis=axis,
  9444. skipna=skipna,
  9445. numeric_only=bool_only,
  9446. filter_type="bool",
  9447. )
  9448. def any(
  9449. self,
  9450. axis: Axis = 0,
  9451. bool_only: bool_t = False,
  9452. skipna: bool_t = True,
  9453. **kwargs,
  9454. ) -> DataFrame | Series | bool_t:
  9455. return self._logical_func(
  9456. "any", nanops.nanany, axis, bool_only, skipna, **kwargs
  9457. )
  9458. def all(
  9459. self,
  9460. axis: Axis = 0,
  9461. bool_only: bool_t = False,
  9462. skipna: bool_t = True,
  9463. **kwargs,
  9464. ) -> Series | bool_t:
  9465. return self._logical_func(
  9466. "all", nanops.nanall, axis, bool_only, skipna, **kwargs
  9467. )
  9468. @final
  9469. def _accum_func(
  9470. self,
  9471. name: str,
  9472. func,
  9473. axis: Axis | None = None,
  9474. skipna: bool_t = True,
  9475. *args,
  9476. **kwargs,
  9477. ):
  9478. skipna = nv.validate_cum_func_with_skipna(skipna, args, kwargs, name)
  9479. if axis is None:
  9480. axis = self._stat_axis_number
  9481. else:
  9482. axis = self._get_axis_number(axis)
  9483. if axis == 1:
  9484. return self.T._accum_func(
  9485. name, func, axis=0, skipna=skipna, *args, **kwargs # noqa: B026
  9486. ).T
  9487. def block_accum_func(blk_values):
  9488. values = blk_values.T if hasattr(blk_values, "T") else blk_values
  9489. result: np.ndarray | ExtensionArray
  9490. if isinstance(values, ExtensionArray):
  9491. result = values._accumulate(name, skipna=skipna, **kwargs)
  9492. else:
  9493. result = nanops.na_accum_func(values, func, skipna=skipna)
  9494. result = result.T if hasattr(result, "T") else result
  9495. return result
  9496. result = self._mgr.apply(block_accum_func)
  9497. return self._constructor(result).__finalize__(self, method=name)
  9498. def cummax(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs):
  9499. return self._accum_func(
  9500. "cummax", np.maximum.accumulate, axis, skipna, *args, **kwargs
  9501. )
  9502. def cummin(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs):
  9503. return self._accum_func(
  9504. "cummin", np.minimum.accumulate, axis, skipna, *args, **kwargs
  9505. )
  9506. def cumsum(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs):
  9507. return self._accum_func("cumsum", np.cumsum, axis, skipna, *args, **kwargs)
  9508. def cumprod(self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs):
  9509. return self._accum_func("cumprod", np.cumprod, axis, skipna, *args, **kwargs)
  9510. @final
  9511. def _stat_function_ddof(
  9512. self,
  9513. name: str,
  9514. func,
  9515. axis: Axis | None = None,
  9516. skipna: bool_t = True,
  9517. ddof: int = 1,
  9518. numeric_only: bool_t = False,
  9519. **kwargs,
  9520. ) -> Series | float:
  9521. nv.validate_stat_ddof_func((), kwargs, fname=name)
  9522. validate_bool_kwarg(skipna, "skipna", none_allowed=False)
  9523. if axis is None:
  9524. axis = self._stat_axis_number
  9525. return self._reduce(
  9526. func, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof
  9527. )
  9528. def sem(
  9529. self,
  9530. axis: Axis | None = None,
  9531. skipna: bool_t = True,
  9532. ddof: int = 1,
  9533. numeric_only: bool_t = False,
  9534. **kwargs,
  9535. ) -> Series | float:
  9536. return self._stat_function_ddof(
  9537. "sem", nanops.nansem, axis, skipna, ddof, numeric_only, **kwargs
  9538. )
  9539. def var(
  9540. self,
  9541. axis: Axis | None = None,
  9542. skipna: bool_t = True,
  9543. ddof: int = 1,
  9544. numeric_only: bool_t = False,
  9545. **kwargs,
  9546. ) -> Series | float:
  9547. return self._stat_function_ddof(
  9548. "var", nanops.nanvar, axis, skipna, ddof, numeric_only, **kwargs
  9549. )
  9550. def std(
  9551. self,
  9552. axis: Axis | None = None,
  9553. skipna: bool_t = True,
  9554. ddof: int = 1,
  9555. numeric_only: bool_t = False,
  9556. **kwargs,
  9557. ) -> Series | float:
  9558. return self._stat_function_ddof(
  9559. "std", nanops.nanstd, axis, skipna, ddof, numeric_only, **kwargs
  9560. )
  9561. @final
  9562. def _stat_function(
  9563. self,
  9564. name: str,
  9565. func,
  9566. axis: Axis | None = 0,
  9567. skipna: bool_t = True,
  9568. numeric_only: bool_t = False,
  9569. **kwargs,
  9570. ):
  9571. if name == "median":
  9572. nv.validate_median((), kwargs)
  9573. else:
  9574. nv.validate_stat_func((), kwargs, fname=name)
  9575. validate_bool_kwarg(skipna, "skipna", none_allowed=False)
  9576. return self._reduce(
  9577. func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only
  9578. )
  9579. def min(
  9580. self,
  9581. axis: Axis | None = 0,
  9582. skipna: bool_t = True,
  9583. numeric_only: bool_t = False,
  9584. **kwargs,
  9585. ):
  9586. return self._stat_function(
  9587. "min",
  9588. nanops.nanmin,
  9589. axis,
  9590. skipna,
  9591. numeric_only,
  9592. **kwargs,
  9593. )
  9594. def max(
  9595. self,
  9596. axis: Axis | None = 0,
  9597. skipna: bool_t = True,
  9598. numeric_only: bool_t = False,
  9599. **kwargs,
  9600. ):
  9601. return self._stat_function(
  9602. "max",
  9603. nanops.nanmax,
  9604. axis,
  9605. skipna,
  9606. numeric_only,
  9607. **kwargs,
  9608. )
  9609. def mean(
  9610. self,
  9611. axis: Axis | None = 0,
  9612. skipna: bool_t = True,
  9613. numeric_only: bool_t = False,
  9614. **kwargs,
  9615. ) -> Series | float:
  9616. return self._stat_function(
  9617. "mean", nanops.nanmean, axis, skipna, numeric_only, **kwargs
  9618. )
  9619. def median(
  9620. self,
  9621. axis: Axis | None = 0,
  9622. skipna: bool_t = True,
  9623. numeric_only: bool_t = False,
  9624. **kwargs,
  9625. ) -> Series | float:
  9626. return self._stat_function(
  9627. "median", nanops.nanmedian, axis, skipna, numeric_only, **kwargs
  9628. )
  9629. def skew(
  9630. self,
  9631. axis: Axis | None = 0,
  9632. skipna: bool_t = True,
  9633. numeric_only: bool_t = False,
  9634. **kwargs,
  9635. ) -> Series | float:
  9636. return self._stat_function(
  9637. "skew", nanops.nanskew, axis, skipna, numeric_only, **kwargs
  9638. )
  9639. def kurt(
  9640. self,
  9641. axis: Axis | None = 0,
  9642. skipna: bool_t = True,
  9643. numeric_only: bool_t = False,
  9644. **kwargs,
  9645. ) -> Series | float:
  9646. return self._stat_function(
  9647. "kurt", nanops.nankurt, axis, skipna, numeric_only, **kwargs
  9648. )
  9649. kurtosis = kurt
  9650. @final
  9651. def _min_count_stat_function(
  9652. self,
  9653. name: str,
  9654. func,
  9655. axis: Axis | None = None,
  9656. skipna: bool_t = True,
  9657. numeric_only: bool_t = False,
  9658. min_count: int = 0,
  9659. **kwargs,
  9660. ):
  9661. if name == "sum":
  9662. nv.validate_sum((), kwargs)
  9663. elif name == "prod":
  9664. nv.validate_prod((), kwargs)
  9665. else:
  9666. nv.validate_stat_func((), kwargs, fname=name)
  9667. validate_bool_kwarg(skipna, "skipna", none_allowed=False)
  9668. if axis is None:
  9669. axis = self._stat_axis_number
  9670. return self._reduce(
  9671. func,
  9672. name=name,
  9673. axis=axis,
  9674. skipna=skipna,
  9675. numeric_only=numeric_only,
  9676. min_count=min_count,
  9677. )
  9678. def sum(
  9679. self,
  9680. axis: Axis | None = None,
  9681. skipna: bool_t = True,
  9682. numeric_only: bool_t = False,
  9683. min_count: int = 0,
  9684. **kwargs,
  9685. ):
  9686. return self._min_count_stat_function(
  9687. "sum", nanops.nansum, axis, skipna, numeric_only, min_count, **kwargs
  9688. )
  9689. def prod(
  9690. self,
  9691. axis: Axis | None = None,
  9692. skipna: bool_t = True,
  9693. numeric_only: bool_t = False,
  9694. min_count: int = 0,
  9695. **kwargs,
  9696. ):
  9697. return self._min_count_stat_function(
  9698. "prod",
  9699. nanops.nanprod,
  9700. axis,
  9701. skipna,
  9702. numeric_only,
  9703. min_count,
  9704. **kwargs,
  9705. )
  9706. product = prod
  9707. @classmethod
  9708. def _add_numeric_operations(cls) -> None:
  9709. """
  9710. Add the operations to the cls; evaluate the doc strings again
  9711. """
  9712. axis_descr, name1, name2 = _doc_params(cls)
  9713. @doc(
  9714. _bool_doc,
  9715. desc=_any_desc,
  9716. name1=name1,
  9717. name2=name2,
  9718. axis_descr=axis_descr,
  9719. see_also=_any_see_also,
  9720. examples=_any_examples,
  9721. empty_value=False,
  9722. )
  9723. def any(
  9724. self,
  9725. *,
  9726. axis: Axis = 0,
  9727. bool_only=None,
  9728. skipna: bool_t = True,
  9729. **kwargs,
  9730. ):
  9731. return NDFrame.any(
  9732. self,
  9733. axis=axis,
  9734. bool_only=bool_only,
  9735. skipna=skipna,
  9736. **kwargs,
  9737. )
  9738. setattr(cls, "any", any)
  9739. @doc(
  9740. _bool_doc,
  9741. desc=_all_desc,
  9742. name1=name1,
  9743. name2=name2,
  9744. axis_descr=axis_descr,
  9745. see_also=_all_see_also,
  9746. examples=_all_examples,
  9747. empty_value=True,
  9748. )
  9749. def all(
  9750. self,
  9751. axis: Axis = 0,
  9752. bool_only=None,
  9753. skipna: bool_t = True,
  9754. **kwargs,
  9755. ):
  9756. return NDFrame.all(self, axis, bool_only, skipna, **kwargs)
  9757. setattr(cls, "all", all)
  9758. @doc(
  9759. _num_ddof_doc,
  9760. desc="Return unbiased standard error of the mean over requested "
  9761. "axis.\n\nNormalized by N-1 by default. This can be changed "
  9762. "using the ddof argument",
  9763. name1=name1,
  9764. name2=name2,
  9765. axis_descr=axis_descr,
  9766. notes="",
  9767. examples="",
  9768. )
  9769. def sem(
  9770. self,
  9771. axis: Axis | None = None,
  9772. skipna: bool_t = True,
  9773. ddof: int = 1,
  9774. numeric_only: bool_t = False,
  9775. **kwargs,
  9776. ):
  9777. return NDFrame.sem(self, axis, skipna, ddof, numeric_only, **kwargs)
  9778. setattr(cls, "sem", sem)
  9779. @doc(
  9780. _num_ddof_doc,
  9781. desc="Return unbiased variance over requested axis.\n\nNormalized by "
  9782. "N-1 by default. This can be changed using the ddof argument.",
  9783. name1=name1,
  9784. name2=name2,
  9785. axis_descr=axis_descr,
  9786. notes="",
  9787. examples=_var_examples,
  9788. )
  9789. def var(
  9790. self,
  9791. axis: Axis | None = None,
  9792. skipna: bool_t = True,
  9793. ddof: int = 1,
  9794. numeric_only: bool_t = False,
  9795. **kwargs,
  9796. ):
  9797. return NDFrame.var(self, axis, skipna, ddof, numeric_only, **kwargs)
  9798. setattr(cls, "var", var)
  9799. @doc(
  9800. _num_ddof_doc,
  9801. desc="Return sample standard deviation over requested axis."
  9802. "\n\nNormalized by N-1 by default. This can be changed using the "
  9803. "ddof argument.",
  9804. name1=name1,
  9805. name2=name2,
  9806. axis_descr=axis_descr,
  9807. notes=_std_notes,
  9808. examples=_std_examples,
  9809. )
  9810. def std(
  9811. self,
  9812. axis: Axis | None = None,
  9813. skipna: bool_t = True,
  9814. ddof: int = 1,
  9815. numeric_only: bool_t = False,
  9816. **kwargs,
  9817. ):
  9818. return NDFrame.std(self, axis, skipna, ddof, numeric_only, **kwargs)
  9819. setattr(cls, "std", std)
  9820. @doc(
  9821. _cnum_doc,
  9822. desc="minimum",
  9823. name1=name1,
  9824. name2=name2,
  9825. axis_descr=axis_descr,
  9826. accum_func_name="min",
  9827. examples=_cummin_examples,
  9828. )
  9829. def cummin(
  9830. self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs
  9831. ):
  9832. return NDFrame.cummin(self, axis, skipna, *args, **kwargs)
  9833. setattr(cls, "cummin", cummin)
  9834. @doc(
  9835. _cnum_doc,
  9836. desc="maximum",
  9837. name1=name1,
  9838. name2=name2,
  9839. axis_descr=axis_descr,
  9840. accum_func_name="max",
  9841. examples=_cummax_examples,
  9842. )
  9843. def cummax(
  9844. self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs
  9845. ):
  9846. return NDFrame.cummax(self, axis, skipna, *args, **kwargs)
  9847. setattr(cls, "cummax", cummax)
  9848. @doc(
  9849. _cnum_doc,
  9850. desc="sum",
  9851. name1=name1,
  9852. name2=name2,
  9853. axis_descr=axis_descr,
  9854. accum_func_name="sum",
  9855. examples=_cumsum_examples,
  9856. )
  9857. def cumsum(
  9858. self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs
  9859. ):
  9860. return NDFrame.cumsum(self, axis, skipna, *args, **kwargs)
  9861. setattr(cls, "cumsum", cumsum)
  9862. @doc(
  9863. _cnum_doc,
  9864. desc="product",
  9865. name1=name1,
  9866. name2=name2,
  9867. axis_descr=axis_descr,
  9868. accum_func_name="prod",
  9869. examples=_cumprod_examples,
  9870. )
  9871. def cumprod(
  9872. self, axis: Axis | None = None, skipna: bool_t = True, *args, **kwargs
  9873. ):
  9874. return NDFrame.cumprod(self, axis, skipna, *args, **kwargs)
  9875. setattr(cls, "cumprod", cumprod)
  9876. # error: Untyped decorator makes function "sum" untyped
  9877. @doc( # type: ignore[misc]
  9878. _num_doc,
  9879. desc="Return the sum of the values over the requested axis.\n\n"
  9880. "This is equivalent to the method ``numpy.sum``.",
  9881. name1=name1,
  9882. name2=name2,
  9883. axis_descr=axis_descr,
  9884. min_count=_min_count_stub,
  9885. see_also=_stat_func_see_also,
  9886. examples=_sum_examples,
  9887. )
  9888. def sum(
  9889. self,
  9890. axis: Axis | None = None,
  9891. skipna: bool_t = True,
  9892. numeric_only: bool_t = False,
  9893. min_count: int = 0,
  9894. **kwargs,
  9895. ):
  9896. return NDFrame.sum(self, axis, skipna, numeric_only, min_count, **kwargs)
  9897. setattr(cls, "sum", sum)
  9898. @doc(
  9899. _num_doc,
  9900. desc="Return the product of the values over the requested axis.",
  9901. name1=name1,
  9902. name2=name2,
  9903. axis_descr=axis_descr,
  9904. min_count=_min_count_stub,
  9905. see_also=_stat_func_see_also,
  9906. examples=_prod_examples,
  9907. )
  9908. def prod(
  9909. self,
  9910. axis: Axis | None = None,
  9911. skipna: bool_t = True,
  9912. numeric_only: bool_t = False,
  9913. min_count: int = 0,
  9914. **kwargs,
  9915. ):
  9916. return NDFrame.prod(self, axis, skipna, numeric_only, min_count, **kwargs)
  9917. setattr(cls, "prod", prod)
  9918. cls.product = prod
  9919. @doc(
  9920. _num_doc,
  9921. desc="Return the mean of the values over the requested axis.",
  9922. name1=name1,
  9923. name2=name2,
  9924. axis_descr=axis_descr,
  9925. min_count="",
  9926. see_also="",
  9927. examples="",
  9928. )
  9929. def mean(
  9930. self,
  9931. axis: AxisInt | None = 0,
  9932. skipna: bool_t = True,
  9933. numeric_only: bool_t = False,
  9934. **kwargs,
  9935. ):
  9936. return NDFrame.mean(self, axis, skipna, numeric_only, **kwargs)
  9937. setattr(cls, "mean", mean)
  9938. @doc(
  9939. _num_doc,
  9940. desc="Return unbiased skew over requested axis.\n\nNormalized by N-1.",
  9941. name1=name1,
  9942. name2=name2,
  9943. axis_descr=axis_descr,
  9944. min_count="",
  9945. see_also="",
  9946. examples="",
  9947. )
  9948. def skew(
  9949. self,
  9950. axis: AxisInt | None = 0,
  9951. skipna: bool_t = True,
  9952. numeric_only: bool_t = False,
  9953. **kwargs,
  9954. ):
  9955. return NDFrame.skew(self, axis, skipna, numeric_only, **kwargs)
  9956. setattr(cls, "skew", skew)
  9957. @doc(
  9958. _num_doc,
  9959. desc="Return unbiased kurtosis over requested axis.\n\n"
  9960. "Kurtosis obtained using Fisher's definition of\n"
  9961. "kurtosis (kurtosis of normal == 0.0). Normalized "
  9962. "by N-1.",
  9963. name1=name1,
  9964. name2=name2,
  9965. axis_descr=axis_descr,
  9966. min_count="",
  9967. see_also="",
  9968. examples="",
  9969. )
  9970. def kurt(
  9971. self,
  9972. axis: Axis | None = 0,
  9973. skipna: bool_t = True,
  9974. numeric_only: bool_t = False,
  9975. **kwargs,
  9976. ):
  9977. return NDFrame.kurt(self, axis, skipna, numeric_only, **kwargs)
  9978. setattr(cls, "kurt", kurt)
  9979. cls.kurtosis = kurt
  9980. @doc(
  9981. _num_doc,
  9982. desc="Return the median of the values over the requested axis.",
  9983. name1=name1,
  9984. name2=name2,
  9985. axis_descr=axis_descr,
  9986. min_count="",
  9987. see_also="",
  9988. examples="",
  9989. )
  9990. def median(
  9991. self,
  9992. axis: AxisInt | None = 0,
  9993. skipna: bool_t = True,
  9994. numeric_only: bool_t = False,
  9995. **kwargs,
  9996. ):
  9997. return NDFrame.median(self, axis, skipna, numeric_only, **kwargs)
  9998. setattr(cls, "median", median)
  9999. @doc(
  10000. _num_doc,
  10001. desc="Return the maximum of the values over the requested axis.\n\n"
  10002. "If you want the *index* of the maximum, use ``idxmax``. This is "
  10003. "the equivalent of the ``numpy.ndarray`` method ``argmax``.",
  10004. name1=name1,
  10005. name2=name2,
  10006. axis_descr=axis_descr,
  10007. min_count="",
  10008. see_also=_stat_func_see_also,
  10009. examples=_max_examples,
  10010. )
  10011. def max(
  10012. self,
  10013. axis: AxisInt | None = 0,
  10014. skipna: bool_t = True,
  10015. numeric_only: bool_t = False,
  10016. **kwargs,
  10017. ):
  10018. return NDFrame.max(self, axis, skipna, numeric_only, **kwargs)
  10019. setattr(cls, "max", max)
  10020. @doc(
  10021. _num_doc,
  10022. desc="Return the minimum of the values over the requested axis.\n\n"
  10023. "If you want the *index* of the minimum, use ``idxmin``. This is "
  10024. "the equivalent of the ``numpy.ndarray`` method ``argmin``.",
  10025. name1=name1,
  10026. name2=name2,
  10027. axis_descr=axis_descr,
  10028. min_count="",
  10029. see_also=_stat_func_see_also,
  10030. examples=_min_examples,
  10031. )
  10032. def min(
  10033. self,
  10034. axis: AxisInt | None = 0,
  10035. skipna: bool_t = True,
  10036. numeric_only: bool_t = False,
  10037. **kwargs,
  10038. ):
  10039. return NDFrame.min(self, axis, skipna, numeric_only, **kwargs)
  10040. setattr(cls, "min", min)
  10041. @final
  10042. @doc(Rolling)
  10043. def rolling(
  10044. self,
  10045. window: int | dt.timedelta | str | BaseOffset | BaseIndexer,
  10046. min_periods: int | None = None,
  10047. center: bool_t = False,
  10048. win_type: str | None = None,
  10049. on: str | None = None,
  10050. axis: Axis = 0,
  10051. closed: str | None = None,
  10052. step: int | None = None,
  10053. method: str = "single",
  10054. ) -> Window | Rolling:
  10055. axis = self._get_axis_number(axis)
  10056. if win_type is not None:
  10057. return Window(
  10058. self,
  10059. window=window,
  10060. min_periods=min_periods,
  10061. center=center,
  10062. win_type=win_type,
  10063. on=on,
  10064. axis=axis,
  10065. closed=closed,
  10066. step=step,
  10067. method=method,
  10068. )
  10069. return Rolling(
  10070. self,
  10071. window=window,
  10072. min_periods=min_periods,
  10073. center=center,
  10074. win_type=win_type,
  10075. on=on,
  10076. axis=axis,
  10077. closed=closed,
  10078. step=step,
  10079. method=method,
  10080. )
  10081. @final
  10082. @doc(Expanding)
  10083. def expanding(
  10084. self,
  10085. min_periods: int = 1,
  10086. axis: Axis = 0,
  10087. method: str = "single",
  10088. ) -> Expanding:
  10089. axis = self._get_axis_number(axis)
  10090. return Expanding(self, min_periods=min_periods, axis=axis, method=method)
  10091. @final
  10092. @doc(ExponentialMovingWindow)
  10093. def ewm(
  10094. self,
  10095. com: float | None = None,
  10096. span: float | None = None,
  10097. halflife: float | TimedeltaConvertibleTypes | None = None,
  10098. alpha: float | None = None,
  10099. min_periods: int | None = 0,
  10100. adjust: bool_t = True,
  10101. ignore_na: bool_t = False,
  10102. axis: Axis = 0,
  10103. times: np.ndarray | DataFrame | Series | None = None,
  10104. method: str = "single",
  10105. ) -> ExponentialMovingWindow:
  10106. axis = self._get_axis_number(axis)
  10107. return ExponentialMovingWindow(
  10108. self,
  10109. com=com,
  10110. span=span,
  10111. halflife=halflife,
  10112. alpha=alpha,
  10113. min_periods=min_periods,
  10114. adjust=adjust,
  10115. ignore_na=ignore_na,
  10116. axis=axis,
  10117. times=times,
  10118. method=method,
  10119. )
  10120. # ----------------------------------------------------------------------
  10121. # Arithmetic Methods
  10122. @final
  10123. def _inplace_method(self, other, op):
  10124. """
  10125. Wrap arithmetic method to operate inplace.
  10126. """
  10127. result = op(self, other)
  10128. if (
  10129. self.ndim == 1
  10130. and result._indexed_same(self)
  10131. and is_dtype_equal(result.dtype, self.dtype)
  10132. ):
  10133. # GH#36498 this inplace op can _actually_ be inplace.
  10134. # Item "ArrayManager" of "Union[ArrayManager, SingleArrayManager,
  10135. # BlockManager, SingleBlockManager]" has no attribute "setitem_inplace"
  10136. self._mgr.setitem_inplace( # type: ignore[union-attr]
  10137. slice(None), result._values
  10138. )
  10139. return self
  10140. # Delete cacher
  10141. self._reset_cacher()
  10142. # this makes sure that we are aligned like the input
  10143. # we are updating inplace so we want to ignore is_copy
  10144. self._update_inplace(
  10145. result.reindex_like(self, copy=False), verify_is_copy=False
  10146. )
  10147. return self
  10148. def __iadd__(self: NDFrameT, other) -> NDFrameT:
  10149. # error: Unsupported left operand type for + ("Type[NDFrame]")
  10150. return self._inplace_method(other, type(self).__add__) # type: ignore[operator]
  10151. def __isub__(self: NDFrameT, other) -> NDFrameT:
  10152. # error: Unsupported left operand type for - ("Type[NDFrame]")
  10153. return self._inplace_method(other, type(self).__sub__) # type: ignore[operator]
  10154. def __imul__(self: NDFrameT, other) -> NDFrameT:
  10155. # error: Unsupported left operand type for * ("Type[NDFrame]")
  10156. return self._inplace_method(other, type(self).__mul__) # type: ignore[operator]
  10157. def __itruediv__(self: NDFrameT, other) -> NDFrameT:
  10158. # error: Unsupported left operand type for / ("Type[NDFrame]")
  10159. return self._inplace_method(
  10160. other, type(self).__truediv__ # type: ignore[operator]
  10161. )
  10162. def __ifloordiv__(self: NDFrameT, other) -> NDFrameT:
  10163. # error: Unsupported left operand type for // ("Type[NDFrame]")
  10164. return self._inplace_method(
  10165. other, type(self).__floordiv__ # type: ignore[operator]
  10166. )
  10167. def __imod__(self: NDFrameT, other) -> NDFrameT:
  10168. # error: Unsupported left operand type for % ("Type[NDFrame]")
  10169. return self._inplace_method(other, type(self).__mod__) # type: ignore[operator]
  10170. def __ipow__(self: NDFrameT, other) -> NDFrameT:
  10171. # error: Unsupported left operand type for ** ("Type[NDFrame]")
  10172. return self._inplace_method(other, type(self).__pow__) # type: ignore[operator]
  10173. def __iand__(self: NDFrameT, other) -> NDFrameT:
  10174. # error: Unsupported left operand type for & ("Type[NDFrame]")
  10175. return self._inplace_method(other, type(self).__and__) # type: ignore[operator]
  10176. def __ior__(self: NDFrameT, other) -> NDFrameT:
  10177. # error: Unsupported left operand type for | ("Type[NDFrame]")
  10178. return self._inplace_method(other, type(self).__or__) # type: ignore[operator]
  10179. def __ixor__(self: NDFrameT, other) -> NDFrameT:
  10180. # error: Unsupported left operand type for ^ ("Type[NDFrame]")
  10181. return self._inplace_method(other, type(self).__xor__) # type: ignore[operator]
  10182. # ----------------------------------------------------------------------
  10183. # Misc methods
  10184. @final
  10185. def _find_valid_index(self, *, how: str) -> Hashable | None:
  10186. """
  10187. Retrieves the index of the first valid value.
  10188. Parameters
  10189. ----------
  10190. how : {'first', 'last'}
  10191. Use this parameter to change between the first or last valid index.
  10192. Returns
  10193. -------
  10194. idx_first_valid : type of index
  10195. """
  10196. idxpos = find_valid_index(self._values, how=how, is_valid=~isna(self._values))
  10197. if idxpos is None:
  10198. return None
  10199. return self.index[idxpos]
  10200. @final
  10201. @doc(position="first", klass=_shared_doc_kwargs["klass"])
  10202. def first_valid_index(self) -> Hashable | None:
  10203. """
  10204. Return index for {position} non-NA value or None, if no non-NA value is found.
  10205. Returns
  10206. -------
  10207. type of index
  10208. Notes
  10209. -----
  10210. If all elements are non-NA/null, returns None.
  10211. Also returns None for empty {klass}.
  10212. """
  10213. return self._find_valid_index(how="first")
  10214. @final
  10215. @doc(first_valid_index, position="last", klass=_shared_doc_kwargs["klass"])
  10216. def last_valid_index(self) -> Hashable | None:
  10217. return self._find_valid_index(how="last")
  10218. def _doc_params(cls):
  10219. """Return a tuple of the doc params."""
  10220. axis_descr = (
  10221. f"{{{', '.join([f'{a} ({i})' for i, a in enumerate(cls._AXIS_ORDERS)])}}}"
  10222. )
  10223. name = cls._constructor_sliced.__name__ if cls._AXIS_LEN > 1 else "scalar"
  10224. name2 = cls.__name__
  10225. return axis_descr, name, name2
  10226. _num_doc = """
  10227. {desc}
  10228. Parameters
  10229. ----------
  10230. axis : {axis_descr}
  10231. Axis for the function to be applied on.
  10232. For `Series` this parameter is unused and defaults to 0.
  10233. For DataFrames, specifying ``axis=None`` will apply the aggregation
  10234. across both axes.
  10235. .. versionadded:: 2.0.0
  10236. skipna : bool, default True
  10237. Exclude NA/null values when computing the result.
  10238. numeric_only : bool, default False
  10239. Include only float, int, boolean columns. Not implemented for Series.
  10240. {min_count}\
  10241. **kwargs
  10242. Additional keyword arguments to be passed to the function.
  10243. Returns
  10244. -------
  10245. {name1} or scalar\
  10246. {see_also}\
  10247. {examples}
  10248. """
  10249. _num_ddof_doc = """
  10250. {desc}
  10251. Parameters
  10252. ----------
  10253. axis : {axis_descr}
  10254. For `Series` this parameter is unused and defaults to 0.
  10255. skipna : bool, default True
  10256. Exclude NA/null values. If an entire row/column is NA, the result
  10257. will be NA.
  10258. ddof : int, default 1
  10259. Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
  10260. where N represents the number of elements.
  10261. numeric_only : bool, default False
  10262. Include only float, int, boolean columns. Not implemented for Series.
  10263. Returns
  10264. -------
  10265. {name1} or {name2} (if level specified) \
  10266. {notes}\
  10267. {examples}
  10268. """
  10269. _std_notes = """
  10270. Notes
  10271. -----
  10272. To have the same behaviour as `numpy.std`, use `ddof=0` (instead of the
  10273. default `ddof=1`)"""
  10274. _std_examples = """
  10275. Examples
  10276. --------
  10277. >>> df = pd.DataFrame({'person_id': [0, 1, 2, 3],
  10278. ... 'age': [21, 25, 62, 43],
  10279. ... 'height': [1.61, 1.87, 1.49, 2.01]}
  10280. ... ).set_index('person_id')
  10281. >>> df
  10282. age height
  10283. person_id
  10284. 0 21 1.61
  10285. 1 25 1.87
  10286. 2 62 1.49
  10287. 3 43 2.01
  10288. The standard deviation of the columns can be found as follows:
  10289. >>> df.std()
  10290. age 18.786076
  10291. height 0.237417
  10292. dtype: float64
  10293. Alternatively, `ddof=0` can be set to normalize by N instead of N-1:
  10294. >>> df.std(ddof=0)
  10295. age 16.269219
  10296. height 0.205609
  10297. dtype: float64"""
  10298. _var_examples = """
  10299. Examples
  10300. --------
  10301. >>> df = pd.DataFrame({'person_id': [0, 1, 2, 3],
  10302. ... 'age': [21, 25, 62, 43],
  10303. ... 'height': [1.61, 1.87, 1.49, 2.01]}
  10304. ... ).set_index('person_id')
  10305. >>> df
  10306. age height
  10307. person_id
  10308. 0 21 1.61
  10309. 1 25 1.87
  10310. 2 62 1.49
  10311. 3 43 2.01
  10312. >>> df.var()
  10313. age 352.916667
  10314. height 0.056367
  10315. dtype: float64
  10316. Alternatively, ``ddof=0`` can be set to normalize by N instead of N-1:
  10317. >>> df.var(ddof=0)
  10318. age 264.687500
  10319. height 0.042275
  10320. dtype: float64"""
  10321. _bool_doc = """
  10322. {desc}
  10323. Parameters
  10324. ----------
  10325. axis : {{0 or 'index', 1 or 'columns', None}}, default 0
  10326. Indicate which axis or axes should be reduced. For `Series` this parameter
  10327. is unused and defaults to 0.
  10328. * 0 / 'index' : reduce the index, return a Series whose index is the
  10329. original column labels.
  10330. * 1 / 'columns' : reduce the columns, return a Series whose index is the
  10331. original index.
  10332. * None : reduce all axes, return a scalar.
  10333. bool_only : bool, default None
  10334. Include only boolean columns. If None, will attempt to use everything,
  10335. then use only boolean data. Not implemented for Series.
  10336. skipna : bool, default True
  10337. Exclude NA/null values. If the entire row/column is NA and skipna is
  10338. True, then the result will be {empty_value}, as for an empty row/column.
  10339. If skipna is False, then NA are treated as True, because these are not
  10340. equal to zero.
  10341. **kwargs : any, default None
  10342. Additional keywords have no effect but might be accepted for
  10343. compatibility with NumPy.
  10344. Returns
  10345. -------
  10346. {name1} or {name2}
  10347. If level is specified, then, {name2} is returned; otherwise, {name1}
  10348. is returned.
  10349. {see_also}
  10350. {examples}"""
  10351. _all_desc = """\
  10352. Return whether all elements are True, potentially over an axis.
  10353. Returns True unless there at least one element within a series or
  10354. along a Dataframe axis that is False or equivalent (e.g. zero or
  10355. empty)."""
  10356. _all_examples = """\
  10357. Examples
  10358. --------
  10359. **Series**
  10360. >>> pd.Series([True, True]).all()
  10361. True
  10362. >>> pd.Series([True, False]).all()
  10363. False
  10364. >>> pd.Series([], dtype="float64").all()
  10365. True
  10366. >>> pd.Series([np.nan]).all()
  10367. True
  10368. >>> pd.Series([np.nan]).all(skipna=False)
  10369. True
  10370. **DataFrames**
  10371. Create a dataframe from a dictionary.
  10372. >>> df = pd.DataFrame({'col1': [True, True], 'col2': [True, False]})
  10373. >>> df
  10374. col1 col2
  10375. 0 True True
  10376. 1 True False
  10377. Default behaviour checks if values in each column all return True.
  10378. >>> df.all()
  10379. col1 True
  10380. col2 False
  10381. dtype: bool
  10382. Specify ``axis='columns'`` to check if values in each row all return True.
  10383. >>> df.all(axis='columns')
  10384. 0 True
  10385. 1 False
  10386. dtype: bool
  10387. Or ``axis=None`` for whether every value is True.
  10388. >>> df.all(axis=None)
  10389. False
  10390. """
  10391. _all_see_also = """\
  10392. See Also
  10393. --------
  10394. Series.all : Return True if all elements are True.
  10395. DataFrame.any : Return True if one (or more) elements are True.
  10396. """
  10397. _cnum_doc = """
  10398. Return cumulative {desc} over a DataFrame or Series axis.
  10399. Returns a DataFrame or Series of the same size containing the cumulative
  10400. {desc}.
  10401. Parameters
  10402. ----------
  10403. axis : {{0 or 'index', 1 or 'columns'}}, default 0
  10404. The index or the name of the axis. 0 is equivalent to None or 'index'.
  10405. For `Series` this parameter is unused and defaults to 0.
  10406. skipna : bool, default True
  10407. Exclude NA/null values. If an entire row/column is NA, the result
  10408. will be NA.
  10409. *args, **kwargs
  10410. Additional keywords have no effect but might be accepted for
  10411. compatibility with NumPy.
  10412. Returns
  10413. -------
  10414. {name1} or {name2}
  10415. Return cumulative {desc} of {name1} or {name2}.
  10416. See Also
  10417. --------
  10418. core.window.expanding.Expanding.{accum_func_name} : Similar functionality
  10419. but ignores ``NaN`` values.
  10420. {name2}.{accum_func_name} : Return the {desc} over
  10421. {name2} axis.
  10422. {name2}.cummax : Return cumulative maximum over {name2} axis.
  10423. {name2}.cummin : Return cumulative minimum over {name2} axis.
  10424. {name2}.cumsum : Return cumulative sum over {name2} axis.
  10425. {name2}.cumprod : Return cumulative product over {name2} axis.
  10426. {examples}"""
  10427. _cummin_examples = """\
  10428. Examples
  10429. --------
  10430. **Series**
  10431. >>> s = pd.Series([2, np.nan, 5, -1, 0])
  10432. >>> s
  10433. 0 2.0
  10434. 1 NaN
  10435. 2 5.0
  10436. 3 -1.0
  10437. 4 0.0
  10438. dtype: float64
  10439. By default, NA values are ignored.
  10440. >>> s.cummin()
  10441. 0 2.0
  10442. 1 NaN
  10443. 2 2.0
  10444. 3 -1.0
  10445. 4 -1.0
  10446. dtype: float64
  10447. To include NA values in the operation, use ``skipna=False``
  10448. >>> s.cummin(skipna=False)
  10449. 0 2.0
  10450. 1 NaN
  10451. 2 NaN
  10452. 3 NaN
  10453. 4 NaN
  10454. dtype: float64
  10455. **DataFrame**
  10456. >>> df = pd.DataFrame([[2.0, 1.0],
  10457. ... [3.0, np.nan],
  10458. ... [1.0, 0.0]],
  10459. ... columns=list('AB'))
  10460. >>> df
  10461. A B
  10462. 0 2.0 1.0
  10463. 1 3.0 NaN
  10464. 2 1.0 0.0
  10465. By default, iterates over rows and finds the minimum
  10466. in each column. This is equivalent to ``axis=None`` or ``axis='index'``.
  10467. >>> df.cummin()
  10468. A B
  10469. 0 2.0 1.0
  10470. 1 2.0 NaN
  10471. 2 1.0 0.0
  10472. To iterate over columns and find the minimum in each row,
  10473. use ``axis=1``
  10474. >>> df.cummin(axis=1)
  10475. A B
  10476. 0 2.0 1.0
  10477. 1 3.0 NaN
  10478. 2 1.0 0.0
  10479. """
  10480. _cumsum_examples = """\
  10481. Examples
  10482. --------
  10483. **Series**
  10484. >>> s = pd.Series([2, np.nan, 5, -1, 0])
  10485. >>> s
  10486. 0 2.0
  10487. 1 NaN
  10488. 2 5.0
  10489. 3 -1.0
  10490. 4 0.0
  10491. dtype: float64
  10492. By default, NA values are ignored.
  10493. >>> s.cumsum()
  10494. 0 2.0
  10495. 1 NaN
  10496. 2 7.0
  10497. 3 6.0
  10498. 4 6.0
  10499. dtype: float64
  10500. To include NA values in the operation, use ``skipna=False``
  10501. >>> s.cumsum(skipna=False)
  10502. 0 2.0
  10503. 1 NaN
  10504. 2 NaN
  10505. 3 NaN
  10506. 4 NaN
  10507. dtype: float64
  10508. **DataFrame**
  10509. >>> df = pd.DataFrame([[2.0, 1.0],
  10510. ... [3.0, np.nan],
  10511. ... [1.0, 0.0]],
  10512. ... columns=list('AB'))
  10513. >>> df
  10514. A B
  10515. 0 2.0 1.0
  10516. 1 3.0 NaN
  10517. 2 1.0 0.0
  10518. By default, iterates over rows and finds the sum
  10519. in each column. This is equivalent to ``axis=None`` or ``axis='index'``.
  10520. >>> df.cumsum()
  10521. A B
  10522. 0 2.0 1.0
  10523. 1 5.0 NaN
  10524. 2 6.0 1.0
  10525. To iterate over columns and find the sum in each row,
  10526. use ``axis=1``
  10527. >>> df.cumsum(axis=1)
  10528. A B
  10529. 0 2.0 3.0
  10530. 1 3.0 NaN
  10531. 2 1.0 1.0
  10532. """
  10533. _cumprod_examples = """\
  10534. Examples
  10535. --------
  10536. **Series**
  10537. >>> s = pd.Series([2, np.nan, 5, -1, 0])
  10538. >>> s
  10539. 0 2.0
  10540. 1 NaN
  10541. 2 5.0
  10542. 3 -1.0
  10543. 4 0.0
  10544. dtype: float64
  10545. By default, NA values are ignored.
  10546. >>> s.cumprod()
  10547. 0 2.0
  10548. 1 NaN
  10549. 2 10.0
  10550. 3 -10.0
  10551. 4 -0.0
  10552. dtype: float64
  10553. To include NA values in the operation, use ``skipna=False``
  10554. >>> s.cumprod(skipna=False)
  10555. 0 2.0
  10556. 1 NaN
  10557. 2 NaN
  10558. 3 NaN
  10559. 4 NaN
  10560. dtype: float64
  10561. **DataFrame**
  10562. >>> df = pd.DataFrame([[2.0, 1.0],
  10563. ... [3.0, np.nan],
  10564. ... [1.0, 0.0]],
  10565. ... columns=list('AB'))
  10566. >>> df
  10567. A B
  10568. 0 2.0 1.0
  10569. 1 3.0 NaN
  10570. 2 1.0 0.0
  10571. By default, iterates over rows and finds the product
  10572. in each column. This is equivalent to ``axis=None`` or ``axis='index'``.
  10573. >>> df.cumprod()
  10574. A B
  10575. 0 2.0 1.0
  10576. 1 6.0 NaN
  10577. 2 6.0 0.0
  10578. To iterate over columns and find the product in each row,
  10579. use ``axis=1``
  10580. >>> df.cumprod(axis=1)
  10581. A B
  10582. 0 2.0 2.0
  10583. 1 3.0 NaN
  10584. 2 1.0 0.0
  10585. """
  10586. _cummax_examples = """\
  10587. Examples
  10588. --------
  10589. **Series**
  10590. >>> s = pd.Series([2, np.nan, 5, -1, 0])
  10591. >>> s
  10592. 0 2.0
  10593. 1 NaN
  10594. 2 5.0
  10595. 3 -1.0
  10596. 4 0.0
  10597. dtype: float64
  10598. By default, NA values are ignored.
  10599. >>> s.cummax()
  10600. 0 2.0
  10601. 1 NaN
  10602. 2 5.0
  10603. 3 5.0
  10604. 4 5.0
  10605. dtype: float64
  10606. To include NA values in the operation, use ``skipna=False``
  10607. >>> s.cummax(skipna=False)
  10608. 0 2.0
  10609. 1 NaN
  10610. 2 NaN
  10611. 3 NaN
  10612. 4 NaN
  10613. dtype: float64
  10614. **DataFrame**
  10615. >>> df = pd.DataFrame([[2.0, 1.0],
  10616. ... [3.0, np.nan],
  10617. ... [1.0, 0.0]],
  10618. ... columns=list('AB'))
  10619. >>> df
  10620. A B
  10621. 0 2.0 1.0
  10622. 1 3.0 NaN
  10623. 2 1.0 0.0
  10624. By default, iterates over rows and finds the maximum
  10625. in each column. This is equivalent to ``axis=None`` or ``axis='index'``.
  10626. >>> df.cummax()
  10627. A B
  10628. 0 2.0 1.0
  10629. 1 3.0 NaN
  10630. 2 3.0 1.0
  10631. To iterate over columns and find the maximum in each row,
  10632. use ``axis=1``
  10633. >>> df.cummax(axis=1)
  10634. A B
  10635. 0 2.0 2.0
  10636. 1 3.0 NaN
  10637. 2 1.0 1.0
  10638. """
  10639. _any_see_also = """\
  10640. See Also
  10641. --------
  10642. numpy.any : Numpy version of this method.
  10643. Series.any : Return whether any element is True.
  10644. Series.all : Return whether all elements are True.
  10645. DataFrame.any : Return whether any element is True over requested axis.
  10646. DataFrame.all : Return whether all elements are True over requested axis.
  10647. """
  10648. _any_desc = """\
  10649. Return whether any element is True, potentially over an axis.
  10650. Returns False unless there is at least one element within a series or
  10651. along a Dataframe axis that is True or equivalent (e.g. non-zero or
  10652. non-empty)."""
  10653. _any_examples = """\
  10654. Examples
  10655. --------
  10656. **Series**
  10657. For Series input, the output is a scalar indicating whether any element
  10658. is True.
  10659. >>> pd.Series([False, False]).any()
  10660. False
  10661. >>> pd.Series([True, False]).any()
  10662. True
  10663. >>> pd.Series([], dtype="float64").any()
  10664. False
  10665. >>> pd.Series([np.nan]).any()
  10666. False
  10667. >>> pd.Series([np.nan]).any(skipna=False)
  10668. True
  10669. **DataFrame**
  10670. Whether each column contains at least one True element (the default).
  10671. >>> df = pd.DataFrame({"A": [1, 2], "B": [0, 2], "C": [0, 0]})
  10672. >>> df
  10673. A B C
  10674. 0 1 0 0
  10675. 1 2 2 0
  10676. >>> df.any()
  10677. A True
  10678. B True
  10679. C False
  10680. dtype: bool
  10681. Aggregating over the columns.
  10682. >>> df = pd.DataFrame({"A": [True, False], "B": [1, 2]})
  10683. >>> df
  10684. A B
  10685. 0 True 1
  10686. 1 False 2
  10687. >>> df.any(axis='columns')
  10688. 0 True
  10689. 1 True
  10690. dtype: bool
  10691. >>> df = pd.DataFrame({"A": [True, False], "B": [1, 0]})
  10692. >>> df
  10693. A B
  10694. 0 True 1
  10695. 1 False 0
  10696. >>> df.any(axis='columns')
  10697. 0 True
  10698. 1 False
  10699. dtype: bool
  10700. Aggregating over the entire DataFrame with ``axis=None``.
  10701. >>> df.any(axis=None)
  10702. True
  10703. `any` for an empty DataFrame is an empty Series.
  10704. >>> pd.DataFrame([]).any()
  10705. Series([], dtype: bool)
  10706. """
  10707. _shared_docs[
  10708. "stat_func_example"
  10709. ] = """
  10710. Examples
  10711. --------
  10712. >>> idx = pd.MultiIndex.from_arrays([
  10713. ... ['warm', 'warm', 'cold', 'cold'],
  10714. ... ['dog', 'falcon', 'fish', 'spider']],
  10715. ... names=['blooded', 'animal'])
  10716. >>> s = pd.Series([4, 2, 0, 8], name='legs', index=idx)
  10717. >>> s
  10718. blooded animal
  10719. warm dog 4
  10720. falcon 2
  10721. cold fish 0
  10722. spider 8
  10723. Name: legs, dtype: int64
  10724. >>> s.{stat_func}()
  10725. {default_output}"""
  10726. _sum_examples = _shared_docs["stat_func_example"].format(
  10727. stat_func="sum", verb="Sum", default_output=14, level_output_0=6, level_output_1=8
  10728. )
  10729. _sum_examples += """
  10730. By default, the sum of an empty or all-NA Series is ``0``.
  10731. >>> pd.Series([], dtype="float64").sum() # min_count=0 is the default
  10732. 0.0
  10733. This can be controlled with the ``min_count`` parameter. For example, if
  10734. you'd like the sum of an empty series to be NaN, pass ``min_count=1``.
  10735. >>> pd.Series([], dtype="float64").sum(min_count=1)
  10736. nan
  10737. Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and
  10738. empty series identically.
  10739. >>> pd.Series([np.nan]).sum()
  10740. 0.0
  10741. >>> pd.Series([np.nan]).sum(min_count=1)
  10742. nan"""
  10743. _max_examples: str = _shared_docs["stat_func_example"].format(
  10744. stat_func="max", verb="Max", default_output=8, level_output_0=4, level_output_1=8
  10745. )
  10746. _min_examples: str = _shared_docs["stat_func_example"].format(
  10747. stat_func="min", verb="Min", default_output=0, level_output_0=2, level_output_1=0
  10748. )
  10749. _stat_func_see_also = """
  10750. See Also
  10751. --------
  10752. Series.sum : Return the sum.
  10753. Series.min : Return the minimum.
  10754. Series.max : Return the maximum.
  10755. Series.idxmin : Return the index of the minimum.
  10756. Series.idxmax : Return the index of the maximum.
  10757. DataFrame.sum : Return the sum over the requested axis.
  10758. DataFrame.min : Return the minimum over the requested axis.
  10759. DataFrame.max : Return the maximum over the requested axis.
  10760. DataFrame.idxmin : Return the index of the minimum over the requested axis.
  10761. DataFrame.idxmax : Return the index of the maximum over the requested axis."""
  10762. _prod_examples = """
  10763. Examples
  10764. --------
  10765. By default, the product of an empty or all-NA Series is ``1``
  10766. >>> pd.Series([], dtype="float64").prod()
  10767. 1.0
  10768. This can be controlled with the ``min_count`` parameter
  10769. >>> pd.Series([], dtype="float64").prod(min_count=1)
  10770. nan
  10771. Thanks to the ``skipna`` parameter, ``min_count`` handles all-NA and
  10772. empty series identically.
  10773. >>> pd.Series([np.nan]).prod()
  10774. 1.0
  10775. >>> pd.Series([np.nan]).prod(min_count=1)
  10776. nan"""
  10777. _min_count_stub = """\
  10778. min_count : int, default 0
  10779. The required number of valid values to perform the operation. If fewer than
  10780. ``min_count`` non-NA values are present the result will be NA.
  10781. """
  10782. def _align_as_utc(
  10783. left: NDFrameT, right: NDFrameT, join_index: Index | None
  10784. ) -> tuple[NDFrameT, NDFrameT]:
  10785. """
  10786. If we are aligning timezone-aware DatetimeIndexes and the timezones
  10787. do not match, convert both to UTC.
  10788. """
  10789. if is_datetime64tz_dtype(left.index.dtype):
  10790. if left.index.tz != right.index.tz:
  10791. if join_index is not None:
  10792. # GH#33671 ensure we don't change the index on
  10793. # our original Series (NB: by default deep=False)
  10794. left = left.copy()
  10795. right = right.copy()
  10796. left.index = join_index
  10797. right.index = join_index
  10798. return left, right