_continuous_distns.py 315 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314
  1. # -*- coding: utf-8 -*-
  2. #
  3. # Author: Travis Oliphant 2002-2011 with contributions from
  4. # SciPy Developers 2004-2011
  5. #
  6. import warnings
  7. from collections.abc import Iterable
  8. from functools import wraps, cached_property
  9. import ctypes
  10. import numpy as np
  11. from numpy.polynomial import Polynomial
  12. from scipy._lib.doccer import (extend_notes_in_docstring,
  13. replace_notes_in_docstring,
  14. inherit_docstring_from)
  15. from scipy._lib._ccallback import LowLevelCallable
  16. from scipy import optimize
  17. from scipy import integrate
  18. import scipy.special as sc
  19. import scipy.special._ufuncs as scu
  20. from scipy._lib._util import _lazyselect, _lazywhere
  21. from . import _stats
  22. from ._tukeylambda_stats import (tukeylambda_variance as _tlvar,
  23. tukeylambda_kurtosis as _tlkurt)
  24. from ._distn_infrastructure import (
  25. get_distribution_names, _kurtosis,
  26. rv_continuous, _skew, _get_fixed_fit_value, _check_shape, _ShapeInfo)
  27. from ._ksstats import kolmogn, kolmognp, kolmogni
  28. from ._constants import (_XMIN, _EULER, _ZETA3, _SQRT_PI,
  29. _SQRT_2_OVER_PI, _LOG_SQRT_2_OVER_PI)
  30. import scipy.stats._boost as _boost
  31. from scipy.optimize import root_scalar
  32. from scipy.stats._warnings_errors import FitError
  33. import scipy.stats as stats
  34. def _remove_optimizer_parameters(kwds):
  35. """
  36. Remove the optimizer-related keyword arguments 'loc', 'scale' and
  37. 'optimizer' from `kwds`. Then check that `kwds` is empty, and
  38. raise `TypeError("Unknown arguments: %s." % kwds)` if it is not.
  39. This function is used in the fit method of distributions that override
  40. the default method and do not use the default optimization code.
  41. `kwds` is modified in-place.
  42. """
  43. kwds.pop('loc', None)
  44. kwds.pop('scale', None)
  45. kwds.pop('optimizer', None)
  46. kwds.pop('method', None)
  47. if kwds:
  48. raise TypeError("Unknown arguments: %s." % kwds)
  49. def _call_super_mom(fun):
  50. # if fit method is overridden only for MLE and doesn't specify what to do
  51. # if method == 'mm', this decorator calls generic implementation
  52. @wraps(fun)
  53. def wrapper(self, *args, **kwds):
  54. method = kwds.get('method', 'mle').lower()
  55. if method != 'mle':
  56. return super(type(self), self).fit(*args, **kwds)
  57. else:
  58. return fun(self, *args, **kwds)
  59. return wrapper
  60. def _get_left_bracket(fun, rbrack, lbrack=None):
  61. # find left bracket for `root_scalar`. A guess for lbrack may be provided.
  62. lbrack = lbrack or rbrack - 1
  63. diff = rbrack - lbrack
  64. # if there is no sign change in `fun` between the brackets, expand
  65. # rbrack - lbrack until a sign change occurs
  66. def interval_contains_root(lbrack, rbrack):
  67. # return true if the signs disagree.
  68. return np.sign(fun(lbrack)) != np.sign(fun(rbrack))
  69. while not interval_contains_root(lbrack, rbrack):
  70. diff *= 2
  71. lbrack = rbrack - diff
  72. msg = ("The solver could not find a bracket containing a "
  73. "root to an MLE first order condition.")
  74. if np.isinf(lbrack):
  75. raise FitSolverError(msg)
  76. return lbrack
  77. class ksone_gen(rv_continuous):
  78. r"""Kolmogorov-Smirnov one-sided test statistic distribution.
  79. This is the distribution of the one-sided Kolmogorov-Smirnov (KS)
  80. statistics :math:`D_n^+` and :math:`D_n^-`
  81. for a finite sample size ``n >= 1`` (the shape parameter).
  82. %(before_notes)s
  83. See Also
  84. --------
  85. kstwobign, kstwo, kstest
  86. Notes
  87. -----
  88. :math:`D_n^+` and :math:`D_n^-` are given by
  89. .. math::
  90. D_n^+ &= \text{sup}_x (F_n(x) - F(x)),\\
  91. D_n^- &= \text{sup}_x (F(x) - F_n(x)),\\
  92. where :math:`F` is a continuous CDF and :math:`F_n` is an empirical CDF.
  93. `ksone` describes the distribution under the null hypothesis of the KS test
  94. that the empirical CDF corresponds to :math:`n` i.i.d. random variates
  95. with CDF :math:`F`.
  96. %(after_notes)s
  97. References
  98. ----------
  99. .. [1] Birnbaum, Z. W. and Tingey, F.H. "One-sided confidence contours
  100. for probability distribution functions", The Annals of Mathematical
  101. Statistics, 22(4), pp 592-596 (1951).
  102. %(example)s
  103. """
  104. def _argcheck(self, n):
  105. return (n >= 1) & (n == np.round(n))
  106. def _shape_info(self):
  107. return [_ShapeInfo("n", True, (1, np.inf), (True, False))]
  108. def _pdf(self, x, n):
  109. return -scu._smirnovp(n, x)
  110. def _cdf(self, x, n):
  111. return scu._smirnovc(n, x)
  112. def _sf(self, x, n):
  113. return sc.smirnov(n, x)
  114. def _ppf(self, q, n):
  115. return scu._smirnovci(n, q)
  116. def _isf(self, q, n):
  117. return sc.smirnovi(n, q)
  118. ksone = ksone_gen(a=0.0, b=1.0, name='ksone')
  119. class kstwo_gen(rv_continuous):
  120. r"""Kolmogorov-Smirnov two-sided test statistic distribution.
  121. This is the distribution of the two-sided Kolmogorov-Smirnov (KS)
  122. statistic :math:`D_n` for a finite sample size ``n >= 1``
  123. (the shape parameter).
  124. %(before_notes)s
  125. See Also
  126. --------
  127. kstwobign, ksone, kstest
  128. Notes
  129. -----
  130. :math:`D_n` is given by
  131. .. math::
  132. D_n = \text{sup}_x |F_n(x) - F(x)|
  133. where :math:`F` is a (continuous) CDF and :math:`F_n` is an empirical CDF.
  134. `kstwo` describes the distribution under the null hypothesis of the KS test
  135. that the empirical CDF corresponds to :math:`n` i.i.d. random variates
  136. with CDF :math:`F`.
  137. %(after_notes)s
  138. References
  139. ----------
  140. .. [1] Simard, R., L'Ecuyer, P. "Computing the Two-Sided
  141. Kolmogorov-Smirnov Distribution", Journal of Statistical Software,
  142. Vol 39, 11, 1-18 (2011).
  143. %(example)s
  144. """
  145. def _argcheck(self, n):
  146. return (n >= 1) & (n == np.round(n))
  147. def _shape_info(self):
  148. return [_ShapeInfo("n", True, (1, np.inf), (True, False))]
  149. def _get_support(self, n):
  150. return (0.5/(n if not isinstance(n, Iterable) else np.asanyarray(n)),
  151. 1.0)
  152. def _pdf(self, x, n):
  153. return kolmognp(n, x)
  154. def _cdf(self, x, n):
  155. return kolmogn(n, x)
  156. def _sf(self, x, n):
  157. return kolmogn(n, x, cdf=False)
  158. def _ppf(self, q, n):
  159. return kolmogni(n, q, cdf=True)
  160. def _isf(self, q, n):
  161. return kolmogni(n, q, cdf=False)
  162. # Use the pdf, (not the ppf) to compute moments
  163. kstwo = kstwo_gen(momtype=0, a=0.0, b=1.0, name='kstwo')
  164. class kstwobign_gen(rv_continuous):
  165. r"""Limiting distribution of scaled Kolmogorov-Smirnov two-sided test statistic.
  166. This is the asymptotic distribution of the two-sided Kolmogorov-Smirnov
  167. statistic :math:`\sqrt{n} D_n` that measures the maximum absolute
  168. distance of the theoretical (continuous) CDF from the empirical CDF.
  169. (see `kstest`).
  170. %(before_notes)s
  171. See Also
  172. --------
  173. ksone, kstwo, kstest
  174. Notes
  175. -----
  176. :math:`\sqrt{n} D_n` is given by
  177. .. math::
  178. D_n = \text{sup}_x |F_n(x) - F(x)|
  179. where :math:`F` is a continuous CDF and :math:`F_n` is an empirical CDF.
  180. `kstwobign` describes the asymptotic distribution (i.e. the limit of
  181. :math:`\sqrt{n} D_n`) under the null hypothesis of the KS test that the
  182. empirical CDF corresponds to i.i.d. random variates with CDF :math:`F`.
  183. %(after_notes)s
  184. References
  185. ----------
  186. .. [1] Feller, W. "On the Kolmogorov-Smirnov Limit Theorems for Empirical
  187. Distributions", Ann. Math. Statist. Vol 19, 177-189 (1948).
  188. %(example)s
  189. """
  190. def _shape_info(self):
  191. return []
  192. def _pdf(self, x):
  193. return -scu._kolmogp(x)
  194. def _cdf(self, x):
  195. return scu._kolmogc(x)
  196. def _sf(self, x):
  197. return sc.kolmogorov(x)
  198. def _ppf(self, q):
  199. return scu._kolmogci(q)
  200. def _isf(self, q):
  201. return sc.kolmogi(q)
  202. kstwobign = kstwobign_gen(a=0.0, name='kstwobign')
  203. ## Normal distribution
  204. # loc = mu, scale = std
  205. # Keep these implementations out of the class definition so they can be reused
  206. # by other distributions.
  207. _norm_pdf_C = np.sqrt(2*np.pi)
  208. _norm_pdf_logC = np.log(_norm_pdf_C)
  209. def _norm_pdf(x):
  210. return np.exp(-x**2/2.0) / _norm_pdf_C
  211. def _norm_logpdf(x):
  212. return -x**2 / 2.0 - _norm_pdf_logC
  213. def _norm_cdf(x):
  214. return sc.ndtr(x)
  215. def _norm_logcdf(x):
  216. return sc.log_ndtr(x)
  217. def _norm_ppf(q):
  218. return sc.ndtri(q)
  219. def _norm_sf(x):
  220. return _norm_cdf(-x)
  221. def _norm_logsf(x):
  222. return _norm_logcdf(-x)
  223. def _norm_isf(q):
  224. return -_norm_ppf(q)
  225. class norm_gen(rv_continuous):
  226. r"""A normal continuous random variable.
  227. The location (``loc``) keyword specifies the mean.
  228. The scale (``scale``) keyword specifies the standard deviation.
  229. %(before_notes)s
  230. Notes
  231. -----
  232. The probability density function for `norm` is:
  233. .. math::
  234. f(x) = \frac{\exp(-x^2/2)}{\sqrt{2\pi}}
  235. for a real number :math:`x`.
  236. %(after_notes)s
  237. %(example)s
  238. """
  239. def _shape_info(self):
  240. return []
  241. def _rvs(self, size=None, random_state=None):
  242. return random_state.standard_normal(size)
  243. def _pdf(self, x):
  244. # norm.pdf(x) = exp(-x**2/2)/sqrt(2*pi)
  245. return _norm_pdf(x)
  246. def _logpdf(self, x):
  247. return _norm_logpdf(x)
  248. def _cdf(self, x):
  249. return _norm_cdf(x)
  250. def _logcdf(self, x):
  251. return _norm_logcdf(x)
  252. def _sf(self, x):
  253. return _norm_sf(x)
  254. def _logsf(self, x):
  255. return _norm_logsf(x)
  256. def _ppf(self, q):
  257. return _norm_ppf(q)
  258. def _isf(self, q):
  259. return _norm_isf(q)
  260. def _stats(self):
  261. return 0.0, 1.0, 0.0, 0.0
  262. def _entropy(self):
  263. return 0.5*(np.log(2*np.pi)+1)
  264. @_call_super_mom
  265. @replace_notes_in_docstring(rv_continuous, notes="""\
  266. For the normal distribution, method of moments and maximum likelihood
  267. estimation give identical fits, and explicit formulas for the estimates
  268. are available.
  269. This function uses these explicit formulas for the maximum likelihood
  270. estimation of the normal distribution parameters, so the
  271. `optimizer` and `method` arguments are ignored.\n\n""")
  272. def fit(self, data, **kwds):
  273. floc = kwds.pop('floc', None)
  274. fscale = kwds.pop('fscale', None)
  275. _remove_optimizer_parameters(kwds)
  276. if floc is not None and fscale is not None:
  277. # This check is for consistency with `rv_continuous.fit`.
  278. # Without this check, this function would just return the
  279. # parameters that were given.
  280. raise ValueError("All parameters fixed. There is nothing to "
  281. "optimize.")
  282. data = np.asarray(data)
  283. if not np.isfinite(data).all():
  284. raise ValueError("The data contains non-finite values.")
  285. if floc is None:
  286. loc = data.mean()
  287. else:
  288. loc = floc
  289. if fscale is None:
  290. scale = np.sqrt(((data - loc)**2).mean())
  291. else:
  292. scale = fscale
  293. return loc, scale
  294. def _munp(self, n):
  295. """
  296. @returns Moments of standard normal distribution for integer n >= 0
  297. See eq. 16 of https://arxiv.org/abs/1209.4340v2
  298. """
  299. if n % 2 == 0:
  300. return sc.factorial2(n - 1)
  301. else:
  302. return 0.
  303. norm = norm_gen(name='norm')
  304. class alpha_gen(rv_continuous):
  305. r"""An alpha continuous random variable.
  306. %(before_notes)s
  307. Notes
  308. -----
  309. The probability density function for `alpha` ([1]_, [2]_) is:
  310. .. math::
  311. f(x, a) = \frac{1}{x^2 \Phi(a) \sqrt{2\pi}} *
  312. \exp(-\frac{1}{2} (a-1/x)^2)
  313. where :math:`\Phi` is the normal CDF, :math:`x > 0`, and :math:`a > 0`.
  314. `alpha` takes ``a`` as a shape parameter.
  315. %(after_notes)s
  316. References
  317. ----------
  318. .. [1] Johnson, Kotz, and Balakrishnan, "Continuous Univariate
  319. Distributions, Volume 1", Second Edition, John Wiley and Sons,
  320. p. 173 (1994).
  321. .. [2] Anthony A. Salvia, "Reliability applications of the Alpha
  322. Distribution", IEEE Transactions on Reliability, Vol. R-34,
  323. No. 3, pp. 251-252 (1985).
  324. %(example)s
  325. """
  326. _support_mask = rv_continuous._open_support_mask
  327. def _shape_info(self):
  328. return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
  329. def _pdf(self, x, a):
  330. # alpha.pdf(x, a) = 1/(x**2*Phi(a)*sqrt(2*pi)) * exp(-1/2 * (a-1/x)**2)
  331. return 1.0/(x**2)/_norm_cdf(a)*_norm_pdf(a-1.0/x)
  332. def _logpdf(self, x, a):
  333. return -2*np.log(x) + _norm_logpdf(a-1.0/x) - np.log(_norm_cdf(a))
  334. def _cdf(self, x, a):
  335. return _norm_cdf(a-1.0/x) / _norm_cdf(a)
  336. def _ppf(self, q, a):
  337. return 1.0/np.asarray(a-sc.ndtri(q*_norm_cdf(a)))
  338. def _stats(self, a):
  339. return [np.inf]*2 + [np.nan]*2
  340. alpha = alpha_gen(a=0.0, name='alpha')
  341. class anglit_gen(rv_continuous):
  342. r"""An anglit continuous random variable.
  343. %(before_notes)s
  344. Notes
  345. -----
  346. The probability density function for `anglit` is:
  347. .. math::
  348. f(x) = \sin(2x + \pi/2) = \cos(2x)
  349. for :math:`-\pi/4 \le x \le \pi/4`.
  350. %(after_notes)s
  351. %(example)s
  352. """
  353. def _shape_info(self):
  354. return []
  355. def _pdf(self, x):
  356. # anglit.pdf(x) = sin(2*x + \pi/2) = cos(2*x)
  357. return np.cos(2*x)
  358. def _cdf(self, x):
  359. return np.sin(x+np.pi/4)**2.0
  360. def _ppf(self, q):
  361. return np.arcsin(np.sqrt(q))-np.pi/4
  362. def _stats(self):
  363. return 0.0, np.pi*np.pi/16-0.5, 0.0, -2*(np.pi**4 - 96)/(np.pi*np.pi-8)**2
  364. def _entropy(self):
  365. return 1-np.log(2)
  366. anglit = anglit_gen(a=-np.pi/4, b=np.pi/4, name='anglit')
  367. class arcsine_gen(rv_continuous):
  368. r"""An arcsine continuous random variable.
  369. %(before_notes)s
  370. Notes
  371. -----
  372. The probability density function for `arcsine` is:
  373. .. math::
  374. f(x) = \frac{1}{\pi \sqrt{x (1-x)}}
  375. for :math:`0 < x < 1`.
  376. %(after_notes)s
  377. %(example)s
  378. """
  379. def _shape_info(self):
  380. return []
  381. def _pdf(self, x):
  382. # arcsine.pdf(x) = 1/(pi*sqrt(x*(1-x)))
  383. with np.errstate(divide='ignore'):
  384. return 1.0/np.pi/np.sqrt(x*(1-x))
  385. def _cdf(self, x):
  386. return 2.0/np.pi*np.arcsin(np.sqrt(x))
  387. def _ppf(self, q):
  388. return np.sin(np.pi/2.0*q)**2.0
  389. def _stats(self):
  390. mu = 0.5
  391. mu2 = 1.0/8
  392. g1 = 0
  393. g2 = -3.0/2.0
  394. return mu, mu2, g1, g2
  395. def _entropy(self):
  396. return -0.24156447527049044468
  397. arcsine = arcsine_gen(a=0.0, b=1.0, name='arcsine')
  398. class FitDataError(ValueError):
  399. """Raised when input data is inconsistent with fixed parameters."""
  400. # This exception is raised by, for example, beta_gen.fit when both floc
  401. # and fscale are fixed and there are values in the data not in the open
  402. # interval (floc, floc+fscale).
  403. def __init__(self, distr, lower, upper):
  404. self.args = (
  405. "Invalid values in `data`. Maximum likelihood "
  406. "estimation with {distr!r} requires that {lower!r} < "
  407. "(x - loc)/scale < {upper!r} for each x in `data`.".format(
  408. distr=distr, lower=lower, upper=upper),
  409. )
  410. class FitSolverError(FitError):
  411. """
  412. Raised when a solver fails to converge while fitting a distribution.
  413. """
  414. # This exception is raised by, for example, beta_gen.fit when
  415. # optimize.fsolve returns with ier != 1.
  416. def __init__(self, mesg):
  417. emsg = "Solver for the MLE equations failed to converge: "
  418. emsg += mesg.replace('\n', '')
  419. self.args = (emsg,)
  420. def _beta_mle_a(a, b, n, s1):
  421. # The zeros of this function give the MLE for `a`, with
  422. # `b`, `n` and `s1` given. `s1` is the sum of the logs of
  423. # the data. `n` is the number of data points.
  424. psiab = sc.psi(a + b)
  425. func = s1 - n * (-psiab + sc.psi(a))
  426. return func
  427. def _beta_mle_ab(theta, n, s1, s2):
  428. # Zeros of this function are critical points of
  429. # the maximum likelihood function. Solving this system
  430. # for theta (which contains a and b) gives the MLE for a and b
  431. # given `n`, `s1` and `s2`. `s1` is the sum of the logs of the data,
  432. # and `s2` is the sum of the logs of 1 - data. `n` is the number
  433. # of data points.
  434. a, b = theta
  435. psiab = sc.psi(a + b)
  436. func = [s1 - n * (-psiab + sc.psi(a)),
  437. s2 - n * (-psiab + sc.psi(b))]
  438. return func
  439. class beta_gen(rv_continuous):
  440. r"""A beta continuous random variable.
  441. %(before_notes)s
  442. Notes
  443. -----
  444. The probability density function for `beta` is:
  445. .. math::
  446. f(x, a, b) = \frac{\Gamma(a+b) x^{a-1} (1-x)^{b-1}}
  447. {\Gamma(a) \Gamma(b)}
  448. for :math:`0 <= x <= 1`, :math:`a > 0`, :math:`b > 0`, where
  449. :math:`\Gamma` is the gamma function (`scipy.special.gamma`).
  450. `beta` takes :math:`a` and :math:`b` as shape parameters.
  451. %(after_notes)s
  452. %(example)s
  453. """
  454. def _shape_info(self):
  455. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  456. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  457. return [ia, ib]
  458. def _rvs(self, a, b, size=None, random_state=None):
  459. return random_state.beta(a, b, size)
  460. def _pdf(self, x, a, b):
  461. # gamma(a+b) * x**(a-1) * (1-x)**(b-1)
  462. # beta.pdf(x, a, b) = ------------------------------------
  463. # gamma(a)*gamma(b)
  464. return _boost._beta_pdf(x, a, b)
  465. def _logpdf(self, x, a, b):
  466. lPx = sc.xlog1py(b - 1.0, -x) + sc.xlogy(a - 1.0, x)
  467. lPx -= sc.betaln(a, b)
  468. return lPx
  469. def _cdf(self, x, a, b):
  470. return _boost._beta_cdf(x, a, b)
  471. def _sf(self, x, a, b):
  472. return _boost._beta_sf(x, a, b)
  473. def _isf(self, x, a, b):
  474. with warnings.catch_warnings():
  475. # See gh-14901
  476. message = "overflow encountered in _beta_isf"
  477. warnings.filterwarnings('ignore', message=message)
  478. return _boost._beta_isf(x, a, b)
  479. def _ppf(self, q, a, b):
  480. with warnings.catch_warnings():
  481. message = "overflow encountered in _beta_ppf"
  482. warnings.filterwarnings('ignore', message=message)
  483. return _boost._beta_ppf(q, a, b)
  484. def _stats(self, a, b):
  485. return (
  486. _boost._beta_mean(a, b),
  487. _boost._beta_variance(a, b),
  488. _boost._beta_skewness(a, b),
  489. _boost._beta_kurtosis_excess(a, b))
  490. def _fitstart(self, data):
  491. g1 = _skew(data)
  492. g2 = _kurtosis(data)
  493. def func(x):
  494. a, b = x
  495. sk = 2*(b-a)*np.sqrt(a + b + 1) / (a + b + 2) / np.sqrt(a*b)
  496. ku = a**3 - a**2*(2*b-1) + b**2*(b+1) - 2*a*b*(b+2)
  497. ku /= a*b*(a+b+2)*(a+b+3)
  498. ku *= 6
  499. return [sk-g1, ku-g2]
  500. a, b = optimize.fsolve(func, (1.0, 1.0))
  501. return super()._fitstart(data, args=(a, b))
  502. @_call_super_mom
  503. @extend_notes_in_docstring(rv_continuous, notes="""\
  504. In the special case where `method="MLE"` and
  505. both `floc` and `fscale` are given, a
  506. `ValueError` is raised if any value `x` in `data` does not satisfy
  507. `floc < x < floc + fscale`.\n\n""")
  508. def fit(self, data, *args, **kwds):
  509. # Override rv_continuous.fit, so we can more efficiently handle the
  510. # case where floc and fscale are given.
  511. floc = kwds.get('floc', None)
  512. fscale = kwds.get('fscale', None)
  513. if floc is None or fscale is None:
  514. # do general fit
  515. return super().fit(data, *args, **kwds)
  516. # We already got these from kwds, so just pop them.
  517. kwds.pop('floc', None)
  518. kwds.pop('fscale', None)
  519. f0 = _get_fixed_fit_value(kwds, ['f0', 'fa', 'fix_a'])
  520. f1 = _get_fixed_fit_value(kwds, ['f1', 'fb', 'fix_b'])
  521. _remove_optimizer_parameters(kwds)
  522. if f0 is not None and f1 is not None:
  523. # This check is for consistency with `rv_continuous.fit`.
  524. raise ValueError("All parameters fixed. There is nothing to "
  525. "optimize.")
  526. # Special case: loc and scale are constrained, so we are fitting
  527. # just the shape parameters. This can be done much more efficiently
  528. # than the method used in `rv_continuous.fit`. (See the subsection
  529. # "Two unknown parameters" in the section "Maximum likelihood" of
  530. # the Wikipedia article on the Beta distribution for the formulas.)
  531. if not np.isfinite(data).all():
  532. raise ValueError("The data contains non-finite values.")
  533. # Normalize the data to the interval [0, 1].
  534. data = (np.ravel(data) - floc) / fscale
  535. if np.any(data <= 0) or np.any(data >= 1):
  536. raise FitDataError("beta", lower=floc, upper=floc + fscale)
  537. xbar = data.mean()
  538. if f0 is not None or f1 is not None:
  539. # One of the shape parameters is fixed.
  540. if f0 is not None:
  541. # The shape parameter a is fixed, so swap the parameters
  542. # and flip the data. We always solve for `a`. The result
  543. # will be swapped back before returning.
  544. b = f0
  545. data = 1 - data
  546. xbar = 1 - xbar
  547. else:
  548. b = f1
  549. # Initial guess for a. Use the formula for the mean of the beta
  550. # distribution, E[x] = a / (a + b), to generate a reasonable
  551. # starting point based on the mean of the data and the given
  552. # value of b.
  553. a = b * xbar / (1 - xbar)
  554. # Compute the MLE for `a` by solving _beta_mle_a.
  555. theta, info, ier, mesg = optimize.fsolve(
  556. _beta_mle_a, a,
  557. args=(b, len(data), np.log(data).sum()),
  558. full_output=True
  559. )
  560. if ier != 1:
  561. raise FitSolverError(mesg=mesg)
  562. a = theta[0]
  563. if f0 is not None:
  564. # The shape parameter a was fixed, so swap back the
  565. # parameters.
  566. a, b = b, a
  567. else:
  568. # Neither of the shape parameters is fixed.
  569. # s1 and s2 are used in the extra arguments passed to _beta_mle_ab
  570. # by optimize.fsolve.
  571. s1 = np.log(data).sum()
  572. s2 = sc.log1p(-data).sum()
  573. # Use the "method of moments" to estimate the initial
  574. # guess for a and b.
  575. fac = xbar * (1 - xbar) / data.var(ddof=0) - 1
  576. a = xbar * fac
  577. b = (1 - xbar) * fac
  578. # Compute the MLE for a and b by solving _beta_mle_ab.
  579. theta, info, ier, mesg = optimize.fsolve(
  580. _beta_mle_ab, [a, b],
  581. args=(len(data), s1, s2),
  582. full_output=True
  583. )
  584. if ier != 1:
  585. raise FitSolverError(mesg=mesg)
  586. a, b = theta
  587. return a, b, floc, fscale
  588. beta = beta_gen(a=0.0, b=1.0, name='beta')
  589. class betaprime_gen(rv_continuous):
  590. r"""A beta prime continuous random variable.
  591. %(before_notes)s
  592. Notes
  593. -----
  594. The probability density function for `betaprime` is:
  595. .. math::
  596. f(x, a, b) = \frac{x^{a-1} (1+x)^{-a-b}}{\beta(a, b)}
  597. for :math:`x >= 0`, :math:`a > 0`, :math:`b > 0`, where
  598. :math:`\beta(a, b)` is the beta function (see `scipy.special.beta`).
  599. `betaprime` takes ``a`` and ``b`` as shape parameters.
  600. %(after_notes)s
  601. %(example)s
  602. """
  603. _support_mask = rv_continuous._open_support_mask
  604. def _shape_info(self):
  605. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  606. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  607. return [ia, ib]
  608. def _rvs(self, a, b, size=None, random_state=None):
  609. u1 = gamma.rvs(a, size=size, random_state=random_state)
  610. u2 = gamma.rvs(b, size=size, random_state=random_state)
  611. return u1 / u2
  612. def _pdf(self, x, a, b):
  613. # betaprime.pdf(x, a, b) = x**(a-1) * (1+x)**(-a-b) / beta(a, b)
  614. return np.exp(self._logpdf(x, a, b))
  615. def _logpdf(self, x, a, b):
  616. return sc.xlogy(a - 1.0, x) - sc.xlog1py(a + b, x) - sc.betaln(a, b)
  617. def _cdf(self, x, a, b):
  618. return sc.betainc(a, b, x/(1.+x))
  619. def _munp(self, n, a, b):
  620. if n == 1.0:
  621. return np.where(b > 1,
  622. a/(b-1.0),
  623. np.inf)
  624. elif n == 2.0:
  625. return np.where(b > 2,
  626. a*(a+1.0)/((b-2.0)*(b-1.0)),
  627. np.inf)
  628. elif n == 3.0:
  629. return np.where(b > 3,
  630. a*(a+1.0)*(a+2.0)/((b-3.0)*(b-2.0)*(b-1.0)),
  631. np.inf)
  632. elif n == 4.0:
  633. return np.where(b > 4,
  634. (a*(a + 1.0)*(a + 2.0)*(a + 3.0) /
  635. ((b - 4.0)*(b - 3.0)*(b - 2.0)*(b - 1.0))),
  636. np.inf)
  637. else:
  638. raise NotImplementedError
  639. betaprime = betaprime_gen(a=0.0, name='betaprime')
  640. class bradford_gen(rv_continuous):
  641. r"""A Bradford continuous random variable.
  642. %(before_notes)s
  643. Notes
  644. -----
  645. The probability density function for `bradford` is:
  646. .. math::
  647. f(x, c) = \frac{c}{\log(1+c) (1+cx)}
  648. for :math:`0 <= x <= 1` and :math:`c > 0`.
  649. `bradford` takes ``c`` as a shape parameter for :math:`c`.
  650. %(after_notes)s
  651. %(example)s
  652. """
  653. def _shape_info(self):
  654. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  655. def _pdf(self, x, c):
  656. # bradford.pdf(x, c) = c / (k * (1+c*x))
  657. return c / (c*x + 1.0) / sc.log1p(c)
  658. def _cdf(self, x, c):
  659. return sc.log1p(c*x) / sc.log1p(c)
  660. def _ppf(self, q, c):
  661. return sc.expm1(q * sc.log1p(c)) / c
  662. def _stats(self, c, moments='mv'):
  663. k = np.log(1.0+c)
  664. mu = (c-k)/(c*k)
  665. mu2 = ((c+2.0)*k-2.0*c)/(2*c*k*k)
  666. g1 = None
  667. g2 = None
  668. if 's' in moments:
  669. g1 = np.sqrt(2)*(12*c*c-9*c*k*(c+2)+2*k*k*(c*(c+3)+3))
  670. g1 /= np.sqrt(c*(c*(k-2)+2*k))*(3*c*(k-2)+6*k)
  671. if 'k' in moments:
  672. g2 = (c**3*(k-3)*(k*(3*k-16)+24)+12*k*c*c*(k-4)*(k-3) +
  673. 6*c*k*k*(3*k-14) + 12*k**3)
  674. g2 /= 3*c*(c*(k-2)+2*k)**2
  675. return mu, mu2, g1, g2
  676. def _entropy(self, c):
  677. k = np.log(1+c)
  678. return k/2.0 - np.log(c/k)
  679. bradford = bradford_gen(a=0.0, b=1.0, name='bradford')
  680. class burr_gen(rv_continuous):
  681. r"""A Burr (Type III) continuous random variable.
  682. %(before_notes)s
  683. See Also
  684. --------
  685. fisk : a special case of either `burr` or `burr12` with ``d=1``
  686. burr12 : Burr Type XII distribution
  687. mielke : Mielke Beta-Kappa / Dagum distribution
  688. Notes
  689. -----
  690. The probability density function for `burr` is:
  691. .. math::
  692. f(x; c, d) = c d \frac{x^{-c - 1}}
  693. {{(1 + x^{-c})}^{d + 1}}
  694. for :math:`x >= 0` and :math:`c, d > 0`.
  695. `burr` takes ``c`` and ``d`` as shape parameters for :math:`c` and
  696. :math:`d`.
  697. This is the PDF corresponding to the third CDF given in Burr's list;
  698. specifically, it is equation (11) in Burr's paper [1]_. The distribution
  699. is also commonly referred to as the Dagum distribution [2]_. If the
  700. parameter :math:`c < 1` then the mean of the distribution does not
  701. exist and if :math:`c < 2` the variance does not exist [2]_.
  702. The PDF is finite at the left endpoint :math:`x = 0` if :math:`c * d >= 1`.
  703. %(after_notes)s
  704. References
  705. ----------
  706. .. [1] Burr, I. W. "Cumulative frequency functions", Annals of
  707. Mathematical Statistics, 13(2), pp 215-232 (1942).
  708. .. [2] https://en.wikipedia.org/wiki/Dagum_distribution
  709. .. [3] Kleiber, Christian. "A guide to the Dagum distributions."
  710. Modeling Income Distributions and Lorenz Curves pp 97-117 (2008).
  711. %(example)s
  712. """
  713. # Do not set _support_mask to rv_continuous._open_support_mask
  714. # Whether the left-hand endpoint is suitable for pdf evaluation is dependent
  715. # on the values of c and d: if c*d >= 1, the pdf is finite, otherwise infinite.
  716. def _shape_info(self):
  717. ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
  718. id = _ShapeInfo("d", False, (0, np.inf), (False, False))
  719. return [ic, id]
  720. def _pdf(self, x, c, d):
  721. # burr.pdf(x, c, d) = c * d * x**(-c-1) * (1+x**(-c))**(-d-1)
  722. output = _lazywhere(x == 0, [x, c, d],
  723. lambda x_, c_, d_: c_ * d_ * (x_**(c_*d_-1)) / (1 + x_**c_),
  724. f2 = lambda x_, c_, d_: (c_ * d_ * (x_ ** (-c_ - 1.0)) /
  725. ((1 + x_ ** (-c_)) ** (d_ + 1.0))))
  726. if output.ndim == 0:
  727. return output[()]
  728. return output
  729. def _logpdf(self, x, c, d):
  730. output = _lazywhere(
  731. x == 0, [x, c, d],
  732. lambda x_, c_, d_: (np.log(c_) + np.log(d_) + sc.xlogy(c_*d_ - 1, x_)
  733. - (d_+1) * sc.log1p(x_**(c_))),
  734. f2 = lambda x_, c_, d_: (np.log(c_) + np.log(d_)
  735. + sc.xlogy(-c_ - 1, x_)
  736. - sc.xlog1py(d_+1, x_**(-c_))))
  737. if output.ndim == 0:
  738. return output[()]
  739. return output
  740. def _cdf(self, x, c, d):
  741. return (1 + x**(-c))**(-d)
  742. def _logcdf(self, x, c, d):
  743. return sc.log1p(x**(-c)) * (-d)
  744. def _sf(self, x, c, d):
  745. return np.exp(self._logsf(x, c, d))
  746. def _logsf(self, x, c, d):
  747. return np.log1p(- (1 + x**(-c))**(-d))
  748. def _ppf(self, q, c, d):
  749. return (q**(-1.0/d) - 1)**(-1.0/c)
  750. def _stats(self, c, d):
  751. nc = np.arange(1, 5).reshape(4,1) / c
  752. #ek is the kth raw moment, e1 is the mean e2-e1**2 variance etc.
  753. e1, e2, e3, e4 = sc.beta(d + nc, 1. - nc) * d
  754. mu = np.where(c > 1.0, e1, np.nan)
  755. mu2_if_c = e2 - mu**2
  756. mu2 = np.where(c > 2.0, mu2_if_c, np.nan)
  757. g1 = _lazywhere(
  758. c > 3.0,
  759. (c, e1, e2, e3, mu2_if_c),
  760. lambda c, e1, e2, e3, mu2_if_c: (e3 - 3*e2*e1 + 2*e1**3) / np.sqrt((mu2_if_c)**3),
  761. fillvalue=np.nan)
  762. g2 = _lazywhere(
  763. c > 4.0,
  764. (c, e1, e2, e3, e4, mu2_if_c),
  765. lambda c, e1, e2, e3, e4, mu2_if_c: (
  766. ((e4 - 4*e3*e1 + 6*e2*e1**2 - 3*e1**4) / mu2_if_c**2) - 3),
  767. fillvalue=np.nan)
  768. if np.ndim(c) == 0:
  769. return mu.item(), mu2.item(), g1.item(), g2.item()
  770. return mu, mu2, g1, g2
  771. def _munp(self, n, c, d):
  772. def __munp(n, c, d):
  773. nc = 1. * n / c
  774. return d * sc.beta(1.0 - nc, d + nc)
  775. n, c, d = np.asarray(n), np.asarray(c), np.asarray(d)
  776. return _lazywhere((c > n) & (n == n) & (d == d), (c, d, n),
  777. lambda c, d, n: __munp(n, c, d),
  778. np.nan)
  779. burr = burr_gen(a=0.0, name='burr')
  780. class burr12_gen(rv_continuous):
  781. r"""A Burr (Type XII) continuous random variable.
  782. %(before_notes)s
  783. See Also
  784. --------
  785. fisk : a special case of either `burr` or `burr12` with ``d=1``
  786. burr : Burr Type III distribution
  787. Notes
  788. -----
  789. The probability density function for `burr12` is:
  790. .. math::
  791. f(x; c, d) = c d \frac{x^{c-1}}
  792. {(1 + x^c)^{d + 1}}
  793. for :math:`x >= 0` and :math:`c, d > 0`.
  794. `burr12` takes ``c`` and ``d`` as shape parameters for :math:`c`
  795. and :math:`d`.
  796. This is the PDF corresponding to the twelfth CDF given in Burr's list;
  797. specifically, it is equation (20) in Burr's paper [1]_.
  798. %(after_notes)s
  799. The Burr type 12 distribution is also sometimes referred to as
  800. the Singh-Maddala distribution from NIST [2]_.
  801. References
  802. ----------
  803. .. [1] Burr, I. W. "Cumulative frequency functions", Annals of
  804. Mathematical Statistics, 13(2), pp 215-232 (1942).
  805. .. [2] https://www.itl.nist.gov/div898/software/dataplot/refman2/auxillar/b12pdf.htm
  806. .. [3] "Burr distribution",
  807. https://en.wikipedia.org/wiki/Burr_distribution
  808. %(example)s
  809. """
  810. def _shape_info(self):
  811. ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
  812. id = _ShapeInfo("d", False, (0, np.inf), (False, False))
  813. return [ic, id]
  814. def _pdf(self, x, c, d):
  815. # burr12.pdf(x, c, d) = c * d * x**(c-1) * (1+x**(c))**(-d-1)
  816. return np.exp(self._logpdf(x, c, d))
  817. def _logpdf(self, x, c, d):
  818. return np.log(c) + np.log(d) + sc.xlogy(c - 1, x) + sc.xlog1py(-d-1, x**c)
  819. def _cdf(self, x, c, d):
  820. return -sc.expm1(self._logsf(x, c, d))
  821. def _logcdf(self, x, c, d):
  822. return sc.log1p(-(1 + x**c)**(-d))
  823. def _sf(self, x, c, d):
  824. return np.exp(self._logsf(x, c, d))
  825. def _logsf(self, x, c, d):
  826. return sc.xlog1py(-d, x**c)
  827. def _ppf(self, q, c, d):
  828. # The following is an implementation of
  829. # ((1 - q)**(-1.0/d) - 1)**(1.0/c)
  830. # that does a better job handling small values of q.
  831. return sc.expm1(-1/d * sc.log1p(-q))**(1/c)
  832. def _munp(self, n, c, d):
  833. nc = 1. * n / c
  834. return d * sc.beta(1.0 + nc, d - nc)
  835. burr12 = burr12_gen(a=0.0, name='burr12')
  836. class fisk_gen(burr_gen):
  837. r"""A Fisk continuous random variable.
  838. The Fisk distribution is also known as the log-logistic distribution.
  839. %(before_notes)s
  840. See Also
  841. --------
  842. burr
  843. Notes
  844. -----
  845. The probability density function for `fisk` is:
  846. .. math::
  847. f(x, c) = \frac{c x^{c-1}}
  848. {(1 + x^c)^2}
  849. for :math:`x >= 0` and :math:`c > 0`.
  850. Please note that the above expression can be transformed into the following
  851. one, which is also commonly used:
  852. .. math::
  853. f(x, c) = \frac{c x^{-c-1}}
  854. {(1 + x^{-c})^2}
  855. `fisk` takes ``c`` as a shape parameter for :math:`c`.
  856. `fisk` is a special case of `burr` or `burr12` with ``d=1``.
  857. %(after_notes)s
  858. %(example)s
  859. """
  860. def _shape_info(self):
  861. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  862. def _pdf(self, x, c):
  863. # fisk.pdf(x, c) = c * x**(-c-1) * (1 + x**(-c))**(-2)
  864. return burr._pdf(x, c, 1.0)
  865. def _cdf(self, x, c):
  866. return burr._cdf(x, c, 1.0)
  867. def _sf(self, x, c):
  868. return burr._sf(x, c, 1.0)
  869. def _logpdf(self, x, c):
  870. # fisk.pdf(x, c) = c * x**(-c-1) * (1 + x**(-c))**(-2)
  871. return burr._logpdf(x, c, 1.0)
  872. def _logcdf(self, x, c):
  873. return burr._logcdf(x, c, 1.0)
  874. def _logsf(self, x, c):
  875. return burr._logsf(x, c, 1.0)
  876. def _ppf(self, x, c):
  877. return burr._ppf(x, c, 1.0)
  878. def _munp(self, n, c):
  879. return burr._munp(n, c, 1.0)
  880. def _stats(self, c):
  881. return burr._stats(c, 1.0)
  882. def _entropy(self, c):
  883. return 2 - np.log(c)
  884. fisk = fisk_gen(a=0.0, name='fisk')
  885. class cauchy_gen(rv_continuous):
  886. r"""A Cauchy continuous random variable.
  887. %(before_notes)s
  888. Notes
  889. -----
  890. The probability density function for `cauchy` is
  891. .. math::
  892. f(x) = \frac{1}{\pi (1 + x^2)}
  893. for a real number :math:`x`.
  894. %(after_notes)s
  895. %(example)s
  896. """
  897. def _shape_info(self):
  898. return []
  899. def _pdf(self, x):
  900. # cauchy.pdf(x) = 1 / (pi * (1 + x**2))
  901. return 1.0/np.pi/(1.0+x*x)
  902. def _cdf(self, x):
  903. return 0.5 + 1.0/np.pi*np.arctan(x)
  904. def _ppf(self, q):
  905. return np.tan(np.pi*q-np.pi/2.0)
  906. def _sf(self, x):
  907. return 0.5 - 1.0/np.pi*np.arctan(x)
  908. def _isf(self, q):
  909. return np.tan(np.pi/2.0-np.pi*q)
  910. def _stats(self):
  911. return np.nan, np.nan, np.nan, np.nan
  912. def _entropy(self):
  913. return np.log(4*np.pi)
  914. def _fitstart(self, data, args=None):
  915. # Initialize ML guesses using quartiles instead of moments.
  916. p25, p50, p75 = np.percentile(data, [25, 50, 75])
  917. return p50, (p75 - p25)/2
  918. cauchy = cauchy_gen(name='cauchy')
  919. class chi_gen(rv_continuous):
  920. r"""A chi continuous random variable.
  921. %(before_notes)s
  922. Notes
  923. -----
  924. The probability density function for `chi` is:
  925. .. math::
  926. f(x, k) = \frac{1}{2^{k/2-1} \Gamma \left( k/2 \right)}
  927. x^{k-1} \exp \left( -x^2/2 \right)
  928. for :math:`x >= 0` and :math:`k > 0` (degrees of freedom, denoted ``df``
  929. in the implementation). :math:`\Gamma` is the gamma function
  930. (`scipy.special.gamma`).
  931. Special cases of `chi` are:
  932. - ``chi(1, loc, scale)`` is equivalent to `halfnorm`
  933. - ``chi(2, 0, scale)`` is equivalent to `rayleigh`
  934. - ``chi(3, 0, scale)`` is equivalent to `maxwell`
  935. `chi` takes ``df`` as a shape parameter.
  936. %(after_notes)s
  937. %(example)s
  938. """
  939. def _shape_info(self):
  940. return [_ShapeInfo("df", False, (0, np.inf), (False, False))]
  941. def _rvs(self, df, size=None, random_state=None):
  942. return np.sqrt(chi2.rvs(df, size=size, random_state=random_state))
  943. def _pdf(self, x, df):
  944. # x**(df-1) * exp(-x**2/2)
  945. # chi.pdf(x, df) = -------------------------
  946. # 2**(df/2-1) * gamma(df/2)
  947. return np.exp(self._logpdf(x, df))
  948. def _logpdf(self, x, df):
  949. l = np.log(2) - .5*np.log(2)*df - sc.gammaln(.5*df)
  950. return l + sc.xlogy(df - 1., x) - .5*x**2
  951. def _cdf(self, x, df):
  952. return sc.gammainc(.5*df, .5*x**2)
  953. def _sf(self, x, df):
  954. return sc.gammaincc(.5*df, .5*x**2)
  955. def _ppf(self, q, df):
  956. return np.sqrt(2*sc.gammaincinv(.5*df, q))
  957. def _isf(self, q, df):
  958. return np.sqrt(2*sc.gammainccinv(.5*df, q))
  959. def _stats(self, df):
  960. mu = np.sqrt(2)*np.exp(sc.gammaln(df/2.0+0.5)-sc.gammaln(df/2.0))
  961. mu2 = df - mu*mu
  962. g1 = (2*mu**3.0 + mu*(1-2*df))/np.asarray(np.power(mu2, 1.5))
  963. g2 = 2*df*(1.0-df)-6*mu**4 + 4*mu**2 * (2*df-1)
  964. g2 /= np.asarray(mu2**2.0)
  965. return mu, mu2, g1, g2
  966. chi = chi_gen(a=0.0, name='chi')
  967. class chi2_gen(rv_continuous):
  968. r"""A chi-squared continuous random variable.
  969. For the noncentral chi-square distribution, see `ncx2`.
  970. %(before_notes)s
  971. See Also
  972. --------
  973. ncx2
  974. Notes
  975. -----
  976. The probability density function for `chi2` is:
  977. .. math::
  978. f(x, k) = \frac{1}{2^{k/2} \Gamma \left( k/2 \right)}
  979. x^{k/2-1} \exp \left( -x/2 \right)
  980. for :math:`x > 0` and :math:`k > 0` (degrees of freedom, denoted ``df``
  981. in the implementation).
  982. `chi2` takes ``df`` as a shape parameter.
  983. The chi-squared distribution is a special case of the gamma
  984. distribution, with gamma parameters ``a = df/2``, ``loc = 0`` and
  985. ``scale = 2``.
  986. %(after_notes)s
  987. %(example)s
  988. """
  989. def _shape_info(self):
  990. return [_ShapeInfo("df", False, (0, np.inf), (False, False))]
  991. def _rvs(self, df, size=None, random_state=None):
  992. return random_state.chisquare(df, size)
  993. def _pdf(self, x, df):
  994. # chi2.pdf(x, df) = 1 / (2*gamma(df/2)) * (x/2)**(df/2-1) * exp(-x/2)
  995. return np.exp(self._logpdf(x, df))
  996. def _logpdf(self, x, df):
  997. return sc.xlogy(df/2.-1, x) - x/2. - sc.gammaln(df/2.) - (np.log(2)*df)/2.
  998. def _cdf(self, x, df):
  999. return sc.chdtr(df, x)
  1000. def _sf(self, x, df):
  1001. return sc.chdtrc(df, x)
  1002. def _isf(self, p, df):
  1003. return sc.chdtri(df, p)
  1004. def _ppf(self, p, df):
  1005. return 2*sc.gammaincinv(df/2, p)
  1006. def _stats(self, df):
  1007. mu = df
  1008. mu2 = 2*df
  1009. g1 = 2*np.sqrt(2.0/df)
  1010. g2 = 12.0/df
  1011. return mu, mu2, g1, g2
  1012. chi2 = chi2_gen(a=0.0, name='chi2')
  1013. class cosine_gen(rv_continuous):
  1014. r"""A cosine continuous random variable.
  1015. %(before_notes)s
  1016. Notes
  1017. -----
  1018. The cosine distribution is an approximation to the normal distribution.
  1019. The probability density function for `cosine` is:
  1020. .. math::
  1021. f(x) = \frac{1}{2\pi} (1+\cos(x))
  1022. for :math:`-\pi \le x \le \pi`.
  1023. %(after_notes)s
  1024. %(example)s
  1025. """
  1026. def _shape_info(self):
  1027. return []
  1028. def _pdf(self, x):
  1029. # cosine.pdf(x) = 1/(2*pi) * (1+cos(x))
  1030. return 1.0/2/np.pi*(1+np.cos(x))
  1031. def _logpdf(self, x):
  1032. c = np.cos(x)
  1033. return _lazywhere(c != -1, (c,),
  1034. lambda c: np.log1p(c) - np.log(2*np.pi),
  1035. fillvalue=-np.inf)
  1036. def _cdf(self, x):
  1037. return scu._cosine_cdf(x)
  1038. def _sf(self, x):
  1039. return scu._cosine_cdf(-x)
  1040. def _ppf(self, p):
  1041. return scu._cosine_invcdf(p)
  1042. def _isf(self, p):
  1043. return -scu._cosine_invcdf(p)
  1044. def _stats(self):
  1045. return 0.0, np.pi*np.pi/3.0-2.0, 0.0, -6.0*(np.pi**4-90)/(5.0*(np.pi*np.pi-6)**2)
  1046. def _entropy(self):
  1047. return np.log(4*np.pi)-1.0
  1048. cosine = cosine_gen(a=-np.pi, b=np.pi, name='cosine')
  1049. class dgamma_gen(rv_continuous):
  1050. r"""A double gamma continuous random variable.
  1051. %(before_notes)s
  1052. Notes
  1053. -----
  1054. The probability density function for `dgamma` is:
  1055. .. math::
  1056. f(x, a) = \frac{1}{2\Gamma(a)} |x|^{a-1} \exp(-|x|)
  1057. for a real number :math:`x` and :math:`a > 0`. :math:`\Gamma` is the
  1058. gamma function (`scipy.special.gamma`).
  1059. `dgamma` takes ``a`` as a shape parameter for :math:`a`.
  1060. %(after_notes)s
  1061. %(example)s
  1062. """
  1063. def _shape_info(self):
  1064. return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
  1065. def _rvs(self, a, size=None, random_state=None):
  1066. u = random_state.uniform(size=size)
  1067. gm = gamma.rvs(a, size=size, random_state=random_state)
  1068. return gm * np.where(u >= 0.5, 1, -1)
  1069. def _pdf(self, x, a):
  1070. # dgamma.pdf(x, a) = 1 / (2*gamma(a)) * abs(x)**(a-1) * exp(-abs(x))
  1071. ax = abs(x)
  1072. return 1.0/(2*sc.gamma(a))*ax**(a-1.0) * np.exp(-ax)
  1073. def _logpdf(self, x, a):
  1074. ax = abs(x)
  1075. return sc.xlogy(a - 1.0, ax) - ax - np.log(2) - sc.gammaln(a)
  1076. def _cdf(self, x, a):
  1077. fac = 0.5*sc.gammainc(a, abs(x))
  1078. return np.where(x > 0, 0.5 + fac, 0.5 - fac)
  1079. def _sf(self, x, a):
  1080. fac = 0.5*sc.gammainc(a, abs(x))
  1081. return np.where(x > 0, 0.5-fac, 0.5+fac)
  1082. def _ppf(self, q, a):
  1083. fac = sc.gammainccinv(a, 1-abs(2*q-1))
  1084. return np.where(q > 0.5, fac, -fac)
  1085. def _stats(self, a):
  1086. mu2 = a*(a+1.0)
  1087. return 0.0, mu2, 0.0, (a+2.0)*(a+3.0)/mu2-3.0
  1088. dgamma = dgamma_gen(name='dgamma')
  1089. class dweibull_gen(rv_continuous):
  1090. r"""A double Weibull continuous random variable.
  1091. %(before_notes)s
  1092. Notes
  1093. -----
  1094. The probability density function for `dweibull` is given by
  1095. .. math::
  1096. f(x, c) = c / 2 |x|^{c-1} \exp(-|x|^c)
  1097. for a real number :math:`x` and :math:`c > 0`.
  1098. `dweibull` takes ``c`` as a shape parameter for :math:`c`.
  1099. %(after_notes)s
  1100. %(example)s
  1101. """
  1102. def _shape_info(self):
  1103. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  1104. def _rvs(self, c, size=None, random_state=None):
  1105. u = random_state.uniform(size=size)
  1106. w = weibull_min.rvs(c, size=size, random_state=random_state)
  1107. return w * (np.where(u >= 0.5, 1, -1))
  1108. def _pdf(self, x, c):
  1109. # dweibull.pdf(x, c) = c / 2 * abs(x)**(c-1) * exp(-abs(x)**c)
  1110. ax = abs(x)
  1111. Px = c / 2.0 * ax**(c-1.0) * np.exp(-ax**c)
  1112. return Px
  1113. def _logpdf(self, x, c):
  1114. ax = abs(x)
  1115. return np.log(c) - np.log(2.0) + sc.xlogy(c - 1.0, ax) - ax**c
  1116. def _cdf(self, x, c):
  1117. Cx1 = 0.5 * np.exp(-abs(x)**c)
  1118. return np.where(x > 0, 1 - Cx1, Cx1)
  1119. def _ppf(self, q, c):
  1120. fac = 2. * np.where(q <= 0.5, q, 1. - q)
  1121. fac = np.power(-np.log(fac), 1.0 / c)
  1122. return np.where(q > 0.5, fac, -fac)
  1123. def _munp(self, n, c):
  1124. return (1 - (n % 2)) * sc.gamma(1.0 + 1.0 * n / c)
  1125. # since we know that all odd moments are zeros, return them at once.
  1126. # returning Nones from _stats makes the public stats call _munp
  1127. # so overall we're saving one or two gamma function evaluations here.
  1128. def _stats(self, c):
  1129. return 0, None, 0, None
  1130. dweibull = dweibull_gen(name='dweibull')
  1131. class expon_gen(rv_continuous):
  1132. r"""An exponential continuous random variable.
  1133. %(before_notes)s
  1134. Notes
  1135. -----
  1136. The probability density function for `expon` is:
  1137. .. math::
  1138. f(x) = \exp(-x)
  1139. for :math:`x \ge 0`.
  1140. %(after_notes)s
  1141. A common parameterization for `expon` is in terms of the rate parameter
  1142. ``lambda``, such that ``pdf = lambda * exp(-lambda * x)``. This
  1143. parameterization corresponds to using ``scale = 1 / lambda``.
  1144. The exponential distribution is a special case of the gamma
  1145. distributions, with gamma shape parameter ``a = 1``.
  1146. %(example)s
  1147. """
  1148. def _shape_info(self):
  1149. return []
  1150. def _rvs(self, size=None, random_state=None):
  1151. return random_state.standard_exponential(size)
  1152. def _pdf(self, x):
  1153. # expon.pdf(x) = exp(-x)
  1154. return np.exp(-x)
  1155. def _logpdf(self, x):
  1156. return -x
  1157. def _cdf(self, x):
  1158. return -sc.expm1(-x)
  1159. def _ppf(self, q):
  1160. return -sc.log1p(-q)
  1161. def _sf(self, x):
  1162. return np.exp(-x)
  1163. def _logsf(self, x):
  1164. return -x
  1165. def _isf(self, q):
  1166. return -np.log(q)
  1167. def _stats(self):
  1168. return 1.0, 1.0, 2.0, 6.0
  1169. def _entropy(self):
  1170. return 1.0
  1171. @_call_super_mom
  1172. @replace_notes_in_docstring(rv_continuous, notes="""\
  1173. When `method='MLE'`,
  1174. this function uses explicit formulas for the maximum likelihood
  1175. estimation of the exponential distribution parameters, so the
  1176. `optimizer`, `loc` and `scale` keyword arguments are
  1177. ignored.\n\n""")
  1178. def fit(self, data, *args, **kwds):
  1179. if len(args) > 0:
  1180. raise TypeError("Too many arguments.")
  1181. floc = kwds.pop('floc', None)
  1182. fscale = kwds.pop('fscale', None)
  1183. _remove_optimizer_parameters(kwds)
  1184. if floc is not None and fscale is not None:
  1185. # This check is for consistency with `rv_continuous.fit`.
  1186. raise ValueError("All parameters fixed. There is nothing to "
  1187. "optimize.")
  1188. data = np.asarray(data)
  1189. if not np.isfinite(data).all():
  1190. raise ValueError("The data contains non-finite values.")
  1191. data_min = data.min()
  1192. if floc is None:
  1193. # ML estimate of the location is the minimum of the data.
  1194. loc = data_min
  1195. else:
  1196. loc = floc
  1197. if data_min < loc:
  1198. # There are values that are less than the specified loc.
  1199. raise FitDataError("expon", lower=floc, upper=np.inf)
  1200. if fscale is None:
  1201. # ML estimate of the scale is the shifted mean.
  1202. scale = data.mean() - loc
  1203. else:
  1204. scale = fscale
  1205. # We expect the return values to be floating point, so ensure it
  1206. # by explicitly converting to float.
  1207. return float(loc), float(scale)
  1208. expon = expon_gen(a=0.0, name='expon')
  1209. class exponnorm_gen(rv_continuous):
  1210. r"""An exponentially modified Normal continuous random variable.
  1211. Also known as the exponentially modified Gaussian distribution [1]_.
  1212. %(before_notes)s
  1213. Notes
  1214. -----
  1215. The probability density function for `exponnorm` is:
  1216. .. math::
  1217. f(x, K) = \frac{1}{2K} \exp\left(\frac{1}{2 K^2} - x / K \right)
  1218. \text{erfc}\left(-\frac{x - 1/K}{\sqrt{2}}\right)
  1219. where :math:`x` is a real number and :math:`K > 0`.
  1220. It can be thought of as the sum of a standard normal random variable
  1221. and an independent exponentially distributed random variable with rate
  1222. ``1/K``.
  1223. %(after_notes)s
  1224. An alternative parameterization of this distribution (for example, in
  1225. the Wikpedia article [1]_) involves three parameters, :math:`\mu`,
  1226. :math:`\lambda` and :math:`\sigma`.
  1227. In the present parameterization this corresponds to having ``loc`` and
  1228. ``scale`` equal to :math:`\mu` and :math:`\sigma`, respectively, and
  1229. shape parameter :math:`K = 1/(\sigma\lambda)`.
  1230. .. versionadded:: 0.16.0
  1231. References
  1232. ----------
  1233. .. [1] Exponentially modified Gaussian distribution, Wikipedia,
  1234. https://en.wikipedia.org/wiki/Exponentially_modified_Gaussian_distribution
  1235. %(example)s
  1236. """
  1237. def _shape_info(self):
  1238. return [_ShapeInfo("K", False, (0, np.inf), (False, False))]
  1239. def _rvs(self, K, size=None, random_state=None):
  1240. expval = random_state.standard_exponential(size) * K
  1241. gval = random_state.standard_normal(size)
  1242. return expval + gval
  1243. def _pdf(self, x, K):
  1244. return np.exp(self._logpdf(x, K))
  1245. def _logpdf(self, x, K):
  1246. invK = 1.0 / K
  1247. exparg = invK * (0.5 * invK - x)
  1248. return exparg + _norm_logcdf(x - invK) - np.log(K)
  1249. def _cdf(self, x, K):
  1250. invK = 1.0 / K
  1251. expval = invK * (0.5 * invK - x)
  1252. logprod = expval + _norm_logcdf(x - invK)
  1253. return _norm_cdf(x) - np.exp(logprod)
  1254. def _sf(self, x, K):
  1255. invK = 1.0 / K
  1256. expval = invK * (0.5 * invK - x)
  1257. logprod = expval + _norm_logcdf(x - invK)
  1258. return _norm_cdf(-x) + np.exp(logprod)
  1259. def _stats(self, K):
  1260. K2 = K * K
  1261. opK2 = 1.0 + K2
  1262. skw = 2 * K**3 * opK2**(-1.5)
  1263. krt = 6.0 * K2 * K2 * opK2**(-2)
  1264. return K, opK2, skw, krt
  1265. exponnorm = exponnorm_gen(name='exponnorm')
  1266. class exponweib_gen(rv_continuous):
  1267. r"""An exponentiated Weibull continuous random variable.
  1268. %(before_notes)s
  1269. See Also
  1270. --------
  1271. weibull_min, numpy.random.Generator.weibull
  1272. Notes
  1273. -----
  1274. The probability density function for `exponweib` is:
  1275. .. math::
  1276. f(x, a, c) = a c [1-\exp(-x^c)]^{a-1} \exp(-x^c) x^{c-1}
  1277. and its cumulative distribution function is:
  1278. .. math::
  1279. F(x, a, c) = [1-\exp(-x^c)]^a
  1280. for :math:`x > 0`, :math:`a > 0`, :math:`c > 0`.
  1281. `exponweib` takes :math:`a` and :math:`c` as shape parameters:
  1282. * :math:`a` is the exponentiation parameter,
  1283. with the special case :math:`a=1` corresponding to the
  1284. (non-exponentiated) Weibull distribution `weibull_min`.
  1285. * :math:`c` is the shape parameter of the non-exponentiated Weibull law.
  1286. %(after_notes)s
  1287. References
  1288. ----------
  1289. https://en.wikipedia.org/wiki/Exponentiated_Weibull_distribution
  1290. %(example)s
  1291. """
  1292. def _shape_info(self):
  1293. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  1294. ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
  1295. return [ia, ic]
  1296. def _pdf(self, x, a, c):
  1297. # exponweib.pdf(x, a, c) =
  1298. # a * c * (1-exp(-x**c))**(a-1) * exp(-x**c)*x**(c-1)
  1299. return np.exp(self._logpdf(x, a, c))
  1300. def _logpdf(self, x, a, c):
  1301. negxc = -x**c
  1302. exm1c = -sc.expm1(negxc)
  1303. logp = (np.log(a) + np.log(c) + sc.xlogy(a - 1.0, exm1c) +
  1304. negxc + sc.xlogy(c - 1.0, x))
  1305. return logp
  1306. def _cdf(self, x, a, c):
  1307. exm1c = -sc.expm1(-x**c)
  1308. return exm1c**a
  1309. def _ppf(self, q, a, c):
  1310. return (-sc.log1p(-q**(1.0/a)))**np.asarray(1.0/c)
  1311. exponweib = exponweib_gen(a=0.0, name='exponweib')
  1312. class exponpow_gen(rv_continuous):
  1313. r"""An exponential power continuous random variable.
  1314. %(before_notes)s
  1315. Notes
  1316. -----
  1317. The probability density function for `exponpow` is:
  1318. .. math::
  1319. f(x, b) = b x^{b-1} \exp(1 + x^b - \exp(x^b))
  1320. for :math:`x \ge 0`, :math:`b > 0`. Note that this is a different
  1321. distribution from the exponential power distribution that is also known
  1322. under the names "generalized normal" or "generalized Gaussian".
  1323. `exponpow` takes ``b`` as a shape parameter for :math:`b`.
  1324. %(after_notes)s
  1325. References
  1326. ----------
  1327. http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Exponentialpower.pdf
  1328. %(example)s
  1329. """
  1330. def _shape_info(self):
  1331. return [_ShapeInfo("b", False, (0, np.inf), (False, False))]
  1332. def _pdf(self, x, b):
  1333. # exponpow.pdf(x, b) = b * x**(b-1) * exp(1 + x**b - exp(x**b))
  1334. return np.exp(self._logpdf(x, b))
  1335. def _logpdf(self, x, b):
  1336. xb = x**b
  1337. f = 1 + np.log(b) + sc.xlogy(b - 1.0, x) + xb - np.exp(xb)
  1338. return f
  1339. def _cdf(self, x, b):
  1340. return -sc.expm1(-sc.expm1(x**b))
  1341. def _sf(self, x, b):
  1342. return np.exp(-sc.expm1(x**b))
  1343. def _isf(self, x, b):
  1344. return (sc.log1p(-np.log(x)))**(1./b)
  1345. def _ppf(self, q, b):
  1346. return pow(sc.log1p(-sc.log1p(-q)), 1.0/b)
  1347. exponpow = exponpow_gen(a=0.0, name='exponpow')
  1348. class fatiguelife_gen(rv_continuous):
  1349. r"""A fatigue-life (Birnbaum-Saunders) continuous random variable.
  1350. %(before_notes)s
  1351. Notes
  1352. -----
  1353. The probability density function for `fatiguelife` is:
  1354. .. math::
  1355. f(x, c) = \frac{x+1}{2c\sqrt{2\pi x^3}} \exp(-\frac{(x-1)^2}{2x c^2})
  1356. for :math:`x >= 0` and :math:`c > 0`.
  1357. `fatiguelife` takes ``c`` as a shape parameter for :math:`c`.
  1358. %(after_notes)s
  1359. References
  1360. ----------
  1361. .. [1] "Birnbaum-Saunders distribution",
  1362. https://en.wikipedia.org/wiki/Birnbaum-Saunders_distribution
  1363. %(example)s
  1364. """
  1365. _support_mask = rv_continuous._open_support_mask
  1366. def _shape_info(self):
  1367. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  1368. def _rvs(self, c, size=None, random_state=None):
  1369. z = random_state.standard_normal(size)
  1370. x = 0.5*c*z
  1371. x2 = x*x
  1372. t = 1.0 + 2*x2 + 2*x*np.sqrt(1 + x2)
  1373. return t
  1374. def _pdf(self, x, c):
  1375. # fatiguelife.pdf(x, c) =
  1376. # (x+1) / (2*c*sqrt(2*pi*x**3)) * exp(-(x-1)**2/(2*x*c**2))
  1377. return np.exp(self._logpdf(x, c))
  1378. def _logpdf(self, x, c):
  1379. return (np.log(x+1) - (x-1)**2 / (2.0*x*c**2) - np.log(2*c) -
  1380. 0.5*(np.log(2*np.pi) + 3*np.log(x)))
  1381. def _cdf(self, x, c):
  1382. return _norm_cdf(1.0 / c * (np.sqrt(x) - 1.0/np.sqrt(x)))
  1383. def _ppf(self, q, c):
  1384. tmp = c*sc.ndtri(q)
  1385. return 0.25 * (tmp + np.sqrt(tmp**2 + 4))**2
  1386. def _sf(self, x, c):
  1387. return _norm_sf(1.0 / c * (np.sqrt(x) - 1.0/np.sqrt(x)))
  1388. def _isf(self, q, c):
  1389. tmp = -c*sc.ndtri(q)
  1390. return 0.25 * (tmp + np.sqrt(tmp**2 + 4))**2
  1391. def _stats(self, c):
  1392. # NB: the formula for kurtosis in wikipedia seems to have an error:
  1393. # it's 40, not 41. At least it disagrees with the one from Wolfram
  1394. # Alpha. And the latter one, below, passes the tests, while the wiki
  1395. # one doesn't So far I didn't have the guts to actually check the
  1396. # coefficients from the expressions for the raw moments.
  1397. c2 = c*c
  1398. mu = c2 / 2.0 + 1.0
  1399. den = 5.0 * c2 + 4.0
  1400. mu2 = c2*den / 4.0
  1401. g1 = 4 * c * (11*c2 + 6.0) / np.power(den, 1.5)
  1402. g2 = 6 * c2 * (93*c2 + 40.0) / den**2.0
  1403. return mu, mu2, g1, g2
  1404. fatiguelife = fatiguelife_gen(a=0.0, name='fatiguelife')
  1405. class foldcauchy_gen(rv_continuous):
  1406. r"""A folded Cauchy continuous random variable.
  1407. %(before_notes)s
  1408. Notes
  1409. -----
  1410. The probability density function for `foldcauchy` is:
  1411. .. math::
  1412. f(x, c) = \frac{1}{\pi (1+(x-c)^2)} + \frac{1}{\pi (1+(x+c)^2)}
  1413. for :math:`x \ge 0` and :math:`c \ge 0`.
  1414. `foldcauchy` takes ``c`` as a shape parameter for :math:`c`.
  1415. %(example)s
  1416. """
  1417. def _argcheck(self, c):
  1418. return c >= 0
  1419. def _shape_info(self):
  1420. return [_ShapeInfo("c", False, (0, np.inf), (True, False))]
  1421. def _rvs(self, c, size=None, random_state=None):
  1422. return abs(cauchy.rvs(loc=c, size=size,
  1423. random_state=random_state))
  1424. def _pdf(self, x, c):
  1425. # foldcauchy.pdf(x, c) = 1/(pi*(1+(x-c)**2)) + 1/(pi*(1+(x+c)**2))
  1426. return 1.0/np.pi*(1.0/(1+(x-c)**2) + 1.0/(1+(x+c)**2))
  1427. def _cdf(self, x, c):
  1428. return 1.0/np.pi*(np.arctan(x-c) + np.arctan(x+c))
  1429. def _stats(self, c):
  1430. return np.inf, np.inf, np.nan, np.nan
  1431. foldcauchy = foldcauchy_gen(a=0.0, name='foldcauchy')
  1432. class f_gen(rv_continuous):
  1433. r"""An F continuous random variable.
  1434. For the noncentral F distribution, see `ncf`.
  1435. %(before_notes)s
  1436. See Also
  1437. --------
  1438. ncf
  1439. Notes
  1440. -----
  1441. The probability density function for `f` is:
  1442. .. math::
  1443. f(x, df_1, df_2) = \frac{df_2^{df_2/2} df_1^{df_1/2} x^{df_1 / 2-1}}
  1444. {(df_2+df_1 x)^{(df_1+df_2)/2}
  1445. B(df_1/2, df_2/2)}
  1446. for :math:`x > 0` and parameters :math:`df_1, df_2 > 0` .
  1447. `f` takes ``dfn`` and ``dfd`` as shape parameters.
  1448. %(after_notes)s
  1449. %(example)s
  1450. """
  1451. def _shape_info(self):
  1452. idfn = _ShapeInfo("dfn", False, (0, np.inf), (False, False))
  1453. idfd = _ShapeInfo("dfd", False, (0, np.inf), (False, False))
  1454. return [idfn, idfd]
  1455. def _rvs(self, dfn, dfd, size=None, random_state=None):
  1456. return random_state.f(dfn, dfd, size)
  1457. def _pdf(self, x, dfn, dfd):
  1458. # df2**(df2/2) * df1**(df1/2) * x**(df1/2-1)
  1459. # F.pdf(x, df1, df2) = --------------------------------------------
  1460. # (df2+df1*x)**((df1+df2)/2) * B(df1/2, df2/2)
  1461. return np.exp(self._logpdf(x, dfn, dfd))
  1462. def _logpdf(self, x, dfn, dfd):
  1463. n = 1.0 * dfn
  1464. m = 1.0 * dfd
  1465. lPx = (m/2 * np.log(m) + n/2 * np.log(n) + sc.xlogy(n/2 - 1, x)
  1466. - (((n+m)/2) * np.log(m + n*x) + sc.betaln(n/2, m/2)))
  1467. return lPx
  1468. def _cdf(self, x, dfn, dfd):
  1469. return sc.fdtr(dfn, dfd, x)
  1470. def _sf(self, x, dfn, dfd):
  1471. return sc.fdtrc(dfn, dfd, x)
  1472. def _ppf(self, q, dfn, dfd):
  1473. return sc.fdtri(dfn, dfd, q)
  1474. def _stats(self, dfn, dfd):
  1475. v1, v2 = 1. * dfn, 1. * dfd
  1476. v2_2, v2_4, v2_6, v2_8 = v2 - 2., v2 - 4., v2 - 6., v2 - 8.
  1477. mu = _lazywhere(
  1478. v2 > 2, (v2, v2_2),
  1479. lambda v2, v2_2: v2 / v2_2,
  1480. np.inf)
  1481. mu2 = _lazywhere(
  1482. v2 > 4, (v1, v2, v2_2, v2_4),
  1483. lambda v1, v2, v2_2, v2_4:
  1484. 2 * v2 * v2 * (v1 + v2_2) / (v1 * v2_2**2 * v2_4),
  1485. np.inf)
  1486. g1 = _lazywhere(
  1487. v2 > 6, (v1, v2_2, v2_4, v2_6),
  1488. lambda v1, v2_2, v2_4, v2_6:
  1489. (2 * v1 + v2_2) / v2_6 * np.sqrt(v2_4 / (v1 * (v1 + v2_2))),
  1490. np.nan)
  1491. g1 *= np.sqrt(8.)
  1492. g2 = _lazywhere(
  1493. v2 > 8, (g1, v2_6, v2_8),
  1494. lambda g1, v2_6, v2_8: (8 + g1 * g1 * v2_6) / v2_8,
  1495. np.nan)
  1496. g2 *= 3. / 2.
  1497. return mu, mu2, g1, g2
  1498. f = f_gen(a=0.0, name='f')
  1499. ## Folded Normal
  1500. ## abs(Z) where (Z is normal with mu=L and std=S so that c=abs(L)/S)
  1501. ##
  1502. ## note: regress docs have scale parameter correct, but first parameter
  1503. ## he gives is a shape parameter A = c * scale
  1504. ## Half-normal is folded normal with shape-parameter c=0.
  1505. class foldnorm_gen(rv_continuous):
  1506. r"""A folded normal continuous random variable.
  1507. %(before_notes)s
  1508. Notes
  1509. -----
  1510. The probability density function for `foldnorm` is:
  1511. .. math::
  1512. f(x, c) = \sqrt{2/\pi} cosh(c x) \exp(-\frac{x^2+c^2}{2})
  1513. for :math:`x \ge 0` and :math:`c \ge 0`.
  1514. `foldnorm` takes ``c`` as a shape parameter for :math:`c`.
  1515. %(after_notes)s
  1516. %(example)s
  1517. """
  1518. def _argcheck(self, c):
  1519. return c >= 0
  1520. def _shape_info(self):
  1521. return [_ShapeInfo("c", False, (0, np.inf), (True, False))]
  1522. def _rvs(self, c, size=None, random_state=None):
  1523. return abs(random_state.standard_normal(size) + c)
  1524. def _pdf(self, x, c):
  1525. # foldnormal.pdf(x, c) = sqrt(2/pi) * cosh(c*x) * exp(-(x**2+c**2)/2)
  1526. return _norm_pdf(x + c) + _norm_pdf(x-c)
  1527. def _cdf(self, x, c):
  1528. return _norm_cdf(x-c) + _norm_cdf(x+c) - 1.0
  1529. def _stats(self, c):
  1530. # Regina C. Elandt, Technometrics 3, 551 (1961)
  1531. # https://www.jstor.org/stable/1266561
  1532. #
  1533. c2 = c*c
  1534. expfac = np.exp(-0.5*c2) / np.sqrt(2.*np.pi)
  1535. mu = 2.*expfac + c * sc.erf(c/np.sqrt(2))
  1536. mu2 = c2 + 1 - mu*mu
  1537. g1 = 2. * (mu*mu*mu - c2*mu - expfac)
  1538. g1 /= np.power(mu2, 1.5)
  1539. g2 = c2 * (c2 + 6.) + 3 + 8.*expfac*mu
  1540. g2 += (2. * (c2 - 3.) - 3. * mu**2) * mu**2
  1541. g2 = g2 / mu2**2.0 - 3.
  1542. return mu, mu2, g1, g2
  1543. foldnorm = foldnorm_gen(a=0.0, name='foldnorm')
  1544. class weibull_min_gen(rv_continuous):
  1545. r"""Weibull minimum continuous random variable.
  1546. The Weibull Minimum Extreme Value distribution, from extreme value theory
  1547. (Fisher-Gnedenko theorem), is also often simply called the Weibull
  1548. distribution. It arises as the limiting distribution of the rescaled
  1549. minimum of iid random variables.
  1550. %(before_notes)s
  1551. See Also
  1552. --------
  1553. weibull_max, numpy.random.Generator.weibull, exponweib
  1554. Notes
  1555. -----
  1556. The probability density function for `weibull_min` is:
  1557. .. math::
  1558. f(x, c) = c x^{c-1} \exp(-x^c)
  1559. for :math:`x > 0`, :math:`c > 0`.
  1560. `weibull_min` takes ``c`` as a shape parameter for :math:`c`.
  1561. (named :math:`k` in Wikipedia article and :math:`a` in
  1562. ``numpy.random.weibull``). Special shape values are :math:`c=1` and
  1563. :math:`c=2` where Weibull distribution reduces to the `expon` and
  1564. `rayleigh` distributions respectively.
  1565. %(after_notes)s
  1566. References
  1567. ----------
  1568. https://en.wikipedia.org/wiki/Weibull_distribution
  1569. https://en.wikipedia.org/wiki/Fisher-Tippett-Gnedenko_theorem
  1570. %(example)s
  1571. """
  1572. def _shape_info(self):
  1573. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  1574. def _pdf(self, x, c):
  1575. # weibull_min.pdf(x, c) = c * x**(c-1) * exp(-x**c)
  1576. return c*pow(x, c-1)*np.exp(-pow(x, c))
  1577. def _logpdf(self, x, c):
  1578. return np.log(c) + sc.xlogy(c - 1, x) - pow(x, c)
  1579. def _cdf(self, x, c):
  1580. return -sc.expm1(-pow(x, c))
  1581. def _sf(self, x, c):
  1582. return np.exp(-pow(x, c))
  1583. def _logsf(self, x, c):
  1584. return -pow(x, c)
  1585. def _ppf(self, q, c):
  1586. return pow(-sc.log1p(-q), 1.0/c)
  1587. def _munp(self, n, c):
  1588. return sc.gamma(1.0+n*1.0/c)
  1589. def _entropy(self, c):
  1590. return -_EULER / c - np.log(c) + _EULER + 1
  1591. @extend_notes_in_docstring(rv_continuous, notes="""\
  1592. If ``method='mm'``, parameters fixed by the user are respected, and the
  1593. remaining parameters are used to match distribution and sample moments
  1594. where possible. For example, if the user fixes the location with
  1595. ``floc``, the parameters will only match the distribution skewness and
  1596. variance to the sample skewness and variance; no attempt will be made
  1597. to match the means or minimize a norm of the errors.
  1598. \n\n""")
  1599. def fit(self, data, *args, **kwds):
  1600. if kwds.pop('superfit', False):
  1601. return super().fit(data, *args, **kwds)
  1602. # this extracts fixed shape, location, and scale however they
  1603. # are specified, and also leaves them in `kwds`
  1604. data, fc, floc, fscale = _check_fit_input_parameters(self, data,
  1605. args, kwds)
  1606. method = kwds.get("method", "mle").lower()
  1607. # See https://en.wikipedia.org/wiki/Weibull_distribution#Moments for
  1608. # moment formulas.
  1609. def skew(c):
  1610. gamma1 = sc.gamma(1+1/c)
  1611. gamma2 = sc.gamma(1+2/c)
  1612. gamma3 = sc.gamma(1+3/c)
  1613. num = 2 * gamma1**3 - 3*gamma1*gamma2 + gamma3
  1614. den = (gamma2 - gamma1**2)**(3/2)
  1615. return num/den
  1616. # For c in [1e2, 3e4], population skewness appears to approach
  1617. # asymptote near -1.139, but past c > 3e4, skewness begins to vary
  1618. # wildly, and MoM won't provide a good guess. Get out early.
  1619. s = stats.skew(data)
  1620. max_c = 1e4
  1621. s_min = skew(max_c)
  1622. if s < s_min and method != "mm" and fc is None and not args:
  1623. return super().fit(data, *args, **kwds)
  1624. # If method is method of moments, we don't need the user's guesses.
  1625. # Otherwise, extract the guesses from args and kwds.
  1626. if method == "mm":
  1627. c, loc, scale = None, None, None
  1628. else:
  1629. c = args[0] if len(args) else None
  1630. loc = kwds.pop('loc', None)
  1631. scale = kwds.pop('scale', None)
  1632. if fc is None and c is None: # not fixed and no guess: use MoM
  1633. # Solve for c that matches sample distribution skewness to sample
  1634. # skewness.
  1635. # we start having numerical issues with `weibull_min` with
  1636. # parameters outside this range - and not just in this method.
  1637. # We could probably improve the situation by doing everything
  1638. # in the log space, but that is for another time.
  1639. c = root_scalar(lambda c: skew(c) - s, bracket=[0.02, max_c],
  1640. method='bisect').root
  1641. elif fc is not None: # fixed: use it
  1642. c = fc
  1643. if fscale is None and scale is None:
  1644. v = np.var(data)
  1645. scale = np.sqrt(v / (sc.gamma(1+2/c) - sc.gamma(1+1/c)**2))
  1646. elif fscale is not None:
  1647. scale = fscale
  1648. if floc is None and loc is None:
  1649. m = np.mean(data)
  1650. loc = m - scale*sc.gamma(1 + 1/c)
  1651. elif floc is not None:
  1652. loc = floc
  1653. if method == 'mm':
  1654. return c, loc, scale
  1655. else:
  1656. # At this point, parameter "guesses" may equal the fixed parameters
  1657. # in kwds. No harm in passing them as guesses, too.
  1658. return super().fit(data, c, loc=loc, scale=scale, **kwds)
  1659. weibull_min = weibull_min_gen(a=0.0, name='weibull_min')
  1660. class truncweibull_min_gen(rv_continuous):
  1661. r"""A doubly truncated Weibull minimum continuous random variable.
  1662. %(before_notes)s
  1663. See Also
  1664. --------
  1665. weibull_min, truncexpon
  1666. Notes
  1667. -----
  1668. The probability density function for `truncweibull_min` is:
  1669. .. math::
  1670. f(x, a, b, c) = \frac{c x^{c-1} \exp(-x^c)}{\exp(-a^c) - \exp(-b^c)}
  1671. for :math:`a < x <= b`, :math:`0 \le a < b` and :math:`c > 0`.
  1672. `truncweibull_min` takes :math:`a`, :math:`b`, and :math:`c` as shape
  1673. parameters.
  1674. Notice that the truncation values, :math:`a` and :math:`b`, are defined in
  1675. standardized form:
  1676. .. math::
  1677. a = (u_l - loc)/scale
  1678. b = (u_r - loc)/scale
  1679. where :math:`u_l` and :math:`u_r` are the specific left and right
  1680. truncation values, respectively. In other words, the support of the
  1681. distribution becomes :math:`(a*scale + loc) < x <= (b*scale + loc)` when
  1682. :math:`loc` and/or :math:`scale` are provided.
  1683. %(after_notes)s
  1684. References
  1685. ----------
  1686. .. [1] Rinne, H. "The Weibull Distribution: A Handbook". CRC Press (2009).
  1687. %(example)s
  1688. """
  1689. def _argcheck(self, c, a, b):
  1690. return (a >= 0.) & (b > a) & (c > 0.)
  1691. def _shape_info(self):
  1692. ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
  1693. ia = _ShapeInfo("a", False, (0, np.inf), (True, False))
  1694. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  1695. return [ic, ia, ib]
  1696. def _fitstart(self, data):
  1697. # Arbitrary, but default a=b=c=1 is not valid
  1698. return super()._fitstart(data, args=(1, 0, 1))
  1699. def _get_support(self, c, a, b):
  1700. return a, b
  1701. def _pdf(self, x, c, a, b):
  1702. denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  1703. return (c * pow(x, c-1) * np.exp(-pow(x, c))) / denum
  1704. def _logpdf(self, x, c, a, b):
  1705. logdenum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  1706. return np.log(c) + sc.xlogy(c - 1, x) - pow(x, c) - logdenum
  1707. def _cdf(self, x, c, a, b):
  1708. num = (np.exp(-pow(a, c)) - np.exp(-pow(x, c)))
  1709. denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  1710. return num / denum
  1711. def _logcdf(self, x, c, a, b):
  1712. lognum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(x, c)))
  1713. logdenum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  1714. return lognum - logdenum
  1715. def _sf(self, x, c, a, b):
  1716. num = (np.exp(-pow(x, c)) - np.exp(-pow(b, c)))
  1717. denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  1718. return num / denum
  1719. def _logsf(self, x, c, a, b):
  1720. lognum = np.log(np.exp(-pow(x, c)) - np.exp(-pow(b, c)))
  1721. logdenum = np.log(np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  1722. return lognum - logdenum
  1723. def _isf(self, q, c, a, b):
  1724. return pow(
  1725. -np.log((1 - q) * np.exp(-pow(b, c)) + q * np.exp(-pow(a, c))), 1/c
  1726. )
  1727. def _ppf(self, q, c, a, b):
  1728. return pow(
  1729. -np.log((1 - q) * np.exp(-pow(a, c)) + q * np.exp(-pow(b, c))), 1/c
  1730. )
  1731. def _munp(self, n, c, a, b):
  1732. gamma_fun = sc.gamma(n/c + 1.) * (
  1733. sc.gammainc(n/c + 1., pow(b, c)) - sc.gammainc(n/c + 1., pow(a, c))
  1734. )
  1735. denum = (np.exp(-pow(a, c)) - np.exp(-pow(b, c)))
  1736. return gamma_fun / denum
  1737. truncweibull_min = truncweibull_min_gen(name='truncweibull_min')
  1738. class weibull_max_gen(rv_continuous):
  1739. r"""Weibull maximum continuous random variable.
  1740. The Weibull Maximum Extreme Value distribution, from extreme value theory
  1741. (Fisher-Gnedenko theorem), is the limiting distribution of rescaled
  1742. maximum of iid random variables. This is the distribution of -X
  1743. if X is from the `weibull_min` function.
  1744. %(before_notes)s
  1745. See Also
  1746. --------
  1747. weibull_min
  1748. Notes
  1749. -----
  1750. The probability density function for `weibull_max` is:
  1751. .. math::
  1752. f(x, c) = c (-x)^{c-1} \exp(-(-x)^c)
  1753. for :math:`x < 0`, :math:`c > 0`.
  1754. `weibull_max` takes ``c`` as a shape parameter for :math:`c`.
  1755. %(after_notes)s
  1756. References
  1757. ----------
  1758. https://en.wikipedia.org/wiki/Weibull_distribution
  1759. https://en.wikipedia.org/wiki/Fisher-Tippett-Gnedenko_theorem
  1760. %(example)s
  1761. """
  1762. def _shape_info(self):
  1763. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  1764. def _pdf(self, x, c):
  1765. # weibull_max.pdf(x, c) = c * (-x)**(c-1) * exp(-(-x)**c)
  1766. return c*pow(-x, c-1)*np.exp(-pow(-x, c))
  1767. def _logpdf(self, x, c):
  1768. return np.log(c) + sc.xlogy(c-1, -x) - pow(-x, c)
  1769. def _cdf(self, x, c):
  1770. return np.exp(-pow(-x, c))
  1771. def _logcdf(self, x, c):
  1772. return -pow(-x, c)
  1773. def _sf(self, x, c):
  1774. return -sc.expm1(-pow(-x, c))
  1775. def _ppf(self, q, c):
  1776. return -pow(-np.log(q), 1.0/c)
  1777. def _munp(self, n, c):
  1778. val = sc.gamma(1.0+n*1.0/c)
  1779. if int(n) % 2:
  1780. sgn = -1
  1781. else:
  1782. sgn = 1
  1783. return sgn * val
  1784. def _entropy(self, c):
  1785. return -_EULER / c - np.log(c) + _EULER + 1
  1786. weibull_max = weibull_max_gen(b=0.0, name='weibull_max')
  1787. class genlogistic_gen(rv_continuous):
  1788. r"""A generalized logistic continuous random variable.
  1789. %(before_notes)s
  1790. Notes
  1791. -----
  1792. The probability density function for `genlogistic` is:
  1793. .. math::
  1794. f(x, c) = c \frac{\exp(-x)}
  1795. {(1 + \exp(-x))^{c+1}}
  1796. for :math:`x >= 0`, :math:`c > 0`.
  1797. `genlogistic` takes ``c`` as a shape parameter for :math:`c`.
  1798. %(after_notes)s
  1799. %(example)s
  1800. """
  1801. def _shape_info(self):
  1802. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  1803. def _pdf(self, x, c):
  1804. # genlogistic.pdf(x, c) = c * exp(-x) / (1 + exp(-x))**(c+1)
  1805. return np.exp(self._logpdf(x, c))
  1806. def _logpdf(self, x, c):
  1807. # Two mathematically equivalent expressions for log(pdf(x, c)):
  1808. # log(pdf(x, c)) = log(c) - x - (c + 1)*log(1 + exp(-x))
  1809. # = log(c) + c*x - (c + 1)*log(1 + exp(x))
  1810. mult = -(c - 1) * (x < 0) - 1
  1811. absx = np.abs(x)
  1812. return np.log(c) + mult*absx - (c+1) * sc.log1p(np.exp(-absx))
  1813. def _cdf(self, x, c):
  1814. Cx = (1+np.exp(-x))**(-c)
  1815. return Cx
  1816. def _ppf(self, q, c):
  1817. vals = -np.log(pow(q, -1.0/c)-1)
  1818. return vals
  1819. def _stats(self, c):
  1820. mu = _EULER + sc.psi(c)
  1821. mu2 = np.pi*np.pi/6.0 + sc.zeta(2, c)
  1822. g1 = -2*sc.zeta(3, c) + 2*_ZETA3
  1823. g1 /= np.power(mu2, 1.5)
  1824. g2 = np.pi**4/15.0 + 6*sc.zeta(4, c)
  1825. g2 /= mu2**2.0
  1826. return mu, mu2, g1, g2
  1827. genlogistic = genlogistic_gen(name='genlogistic')
  1828. class genpareto_gen(rv_continuous):
  1829. r"""A generalized Pareto continuous random variable.
  1830. %(before_notes)s
  1831. Notes
  1832. -----
  1833. The probability density function for `genpareto` is:
  1834. .. math::
  1835. f(x, c) = (1 + c x)^{-1 - 1/c}
  1836. defined for :math:`x \ge 0` if :math:`c \ge 0`, and for
  1837. :math:`0 \le x \le -1/c` if :math:`c < 0`.
  1838. `genpareto` takes ``c`` as a shape parameter for :math:`c`.
  1839. For :math:`c=0`, `genpareto` reduces to the exponential
  1840. distribution, `expon`:
  1841. .. math::
  1842. f(x, 0) = \exp(-x)
  1843. For :math:`c=-1`, `genpareto` is uniform on ``[0, 1]``:
  1844. .. math::
  1845. f(x, -1) = 1
  1846. %(after_notes)s
  1847. %(example)s
  1848. """
  1849. def _argcheck(self, c):
  1850. return np.isfinite(c)
  1851. def _shape_info(self):
  1852. return [_ShapeInfo("c", False, (-np.inf, np.inf), (False, False))]
  1853. def _get_support(self, c):
  1854. c = np.asarray(c)
  1855. b = _lazywhere(c < 0, (c,),
  1856. lambda c: -1. / c,
  1857. np.inf)
  1858. a = np.where(c >= 0, self.a, self.a)
  1859. return a, b
  1860. def _pdf(self, x, c):
  1861. # genpareto.pdf(x, c) = (1 + c * x)**(-1 - 1/c)
  1862. return np.exp(self._logpdf(x, c))
  1863. def _logpdf(self, x, c):
  1864. return _lazywhere((x == x) & (c != 0), (x, c),
  1865. lambda x, c: -sc.xlog1py(c + 1., c*x) / c,
  1866. -x)
  1867. def _cdf(self, x, c):
  1868. return -sc.inv_boxcox1p(-x, -c)
  1869. def _sf(self, x, c):
  1870. return sc.inv_boxcox(-x, -c)
  1871. def _logsf(self, x, c):
  1872. return _lazywhere((x == x) & (c != 0), (x, c),
  1873. lambda x, c: -sc.log1p(c*x) / c,
  1874. -x)
  1875. def _ppf(self, q, c):
  1876. return -sc.boxcox1p(-q, -c)
  1877. def _isf(self, q, c):
  1878. return -sc.boxcox(q, -c)
  1879. def _stats(self, c, moments='mv'):
  1880. if 'm' not in moments:
  1881. m = None
  1882. else:
  1883. m = _lazywhere(c < 1, (c,),
  1884. lambda xi: 1/(1 - xi),
  1885. np.inf)
  1886. if 'v' not in moments:
  1887. v = None
  1888. else:
  1889. v = _lazywhere(c < 1/2, (c,),
  1890. lambda xi: 1 / (1 - xi)**2 / (1 - 2*xi),
  1891. np.nan)
  1892. if 's' not in moments:
  1893. s = None
  1894. else:
  1895. s = _lazywhere(c < 1/3, (c,),
  1896. lambda xi: 2 * (1 + xi) * np.sqrt(1 - 2*xi) /
  1897. (1 - 3*xi),
  1898. np.nan)
  1899. if 'k' not in moments:
  1900. k = None
  1901. else:
  1902. k = _lazywhere(c < 1/4, (c,),
  1903. lambda xi: 3 * (1 - 2*xi) * (2*xi**2 + xi + 3) /
  1904. (1 - 3*xi) / (1 - 4*xi) - 3,
  1905. np.nan)
  1906. return m, v, s, k
  1907. def _munp(self, n, c):
  1908. def __munp(n, c):
  1909. val = 0.0
  1910. k = np.arange(0, n + 1)
  1911. for ki, cnk in zip(k, sc.comb(n, k)):
  1912. val = val + cnk * (-1) ** ki / (1.0 - c * ki)
  1913. return np.where(c * n < 1, val * (-1.0 / c) ** n, np.inf)
  1914. return _lazywhere(c != 0, (c,),
  1915. lambda c: __munp(n, c),
  1916. sc.gamma(n + 1))
  1917. def _entropy(self, c):
  1918. return 1. + c
  1919. genpareto = genpareto_gen(a=0.0, name='genpareto')
  1920. class genexpon_gen(rv_continuous):
  1921. r"""A generalized exponential continuous random variable.
  1922. %(before_notes)s
  1923. Notes
  1924. -----
  1925. The probability density function for `genexpon` is:
  1926. .. math::
  1927. f(x, a, b, c) = (a + b (1 - \exp(-c x)))
  1928. \exp(-a x - b x + \frac{b}{c} (1-\exp(-c x)))
  1929. for :math:`x \ge 0`, :math:`a, b, c > 0`.
  1930. `genexpon` takes :math:`a`, :math:`b` and :math:`c` as shape parameters.
  1931. %(after_notes)s
  1932. References
  1933. ----------
  1934. H.K. Ryu, "An Extension of Marshall and Olkin's Bivariate Exponential
  1935. Distribution", Journal of the American Statistical Association, 1993.
  1936. N. Balakrishnan, "The Exponential Distribution: Theory, Methods and
  1937. Applications", Asit P. Basu.
  1938. %(example)s
  1939. """
  1940. def _shape_info(self):
  1941. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  1942. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  1943. ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
  1944. return [ia, ib, ic]
  1945. def _pdf(self, x, a, b, c):
  1946. # genexpon.pdf(x, a, b, c) = (a + b * (1 - exp(-c*x))) * \
  1947. # exp(-a*x - b*x + b/c * (1-exp(-c*x)))
  1948. return (a + b*(-sc.expm1(-c*x)))*np.exp((-a-b)*x +
  1949. b*(-sc.expm1(-c*x))/c)
  1950. def _logpdf(self, x, a, b, c):
  1951. return np.log(a+b*(-sc.expm1(-c*x))) + (-a-b)*x+b*(-sc.expm1(-c*x))/c
  1952. def _cdf(self, x, a, b, c):
  1953. return -sc.expm1((-a-b)*x + b*(-sc.expm1(-c*x))/c)
  1954. def _sf(self, x, a, b, c):
  1955. return np.exp((-a-b)*x + b*(-sc.expm1(-c*x))/c)
  1956. genexpon = genexpon_gen(a=0.0, name='genexpon')
  1957. class genextreme_gen(rv_continuous):
  1958. r"""A generalized extreme value continuous random variable.
  1959. %(before_notes)s
  1960. See Also
  1961. --------
  1962. gumbel_r
  1963. Notes
  1964. -----
  1965. For :math:`c=0`, `genextreme` is equal to `gumbel_r` with
  1966. probability density function
  1967. .. math::
  1968. f(x) = \exp(-\exp(-x)) \exp(-x),
  1969. where :math:`-\infty < x < \infty`.
  1970. For :math:`c \ne 0`, the probability density function for `genextreme` is:
  1971. .. math::
  1972. f(x, c) = \exp(-(1-c x)^{1/c}) (1-c x)^{1/c-1},
  1973. where :math:`-\infty < x \le 1/c` if :math:`c > 0` and
  1974. :math:`1/c \le x < \infty` if :math:`c < 0`.
  1975. Note that several sources and software packages use the opposite
  1976. convention for the sign of the shape parameter :math:`c`.
  1977. `genextreme` takes ``c`` as a shape parameter for :math:`c`.
  1978. %(after_notes)s
  1979. %(example)s
  1980. """
  1981. def _argcheck(self, c):
  1982. return np.isfinite(c)
  1983. def _shape_info(self):
  1984. return [_ShapeInfo("c", False, (-np.inf, np.inf), (False, False))]
  1985. def _get_support(self, c):
  1986. _b = np.where(c > 0, 1.0 / np.maximum(c, _XMIN), np.inf)
  1987. _a = np.where(c < 0, 1.0 / np.minimum(c, -_XMIN), -np.inf)
  1988. return _a, _b
  1989. def _loglogcdf(self, x, c):
  1990. # Returns log(-log(cdf(x, c)))
  1991. return _lazywhere((x == x) & (c != 0), (x, c),
  1992. lambda x, c: sc.log1p(-c*x)/c, -x)
  1993. def _pdf(self, x, c):
  1994. # genextreme.pdf(x, c) =
  1995. # exp(-exp(-x))*exp(-x), for c==0
  1996. # exp(-(1-c*x)**(1/c))*(1-c*x)**(1/c-1), for x \le 1/c, c > 0
  1997. return np.exp(self._logpdf(x, c))
  1998. def _logpdf(self, x, c):
  1999. cx = _lazywhere((x == x) & (c != 0), (x, c), lambda x, c: c*x, 0.0)
  2000. logex2 = sc.log1p(-cx)
  2001. logpex2 = self._loglogcdf(x, c)
  2002. pex2 = np.exp(logpex2)
  2003. # Handle special cases
  2004. np.putmask(logpex2, (c == 0) & (x == -np.inf), 0.0)
  2005. logpdf = _lazywhere(~((cx == 1) | (cx == -np.inf)),
  2006. (pex2, logpex2, logex2),
  2007. lambda pex2, lpex2, lex2: -pex2 + lpex2 - lex2,
  2008. fillvalue=-np.inf)
  2009. np.putmask(logpdf, (c == 1) & (x == 1), 0.0)
  2010. return logpdf
  2011. def _logcdf(self, x, c):
  2012. return -np.exp(self._loglogcdf(x, c))
  2013. def _cdf(self, x, c):
  2014. return np.exp(self._logcdf(x, c))
  2015. def _sf(self, x, c):
  2016. return -sc.expm1(self._logcdf(x, c))
  2017. def _ppf(self, q, c):
  2018. x = -np.log(-np.log(q))
  2019. return _lazywhere((x == x) & (c != 0), (x, c),
  2020. lambda x, c: -sc.expm1(-c * x) / c, x)
  2021. def _isf(self, q, c):
  2022. x = -np.log(-sc.log1p(-q))
  2023. return _lazywhere((x == x) & (c != 0), (x, c),
  2024. lambda x, c: -sc.expm1(-c * x) / c, x)
  2025. def _stats(self, c):
  2026. g = lambda n: sc.gamma(n*c + 1)
  2027. g1 = g(1)
  2028. g2 = g(2)
  2029. g3 = g(3)
  2030. g4 = g(4)
  2031. g2mg12 = np.where(abs(c) < 1e-7, (c*np.pi)**2.0/6.0, g2-g1**2.0)
  2032. gam2k = np.where(abs(c) < 1e-7, np.pi**2.0/6.0,
  2033. sc.expm1(sc.gammaln(2.0*c+1.0)-2*sc.gammaln(c + 1.0))/c**2.0)
  2034. eps = 1e-14
  2035. gamk = np.where(abs(c) < eps, -_EULER, sc.expm1(sc.gammaln(c + 1))/c)
  2036. m = np.where(c < -1.0, np.nan, -gamk)
  2037. v = np.where(c < -0.5, np.nan, g1**2.0*gam2k)
  2038. # skewness
  2039. sk1 = _lazywhere(c >= -1./3,
  2040. (c, g1, g2, g3, g2mg12),
  2041. lambda c, g1, g2, g3, g2gm12:
  2042. np.sign(c)*(-g3 + (g2 + 2*g2mg12)*g1)/g2mg12**1.5,
  2043. fillvalue=np.nan)
  2044. sk = np.where(abs(c) <= eps**0.29, 12*np.sqrt(6)*_ZETA3/np.pi**3, sk1)
  2045. # kurtosis
  2046. ku1 = _lazywhere(c >= -1./4,
  2047. (g1, g2, g3, g4, g2mg12),
  2048. lambda g1, g2, g3, g4, g2mg12:
  2049. (g4 + (-4*g3 + 3*(g2 + g2mg12)*g1)*g1)/g2mg12**2,
  2050. fillvalue=np.nan)
  2051. ku = np.where(abs(c) <= (eps)**0.23, 12.0/5.0, ku1-3.0)
  2052. return m, v, sk, ku
  2053. def _fitstart(self, data):
  2054. # This is better than the default shape of (1,).
  2055. g = _skew(data)
  2056. if g < 0:
  2057. a = 0.5
  2058. else:
  2059. a = -0.5
  2060. return super()._fitstart(data, args=(a,))
  2061. def _munp(self, n, c):
  2062. k = np.arange(0, n+1)
  2063. vals = 1.0/c**n * np.sum(
  2064. sc.comb(n, k) * (-1)**k * sc.gamma(c*k + 1),
  2065. axis=0)
  2066. return np.where(c*n > -1, vals, np.inf)
  2067. def _entropy(self, c):
  2068. return _EULER*(1 - c) + 1
  2069. genextreme = genextreme_gen(name='genextreme')
  2070. def _digammainv(y):
  2071. """Inverse of the digamma function (real positive arguments only).
  2072. This function is used in the `fit` method of `gamma_gen`.
  2073. The function uses either optimize.fsolve or optimize.newton
  2074. to solve `sc.digamma(x) - y = 0`. There is probably room for
  2075. improvement, but currently it works over a wide range of y:
  2076. >>> import numpy as np
  2077. >>> rng = np.random.default_rng()
  2078. >>> y = 64*rng.standard_normal(1000000)
  2079. >>> y.min(), y.max()
  2080. (-311.43592651416662, 351.77388222276869)
  2081. >>> x = [_digammainv(t) for t in y]
  2082. >>> np.abs(sc.digamma(x) - y).max()
  2083. 1.1368683772161603e-13
  2084. """
  2085. _em = 0.5772156649015328606065120
  2086. func = lambda x: sc.digamma(x) - y
  2087. if y > -0.125:
  2088. x0 = np.exp(y) + 0.5
  2089. if y < 10:
  2090. # Some experimentation shows that newton reliably converges
  2091. # must faster than fsolve in this y range. For larger y,
  2092. # newton sometimes fails to converge.
  2093. value = optimize.newton(func, x0, tol=1e-10)
  2094. return value
  2095. elif y > -3:
  2096. x0 = np.exp(y/2.332) + 0.08661
  2097. else:
  2098. x0 = 1.0 / (-y - _em)
  2099. value, info, ier, mesg = optimize.fsolve(func, x0, xtol=1e-11,
  2100. full_output=True)
  2101. if ier != 1:
  2102. raise RuntimeError("_digammainv: fsolve failed, y = %r" % y)
  2103. return value[0]
  2104. ## Gamma (Use MATLAB and MATHEMATICA (b=theta=scale, a=alpha=shape) definition)
  2105. ## gamma(a, loc, scale) with a an integer is the Erlang distribution
  2106. ## gamma(1, loc, scale) is the Exponential distribution
  2107. ## gamma(df/2, 0, 2) is the chi2 distribution with df degrees of freedom.
  2108. class gamma_gen(rv_continuous):
  2109. r"""A gamma continuous random variable.
  2110. %(before_notes)s
  2111. See Also
  2112. --------
  2113. erlang, expon
  2114. Notes
  2115. -----
  2116. The probability density function for `gamma` is:
  2117. .. math::
  2118. f(x, a) = \frac{x^{a-1} e^{-x}}{\Gamma(a)}
  2119. for :math:`x \ge 0`, :math:`a > 0`. Here :math:`\Gamma(a)` refers to the
  2120. gamma function.
  2121. `gamma` takes ``a`` as a shape parameter for :math:`a`.
  2122. When :math:`a` is an integer, `gamma` reduces to the Erlang
  2123. distribution, and when :math:`a=1` to the exponential distribution.
  2124. Gamma distributions are sometimes parameterized with two variables,
  2125. with a probability density function of:
  2126. .. math::
  2127. f(x, \alpha, \beta) = \frac{\beta^\alpha x^{\alpha - 1} e^{-\beta x }}{\Gamma(\alpha)}
  2128. Note that this parameterization is equivalent to the above, with
  2129. ``scale = 1 / beta``.
  2130. %(after_notes)s
  2131. %(example)s
  2132. """
  2133. def _shape_info(self):
  2134. return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
  2135. def _rvs(self, a, size=None, random_state=None):
  2136. return random_state.standard_gamma(a, size)
  2137. def _pdf(self, x, a):
  2138. # gamma.pdf(x, a) = x**(a-1) * exp(-x) / gamma(a)
  2139. return np.exp(self._logpdf(x, a))
  2140. def _logpdf(self, x, a):
  2141. return sc.xlogy(a-1.0, x) - x - sc.gammaln(a)
  2142. def _cdf(self, x, a):
  2143. return sc.gammainc(a, x)
  2144. def _sf(self, x, a):
  2145. return sc.gammaincc(a, x)
  2146. def _ppf(self, q, a):
  2147. return sc.gammaincinv(a, q)
  2148. def _isf(self, q, a):
  2149. return sc.gammainccinv(a, q)
  2150. def _stats(self, a):
  2151. return a, a, 2.0/np.sqrt(a), 6.0/a
  2152. def _entropy(self, a):
  2153. return sc.psi(a)*(1-a) + a + sc.gammaln(a)
  2154. def _fitstart(self, data):
  2155. # The skewness of the gamma distribution is `2 / np.sqrt(a)`.
  2156. # We invert that to estimate the shape `a` using the skewness
  2157. # of the data. The formula is regularized with 1e-8 in the
  2158. # denominator to allow for degenerate data where the skewness
  2159. # is close to 0.
  2160. a = 4 / (1e-8 + _skew(data)**2)
  2161. return super()._fitstart(data, args=(a,))
  2162. @extend_notes_in_docstring(rv_continuous, notes="""\
  2163. When the location is fixed by using the argument `floc`
  2164. and `method='MLE'`, this
  2165. function uses explicit formulas or solves a simpler numerical
  2166. problem than the full ML optimization problem. So in that case,
  2167. the `optimizer`, `loc` and `scale` arguments are ignored.
  2168. \n\n""")
  2169. def fit(self, data, *args, **kwds):
  2170. floc = kwds.get('floc', None)
  2171. method = kwds.get('method', 'mle')
  2172. if floc is None or method.lower() == 'mm':
  2173. # loc is not fixed. Use the default fit method.
  2174. return super().fit(data, *args, **kwds)
  2175. # We already have this value, so just pop it from kwds.
  2176. kwds.pop('floc', None)
  2177. f0 = _get_fixed_fit_value(kwds, ['f0', 'fa', 'fix_a'])
  2178. fscale = kwds.pop('fscale', None)
  2179. _remove_optimizer_parameters(kwds)
  2180. # Special case: loc is fixed.
  2181. if f0 is not None and fscale is not None:
  2182. # This check is for consistency with `rv_continuous.fit`.
  2183. # Without this check, this function would just return the
  2184. # parameters that were given.
  2185. raise ValueError("All parameters fixed. There is nothing to "
  2186. "optimize.")
  2187. # Fixed location is handled by shifting the data.
  2188. data = np.asarray(data)
  2189. if not np.isfinite(data).all():
  2190. raise ValueError("The data contains non-finite values.")
  2191. if np.any(data <= floc):
  2192. raise FitDataError("gamma", lower=floc, upper=np.inf)
  2193. if floc != 0:
  2194. # Don't do the subtraction in-place, because `data` might be a
  2195. # view of the input array.
  2196. data = data - floc
  2197. xbar = data.mean()
  2198. # Three cases to handle:
  2199. # * shape and scale both free
  2200. # * shape fixed, scale free
  2201. # * shape free, scale fixed
  2202. if fscale is None:
  2203. # scale is free
  2204. if f0 is not None:
  2205. # shape is fixed
  2206. a = f0
  2207. else:
  2208. # shape and scale are both free.
  2209. # The MLE for the shape parameter `a` is the solution to:
  2210. # np.log(a) - sc.digamma(a) - np.log(xbar) +
  2211. # np.log(data).mean() = 0
  2212. s = np.log(xbar) - np.log(data).mean()
  2213. func = lambda a: np.log(a) - sc.digamma(a) - s
  2214. aest = (3-s + np.sqrt((s-3)**2 + 24*s)) / (12*s)
  2215. xa = aest*(1-0.4)
  2216. xb = aest*(1+0.4)
  2217. a = optimize.brentq(func, xa, xb, disp=0)
  2218. # The MLE for the scale parameter is just the data mean
  2219. # divided by the shape parameter.
  2220. scale = xbar / a
  2221. else:
  2222. # scale is fixed, shape is free
  2223. # The MLE for the shape parameter `a` is the solution to:
  2224. # sc.digamma(a) - np.log(data).mean() + np.log(fscale) = 0
  2225. c = np.log(data).mean() - np.log(fscale)
  2226. a = _digammainv(c)
  2227. scale = fscale
  2228. return a, floc, scale
  2229. gamma = gamma_gen(a=0.0, name='gamma')
  2230. class erlang_gen(gamma_gen):
  2231. """An Erlang continuous random variable.
  2232. %(before_notes)s
  2233. See Also
  2234. --------
  2235. gamma
  2236. Notes
  2237. -----
  2238. The Erlang distribution is a special case of the Gamma distribution, with
  2239. the shape parameter `a` an integer. Note that this restriction is not
  2240. enforced by `erlang`. It will, however, generate a warning the first time
  2241. a non-integer value is used for the shape parameter.
  2242. Refer to `gamma` for examples.
  2243. """
  2244. def _argcheck(self, a):
  2245. allint = np.all(np.floor(a) == a)
  2246. if not allint:
  2247. # An Erlang distribution shouldn't really have a non-integer
  2248. # shape parameter, so warn the user.
  2249. warnings.warn(
  2250. 'The shape parameter of the erlang distribution '
  2251. 'has been given a non-integer value %r.' % (a,),
  2252. RuntimeWarning)
  2253. return a > 0
  2254. def _shape_info(self):
  2255. return [_ShapeInfo("a", True, (1, np.inf), (True, False))]
  2256. def _fitstart(self, data):
  2257. # Override gamma_gen_fitstart so that an integer initial value is
  2258. # used. (Also regularize the division, to avoid issues when
  2259. # _skew(data) is 0 or close to 0.)
  2260. a = int(4.0 / (1e-8 + _skew(data)**2))
  2261. return super(gamma_gen, self)._fitstart(data, args=(a,))
  2262. # Trivial override of the fit method, so we can monkey-patch its
  2263. # docstring.
  2264. @extend_notes_in_docstring(rv_continuous, notes="""\
  2265. The Erlang distribution is generally defined to have integer values
  2266. for the shape parameter. This is not enforced by the `erlang` class.
  2267. When fitting the distribution, it will generally return a non-integer
  2268. value for the shape parameter. By using the keyword argument
  2269. `f0=<integer>`, the fit method can be constrained to fit the data to
  2270. a specific integer shape parameter.""")
  2271. def fit(self, data, *args, **kwds):
  2272. return super().fit(data, *args, **kwds)
  2273. erlang = erlang_gen(a=0.0, name='erlang')
  2274. class gengamma_gen(rv_continuous):
  2275. r"""A generalized gamma continuous random variable.
  2276. %(before_notes)s
  2277. See Also
  2278. --------
  2279. gamma, invgamma, weibull_min
  2280. Notes
  2281. -----
  2282. The probability density function for `gengamma` is ([1]_):
  2283. .. math::
  2284. f(x, a, c) = \frac{|c| x^{c a-1} \exp(-x^c)}{\Gamma(a)}
  2285. for :math:`x \ge 0`, :math:`a > 0`, and :math:`c \ne 0`.
  2286. :math:`\Gamma` is the gamma function (`scipy.special.gamma`).
  2287. `gengamma` takes :math:`a` and :math:`c` as shape parameters.
  2288. %(after_notes)s
  2289. References
  2290. ----------
  2291. .. [1] E.W. Stacy, "A Generalization of the Gamma Distribution",
  2292. Annals of Mathematical Statistics, Vol 33(3), pp. 1187--1192.
  2293. %(example)s
  2294. """
  2295. def _argcheck(self, a, c):
  2296. return (a > 0) & (c != 0)
  2297. def _shape_info(self):
  2298. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  2299. ic = _ShapeInfo("c", False, (-np.inf, np.inf), (False, False))
  2300. return [ia, ic]
  2301. def _pdf(self, x, a, c):
  2302. return np.exp(self._logpdf(x, a, c))
  2303. def _logpdf(self, x, a, c):
  2304. return _lazywhere((x != 0) | (c > 0), (x, c),
  2305. lambda x, c: (np.log(abs(c)) + sc.xlogy(c*a - 1, x)
  2306. - x**c - sc.gammaln(a)),
  2307. fillvalue=-np.inf)
  2308. def _cdf(self, x, a, c):
  2309. xc = x**c
  2310. val1 = sc.gammainc(a, xc)
  2311. val2 = sc.gammaincc(a, xc)
  2312. return np.where(c > 0, val1, val2)
  2313. def _rvs(self, a, c, size=None, random_state=None):
  2314. r = random_state.standard_gamma(a, size=size)
  2315. return r**(1./c)
  2316. def _sf(self, x, a, c):
  2317. xc = x**c
  2318. val1 = sc.gammainc(a, xc)
  2319. val2 = sc.gammaincc(a, xc)
  2320. return np.where(c > 0, val2, val1)
  2321. def _ppf(self, q, a, c):
  2322. val1 = sc.gammaincinv(a, q)
  2323. val2 = sc.gammainccinv(a, q)
  2324. return np.where(c > 0, val1, val2)**(1.0/c)
  2325. def _isf(self, q, a, c):
  2326. val1 = sc.gammaincinv(a, q)
  2327. val2 = sc.gammainccinv(a, q)
  2328. return np.where(c > 0, val2, val1)**(1.0/c)
  2329. def _munp(self, n, a, c):
  2330. # Pochhammer symbol: sc.pocha,n) = gamma(a+n)/gamma(a)
  2331. return sc.poch(a, n*1.0/c)
  2332. def _entropy(self, a, c):
  2333. val = sc.psi(a)
  2334. return a*(1-val) + 1.0/c*val + sc.gammaln(a) - np.log(abs(c))
  2335. gengamma = gengamma_gen(a=0.0, name='gengamma')
  2336. class genhalflogistic_gen(rv_continuous):
  2337. r"""A generalized half-logistic continuous random variable.
  2338. %(before_notes)s
  2339. Notes
  2340. -----
  2341. The probability density function for `genhalflogistic` is:
  2342. .. math::
  2343. f(x, c) = \frac{2 (1 - c x)^{1/(c-1)}}{[1 + (1 - c x)^{1/c}]^2}
  2344. for :math:`0 \le x \le 1/c`, and :math:`c > 0`.
  2345. `genhalflogistic` takes ``c`` as a shape parameter for :math:`c`.
  2346. %(after_notes)s
  2347. %(example)s
  2348. """
  2349. def _shape_info(self):
  2350. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  2351. def _get_support(self, c):
  2352. return self.a, 1.0/c
  2353. def _pdf(self, x, c):
  2354. # genhalflogistic.pdf(x, c) =
  2355. # 2 * (1-c*x)**(1/c-1) / (1+(1-c*x)**(1/c))**2
  2356. limit = 1.0/c
  2357. tmp = np.asarray(1-c*x)
  2358. tmp0 = tmp**(limit-1)
  2359. tmp2 = tmp0*tmp
  2360. return 2*tmp0 / (1+tmp2)**2
  2361. def _cdf(self, x, c):
  2362. limit = 1.0/c
  2363. tmp = np.asarray(1-c*x)
  2364. tmp2 = tmp**(limit)
  2365. return (1.0-tmp2) / (1+tmp2)
  2366. def _ppf(self, q, c):
  2367. return 1.0/c*(1-((1.0-q)/(1.0+q))**c)
  2368. def _entropy(self, c):
  2369. return 2 - (2*c+1)*np.log(2)
  2370. genhalflogistic = genhalflogistic_gen(a=0.0, name='genhalflogistic')
  2371. class genhyperbolic_gen(rv_continuous):
  2372. r"""A generalized hyperbolic continuous random variable.
  2373. %(before_notes)s
  2374. See Also
  2375. --------
  2376. t, norminvgauss, geninvgauss, laplace, cauchy
  2377. Notes
  2378. -----
  2379. The probability density function for `genhyperbolic` is:
  2380. .. math::
  2381. f(x, p, a, b) =
  2382. \frac{(a^2 - b^2)^{p/2}}
  2383. {\sqrt{2\pi}a^{p-0.5}
  2384. K_p\Big(\sqrt{a^2 - b^2}\Big)}
  2385. e^{bx} \times \frac{K_{p - 1/2}
  2386. (a \sqrt{1 + x^2})}
  2387. {(\sqrt{1 + x^2})^{1/2 - p}}
  2388. for :math:`x, p \in ( - \infty; \infty)`,
  2389. :math:`|b| < a` if :math:`p \ge 0`,
  2390. :math:`|b| \le a` if :math:`p < 0`.
  2391. :math:`K_{p}(.)` denotes the modified Bessel function of the second
  2392. kind and order :math:`p` (`scipy.special.kn`)
  2393. `genhyperbolic` takes ``p`` as a tail parameter,
  2394. ``a`` as a shape parameter,
  2395. ``b`` as a skewness parameter.
  2396. %(after_notes)s
  2397. The original parameterization of the Generalized Hyperbolic Distribution
  2398. is found in [1]_ as follows
  2399. .. math::
  2400. f(x, \lambda, \alpha, \beta, \delta, \mu) =
  2401. \frac{(\gamma/\delta)^\lambda}{\sqrt{2\pi}K_\lambda(\delta \gamma)}
  2402. e^{\beta (x - \mu)} \times \frac{K_{\lambda - 1/2}
  2403. (\alpha \sqrt{\delta^2 + (x - \mu)^2})}
  2404. {(\sqrt{\delta^2 + (x - \mu)^2} / \alpha)^{1/2 - \lambda}}
  2405. for :math:`x \in ( - \infty; \infty)`,
  2406. :math:`\gamma := \sqrt{\alpha^2 - \beta^2}`,
  2407. :math:`\lambda, \mu \in ( - \infty; \infty)`,
  2408. :math:`\delta \ge 0, |\beta| < \alpha` if :math:`\lambda \ge 0`,
  2409. :math:`\delta > 0, |\beta| \le \alpha` if :math:`\lambda < 0`.
  2410. The location-scale-based parameterization implemented in
  2411. SciPy is based on [2]_, where :math:`a = \alpha\delta`,
  2412. :math:`b = \beta\delta`, :math:`p = \lambda`,
  2413. :math:`scale=\delta` and :math:`loc=\mu`
  2414. Moments are implemented based on [3]_ and [4]_.
  2415. For the distributions that are a special case such as Student's t,
  2416. it is not recommended to rely on the implementation of genhyperbolic.
  2417. To avoid potential numerical problems and for performance reasons,
  2418. the methods of the specific distributions should be used.
  2419. References
  2420. ----------
  2421. .. [1] O. Barndorff-Nielsen, "Hyperbolic Distributions and Distributions
  2422. on Hyperbolae", Scandinavian Journal of Statistics, Vol. 5(3),
  2423. pp. 151-157, 1978. https://www.jstor.org/stable/4615705
  2424. .. [2] Eberlein E., Prause K. (2002) The Generalized Hyperbolic Model:
  2425. Financial Derivatives and Risk Measures. In: Geman H., Madan D.,
  2426. Pliska S.R., Vorst T. (eds) Mathematical Finance - Bachelier
  2427. Congress 2000. Springer Finance. Springer, Berlin, Heidelberg.
  2428. :doi:`10.1007/978-3-662-12429-1_12`
  2429. .. [3] Scott, David J, Würtz, Diethelm, Dong, Christine and Tran,
  2430. Thanh Tam, (2009), Moments of the generalized hyperbolic
  2431. distribution, MPRA Paper, University Library of Munich, Germany,
  2432. https://EconPapers.repec.org/RePEc:pra:mprapa:19081.
  2433. .. [4] E. Eberlein and E. A. von Hammerstein. Generalized hyperbolic
  2434. and inverse Gaussian distributions: Limiting cases and approximation
  2435. of processes. FDM Preprint 80, April 2003. University of Freiburg.
  2436. https://freidok.uni-freiburg.de/fedora/objects/freidok:7974/datastreams/FILE1/content
  2437. %(example)s
  2438. """
  2439. def _argcheck(self, p, a, b):
  2440. return (np.logical_and(np.abs(b) < a, p >= 0)
  2441. | np.logical_and(np.abs(b) <= a, p < 0))
  2442. def _shape_info(self):
  2443. ip = _ShapeInfo("p", False, (-np.inf, np.inf), (False, False))
  2444. ia = _ShapeInfo("a", False, (0, np.inf), (True, False))
  2445. ib = _ShapeInfo("b", False, (-np.inf, np.inf), (False, False))
  2446. return [ip, ia, ib]
  2447. def _fitstart(self, data):
  2448. # Arbitrary, but the default a=b=1 is not valid
  2449. return super()._fitstart(data, args=(1, 1, 0.5))
  2450. def _logpdf(self, x, p, a, b):
  2451. # kve instead of kv works better for large values of p
  2452. # and smaller values of sqrt(a^2 - b^2)
  2453. @np.vectorize
  2454. def _logpdf_single(x, p, a, b):
  2455. return _stats.genhyperbolic_logpdf(x, p, a, b)
  2456. return _logpdf_single(x, p, a, b)
  2457. def _pdf(self, x, p, a, b):
  2458. # kve instead of kv works better for large values of p
  2459. # and smaller values of sqrt(a^2 - b^2)
  2460. @np.vectorize
  2461. def _pdf_single(x, p, a, b):
  2462. return _stats.genhyperbolic_pdf(x, p, a, b)
  2463. return _pdf_single(x, p, a, b)
  2464. def _cdf(self, x, p, a, b):
  2465. @np.vectorize
  2466. def _cdf_single(x, p, a, b):
  2467. user_data = np.array(
  2468. [p, a, b], float
  2469. ).ctypes.data_as(ctypes.c_void_p)
  2470. llc = LowLevelCallable.from_cython(
  2471. _stats, '_genhyperbolic_pdf', user_data
  2472. )
  2473. t1 = integrate.quad(llc, -np.inf, x)[0]
  2474. if np.isnan(t1):
  2475. msg = ("Infinite values encountered in scipy.special.kve. "
  2476. "Values replaced by NaN to avoid incorrect results.")
  2477. warnings.warn(msg, RuntimeWarning)
  2478. return t1
  2479. return _cdf_single(x, p, a, b)
  2480. def _rvs(self, p, a, b, size=None, random_state=None):
  2481. # note: X = b * V + sqrt(V) * X has a
  2482. # generalized hyperbolic distribution
  2483. # if X is standard normal and V is
  2484. # geninvgauss(p = p, b = t2, loc = loc, scale = t3)
  2485. t1 = np.float_power(a, 2) - np.float_power(b, 2)
  2486. # b in the GIG
  2487. t2 = np.float_power(t1, 0.5)
  2488. # scale in the GIG
  2489. t3 = np.float_power(t1, - 0.5)
  2490. gig = geninvgauss.rvs(
  2491. p=p,
  2492. b=t2,
  2493. scale=t3,
  2494. size=size,
  2495. random_state=random_state
  2496. )
  2497. normst = norm.rvs(size=size, random_state=random_state)
  2498. return b * gig + np.sqrt(gig) * normst
  2499. def _stats(self, p, a, b):
  2500. # https://mpra.ub.uni-muenchen.de/19081/1/MPRA_paper_19081.pdf
  2501. # https://freidok.uni-freiburg.de/fedora/objects/freidok:7974/datastreams/FILE1/content
  2502. # standardized moments
  2503. p, a, b = np.broadcast_arrays(p, a, b)
  2504. t1 = np.float_power(a, 2) - np.float_power(b, 2)
  2505. t1 = np.float_power(t1, 0.5)
  2506. t2 = np.float_power(1, 2) * np.float_power(t1, - 1)
  2507. integers = np.linspace(0, 4, 5)
  2508. # make integers perpendicular to existing dimensions
  2509. integers = integers.reshape(integers.shape + (1,) * p.ndim)
  2510. b0, b1, b2, b3, b4 = sc.kv(p + integers, t1)
  2511. r1, r2, r3, r4 = [b / b0 for b in (b1, b2, b3, b4)]
  2512. m = b * t2 * r1
  2513. v = (
  2514. t2 * r1 + np.float_power(b, 2) * np.float_power(t2, 2) *
  2515. (r2 - np.float_power(r1, 2))
  2516. )
  2517. m3e = (
  2518. np.float_power(b, 3) * np.float_power(t2, 3) *
  2519. (r3 - 3 * b2 * b1 * np.float_power(b0, -2) +
  2520. 2 * np.float_power(r1, 3)) +
  2521. 3 * b * np.float_power(t2, 2) *
  2522. (r2 - np.float_power(r1, 2))
  2523. )
  2524. s = m3e * np.float_power(v, - 3 / 2)
  2525. m4e = (
  2526. np.float_power(b, 4) * np.float_power(t2, 4) *
  2527. (r4 - 4 * b3 * b1 * np.float_power(b0, - 2) +
  2528. 6 * b2 * np.float_power(b1, 2) * np.float_power(b0, - 3) -
  2529. 3 * np.float_power(r1, 4)) +
  2530. np.float_power(b, 2) * np.float_power(t2, 3) *
  2531. (6 * r3 - 12 * b2 * b1 * np.float_power(b0, - 2) +
  2532. 6 * np.float_power(r1, 3)) +
  2533. 3 * np.float_power(t2, 2) * r2
  2534. )
  2535. k = m4e * np.float_power(v, -2) - 3
  2536. return m, v, s, k
  2537. genhyperbolic = genhyperbolic_gen(name='genhyperbolic')
  2538. class gompertz_gen(rv_continuous):
  2539. r"""A Gompertz (or truncated Gumbel) continuous random variable.
  2540. %(before_notes)s
  2541. Notes
  2542. -----
  2543. The probability density function for `gompertz` is:
  2544. .. math::
  2545. f(x, c) = c \exp(x) \exp(-c (e^x-1))
  2546. for :math:`x \ge 0`, :math:`c > 0`.
  2547. `gompertz` takes ``c`` as a shape parameter for :math:`c`.
  2548. %(after_notes)s
  2549. %(example)s
  2550. """
  2551. def _shape_info(self):
  2552. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  2553. def _pdf(self, x, c):
  2554. # gompertz.pdf(x, c) = c * exp(x) * exp(-c*(exp(x)-1))
  2555. return np.exp(self._logpdf(x, c))
  2556. def _logpdf(self, x, c):
  2557. return np.log(c) + x - c * sc.expm1(x)
  2558. def _cdf(self, x, c):
  2559. return -sc.expm1(-c * sc.expm1(x))
  2560. def _ppf(self, q, c):
  2561. return sc.log1p(-1.0 / c * sc.log1p(-q))
  2562. def _entropy(self, c):
  2563. return 1.0 - np.log(c) - np.exp(c)*sc.expn(1, c)
  2564. gompertz = gompertz_gen(a=0.0, name='gompertz')
  2565. def _average_with_log_weights(x, logweights):
  2566. x = np.asarray(x)
  2567. logweights = np.asarray(logweights)
  2568. maxlogw = logweights.max()
  2569. weights = np.exp(logweights - maxlogw)
  2570. return np.average(x, weights=weights)
  2571. class gumbel_r_gen(rv_continuous):
  2572. r"""A right-skewed Gumbel continuous random variable.
  2573. %(before_notes)s
  2574. See Also
  2575. --------
  2576. gumbel_l, gompertz, genextreme
  2577. Notes
  2578. -----
  2579. The probability density function for `gumbel_r` is:
  2580. .. math::
  2581. f(x) = \exp(-(x + e^{-x}))
  2582. The Gumbel distribution is sometimes referred to as a type I Fisher-Tippett
  2583. distribution. It is also related to the extreme value distribution,
  2584. log-Weibull and Gompertz distributions.
  2585. %(after_notes)s
  2586. %(example)s
  2587. """
  2588. def _shape_info(self):
  2589. return []
  2590. def _pdf(self, x):
  2591. # gumbel_r.pdf(x) = exp(-(x + exp(-x)))
  2592. return np.exp(self._logpdf(x))
  2593. def _logpdf(self, x):
  2594. return -x - np.exp(-x)
  2595. def _cdf(self, x):
  2596. return np.exp(-np.exp(-x))
  2597. def _logcdf(self, x):
  2598. return -np.exp(-x)
  2599. def _ppf(self, q):
  2600. return -np.log(-np.log(q))
  2601. def _sf(self, x):
  2602. return -sc.expm1(-np.exp(-x))
  2603. def _isf(self, p):
  2604. return -np.log(-np.log1p(-p))
  2605. def _stats(self):
  2606. return _EULER, np.pi*np.pi/6.0, 12*np.sqrt(6)/np.pi**3 * _ZETA3, 12.0/5
  2607. def _entropy(self):
  2608. # https://en.wikipedia.org/wiki/Gumbel_distribution
  2609. return _EULER + 1.
  2610. @_call_super_mom
  2611. @inherit_docstring_from(rv_continuous)
  2612. def fit(self, data, *args, **kwds):
  2613. data, floc, fscale = _check_fit_input_parameters(self, data,
  2614. args, kwds)
  2615. # By the method of maximum likelihood, the estimators of the
  2616. # location and scale are the roots of the equations defined in
  2617. # `func` and the value of the expression for `loc` that follows.
  2618. # The first `func` is a first order derivative of the log-likelihood
  2619. # equation and the second is from Source: Statistical Distributions,
  2620. # 3rd Edition. Evans, Hastings, and Peacock (2000), Page 101.
  2621. def get_loc_from_scale(scale):
  2622. return -scale * (sc.logsumexp(-data / scale) - np.log(len(data)))
  2623. if fscale is not None:
  2624. # if the scale is fixed, the location can be analytically
  2625. # determined.
  2626. scale = fscale
  2627. loc = get_loc_from_scale(scale)
  2628. else:
  2629. # A different function is solved depending on whether the location
  2630. # is fixed.
  2631. if floc is not None:
  2632. loc = floc
  2633. # equation to use if the location is fixed.
  2634. # note that one cannot use the equation in Evans, Hastings,
  2635. # and Peacock (2000) (since it assumes that the derivative
  2636. # w.r.t. the log-likelihood is zero). however, it is easy to
  2637. # derive the MLE condition directly if loc is fixed
  2638. def func(scale):
  2639. term1 = (loc - data) * np.exp((loc - data) / scale) + data
  2640. term2 = len(data) * (loc + scale)
  2641. return term1.sum() - term2
  2642. else:
  2643. # equation to use if both location and scale are free
  2644. def func(scale):
  2645. sdata = -data / scale
  2646. wavg = _average_with_log_weights(data, logweights=sdata)
  2647. return data.mean() - wavg - scale
  2648. # set brackets for `root_scalar` to use when optimizing over the
  2649. # scale such that a root is likely between them. Use user supplied
  2650. # guess or default 1.
  2651. brack_start = kwds.get('scale', 1)
  2652. lbrack, rbrack = brack_start / 2, brack_start * 2
  2653. # if a root is not between the brackets, iteratively expand them
  2654. # until they include a sign change, checking after each bracket is
  2655. # modified.
  2656. def interval_contains_root(lbrack, rbrack):
  2657. # return true if the signs disagree.
  2658. return (np.sign(func(lbrack)) !=
  2659. np.sign(func(rbrack)))
  2660. while (not interval_contains_root(lbrack, rbrack)
  2661. and (lbrack > 0 or rbrack < np.inf)):
  2662. lbrack /= 2
  2663. rbrack *= 2
  2664. res = optimize.root_scalar(func, bracket=(lbrack, rbrack),
  2665. rtol=1e-14, xtol=1e-14)
  2666. scale = res.root
  2667. loc = floc if floc is not None else get_loc_from_scale(scale)
  2668. return loc, scale
  2669. gumbel_r = gumbel_r_gen(name='gumbel_r')
  2670. class gumbel_l_gen(rv_continuous):
  2671. r"""A left-skewed Gumbel continuous random variable.
  2672. %(before_notes)s
  2673. See Also
  2674. --------
  2675. gumbel_r, gompertz, genextreme
  2676. Notes
  2677. -----
  2678. The probability density function for `gumbel_l` is:
  2679. .. math::
  2680. f(x) = \exp(x - e^x)
  2681. The Gumbel distribution is sometimes referred to as a type I Fisher-Tippett
  2682. distribution. It is also related to the extreme value distribution,
  2683. log-Weibull and Gompertz distributions.
  2684. %(after_notes)s
  2685. %(example)s
  2686. """
  2687. def _shape_info(self):
  2688. return []
  2689. def _pdf(self, x):
  2690. # gumbel_l.pdf(x) = exp(x - exp(x))
  2691. return np.exp(self._logpdf(x))
  2692. def _logpdf(self, x):
  2693. return x - np.exp(x)
  2694. def _cdf(self, x):
  2695. return -sc.expm1(-np.exp(x))
  2696. def _ppf(self, q):
  2697. return np.log(-sc.log1p(-q))
  2698. def _logsf(self, x):
  2699. return -np.exp(x)
  2700. def _sf(self, x):
  2701. return np.exp(-np.exp(x))
  2702. def _isf(self, x):
  2703. return np.log(-np.log(x))
  2704. def _stats(self):
  2705. return -_EULER, np.pi*np.pi/6.0, \
  2706. -12*np.sqrt(6)/np.pi**3 * _ZETA3, 12.0/5
  2707. def _entropy(self):
  2708. return _EULER + 1.
  2709. @_call_super_mom
  2710. @inherit_docstring_from(rv_continuous)
  2711. def fit(self, data, *args, **kwds):
  2712. # The fit method of `gumbel_r` can be used for this distribution with
  2713. # small modifications. The process to do this is
  2714. # 1. pass the sign negated data into `gumbel_r.fit`
  2715. # - if the location is fixed, it should also be negated.
  2716. # 2. negate the sign of the resulting location, leaving the scale
  2717. # unmodified.
  2718. # `gumbel_r.fit` holds necessary input checks.
  2719. if kwds.get('floc') is not None:
  2720. kwds['floc'] = -kwds['floc']
  2721. loc_r, scale_r, = gumbel_r.fit(-np.asarray(data), *args, **kwds)
  2722. return -loc_r, scale_r
  2723. gumbel_l = gumbel_l_gen(name='gumbel_l')
  2724. class halfcauchy_gen(rv_continuous):
  2725. r"""A Half-Cauchy continuous random variable.
  2726. %(before_notes)s
  2727. Notes
  2728. -----
  2729. The probability density function for `halfcauchy` is:
  2730. .. math::
  2731. f(x) = \frac{2}{\pi (1 + x^2)}
  2732. for :math:`x \ge 0`.
  2733. %(after_notes)s
  2734. %(example)s
  2735. """
  2736. def _shape_info(self):
  2737. return []
  2738. def _pdf(self, x):
  2739. # halfcauchy.pdf(x) = 2 / (pi * (1 + x**2))
  2740. return 2.0/np.pi/(1.0+x*x)
  2741. def _logpdf(self, x):
  2742. return np.log(2.0/np.pi) - sc.log1p(x*x)
  2743. def _cdf(self, x):
  2744. return 2.0/np.pi*np.arctan(x)
  2745. def _ppf(self, q):
  2746. return np.tan(np.pi/2*q)
  2747. def _stats(self):
  2748. return np.inf, np.inf, np.nan, np.nan
  2749. def _entropy(self):
  2750. return np.log(2*np.pi)
  2751. halfcauchy = halfcauchy_gen(a=0.0, name='halfcauchy')
  2752. class halflogistic_gen(rv_continuous):
  2753. r"""A half-logistic continuous random variable.
  2754. %(before_notes)s
  2755. Notes
  2756. -----
  2757. The probability density function for `halflogistic` is:
  2758. .. math::
  2759. f(x) = \frac{ 2 e^{-x} }{ (1+e^{-x})^2 }
  2760. = \frac{1}{2} \text{sech}(x/2)^2
  2761. for :math:`x \ge 0`.
  2762. %(after_notes)s
  2763. %(example)s
  2764. """
  2765. def _shape_info(self):
  2766. return []
  2767. def _pdf(self, x):
  2768. # halflogistic.pdf(x) = 2 * exp(-x) / (1+exp(-x))**2
  2769. # = 1/2 * sech(x/2)**2
  2770. return np.exp(self._logpdf(x))
  2771. def _logpdf(self, x):
  2772. return np.log(2) - x - 2. * sc.log1p(np.exp(-x))
  2773. def _cdf(self, x):
  2774. return np.tanh(x/2.0)
  2775. def _ppf(self, q):
  2776. return 2*np.arctanh(q)
  2777. def _munp(self, n):
  2778. if n == 1:
  2779. return 2*np.log(2)
  2780. if n == 2:
  2781. return np.pi*np.pi/3.0
  2782. if n == 3:
  2783. return 9*_ZETA3
  2784. if n == 4:
  2785. return 7*np.pi**4 / 15.0
  2786. return 2*(1-pow(2.0, 1-n))*sc.gamma(n+1)*sc.zeta(n, 1)
  2787. def _entropy(self):
  2788. return 2-np.log(2)
  2789. halflogistic = halflogistic_gen(a=0.0, name='halflogistic')
  2790. class halfnorm_gen(rv_continuous):
  2791. r"""A half-normal continuous random variable.
  2792. %(before_notes)s
  2793. Notes
  2794. -----
  2795. The probability density function for `halfnorm` is:
  2796. .. math::
  2797. f(x) = \sqrt{2/\pi} \exp(-x^2 / 2)
  2798. for :math:`x >= 0`.
  2799. `halfnorm` is a special case of `chi` with ``df=1``.
  2800. %(after_notes)s
  2801. %(example)s
  2802. """
  2803. def _shape_info(self):
  2804. return []
  2805. def _rvs(self, size=None, random_state=None):
  2806. return abs(random_state.standard_normal(size=size))
  2807. def _pdf(self, x):
  2808. # halfnorm.pdf(x) = sqrt(2/pi) * exp(-x**2/2)
  2809. return np.sqrt(2.0/np.pi)*np.exp(-x*x/2.0)
  2810. def _logpdf(self, x):
  2811. return 0.5 * np.log(2.0/np.pi) - x*x/2.0
  2812. def _cdf(self, x):
  2813. return _norm_cdf(x)*2-1.0
  2814. def _ppf(self, q):
  2815. return sc.ndtri((1+q)/2.0)
  2816. def _stats(self):
  2817. return (np.sqrt(2.0/np.pi),
  2818. 1-2.0/np.pi,
  2819. np.sqrt(2)*(4-np.pi)/(np.pi-2)**1.5,
  2820. 8*(np.pi-3)/(np.pi-2)**2)
  2821. def _entropy(self):
  2822. return 0.5*np.log(np.pi/2.0)+0.5
  2823. halfnorm = halfnorm_gen(a=0.0, name='halfnorm')
  2824. class hypsecant_gen(rv_continuous):
  2825. r"""A hyperbolic secant continuous random variable.
  2826. %(before_notes)s
  2827. Notes
  2828. -----
  2829. The probability density function for `hypsecant` is:
  2830. .. math::
  2831. f(x) = \frac{1}{\pi} \text{sech}(x)
  2832. for a real number :math:`x`.
  2833. %(after_notes)s
  2834. %(example)s
  2835. """
  2836. def _shape_info(self):
  2837. return []
  2838. def _pdf(self, x):
  2839. # hypsecant.pdf(x) = 1/pi * sech(x)
  2840. return 1.0/(np.pi*np.cosh(x))
  2841. def _cdf(self, x):
  2842. return 2.0/np.pi*np.arctan(np.exp(x))
  2843. def _ppf(self, q):
  2844. return np.log(np.tan(np.pi*q/2.0))
  2845. def _stats(self):
  2846. return 0, np.pi*np.pi/4, 0, 2
  2847. def _entropy(self):
  2848. return np.log(2*np.pi)
  2849. hypsecant = hypsecant_gen(name='hypsecant')
  2850. class gausshyper_gen(rv_continuous):
  2851. r"""A Gauss hypergeometric continuous random variable.
  2852. %(before_notes)s
  2853. Notes
  2854. -----
  2855. The probability density function for `gausshyper` is:
  2856. .. math::
  2857. f(x, a, b, c, z) = C x^{a-1} (1-x)^{b-1} (1+zx)^{-c}
  2858. for :math:`0 \le x \le 1`, :math:`a,b > 0`, :math:`c` a real number,
  2859. :math:`z > -1`, and :math:`C = \frac{1}{B(a, b) F[2, 1](c, a; a+b; -z)}`.
  2860. :math:`F[2, 1]` is the Gauss hypergeometric function
  2861. `scipy.special.hyp2f1`.
  2862. `gausshyper` takes :math:`a`, :math:`b`, :math:`c` and :math:`z` as shape
  2863. parameters.
  2864. %(after_notes)s
  2865. References
  2866. ----------
  2867. .. [1] Armero, C., and M. J. Bayarri. "Prior Assessments for Prediction in
  2868. Queues." *Journal of the Royal Statistical Society*. Series D (The
  2869. Statistician) 43, no. 1 (1994): 139-53. doi:10.2307/2348939
  2870. %(example)s
  2871. """
  2872. def _argcheck(self, a, b, c, z):
  2873. # z > -1 per gh-10134
  2874. return (a > 0) & (b > 0) & (c == c) & (z > -1)
  2875. def _shape_info(self):
  2876. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  2877. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  2878. ic = _ShapeInfo("c", False, (-np.inf, np.inf), (False, False))
  2879. iz = _ShapeInfo("z", False, (-1, np.inf), (False, False))
  2880. return [ia, ib, ic, iz]
  2881. def _pdf(self, x, a, b, c, z):
  2882. # gausshyper.pdf(x, a, b, c, z) =
  2883. # C * x**(a-1) * (1-x)**(b-1) * (1+z*x)**(-c)
  2884. Cinv = sc.gamma(a)*sc.gamma(b)/sc.gamma(a+b)*sc.hyp2f1(c, a, a+b, -z)
  2885. return 1.0/Cinv * x**(a-1.0) * (1.0-x)**(b-1.0) / (1.0+z*x)**c
  2886. def _munp(self, n, a, b, c, z):
  2887. fac = sc.beta(n+a, b) / sc.beta(a, b)
  2888. num = sc.hyp2f1(c, a+n, a+b+n, -z)
  2889. den = sc.hyp2f1(c, a, a+b, -z)
  2890. return fac*num / den
  2891. gausshyper = gausshyper_gen(a=0.0, b=1.0, name='gausshyper')
  2892. class invgamma_gen(rv_continuous):
  2893. r"""An inverted gamma continuous random variable.
  2894. %(before_notes)s
  2895. Notes
  2896. -----
  2897. The probability density function for `invgamma` is:
  2898. .. math::
  2899. f(x, a) = \frac{x^{-a-1}}{\Gamma(a)} \exp(-\frac{1}{x})
  2900. for :math:`x >= 0`, :math:`a > 0`. :math:`\Gamma` is the gamma function
  2901. (`scipy.special.gamma`).
  2902. `invgamma` takes ``a`` as a shape parameter for :math:`a`.
  2903. `invgamma` is a special case of `gengamma` with ``c=-1``, and it is a
  2904. different parameterization of the scaled inverse chi-squared distribution.
  2905. Specifically, if the scaled inverse chi-squared distribution is
  2906. parameterized with degrees of freedom :math:`\nu` and scaling parameter
  2907. :math:`\tau^2`, then it can be modeled using `invgamma` with
  2908. ``a=`` :math:`\nu/2` and ``scale=`` :math:`\nu \tau^2/2`.
  2909. %(after_notes)s
  2910. %(example)s
  2911. """
  2912. _support_mask = rv_continuous._open_support_mask
  2913. def _shape_info(self):
  2914. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  2915. def _pdf(self, x, a):
  2916. # invgamma.pdf(x, a) = x**(-a-1) / gamma(a) * exp(-1/x)
  2917. return np.exp(self._logpdf(x, a))
  2918. def _logpdf(self, x, a):
  2919. return -(a+1) * np.log(x) - sc.gammaln(a) - 1.0/x
  2920. def _cdf(self, x, a):
  2921. return sc.gammaincc(a, 1.0 / x)
  2922. def _ppf(self, q, a):
  2923. return 1.0 / sc.gammainccinv(a, q)
  2924. def _sf(self, x, a):
  2925. return sc.gammainc(a, 1.0 / x)
  2926. def _isf(self, q, a):
  2927. return 1.0 / sc.gammaincinv(a, q)
  2928. def _stats(self, a, moments='mvsk'):
  2929. m1 = _lazywhere(a > 1, (a,), lambda x: 1. / (x - 1.), np.inf)
  2930. m2 = _lazywhere(a > 2, (a,), lambda x: 1. / (x - 1.)**2 / (x - 2.),
  2931. np.inf)
  2932. g1, g2 = None, None
  2933. if 's' in moments:
  2934. g1 = _lazywhere(
  2935. a > 3, (a,),
  2936. lambda x: 4. * np.sqrt(x - 2.) / (x - 3.), np.nan)
  2937. if 'k' in moments:
  2938. g2 = _lazywhere(
  2939. a > 4, (a,),
  2940. lambda x: 6. * (5. * x - 11.) / (x - 3.) / (x - 4.), np.nan)
  2941. return m1, m2, g1, g2
  2942. def _entropy(self, a):
  2943. return a - (a+1.0) * sc.psi(a) + sc.gammaln(a)
  2944. invgamma = invgamma_gen(a=0.0, name='invgamma')
  2945. class invgauss_gen(rv_continuous):
  2946. r"""An inverse Gaussian continuous random variable.
  2947. %(before_notes)s
  2948. Notes
  2949. -----
  2950. The probability density function for `invgauss` is:
  2951. .. math::
  2952. f(x, \mu) = \frac{1}{\sqrt{2 \pi x^3}}
  2953. \exp(-\frac{(x-\mu)^2}{2 x \mu^2})
  2954. for :math:`x >= 0` and :math:`\mu > 0`.
  2955. `invgauss` takes ``mu`` as a shape parameter for :math:`\mu`.
  2956. %(after_notes)s
  2957. %(example)s
  2958. """
  2959. _support_mask = rv_continuous._open_support_mask
  2960. def _shape_info(self):
  2961. return [_ShapeInfo("mu", False, (0, np.inf), (False, False))]
  2962. def _rvs(self, mu, size=None, random_state=None):
  2963. return random_state.wald(mu, 1.0, size=size)
  2964. def _pdf(self, x, mu):
  2965. # invgauss.pdf(x, mu) =
  2966. # 1 / sqrt(2*pi*x**3) * exp(-(x-mu)**2/(2*x*mu**2))
  2967. return 1.0/np.sqrt(2*np.pi*x**3.0)*np.exp(-1.0/(2*x)*((x-mu)/mu)**2)
  2968. def _logpdf(self, x, mu):
  2969. return -0.5*np.log(2*np.pi) - 1.5*np.log(x) - ((x-mu)/mu)**2/(2*x)
  2970. # approach adapted from equations in
  2971. # https://journal.r-project.org/archive/2016-1/giner-smyth.pdf,
  2972. # not R code. see gh-13616
  2973. def _logcdf(self, x, mu):
  2974. fac = 1 / np.sqrt(x)
  2975. a = _norm_logcdf(fac * ((x / mu) - 1))
  2976. b = 2 / mu + _norm_logcdf(-fac * ((x / mu) + 1))
  2977. return a + np.log1p(np.exp(b - a))
  2978. def _logsf(self, x, mu):
  2979. fac = 1 / np.sqrt(x)
  2980. a = _norm_logsf(fac * ((x / mu) - 1))
  2981. b = 2 / mu + _norm_logcdf(-fac * (x + mu) / mu)
  2982. return a + np.log1p(-np.exp(b - a))
  2983. def _sf(self, x, mu):
  2984. return np.exp(self._logsf(x, mu))
  2985. def _cdf(self, x, mu):
  2986. return np.exp(self._logcdf(x, mu))
  2987. def _ppf(self, x, mu):
  2988. with np.errstate(divide='ignore', over='ignore', invalid='ignore'):
  2989. x, mu = np.broadcast_arrays(x, mu)
  2990. ppf = _boost._invgauss_ppf(x, mu, 1)
  2991. i_wt = x > 0.5 # "wrong tail" - sometimes too inaccurate
  2992. ppf[i_wt] = _boost._invgauss_isf(1-x[i_wt], mu[i_wt], 1)
  2993. i_nan = np.isnan(ppf)
  2994. ppf[i_nan] = super()._ppf(x[i_nan], mu[i_nan])
  2995. return ppf
  2996. def _isf(self, x, mu):
  2997. with np.errstate(divide='ignore', over='ignore', invalid='ignore'):
  2998. x, mu = np.broadcast_arrays(x, mu)
  2999. isf = _boost._invgauss_isf(x, mu, 1)
  3000. i_wt = x > 0.5 # "wrong tail" - sometimes too inaccurate
  3001. isf[i_wt] = _boost._invgauss_ppf(1-x[i_wt], mu[i_wt], 1)
  3002. i_nan = np.isnan(isf)
  3003. isf[i_nan] = super()._isf(x[i_nan], mu[i_nan])
  3004. return isf
  3005. def _stats(self, mu):
  3006. return mu, mu**3.0, 3*np.sqrt(mu), 15*mu
  3007. @inherit_docstring_from(rv_continuous)
  3008. def fit(self, data, *args, **kwds):
  3009. method = kwds.get('method', 'mle')
  3010. if type(self) == wald_gen or method.lower() == 'mm':
  3011. return super().fit(data, *args, **kwds)
  3012. data, fshape_s, floc, fscale = _check_fit_input_parameters(self, data,
  3013. args, kwds)
  3014. '''
  3015. Source: Statistical Distributions, 3rd Edition. Evans, Hastings,
  3016. and Peacock (2000), Page 121. Their shape parameter is equivilent to
  3017. SciPy's with the conversion `fshape_s = fshape / scale`.
  3018. MLE formulas are not used in 3 condtions:
  3019. - `loc` is not fixed
  3020. - `mu` is fixed
  3021. These cases fall back on the superclass fit method.
  3022. - `loc` is fixed but translation results in negative data raises
  3023. a `FitDataError`.
  3024. '''
  3025. if floc is None or fshape_s is not None:
  3026. return super().fit(data, *args, **kwds)
  3027. elif np.any(data - floc < 0):
  3028. raise FitDataError("invgauss", lower=0, upper=np.inf)
  3029. else:
  3030. data = data - floc
  3031. fshape_n = np.mean(data)
  3032. if fscale is None:
  3033. fscale = len(data) / (np.sum(data ** -1 - fshape_n ** -1))
  3034. fshape_s = fshape_n / fscale
  3035. return fshape_s, floc, fscale
  3036. invgauss = invgauss_gen(a=0.0, name='invgauss')
  3037. class geninvgauss_gen(rv_continuous):
  3038. r"""A Generalized Inverse Gaussian continuous random variable.
  3039. %(before_notes)s
  3040. Notes
  3041. -----
  3042. The probability density function for `geninvgauss` is:
  3043. .. math::
  3044. f(x, p, b) = x^{p-1} \exp(-b (x + 1/x) / 2) / (2 K_p(b))
  3045. where `x > 0`, `p` is a real number and `b > 0`([1]_).
  3046. :math:`K_p` is the modified Bessel function of second kind of order `p`
  3047. (`scipy.special.kv`).
  3048. %(after_notes)s
  3049. The inverse Gaussian distribution `stats.invgauss(mu)` is a special case of
  3050. `geninvgauss` with `p = -1/2`, `b = 1 / mu` and `scale = mu`.
  3051. Generating random variates is challenging for this distribution. The
  3052. implementation is based on [2]_.
  3053. References
  3054. ----------
  3055. .. [1] O. Barndorff-Nielsen, P. Blaesild, C. Halgreen, "First hitting time
  3056. models for the generalized inverse gaussian distribution",
  3057. Stochastic Processes and their Applications 7, pp. 49--54, 1978.
  3058. .. [2] W. Hoermann and J. Leydold, "Generating generalized inverse Gaussian
  3059. random variates", Statistics and Computing, 24(4), p. 547--557, 2014.
  3060. %(example)s
  3061. """
  3062. def _argcheck(self, p, b):
  3063. return (p == p) & (b > 0)
  3064. def _shape_info(self):
  3065. ip = _ShapeInfo("p", False, (-np.inf, np.inf), (False, False))
  3066. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  3067. return [ip, ib]
  3068. def _logpdf(self, x, p, b):
  3069. # kve instead of kv works better for large values of b
  3070. # warn if kve produces infinite values and replace by nan
  3071. # otherwise c = -inf and the results are often incorrect
  3072. @np.vectorize
  3073. def logpdf_single(x, p, b):
  3074. return _stats.geninvgauss_logpdf(x, p, b)
  3075. z = logpdf_single(x, p, b)
  3076. if np.isnan(z).any():
  3077. msg = ("Infinite values encountered in scipy.special.kve(p, b). "
  3078. "Values replaced by NaN to avoid incorrect results.")
  3079. warnings.warn(msg, RuntimeWarning)
  3080. return z
  3081. def _pdf(self, x, p, b):
  3082. # relying on logpdf avoids overflow of x**(p-1) for large x and p
  3083. return np.exp(self._logpdf(x, p, b))
  3084. def _cdf(self, x, *args):
  3085. _a, _b = self._get_support(*args)
  3086. @np.vectorize
  3087. def _cdf_single(x, *args):
  3088. p, b = args
  3089. user_data = np.array([p, b], float).ctypes.data_as(ctypes.c_void_p)
  3090. llc = LowLevelCallable.from_cython(_stats, '_geninvgauss_pdf',
  3091. user_data)
  3092. return integrate.quad(llc, _a, x)[0]
  3093. return _cdf_single(x, *args)
  3094. def _logquasipdf(self, x, p, b):
  3095. # log of the quasi-density (w/o normalizing constant) used in _rvs
  3096. return _lazywhere(x > 0, (x, p, b),
  3097. lambda x, p, b: (p - 1)*np.log(x) - b*(x + 1/x)/2,
  3098. -np.inf)
  3099. def _rvs(self, p, b, size=None, random_state=None):
  3100. # if p and b are scalar, use _rvs_scalar, otherwise need to create
  3101. # output by iterating over parameters
  3102. if np.isscalar(p) and np.isscalar(b):
  3103. out = self._rvs_scalar(p, b, size, random_state)
  3104. elif p.size == 1 and b.size == 1:
  3105. out = self._rvs_scalar(p.item(), b.item(), size, random_state)
  3106. else:
  3107. # When this method is called, size will be a (possibly empty)
  3108. # tuple of integers. It will not be None; if `size=None` is passed
  3109. # to `rvs()`, size will be the empty tuple ().
  3110. p, b = np.broadcast_arrays(p, b)
  3111. # p and b now have the same shape.
  3112. # `shp` is the shape of the blocks of random variates that are
  3113. # generated for each combination of parameters associated with
  3114. # broadcasting p and b.
  3115. # bc is a tuple the same lenth as size. The values
  3116. # in bc are bools. If bc[j] is True, it means that
  3117. # entire axis is filled in for a given combination of the
  3118. # broadcast arguments.
  3119. shp, bc = _check_shape(p.shape, size)
  3120. # `numsamples` is the total number of variates to be generated
  3121. # for each combination of the input arguments.
  3122. numsamples = int(np.prod(shp))
  3123. # `out` is the array to be returned. It is filled in the
  3124. # loop below.
  3125. out = np.empty(size)
  3126. it = np.nditer([p, b],
  3127. flags=['multi_index'],
  3128. op_flags=[['readonly'], ['readonly']])
  3129. while not it.finished:
  3130. # Convert the iterator's multi_index into an index into the
  3131. # `out` array where the call to _rvs_scalar() will be stored.
  3132. # Where bc is True, we use a full slice; otherwise we use the
  3133. # index value from it.multi_index. len(it.multi_index) might
  3134. # be less than len(bc), and in that case we want to align these
  3135. # two sequences to the right, so the loop variable j runs from
  3136. # -len(size) to 0. This doesn't cause an IndexError, as
  3137. # bc[j] will be True in those cases where it.multi_index[j]
  3138. # would cause an IndexError.
  3139. idx = tuple((it.multi_index[j] if not bc[j] else slice(None))
  3140. for j in range(-len(size), 0))
  3141. out[idx] = self._rvs_scalar(it[0], it[1], numsamples,
  3142. random_state).reshape(shp)
  3143. it.iternext()
  3144. if size == ():
  3145. out = out.item()
  3146. return out
  3147. def _rvs_scalar(self, p, b, numsamples, random_state):
  3148. # following [2], the quasi-pdf is used instead of the pdf for the
  3149. # generation of rvs
  3150. invert_res = False
  3151. if not numsamples:
  3152. numsamples = 1
  3153. if p < 0:
  3154. # note: if X is geninvgauss(p, b), then 1/X is geninvgauss(-p, b)
  3155. p = -p
  3156. invert_res = True
  3157. m = self._mode(p, b)
  3158. # determine method to be used following [2]
  3159. ratio_unif = True
  3160. if p >= 1 or b > 1:
  3161. # ratio of uniforms with mode shift below
  3162. mode_shift = True
  3163. elif b >= min(0.5, 2 * np.sqrt(1 - p) / 3):
  3164. # ratio of uniforms without mode shift below
  3165. mode_shift = False
  3166. else:
  3167. # new algorithm in [2]
  3168. ratio_unif = False
  3169. # prepare sampling of rvs
  3170. size1d = tuple(np.atleast_1d(numsamples))
  3171. N = np.prod(size1d) # number of rvs needed, reshape upon return
  3172. x = np.zeros(N)
  3173. simulated = 0
  3174. if ratio_unif:
  3175. # use ratio of uniforms method
  3176. if mode_shift:
  3177. a2 = -2 * (p + 1) / b - m
  3178. a1 = 2 * m * (p - 1) / b - 1
  3179. # find roots of x**3 + a2*x**2 + a1*x + m (Cardano's formula)
  3180. p1 = a1 - a2**2 / 3
  3181. q1 = 2 * a2**3 / 27 - a2 * a1 / 3 + m
  3182. phi = np.arccos(-q1 * np.sqrt(-27 / p1**3) / 2)
  3183. s1 = -np.sqrt(-4 * p1 / 3)
  3184. root1 = s1 * np.cos(phi / 3 + np.pi / 3) - a2 / 3
  3185. root2 = -s1 * np.cos(phi / 3) - a2 / 3
  3186. # root3 = s1 * np.cos(phi / 3 - np.pi / 3) - a2 / 3
  3187. # if g is the quasipdf, rescale: g(x) / g(m) which we can write
  3188. # as exp(log(g(x)) - log(g(m))). This is important
  3189. # since for large values of p and b, g cannot be evaluated.
  3190. # denote the rescaled quasipdf by h
  3191. lm = self._logquasipdf(m, p, b)
  3192. d1 = self._logquasipdf(root1, p, b) - lm
  3193. d2 = self._logquasipdf(root2, p, b) - lm
  3194. # compute the bounding rectangle w.r.t. h. Note that
  3195. # np.exp(0.5*d1) = np.sqrt(g(root1)/g(m)) = np.sqrt(h(root1))
  3196. vmin = (root1 - m) * np.exp(0.5 * d1)
  3197. vmax = (root2 - m) * np.exp(0.5 * d2)
  3198. umax = 1 # umax = sqrt(h(m)) = 1
  3199. logqpdf = lambda x: self._logquasipdf(x, p, b) - lm
  3200. c = m
  3201. else:
  3202. # ratio of uniforms without mode shift
  3203. # compute np.sqrt(quasipdf(m))
  3204. umax = np.exp(0.5*self._logquasipdf(m, p, b))
  3205. xplus = ((1 + p) + np.sqrt((1 + p)**2 + b**2))/b
  3206. vmin = 0
  3207. # compute xplus * np.sqrt(quasipdf(xplus))
  3208. vmax = xplus * np.exp(0.5 * self._logquasipdf(xplus, p, b))
  3209. c = 0
  3210. logqpdf = lambda x: self._logquasipdf(x, p, b)
  3211. if vmin >= vmax:
  3212. raise ValueError("vmin must be smaller than vmax.")
  3213. if umax <= 0:
  3214. raise ValueError("umax must be positive.")
  3215. i = 1
  3216. while simulated < N:
  3217. k = N - simulated
  3218. # simulate uniform rvs on [0, umax] and [vmin, vmax]
  3219. u = umax * random_state.uniform(size=k)
  3220. v = random_state.uniform(size=k)
  3221. v = vmin + (vmax - vmin) * v
  3222. rvs = v / u + c
  3223. # rewrite acceptance condition u**2 <= pdf(rvs) by taking logs
  3224. accept = (2*np.log(u) <= logqpdf(rvs))
  3225. num_accept = np.sum(accept)
  3226. if num_accept > 0:
  3227. x[simulated:(simulated + num_accept)] = rvs[accept]
  3228. simulated += num_accept
  3229. if (simulated == 0) and (i*N >= 50000):
  3230. msg = ("Not a single random variate could be generated "
  3231. "in {} attempts. Sampling does not appear to "
  3232. "work for the provided parameters.".format(i*N))
  3233. raise RuntimeError(msg)
  3234. i += 1
  3235. else:
  3236. # use new algorithm in [2]
  3237. x0 = b / (1 - p)
  3238. xs = np.max((x0, 2 / b))
  3239. k1 = np.exp(self._logquasipdf(m, p, b))
  3240. A1 = k1 * x0
  3241. if x0 < 2 / b:
  3242. k2 = np.exp(-b)
  3243. if p > 0:
  3244. A2 = k2 * ((2 / b)**p - x0**p) / p
  3245. else:
  3246. A2 = k2 * np.log(2 / b**2)
  3247. else:
  3248. k2, A2 = 0, 0
  3249. k3 = xs**(p - 1)
  3250. A3 = 2 * k3 * np.exp(-xs * b / 2) / b
  3251. A = A1 + A2 + A3
  3252. # [2]: rejection constant is < 2.73; so expected runtime is finite
  3253. while simulated < N:
  3254. k = N - simulated
  3255. h, rvs = np.zeros(k), np.zeros(k)
  3256. # simulate uniform rvs on [x1, x2] and [0, y2]
  3257. u = random_state.uniform(size=k)
  3258. v = A * random_state.uniform(size=k)
  3259. cond1 = v <= A1
  3260. cond2 = np.logical_not(cond1) & (v <= A1 + A2)
  3261. cond3 = np.logical_not(cond1 | cond2)
  3262. # subdomain (0, x0)
  3263. rvs[cond1] = x0 * v[cond1] / A1
  3264. h[cond1] = k1
  3265. # subdomain (x0, 2 / b)
  3266. if p > 0:
  3267. rvs[cond2] = (x0**p + (v[cond2] - A1) * p / k2)**(1 / p)
  3268. else:
  3269. rvs[cond2] = b * np.exp((v[cond2] - A1) * np.exp(b))
  3270. h[cond2] = k2 * rvs[cond2]**(p - 1)
  3271. # subdomain (xs, infinity)
  3272. z = np.exp(-xs * b / 2) - b * (v[cond3] - A1 - A2) / (2 * k3)
  3273. rvs[cond3] = -2 / b * np.log(z)
  3274. h[cond3] = k3 * np.exp(-rvs[cond3] * b / 2)
  3275. # apply rejection method
  3276. accept = (np.log(u * h) <= self._logquasipdf(rvs, p, b))
  3277. num_accept = sum(accept)
  3278. if num_accept > 0:
  3279. x[simulated:(simulated + num_accept)] = rvs[accept]
  3280. simulated += num_accept
  3281. rvs = np.reshape(x, size1d)
  3282. if invert_res:
  3283. rvs = 1 / rvs
  3284. return rvs
  3285. def _mode(self, p, b):
  3286. # distinguish cases to avoid catastrophic cancellation (see [2])
  3287. if p < 1:
  3288. return b / (np.sqrt((p - 1)**2 + b**2) + 1 - p)
  3289. else:
  3290. return (np.sqrt((1 - p)**2 + b**2) - (1 - p)) / b
  3291. def _munp(self, n, p, b):
  3292. num = sc.kve(p + n, b)
  3293. denom = sc.kve(p, b)
  3294. inf_vals = np.isinf(num) | np.isinf(denom)
  3295. if inf_vals.any():
  3296. msg = ("Infinite values encountered in the moment calculation "
  3297. "involving scipy.special.kve. Values replaced by NaN to "
  3298. "avoid incorrect results.")
  3299. warnings.warn(msg, RuntimeWarning)
  3300. m = np.full_like(num, np.nan, dtype=np.double)
  3301. m[~inf_vals] = num[~inf_vals] / denom[~inf_vals]
  3302. else:
  3303. m = num / denom
  3304. return m
  3305. geninvgauss = geninvgauss_gen(a=0.0, name="geninvgauss")
  3306. class norminvgauss_gen(rv_continuous):
  3307. r"""A Normal Inverse Gaussian continuous random variable.
  3308. %(before_notes)s
  3309. Notes
  3310. -----
  3311. The probability density function for `norminvgauss` is:
  3312. .. math::
  3313. f(x, a, b) = \frac{a \, K_1(a \sqrt{1 + x^2})}{\pi \sqrt{1 + x^2}} \,
  3314. \exp(\sqrt{a^2 - b^2} + b x)
  3315. where :math:`x` is a real number, the parameter :math:`a` is the tail
  3316. heaviness and :math:`b` is the asymmetry parameter satisfying
  3317. :math:`a > 0` and :math:`|b| <= a`.
  3318. :math:`K_1` is the modified Bessel function of second kind
  3319. (`scipy.special.k1`).
  3320. %(after_notes)s
  3321. A normal inverse Gaussian random variable `Y` with parameters `a` and `b`
  3322. can be expressed as a normal mean-variance mixture:
  3323. `Y = b * V + sqrt(V) * X` where `X` is `norm(0,1)` and `V` is
  3324. `invgauss(mu=1/sqrt(a**2 - b**2))`. This representation is used
  3325. to generate random variates.
  3326. Another common parametrization of the distribution (see Equation 2.1 in
  3327. [2]_) is given by the following expression of the pdf:
  3328. .. math::
  3329. g(x, \alpha, \beta, \delta, \mu) =
  3330. \frac{\alpha\delta K_1\left(\alpha\sqrt{\delta^2 + (x - \mu)^2}\right)}
  3331. {\pi \sqrt{\delta^2 + (x - \mu)^2}} \,
  3332. e^{\delta \sqrt{\alpha^2 - \beta^2} + \beta (x - \mu)}
  3333. In SciPy, this corresponds to
  3334. `a = alpha * delta, b = beta * delta, loc = mu, scale=delta`.
  3335. References
  3336. ----------
  3337. .. [1] O. Barndorff-Nielsen, "Hyperbolic Distributions and Distributions on
  3338. Hyperbolae", Scandinavian Journal of Statistics, Vol. 5(3),
  3339. pp. 151-157, 1978.
  3340. .. [2] O. Barndorff-Nielsen, "Normal Inverse Gaussian Distributions and
  3341. Stochastic Volatility Modelling", Scandinavian Journal of
  3342. Statistics, Vol. 24, pp. 1-13, 1997.
  3343. %(example)s
  3344. """
  3345. _support_mask = rv_continuous._open_support_mask
  3346. def _argcheck(self, a, b):
  3347. return (a > 0) & (np.absolute(b) < a)
  3348. def _shape_info(self):
  3349. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  3350. ib = _ShapeInfo("b", False, (-np.inf, np.inf), (False, False))
  3351. return [ia, ib]
  3352. def _fitstart(self, data):
  3353. # Arbitrary, but the default a=b=1 is not valid
  3354. return super()._fitstart(data, args=(1, 0.5))
  3355. def _pdf(self, x, a, b):
  3356. gamma = np.sqrt(a**2 - b**2)
  3357. fac1 = a / np.pi * np.exp(gamma)
  3358. sq = np.hypot(1, x) # reduce overflows
  3359. return fac1 * sc.k1e(a * sq) * np.exp(b*x - a*sq) / sq
  3360. def _sf(self, x, a, b):
  3361. if np.isscalar(x):
  3362. # If x is a scalar, then so are a and b.
  3363. return integrate.quad(self._pdf, x, np.inf, args=(a, b))[0]
  3364. else:
  3365. result = []
  3366. for (x0, a0, b0) in zip(x, a, b):
  3367. result.append(integrate.quad(self._pdf, x0, np.inf,
  3368. args=(a0, b0))[0])
  3369. return np.array(result)
  3370. def _isf(self, q, a, b):
  3371. def _isf_scalar(q, a, b):
  3372. def eq(x, a, b, q):
  3373. # Solve eq(x, a, b, q) = 0 to obtain isf(x, a, b) = q.
  3374. return self._sf(x, a, b) - q
  3375. # Find a bracketing interval for the root.
  3376. # Start at the mean, and grow the length of the interval
  3377. # by 2 each iteration until there is a sign change in eq.
  3378. xm = self.mean(a, b)
  3379. em = eq(xm, a, b, q)
  3380. if em == 0:
  3381. # Unlikely, but might as well check.
  3382. return xm
  3383. if em > 0:
  3384. delta = 1
  3385. left = xm
  3386. right = xm + delta
  3387. while eq(right, a, b, q) > 0:
  3388. delta = 2*delta
  3389. right = xm + delta
  3390. else:
  3391. # em < 0
  3392. delta = 1
  3393. right = xm
  3394. left = xm - delta
  3395. while eq(left, a, b, q) < 0:
  3396. delta = 2*delta
  3397. left = xm - delta
  3398. result = optimize.brentq(eq, left, right, args=(a, b, q),
  3399. xtol=self.xtol)
  3400. return result
  3401. if np.isscalar(q):
  3402. return _isf_scalar(q, a, b)
  3403. else:
  3404. result = []
  3405. for (q0, a0, b0) in zip(q, a, b):
  3406. result.append(_isf_scalar(q0, a0, b0))
  3407. return np.array(result)
  3408. def _rvs(self, a, b, size=None, random_state=None):
  3409. # note: X = b * V + sqrt(V) * X is norminvgaus(a,b) if X is standard
  3410. # normal and V is invgauss(mu=1/sqrt(a**2 - b**2))
  3411. gamma = np.sqrt(a**2 - b**2)
  3412. ig = invgauss.rvs(mu=1/gamma, size=size, random_state=random_state)
  3413. return b * ig + np.sqrt(ig) * norm.rvs(size=size,
  3414. random_state=random_state)
  3415. def _stats(self, a, b):
  3416. gamma = np.sqrt(a**2 - b**2)
  3417. mean = b / gamma
  3418. variance = a**2 / gamma**3
  3419. skewness = 3.0 * b / (a * np.sqrt(gamma))
  3420. kurtosis = 3.0 * (1 + 4 * b**2 / a**2) / gamma
  3421. return mean, variance, skewness, kurtosis
  3422. norminvgauss = norminvgauss_gen(name="norminvgauss")
  3423. class invweibull_gen(rv_continuous):
  3424. """An inverted Weibull continuous random variable.
  3425. This distribution is also known as the Fréchet distribution or the
  3426. type II extreme value distribution.
  3427. %(before_notes)s
  3428. Notes
  3429. -----
  3430. The probability density function for `invweibull` is:
  3431. .. math::
  3432. f(x, c) = c x^{-c-1} \\exp(-x^{-c})
  3433. for :math:`x > 0`, :math:`c > 0`.
  3434. `invweibull` takes ``c`` as a shape parameter for :math:`c`.
  3435. %(after_notes)s
  3436. References
  3437. ----------
  3438. F.R.S. de Gusmao, E.M.M Ortega and G.M. Cordeiro, "The generalized inverse
  3439. Weibull distribution", Stat. Papers, vol. 52, pp. 591-619, 2011.
  3440. %(example)s
  3441. """
  3442. _support_mask = rv_continuous._open_support_mask
  3443. def _shape_info(self):
  3444. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  3445. def _pdf(self, x, c):
  3446. # invweibull.pdf(x, c) = c * x**(-c-1) * exp(-x**(-c))
  3447. xc1 = np.power(x, -c - 1.0)
  3448. xc2 = np.power(x, -c)
  3449. xc2 = np.exp(-xc2)
  3450. return c * xc1 * xc2
  3451. def _cdf(self, x, c):
  3452. xc1 = np.power(x, -c)
  3453. return np.exp(-xc1)
  3454. def _sf(self, x, c):
  3455. return -np.expm1(-x**-c)
  3456. def _ppf(self, q, c):
  3457. return np.power(-np.log(q), -1.0/c)
  3458. def _isf(self, p, c):
  3459. return (-np.log1p(-p))**(-1/c)
  3460. def _munp(self, n, c):
  3461. return sc.gamma(1 - n / c)
  3462. def _entropy(self, c):
  3463. return 1+_EULER + _EULER / c - np.log(c)
  3464. def _fitstart(self, data, args=None):
  3465. # invweibull requires c > 1 for the first moment to exist, so use 2.0
  3466. args = (2.0,) if args is None else args
  3467. return super(invweibull_gen, self)._fitstart(data, args=args)
  3468. invweibull = invweibull_gen(a=0, name='invweibull')
  3469. class johnsonsb_gen(rv_continuous):
  3470. r"""A Johnson SB continuous random variable.
  3471. %(before_notes)s
  3472. See Also
  3473. --------
  3474. johnsonsu
  3475. Notes
  3476. -----
  3477. The probability density function for `johnsonsb` is:
  3478. .. math::
  3479. f(x, a, b) = \frac{b}{x(1-x)} \phi(a + b \log \frac{x}{1-x} )
  3480. where :math:`x`, :math:`a`, and :math:`b` are real scalars; :math:`b > 0`
  3481. and :math:`x \in [0,1]`. :math:`\phi` is the pdf of the normal
  3482. distribution.
  3483. `johnsonsb` takes :math:`a` and :math:`b` as shape parameters.
  3484. %(after_notes)s
  3485. %(example)s
  3486. """
  3487. _support_mask = rv_continuous._open_support_mask
  3488. def _argcheck(self, a, b):
  3489. return (b > 0) & (a == a)
  3490. def _shape_info(self):
  3491. ia = _ShapeInfo("a", False, (-np.inf, np.inf), (False, False))
  3492. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  3493. return [ia, ib]
  3494. def _pdf(self, x, a, b):
  3495. # johnsonsb.pdf(x, a, b) = b / (x*(1-x)) * phi(a + b * log(x/(1-x)))
  3496. trm = _norm_pdf(a + b*np.log(x/(1.0-x)))
  3497. return b*1.0/(x*(1-x))*trm
  3498. def _cdf(self, x, a, b):
  3499. return _norm_cdf(a + b*np.log(x/(1.0-x)))
  3500. def _ppf(self, q, a, b):
  3501. return 1.0 / (1 + np.exp(-1.0 / b * (_norm_ppf(q) - a)))
  3502. johnsonsb = johnsonsb_gen(a=0.0, b=1.0, name='johnsonsb')
  3503. class johnsonsu_gen(rv_continuous):
  3504. r"""A Johnson SU continuous random variable.
  3505. %(before_notes)s
  3506. See Also
  3507. --------
  3508. johnsonsb
  3509. Notes
  3510. -----
  3511. The probability density function for `johnsonsu` is:
  3512. .. math::
  3513. f(x, a, b) = \frac{b}{\sqrt{x^2 + 1}}
  3514. \phi(a + b \log(x + \sqrt{x^2 + 1}))
  3515. where :math:`x`, :math:`a`, and :math:`b` are real scalars; :math:`b > 0`.
  3516. :math:`\phi` is the pdf of the normal distribution.
  3517. `johnsonsu` takes :math:`a` and :math:`b` as shape parameters.
  3518. %(after_notes)s
  3519. %(example)s
  3520. """
  3521. def _argcheck(self, a, b):
  3522. return (b > 0) & (a == a)
  3523. def _shape_info(self):
  3524. ia = _ShapeInfo("a", False, (-np.inf, np.inf), (False, False))
  3525. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  3526. return [ia, ib]
  3527. def _pdf(self, x, a, b):
  3528. # johnsonsu.pdf(x, a, b) = b / sqrt(x**2 + 1) *
  3529. # phi(a + b * log(x + sqrt(x**2 + 1)))
  3530. x2 = x*x
  3531. trm = _norm_pdf(a + b * np.log(x + np.sqrt(x2+1)))
  3532. return b*1.0/np.sqrt(x2+1.0)*trm
  3533. def _cdf(self, x, a, b):
  3534. return _norm_cdf(a + b * np.log(x + np.sqrt(x*x + 1)))
  3535. def _ppf(self, q, a, b):
  3536. return np.sinh((_norm_ppf(q) - a) / b)
  3537. johnsonsu = johnsonsu_gen(name='johnsonsu')
  3538. class laplace_gen(rv_continuous):
  3539. r"""A Laplace continuous random variable.
  3540. %(before_notes)s
  3541. Notes
  3542. -----
  3543. The probability density function for `laplace` is
  3544. .. math::
  3545. f(x) = \frac{1}{2} \exp(-|x|)
  3546. for a real number :math:`x`.
  3547. %(after_notes)s
  3548. %(example)s
  3549. """
  3550. def _shape_info(self):
  3551. return []
  3552. def _rvs(self, size=None, random_state=None):
  3553. return random_state.laplace(0, 1, size=size)
  3554. def _pdf(self, x):
  3555. # laplace.pdf(x) = 1/2 * exp(-abs(x))
  3556. return 0.5*np.exp(-abs(x))
  3557. def _cdf(self, x):
  3558. with np.errstate(over='ignore'):
  3559. return np.where(x > 0, 1.0 - 0.5*np.exp(-x), 0.5*np.exp(x))
  3560. def _sf(self, x):
  3561. # By symmetry...
  3562. return self._cdf(-x)
  3563. def _ppf(self, q):
  3564. return np.where(q > 0.5, -np.log(2*(1-q)), np.log(2*q))
  3565. def _isf(self, q):
  3566. # By symmetry...
  3567. return -self._ppf(q)
  3568. def _stats(self):
  3569. return 0, 2, 0, 3
  3570. def _entropy(self):
  3571. return np.log(2)+1
  3572. @_call_super_mom
  3573. @replace_notes_in_docstring(rv_continuous, notes="""\
  3574. This function uses explicit formulas for the maximum likelihood
  3575. estimation of the Laplace distribution parameters, so the keyword
  3576. arguments `loc`, `scale`, and `optimizer` are ignored.\n\n""")
  3577. def fit(self, data, *args, **kwds):
  3578. data, floc, fscale = _check_fit_input_parameters(self, data,
  3579. args, kwds)
  3580. # Source: Statistical Distributions, 3rd Edition. Evans, Hastings,
  3581. # and Peacock (2000), Page 124
  3582. if floc is None:
  3583. floc = np.median(data)
  3584. if fscale is None:
  3585. fscale = (np.sum(np.abs(data - floc))) / len(data)
  3586. return floc, fscale
  3587. laplace = laplace_gen(name='laplace')
  3588. class laplace_asymmetric_gen(rv_continuous):
  3589. r"""An asymmetric Laplace continuous random variable.
  3590. %(before_notes)s
  3591. See Also
  3592. --------
  3593. laplace : Laplace distribution
  3594. Notes
  3595. -----
  3596. The probability density function for `laplace_asymmetric` is
  3597. .. math::
  3598. f(x, \kappa) &= \frac{1}{\kappa+\kappa^{-1}}\exp(-x\kappa),\quad x\ge0\\
  3599. &= \frac{1}{\kappa+\kappa^{-1}}\exp(x/\kappa),\quad x<0\\
  3600. for :math:`-\infty < x < \infty`, :math:`\kappa > 0`.
  3601. `laplace_asymmetric` takes ``kappa`` as a shape parameter for
  3602. :math:`\kappa`. For :math:`\kappa = 1`, it is identical to a
  3603. Laplace distribution.
  3604. %(after_notes)s
  3605. References
  3606. ----------
  3607. .. [1] "Asymmetric Laplace distribution", Wikipedia
  3608. https://en.wikipedia.org/wiki/Asymmetric_Laplace_distribution
  3609. .. [2] Kozubowski TJ and Podgórski K. A Multivariate and
  3610. Asymmetric Generalization of Laplace Distribution,
  3611. Computational Statistics 15, 531--540 (2000).
  3612. :doi:`10.1007/PL00022717`
  3613. %(example)s
  3614. """
  3615. def _shape_info(self):
  3616. return [_ShapeInfo("kappa", False, (0, np.inf), (False, False))]
  3617. def _pdf(self, x, kappa):
  3618. return np.exp(self._logpdf(x, kappa))
  3619. def _logpdf(self, x, kappa):
  3620. kapinv = 1/kappa
  3621. lPx = x * np.where(x >= 0, -kappa, kapinv)
  3622. lPx -= np.log(kappa+kapinv)
  3623. return lPx
  3624. def _cdf(self, x, kappa):
  3625. kapinv = 1/kappa
  3626. kappkapinv = kappa+kapinv
  3627. return np.where(x >= 0,
  3628. 1 - np.exp(-x*kappa)*(kapinv/kappkapinv),
  3629. np.exp(x*kapinv)*(kappa/kappkapinv))
  3630. def _sf(self, x, kappa):
  3631. kapinv = 1/kappa
  3632. kappkapinv = kappa+kapinv
  3633. return np.where(x >= 0,
  3634. np.exp(-x*kappa)*(kapinv/kappkapinv),
  3635. 1 - np.exp(x*kapinv)*(kappa/kappkapinv))
  3636. def _ppf(self, q, kappa):
  3637. kapinv = 1/kappa
  3638. kappkapinv = kappa+kapinv
  3639. return np.where(q >= kappa/kappkapinv,
  3640. -np.log((1 - q)*kappkapinv*kappa)*kapinv,
  3641. np.log(q*kappkapinv/kappa)*kappa)
  3642. def _isf(self, q, kappa):
  3643. kapinv = 1/kappa
  3644. kappkapinv = kappa+kapinv
  3645. return np.where(q <= kapinv/kappkapinv,
  3646. -np.log(q*kappkapinv*kappa)*kapinv,
  3647. np.log((1 - q)*kappkapinv/kappa)*kappa)
  3648. def _stats(self, kappa):
  3649. kapinv = 1/kappa
  3650. mn = kapinv - kappa
  3651. var = kapinv*kapinv + kappa*kappa
  3652. g1 = 2.0*(1-np.power(kappa, 6))/np.power(1+np.power(kappa, 4), 1.5)
  3653. g2 = 6.0*(1+np.power(kappa, 8))/np.power(1+np.power(kappa, 4), 2)
  3654. return mn, var, g1, g2
  3655. def _entropy(self, kappa):
  3656. return 1 + np.log(kappa+1/kappa)
  3657. laplace_asymmetric = laplace_asymmetric_gen(name='laplace_asymmetric')
  3658. def _check_fit_input_parameters(dist, data, args, kwds):
  3659. data = np.asarray(data)
  3660. floc = kwds.get('floc', None)
  3661. fscale = kwds.get('fscale', None)
  3662. num_shapes = len(dist.shapes.split(",")) if dist.shapes else 0
  3663. fshape_keys = []
  3664. fshapes = []
  3665. # user has many options for fixing the shape, so here we standardize it
  3666. # into 'f' + the number of the shape.
  3667. # Adapted from `_reduce_func` in `_distn_infrastructure.py`:
  3668. if dist.shapes:
  3669. shapes = dist.shapes.replace(',', ' ').split()
  3670. for j, s in enumerate(shapes):
  3671. key = 'f' + str(j)
  3672. names = [key, 'f' + s, 'fix_' + s]
  3673. val = _get_fixed_fit_value(kwds, names)
  3674. fshape_keys.append(key)
  3675. fshapes.append(val)
  3676. if val is not None:
  3677. kwds[key] = val
  3678. # determine if there are any unknown arguments in kwds
  3679. known_keys = {'loc', 'scale', 'optimizer', 'method',
  3680. 'floc', 'fscale', *fshape_keys}
  3681. unknown_keys = set(kwds).difference(known_keys)
  3682. if unknown_keys:
  3683. raise TypeError(f"Unknown keyword arguments: {unknown_keys}.")
  3684. if len(args) > num_shapes:
  3685. raise TypeError("Too many positional arguments.")
  3686. if None not in {floc, fscale, *fshapes}:
  3687. # This check is for consistency with `rv_continuous.fit`.
  3688. # Without this check, this function would just return the
  3689. # parameters that were given.
  3690. raise RuntimeError("All parameters fixed. There is nothing to "
  3691. "optimize.")
  3692. if not np.isfinite(data).all():
  3693. raise ValueError("The data contains non-finite values.")
  3694. return (data, *fshapes, floc, fscale)
  3695. class levy_gen(rv_continuous):
  3696. r"""A Levy continuous random variable.
  3697. %(before_notes)s
  3698. See Also
  3699. --------
  3700. levy_stable, levy_l
  3701. Notes
  3702. -----
  3703. The probability density function for `levy` is:
  3704. .. math::
  3705. f(x) = \frac{1}{\sqrt{2\pi x^3}} \exp\left(-\frac{1}{2x}\right)
  3706. for :math:`x >= 0`.
  3707. This is the same as the Levy-stable distribution with :math:`a=1/2` and
  3708. :math:`b=1`.
  3709. %(after_notes)s
  3710. Examples
  3711. --------
  3712. >>> import numpy as np
  3713. >>> from scipy.stats import levy
  3714. >>> import matplotlib.pyplot as plt
  3715. >>> fig, ax = plt.subplots(1, 1)
  3716. Calculate the first four moments:
  3717. >>> mean, var, skew, kurt = levy.stats(moments='mvsk')
  3718. Display the probability density function (``pdf``):
  3719. >>> # `levy` is very heavy-tailed.
  3720. >>> # To show a nice plot, let's cut off the upper 40 percent.
  3721. >>> a, b = levy.ppf(0), levy.ppf(0.6)
  3722. >>> x = np.linspace(a, b, 100)
  3723. >>> ax.plot(x, levy.pdf(x),
  3724. ... 'r-', lw=5, alpha=0.6, label='levy pdf')
  3725. Alternatively, the distribution object can be called (as a function)
  3726. to fix the shape, location and scale parameters. This returns a "frozen"
  3727. RV object holding the given parameters fixed.
  3728. Freeze the distribution and display the frozen ``pdf``:
  3729. >>> rv = levy()
  3730. >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
  3731. Check accuracy of ``cdf`` and ``ppf``:
  3732. >>> vals = levy.ppf([0.001, 0.5, 0.999])
  3733. >>> np.allclose([0.001, 0.5, 0.999], levy.cdf(vals))
  3734. True
  3735. Generate random numbers:
  3736. >>> r = levy.rvs(size=1000)
  3737. And compare the histogram:
  3738. >>> # manual binning to ignore the tail
  3739. >>> bins = np.concatenate((np.linspace(a, b, 20), [np.max(r)]))
  3740. >>> ax.hist(r, bins=bins, density=True, histtype='stepfilled', alpha=0.2)
  3741. >>> ax.set_xlim([x[0], x[-1]])
  3742. >>> ax.legend(loc='best', frameon=False)
  3743. >>> plt.show()
  3744. """
  3745. _support_mask = rv_continuous._open_support_mask
  3746. def _shape_info(self):
  3747. return []
  3748. def _pdf(self, x):
  3749. # levy.pdf(x) = 1 / (x * sqrt(2*pi*x)) * exp(-1/(2*x))
  3750. return 1 / np.sqrt(2*np.pi*x) / x * np.exp(-1/(2*x))
  3751. def _cdf(self, x):
  3752. # Equivalent to 2*norm.sf(np.sqrt(1/x))
  3753. return sc.erfc(np.sqrt(0.5 / x))
  3754. def _sf(self, x):
  3755. return sc.erf(np.sqrt(0.5 / x))
  3756. def _ppf(self, q):
  3757. # Equivalent to 1.0/(norm.isf(q/2)**2) or 0.5/(erfcinv(q)**2)
  3758. val = -sc.ndtri(q/2)
  3759. return 1.0 / (val * val)
  3760. def _isf(self, p):
  3761. return 1/(2*sc.erfinv(p)**2)
  3762. def _stats(self):
  3763. return np.inf, np.inf, np.nan, np.nan
  3764. levy = levy_gen(a=0.0, name="levy")
  3765. class levy_l_gen(rv_continuous):
  3766. r"""A left-skewed Levy continuous random variable.
  3767. %(before_notes)s
  3768. See Also
  3769. --------
  3770. levy, levy_stable
  3771. Notes
  3772. -----
  3773. The probability density function for `levy_l` is:
  3774. .. math::
  3775. f(x) = \frac{1}{|x| \sqrt{2\pi |x|}} \exp{ \left(-\frac{1}{2|x|} \right)}
  3776. for :math:`x <= 0`.
  3777. This is the same as the Levy-stable distribution with :math:`a=1/2` and
  3778. :math:`b=-1`.
  3779. %(after_notes)s
  3780. Examples
  3781. --------
  3782. >>> import numpy as np
  3783. >>> from scipy.stats import levy_l
  3784. >>> import matplotlib.pyplot as plt
  3785. >>> fig, ax = plt.subplots(1, 1)
  3786. Calculate the first four moments:
  3787. >>> mean, var, skew, kurt = levy_l.stats(moments='mvsk')
  3788. Display the probability density function (``pdf``):
  3789. >>> # `levy_l` is very heavy-tailed.
  3790. >>> # To show a nice plot, let's cut off the lower 40 percent.
  3791. >>> a, b = levy_l.ppf(0.4), levy_l.ppf(1)
  3792. >>> x = np.linspace(a, b, 100)
  3793. >>> ax.plot(x, levy_l.pdf(x),
  3794. ... 'r-', lw=5, alpha=0.6, label='levy_l pdf')
  3795. Alternatively, the distribution object can be called (as a function)
  3796. to fix the shape, location and scale parameters. This returns a "frozen"
  3797. RV object holding the given parameters fixed.
  3798. Freeze the distribution and display the frozen ``pdf``:
  3799. >>> rv = levy_l()
  3800. >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
  3801. Check accuracy of ``cdf`` and ``ppf``:
  3802. >>> vals = levy_l.ppf([0.001, 0.5, 0.999])
  3803. >>> np.allclose([0.001, 0.5, 0.999], levy_l.cdf(vals))
  3804. True
  3805. Generate random numbers:
  3806. >>> r = levy_l.rvs(size=1000)
  3807. And compare the histogram:
  3808. >>> # manual binning to ignore the tail
  3809. >>> bins = np.concatenate(([np.min(r)], np.linspace(a, b, 20)))
  3810. >>> ax.hist(r, bins=bins, density=True, histtype='stepfilled', alpha=0.2)
  3811. >>> ax.set_xlim([x[0], x[-1]])
  3812. >>> ax.legend(loc='best', frameon=False)
  3813. >>> plt.show()
  3814. """
  3815. _support_mask = rv_continuous._open_support_mask
  3816. def _shape_info(self):
  3817. return []
  3818. def _pdf(self, x):
  3819. # levy_l.pdf(x) = 1 / (abs(x) * sqrt(2*pi*abs(x))) * exp(-1/(2*abs(x)))
  3820. ax = abs(x)
  3821. return 1/np.sqrt(2*np.pi*ax)/ax*np.exp(-1/(2*ax))
  3822. def _cdf(self, x):
  3823. ax = abs(x)
  3824. return 2 * _norm_cdf(1 / np.sqrt(ax)) - 1
  3825. def _sf(self, x):
  3826. ax = abs(x)
  3827. return 2 * _norm_sf(1 / np.sqrt(ax))
  3828. def _ppf(self, q):
  3829. val = _norm_ppf((q + 1.0) / 2)
  3830. return -1.0 / (val * val)
  3831. def _isf(self, p):
  3832. return -1/_norm_isf(p/2)**2
  3833. def _stats(self):
  3834. return np.inf, np.inf, np.nan, np.nan
  3835. levy_l = levy_l_gen(b=0.0, name="levy_l")
  3836. class logistic_gen(rv_continuous):
  3837. r"""A logistic (or Sech-squared) continuous random variable.
  3838. %(before_notes)s
  3839. Notes
  3840. -----
  3841. The probability density function for `logistic` is:
  3842. .. math::
  3843. f(x) = \frac{\exp(-x)}
  3844. {(1+\exp(-x))^2}
  3845. `logistic` is a special case of `genlogistic` with ``c=1``.
  3846. Remark that the survival function (``logistic.sf``) is equal to the
  3847. Fermi-Dirac distribution describing fermionic statistics.
  3848. %(after_notes)s
  3849. %(example)s
  3850. """
  3851. def _shape_info(self):
  3852. return []
  3853. def _rvs(self, size=None, random_state=None):
  3854. return random_state.logistic(size=size)
  3855. def _pdf(self, x):
  3856. # logistic.pdf(x) = exp(-x) / (1+exp(-x))**2
  3857. return np.exp(self._logpdf(x))
  3858. def _logpdf(self, x):
  3859. y = -np.abs(x)
  3860. return y - 2. * sc.log1p(np.exp(y))
  3861. def _cdf(self, x):
  3862. return sc.expit(x)
  3863. def _logcdf(self, x):
  3864. return sc.log_expit(x)
  3865. def _ppf(self, q):
  3866. return sc.logit(q)
  3867. def _sf(self, x):
  3868. return sc.expit(-x)
  3869. def _logsf(self, x):
  3870. return sc.log_expit(-x)
  3871. def _isf(self, q):
  3872. return -sc.logit(q)
  3873. def _stats(self):
  3874. return 0, np.pi*np.pi/3.0, 0, 6.0/5.0
  3875. def _entropy(self):
  3876. # https://en.wikipedia.org/wiki/Logistic_distribution
  3877. return 2.0
  3878. @_call_super_mom
  3879. @inherit_docstring_from(rv_continuous)
  3880. def fit(self, data, *args, **kwds):
  3881. if kwds.pop('superfit', False):
  3882. return super().fit(data, *args, **kwds)
  3883. data, floc, fscale = _check_fit_input_parameters(self, data,
  3884. args, kwds)
  3885. n = len(data)
  3886. # rv_continuous provided guesses
  3887. loc, scale = self._fitstart(data)
  3888. # these are trumped by user-provided guesses
  3889. loc, scale = kwds.get('loc', loc), kwds.get('scale', scale)
  3890. # the maximum likelihood estimators `a` and `b` of the location and
  3891. # scale parameters are roots of the two equations described in `func`.
  3892. # Source: Statistical Distributions, 3rd Edition. Evans, Hastings, and
  3893. # Peacock (2000), Page 130
  3894. def dl_dloc(loc, scale=fscale):
  3895. c = (data - loc) / scale
  3896. return np.sum(sc.expit(c)) - n/2
  3897. def dl_dscale(scale, loc=floc):
  3898. c = (data - loc) / scale
  3899. return np.sum(c*np.tanh(c/2)) - n
  3900. def func(params):
  3901. loc, scale = params
  3902. return dl_dloc(loc, scale), dl_dscale(scale, loc)
  3903. if fscale is not None and floc is None:
  3904. res = optimize.root(dl_dloc, (loc,))
  3905. loc = res.x[0]
  3906. scale = fscale
  3907. elif floc is not None and fscale is None:
  3908. res = optimize.root(dl_dscale, (scale,))
  3909. scale = res.x[0]
  3910. loc = floc
  3911. else:
  3912. res = optimize.root(func, (loc, scale))
  3913. loc, scale = res.x
  3914. return ((loc, scale) if res.success
  3915. else super().fit(data, *args, **kwds))
  3916. logistic = logistic_gen(name='logistic')
  3917. class loggamma_gen(rv_continuous):
  3918. r"""A log gamma continuous random variable.
  3919. %(before_notes)s
  3920. Notes
  3921. -----
  3922. The probability density function for `loggamma` is:
  3923. .. math::
  3924. f(x, c) = \frac{\exp(c x - \exp(x))}
  3925. {\Gamma(c)}
  3926. for all :math:`x, c > 0`. Here, :math:`\Gamma` is the
  3927. gamma function (`scipy.special.gamma`).
  3928. `loggamma` takes ``c`` as a shape parameter for :math:`c`.
  3929. %(after_notes)s
  3930. %(example)s
  3931. """
  3932. def _shape_info(self):
  3933. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  3934. def _rvs(self, c, size=None, random_state=None):
  3935. # Use the property of the gamma distribution Gamma(c)
  3936. # Gamma(c) ~ Gamma(c + 1)*U**(1/c),
  3937. # where U is uniform on [0, 1]. (See, e.g.,
  3938. # G. Marsaglia and W.W. Tsang, "A simple method for generating gamma
  3939. # variables", https://doi.org/10.1145/358407.358414)
  3940. # So
  3941. # log(Gamma(c)) ~ log(Gamma(c + 1)) + log(U)/c
  3942. # Generating a sample with this formulation is a bit slower
  3943. # than the more obvious log(Gamma(c)), but it avoids loss
  3944. # of precision when c << 1.
  3945. return (np.log(random_state.gamma(c + 1, size=size))
  3946. + np.log(random_state.uniform(size=size))/c)
  3947. def _pdf(self, x, c):
  3948. # loggamma.pdf(x, c) = exp(c*x-exp(x)) / gamma(c)
  3949. return np.exp(c*x-np.exp(x)-sc.gammaln(c))
  3950. def _logpdf(self, x, c):
  3951. return c*x - np.exp(x) - sc.gammaln(c)
  3952. def _cdf(self, x, c):
  3953. return sc.gammainc(c, np.exp(x))
  3954. def _ppf(self, q, c):
  3955. return np.log(sc.gammaincinv(c, q))
  3956. def _sf(self, x, c):
  3957. return sc.gammaincc(c, np.exp(x))
  3958. def _isf(self, q, c):
  3959. return np.log(sc.gammainccinv(c, q))
  3960. def _stats(self, c):
  3961. # See, for example, "A Statistical Study of Log-Gamma Distribution", by
  3962. # Ping Shing Chan (thesis, McMaster University, 1993).
  3963. mean = sc.digamma(c)
  3964. var = sc.polygamma(1, c)
  3965. skewness = sc.polygamma(2, c) / np.power(var, 1.5)
  3966. excess_kurtosis = sc.polygamma(3, c) / (var*var)
  3967. return mean, var, skewness, excess_kurtosis
  3968. loggamma = loggamma_gen(name='loggamma')
  3969. class loglaplace_gen(rv_continuous):
  3970. r"""A log-Laplace continuous random variable.
  3971. %(before_notes)s
  3972. Notes
  3973. -----
  3974. The probability density function for `loglaplace` is:
  3975. .. math::
  3976. f(x, c) = \begin{cases}\frac{c}{2} x^{ c-1} &\text{for } 0 < x < 1\\
  3977. \frac{c}{2} x^{-c-1} &\text{for } x \ge 1
  3978. \end{cases}
  3979. for :math:`c > 0`.
  3980. `loglaplace` takes ``c`` as a shape parameter for :math:`c`.
  3981. %(after_notes)s
  3982. References
  3983. ----------
  3984. T.J. Kozubowski and K. Podgorski, "A log-Laplace growth rate model",
  3985. The Mathematical Scientist, vol. 28, pp. 49-60, 2003.
  3986. %(example)s
  3987. """
  3988. def _shape_info(self):
  3989. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  3990. def _pdf(self, x, c):
  3991. # loglaplace.pdf(x, c) = c / 2 * x**(c-1), for 0 < x < 1
  3992. # = c / 2 * x**(-c-1), for x >= 1
  3993. cd2 = c/2.0
  3994. c = np.where(x < 1, c, -c)
  3995. return cd2*x**(c-1)
  3996. def _cdf(self, x, c):
  3997. return np.where(x < 1, 0.5*x**c, 1-0.5*x**(-c))
  3998. def _ppf(self, q, c):
  3999. return np.where(q < 0.5, (2.0*q)**(1.0/c), (2*(1.0-q))**(-1.0/c))
  4000. def _munp(self, n, c):
  4001. return c**2 / (c**2 - n**2)
  4002. def _entropy(self, c):
  4003. return np.log(2.0/c) + 1.0
  4004. loglaplace = loglaplace_gen(a=0.0, name='loglaplace')
  4005. def _lognorm_logpdf(x, s):
  4006. return _lazywhere(x != 0, (x, s),
  4007. lambda x, s: -np.log(x)**2 / (2*s**2) - np.log(s*x*np.sqrt(2*np.pi)),
  4008. -np.inf)
  4009. class lognorm_gen(rv_continuous):
  4010. r"""A lognormal continuous random variable.
  4011. %(before_notes)s
  4012. Notes
  4013. -----
  4014. The probability density function for `lognorm` is:
  4015. .. math::
  4016. f(x, s) = \frac{1}{s x \sqrt{2\pi}}
  4017. \exp\left(-\frac{\log^2(x)}{2s^2}\right)
  4018. for :math:`x > 0`, :math:`s > 0`.
  4019. `lognorm` takes ``s`` as a shape parameter for :math:`s`.
  4020. %(after_notes)s
  4021. Suppose a normally distributed random variable ``X`` has mean ``mu`` and
  4022. standard deviation ``sigma``. Then ``Y = exp(X)`` is lognormally
  4023. distributed with ``s = sigma`` and ``scale = exp(mu)``.
  4024. %(example)s
  4025. """
  4026. _support_mask = rv_continuous._open_support_mask
  4027. def _shape_info(self):
  4028. return [_ShapeInfo("s", False, (0, np.inf), (False, False))]
  4029. def _rvs(self, s, size=None, random_state=None):
  4030. return np.exp(s * random_state.standard_normal(size))
  4031. def _pdf(self, x, s):
  4032. # lognorm.pdf(x, s) = 1 / (s*x*sqrt(2*pi)) * exp(-1/2*(log(x)/s)**2)
  4033. return np.exp(self._logpdf(x, s))
  4034. def _logpdf(self, x, s):
  4035. return _lognorm_logpdf(x, s)
  4036. def _cdf(self, x, s):
  4037. return _norm_cdf(np.log(x) / s)
  4038. def _logcdf(self, x, s):
  4039. return _norm_logcdf(np.log(x) / s)
  4040. def _ppf(self, q, s):
  4041. return np.exp(s * _norm_ppf(q))
  4042. def _sf(self, x, s):
  4043. return _norm_sf(np.log(x) / s)
  4044. def _logsf(self, x, s):
  4045. return _norm_logsf(np.log(x) / s)
  4046. def _stats(self, s):
  4047. p = np.exp(s*s)
  4048. mu = np.sqrt(p)
  4049. mu2 = p*(p-1)
  4050. g1 = np.sqrt((p-1))*(2+p)
  4051. g2 = np.polyval([1, 2, 3, 0, -6.0], p)
  4052. return mu, mu2, g1, g2
  4053. def _entropy(self, s):
  4054. return 0.5 * (1 + np.log(2*np.pi) + 2 * np.log(s))
  4055. @_call_super_mom
  4056. @extend_notes_in_docstring(rv_continuous, notes="""\
  4057. When `method='MLE'` and
  4058. the location parameter is fixed by using the `floc` argument,
  4059. this function uses explicit formulas for the maximum likelihood
  4060. estimation of the log-normal shape and scale parameters, so the
  4061. `optimizer`, `loc` and `scale` keyword arguments are ignored.
  4062. \n\n""")
  4063. def fit(self, data, *args, **kwds):
  4064. floc = kwds.get('floc', None)
  4065. if floc is None:
  4066. # fall back on the default fit method.
  4067. return super().fit(data, *args, **kwds)
  4068. f0 = (kwds.get('f0', None) or kwds.get('fs', None) or
  4069. kwds.get('fix_s', None))
  4070. fscale = kwds.get('fscale', None)
  4071. if len(args) > 1:
  4072. raise TypeError("Too many input arguments.")
  4073. for name in ['f0', 'fs', 'fix_s', 'floc', 'fscale', 'loc', 'scale',
  4074. 'optimizer', 'method']:
  4075. kwds.pop(name, None)
  4076. if kwds:
  4077. raise TypeError("Unknown arguments: %s." % kwds)
  4078. # Special case: loc is fixed. Use the maximum likelihood formulas
  4079. # instead of the numerical solver.
  4080. if f0 is not None and fscale is not None:
  4081. # This check is for consistency with `rv_continuous.fit`.
  4082. raise ValueError("All parameters fixed. There is nothing to "
  4083. "optimize.")
  4084. data = np.asarray(data)
  4085. if not np.isfinite(data).all():
  4086. raise ValueError("The data contains non-finite values.")
  4087. floc = float(floc)
  4088. if floc != 0:
  4089. # Shifting the data by floc. Don't do the subtraction in-place,
  4090. # because `data` might be a view of the input array.
  4091. data = data - floc
  4092. if np.any(data <= 0):
  4093. raise FitDataError("lognorm", lower=floc, upper=np.inf)
  4094. lndata = np.log(data)
  4095. # Three cases to handle:
  4096. # * shape and scale both free
  4097. # * shape fixed, scale free
  4098. # * shape free, scale fixed
  4099. if fscale is None:
  4100. # scale is free.
  4101. scale = np.exp(lndata.mean())
  4102. if f0 is None:
  4103. # shape is free.
  4104. shape = lndata.std()
  4105. else:
  4106. # shape is fixed.
  4107. shape = float(f0)
  4108. else:
  4109. # scale is fixed, shape is free
  4110. scale = float(fscale)
  4111. shape = np.sqrt(((lndata - np.log(scale))**2).mean())
  4112. return shape, floc, scale
  4113. lognorm = lognorm_gen(a=0.0, name='lognorm')
  4114. class gibrat_gen(rv_continuous):
  4115. r"""A Gibrat continuous random variable.
  4116. %(before_notes)s
  4117. Notes
  4118. -----
  4119. The probability density function for `gibrat` is:
  4120. .. math::
  4121. f(x) = \frac{1}{x \sqrt{2\pi}} \exp(-\frac{1}{2} (\log(x))^2)
  4122. `gibrat` is a special case of `lognorm` with ``s=1``.
  4123. %(after_notes)s
  4124. %(example)s
  4125. """
  4126. _support_mask = rv_continuous._open_support_mask
  4127. def _shape_info(self):
  4128. return []
  4129. def _rvs(self, size=None, random_state=None):
  4130. return np.exp(random_state.standard_normal(size))
  4131. def _pdf(self, x):
  4132. # gibrat.pdf(x) = 1/(x*sqrt(2*pi)) * exp(-1/2*(log(x))**2)
  4133. return np.exp(self._logpdf(x))
  4134. def _logpdf(self, x):
  4135. return _lognorm_logpdf(x, 1.0)
  4136. def _cdf(self, x):
  4137. return _norm_cdf(np.log(x))
  4138. def _ppf(self, q):
  4139. return np.exp(_norm_ppf(q))
  4140. def _stats(self):
  4141. p = np.e
  4142. mu = np.sqrt(p)
  4143. mu2 = p * (p - 1)
  4144. g1 = np.sqrt((p - 1)) * (2 + p)
  4145. g2 = np.polyval([1, 2, 3, 0, -6.0], p)
  4146. return mu, mu2, g1, g2
  4147. def _entropy(self):
  4148. return 0.5 * np.log(2 * np.pi) + 0.5
  4149. # deprecation of gilbrat, see #15911
  4150. deprmsg = ("`gilbrat` is a misspelling of the correct name for the `gibrat` "
  4151. "distribution, and will be removed in SciPy 1.11.")
  4152. class gilbrat_gen(gibrat_gen):
  4153. # override __call__ protocol from rv_generic to also
  4154. # deprecate instantiation of frozen distributions
  4155. r"""
  4156. .. deprecated:: 1.9.0
  4157. `gilbrat` is deprecated, use `gibrat` instead!
  4158. `gilbrat` is a misspelling of the correct name for the `gibrat`
  4159. distribution, and will be removed in SciPy 1.11.
  4160. """
  4161. def __call__(self, *args, **kwds):
  4162. # align with warning text from np.deprecated that's used for methods
  4163. msg = "`gilbrat` is deprecated, use `gibrat` instead!\n" + deprmsg
  4164. warnings.warn(msg, DeprecationWarning, stacklevel=2)
  4165. return self.freeze(*args, **kwds)
  4166. gibrat = gibrat_gen(a=0.0, name='gibrat')
  4167. gilbrat = gilbrat_gen(a=0.0, name='gilbrat')
  4168. # since the deprecated class gets intantiated upon import (and we only want to
  4169. # warn upon use), add the deprecation to each (documented) class method, c.f.
  4170. # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.gilbrat.html
  4171. _gibrat_method_names = [
  4172. "cdf", "entropy", "expect", "fit", "interval", "isf", "logcdf", "logpdf",
  4173. "logsf", "mean", "median", "moment", "pdf", "ppf", "rvs", "sf", "stats",
  4174. "std", "var"
  4175. ]
  4176. for m in _gibrat_method_names:
  4177. wrapper = np.deprecate(getattr(gilbrat, m), f"gilbrat.{m}", f"gibrat.{m}",
  4178. deprmsg)
  4179. setattr(gilbrat, m, wrapper)
  4180. class maxwell_gen(rv_continuous):
  4181. r"""A Maxwell continuous random variable.
  4182. %(before_notes)s
  4183. Notes
  4184. -----
  4185. A special case of a `chi` distribution, with ``df=3``, ``loc=0.0``,
  4186. and given ``scale = a``, where ``a`` is the parameter used in the
  4187. Mathworld description [1]_.
  4188. The probability density function for `maxwell` is:
  4189. .. math::
  4190. f(x) = \sqrt{2/\pi}x^2 \exp(-x^2/2)
  4191. for :math:`x >= 0`.
  4192. %(after_notes)s
  4193. References
  4194. ----------
  4195. .. [1] http://mathworld.wolfram.com/MaxwellDistribution.html
  4196. %(example)s
  4197. """
  4198. def _shape_info(self):
  4199. return []
  4200. def _rvs(self, size=None, random_state=None):
  4201. return chi.rvs(3.0, size=size, random_state=random_state)
  4202. def _pdf(self, x):
  4203. # maxwell.pdf(x) = sqrt(2/pi)x**2 * exp(-x**2/2)
  4204. return _SQRT_2_OVER_PI*x*x*np.exp(-x*x/2.0)
  4205. def _logpdf(self, x):
  4206. # Allow x=0 without 'divide by zero' warnings
  4207. with np.errstate(divide='ignore'):
  4208. return _LOG_SQRT_2_OVER_PI + 2*np.log(x) - 0.5*x*x
  4209. def _cdf(self, x):
  4210. return sc.gammainc(1.5, x*x/2.0)
  4211. def _ppf(self, q):
  4212. return np.sqrt(2*sc.gammaincinv(1.5, q))
  4213. def _stats(self):
  4214. val = 3*np.pi-8
  4215. return (2*np.sqrt(2.0/np.pi),
  4216. 3-8/np.pi,
  4217. np.sqrt(2)*(32-10*np.pi)/val**1.5,
  4218. (-12*np.pi*np.pi + 160*np.pi - 384) / val**2.0)
  4219. def _entropy(self):
  4220. return _EULER + 0.5*np.log(2*np.pi)-0.5
  4221. maxwell = maxwell_gen(a=0.0, name='maxwell')
  4222. class mielke_gen(rv_continuous):
  4223. r"""A Mielke Beta-Kappa / Dagum continuous random variable.
  4224. %(before_notes)s
  4225. Notes
  4226. -----
  4227. The probability density function for `mielke` is:
  4228. .. math::
  4229. f(x, k, s) = \frac{k x^{k-1}}{(1+x^s)^{1+k/s}}
  4230. for :math:`x > 0` and :math:`k, s > 0`. The distribution is sometimes
  4231. called Dagum distribution ([2]_). It was already defined in [3]_, called
  4232. a Burr Type III distribution (`burr` with parameters ``c=s`` and
  4233. ``d=k/s``).
  4234. `mielke` takes ``k`` and ``s`` as shape parameters.
  4235. %(after_notes)s
  4236. References
  4237. ----------
  4238. .. [1] Mielke, P.W., 1973 "Another Family of Distributions for Describing
  4239. and Analyzing Precipitation Data." J. Appl. Meteor., 12, 275-280
  4240. .. [2] Dagum, C., 1977 "A new model for personal income distribution."
  4241. Economie Appliquee, 33, 327-367.
  4242. .. [3] Burr, I. W. "Cumulative frequency functions", Annals of
  4243. Mathematical Statistics, 13(2), pp 215-232 (1942).
  4244. %(example)s
  4245. """
  4246. def _shape_info(self):
  4247. ik = _ShapeInfo("k", False, (0, np.inf), (False, False))
  4248. i_s = _ShapeInfo("s", False, (0, np.inf), (False, False))
  4249. return [ik, i_s]
  4250. def _pdf(self, x, k, s):
  4251. return k*x**(k-1.0) / (1.0+x**s)**(1.0+k*1.0/s)
  4252. def _logpdf(self, x, k, s):
  4253. # Allow x=0 without 'divide by zero' warnings.
  4254. with np.errstate(divide='ignore'):
  4255. return np.log(k) + np.log(x)*(k - 1) - np.log1p(x**s)*(1 + k/s)
  4256. def _cdf(self, x, k, s):
  4257. return x**k / (1.0+x**s)**(k*1.0/s)
  4258. def _ppf(self, q, k, s):
  4259. qsk = pow(q, s*1.0/k)
  4260. return pow(qsk/(1.0-qsk), 1.0/s)
  4261. def _munp(self, n, k, s):
  4262. def nth_moment(n, k, s):
  4263. # n-th moment is defined for -k < n < s
  4264. return sc.gamma((k+n)/s)*sc.gamma(1-n/s)/sc.gamma(k/s)
  4265. return _lazywhere(n < s, (n, k, s), nth_moment, np.inf)
  4266. mielke = mielke_gen(a=0.0, name='mielke')
  4267. class kappa4_gen(rv_continuous):
  4268. r"""Kappa 4 parameter distribution.
  4269. %(before_notes)s
  4270. Notes
  4271. -----
  4272. The probability density function for kappa4 is:
  4273. .. math::
  4274. f(x, h, k) = (1 - k x)^{1/k - 1} (1 - h (1 - k x)^{1/k})^{1/h-1}
  4275. if :math:`h` and :math:`k` are not equal to 0.
  4276. If :math:`h` or :math:`k` are zero then the pdf can be simplified:
  4277. h = 0 and k != 0::
  4278. kappa4.pdf(x, h, k) = (1.0 - k*x)**(1.0/k - 1.0)*
  4279. exp(-(1.0 - k*x)**(1.0/k))
  4280. h != 0 and k = 0::
  4281. kappa4.pdf(x, h, k) = exp(-x)*(1.0 - h*exp(-x))**(1.0/h - 1.0)
  4282. h = 0 and k = 0::
  4283. kappa4.pdf(x, h, k) = exp(-x)*exp(-exp(-x))
  4284. kappa4 takes :math:`h` and :math:`k` as shape parameters.
  4285. The kappa4 distribution returns other distributions when certain
  4286. :math:`h` and :math:`k` values are used.
  4287. +------+-------------+----------------+------------------+
  4288. | h | k=0.0 | k=1.0 | -inf<=k<=inf |
  4289. +======+=============+================+==================+
  4290. | -1.0 | Logistic | | Generalized |
  4291. | | | | Logistic(1) |
  4292. | | | | |
  4293. | | logistic(x) | | |
  4294. +------+-------------+----------------+------------------+
  4295. | 0.0 | Gumbel | Reverse | Generalized |
  4296. | | | Exponential(2) | Extreme Value |
  4297. | | | | |
  4298. | | gumbel_r(x) | | genextreme(x, k) |
  4299. +------+-------------+----------------+------------------+
  4300. | 1.0 | Exponential | Uniform | Generalized |
  4301. | | | | Pareto |
  4302. | | | | |
  4303. | | expon(x) | uniform(x) | genpareto(x, -k) |
  4304. +------+-------------+----------------+------------------+
  4305. (1) There are at least five generalized logistic distributions.
  4306. Four are described here:
  4307. https://en.wikipedia.org/wiki/Generalized_logistic_distribution
  4308. The "fifth" one is the one kappa4 should match which currently
  4309. isn't implemented in scipy:
  4310. https://en.wikipedia.org/wiki/Talk:Generalized_logistic_distribution
  4311. https://www.mathwave.com/help/easyfit/html/analyses/distributions/gen_logistic.html
  4312. (2) This distribution is currently not in scipy.
  4313. References
  4314. ----------
  4315. J.C. Finney, "Optimization of a Skewed Logistic Distribution With Respect
  4316. to the Kolmogorov-Smirnov Test", A Dissertation Submitted to the Graduate
  4317. Faculty of the Louisiana State University and Agricultural and Mechanical
  4318. College, (August, 2004),
  4319. https://digitalcommons.lsu.edu/gradschool_dissertations/3672
  4320. J.R.M. Hosking, "The four-parameter kappa distribution". IBM J. Res.
  4321. Develop. 38 (3), 25 1-258 (1994).
  4322. B. Kumphon, A. Kaew-Man, P. Seenoi, "A Rainfall Distribution for the Lampao
  4323. Site in the Chi River Basin, Thailand", Journal of Water Resource and
  4324. Protection, vol. 4, 866-869, (2012).
  4325. :doi:`10.4236/jwarp.2012.410101`
  4326. C. Winchester, "On Estimation of the Four-Parameter Kappa Distribution", A
  4327. Thesis Submitted to Dalhousie University, Halifax, Nova Scotia, (March
  4328. 2000).
  4329. http://www.nlc-bnc.ca/obj/s4/f2/dsk2/ftp01/MQ57336.pdf
  4330. %(after_notes)s
  4331. %(example)s
  4332. """
  4333. def _argcheck(self, h, k):
  4334. shape = np.broadcast_arrays(h, k)[0].shape
  4335. return np.full(shape, fill_value=True)
  4336. def _shape_info(self):
  4337. ih = _ShapeInfo("h", False, (-np.inf, np.inf), (False, False))
  4338. ik = _ShapeInfo("k", False, (-np.inf, np.inf), (False, False))
  4339. return [ih, ik]
  4340. def _get_support(self, h, k):
  4341. condlist = [np.logical_and(h > 0, k > 0),
  4342. np.logical_and(h > 0, k == 0),
  4343. np.logical_and(h > 0, k < 0),
  4344. np.logical_and(h <= 0, k > 0),
  4345. np.logical_and(h <= 0, k == 0),
  4346. np.logical_and(h <= 0, k < 0)]
  4347. def f0(h, k):
  4348. return (1.0 - np.float_power(h, -k))/k
  4349. def f1(h, k):
  4350. return np.log(h)
  4351. def f3(h, k):
  4352. a = np.empty(np.shape(h))
  4353. a[:] = -np.inf
  4354. return a
  4355. def f5(h, k):
  4356. return 1.0/k
  4357. _a = _lazyselect(condlist,
  4358. [f0, f1, f0, f3, f3, f5],
  4359. [h, k],
  4360. default=np.nan)
  4361. def f0(h, k):
  4362. return 1.0/k
  4363. def f1(h, k):
  4364. a = np.empty(np.shape(h))
  4365. a[:] = np.inf
  4366. return a
  4367. _b = _lazyselect(condlist,
  4368. [f0, f1, f1, f0, f1, f1],
  4369. [h, k],
  4370. default=np.nan)
  4371. return _a, _b
  4372. def _pdf(self, x, h, k):
  4373. # kappa4.pdf(x, h, k) = (1.0 - k*x)**(1.0/k - 1.0)*
  4374. # (1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h-1)
  4375. return np.exp(self._logpdf(x, h, k))
  4376. def _logpdf(self, x, h, k):
  4377. condlist = [np.logical_and(h != 0, k != 0),
  4378. np.logical_and(h == 0, k != 0),
  4379. np.logical_and(h != 0, k == 0),
  4380. np.logical_and(h == 0, k == 0)]
  4381. def f0(x, h, k):
  4382. '''pdf = (1.0 - k*x)**(1.0/k - 1.0)*(
  4383. 1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h-1.0)
  4384. logpdf = ...
  4385. '''
  4386. return (sc.xlog1py(1.0/k - 1.0, -k*x) +
  4387. sc.xlog1py(1.0/h - 1.0, -h*(1.0 - k*x)**(1.0/k)))
  4388. def f1(x, h, k):
  4389. '''pdf = (1.0 - k*x)**(1.0/k - 1.0)*np.exp(-(
  4390. 1.0 - k*x)**(1.0/k))
  4391. logpdf = ...
  4392. '''
  4393. return sc.xlog1py(1.0/k - 1.0, -k*x) - (1.0 - k*x)**(1.0/k)
  4394. def f2(x, h, k):
  4395. '''pdf = np.exp(-x)*(1.0 - h*np.exp(-x))**(1.0/h - 1.0)
  4396. logpdf = ...
  4397. '''
  4398. return -x + sc.xlog1py(1.0/h - 1.0, -h*np.exp(-x))
  4399. def f3(x, h, k):
  4400. '''pdf = np.exp(-x-np.exp(-x))
  4401. logpdf = ...
  4402. '''
  4403. return -x - np.exp(-x)
  4404. return _lazyselect(condlist,
  4405. [f0, f1, f2, f3],
  4406. [x, h, k],
  4407. default=np.nan)
  4408. def _cdf(self, x, h, k):
  4409. return np.exp(self._logcdf(x, h, k))
  4410. def _logcdf(self, x, h, k):
  4411. condlist = [np.logical_and(h != 0, k != 0),
  4412. np.logical_and(h == 0, k != 0),
  4413. np.logical_and(h != 0, k == 0),
  4414. np.logical_and(h == 0, k == 0)]
  4415. def f0(x, h, k):
  4416. '''cdf = (1.0 - h*(1.0 - k*x)**(1.0/k))**(1.0/h)
  4417. logcdf = ...
  4418. '''
  4419. return (1.0/h)*sc.log1p(-h*(1.0 - k*x)**(1.0/k))
  4420. def f1(x, h, k):
  4421. '''cdf = np.exp(-(1.0 - k*x)**(1.0/k))
  4422. logcdf = ...
  4423. '''
  4424. return -(1.0 - k*x)**(1.0/k)
  4425. def f2(x, h, k):
  4426. '''cdf = (1.0 - h*np.exp(-x))**(1.0/h)
  4427. logcdf = ...
  4428. '''
  4429. return (1.0/h)*sc.log1p(-h*np.exp(-x))
  4430. def f3(x, h, k):
  4431. '''cdf = np.exp(-np.exp(-x))
  4432. logcdf = ...
  4433. '''
  4434. return -np.exp(-x)
  4435. return _lazyselect(condlist,
  4436. [f0, f1, f2, f3],
  4437. [x, h, k],
  4438. default=np.nan)
  4439. def _ppf(self, q, h, k):
  4440. condlist = [np.logical_and(h != 0, k != 0),
  4441. np.logical_and(h == 0, k != 0),
  4442. np.logical_and(h != 0, k == 0),
  4443. np.logical_and(h == 0, k == 0)]
  4444. def f0(q, h, k):
  4445. return 1.0/k*(1.0 - ((1.0 - (q**h))/h)**k)
  4446. def f1(q, h, k):
  4447. return 1.0/k*(1.0 - (-np.log(q))**k)
  4448. def f2(q, h, k):
  4449. '''ppf = -np.log((1.0 - (q**h))/h)
  4450. '''
  4451. return -sc.log1p(-(q**h)) + np.log(h)
  4452. def f3(q, h, k):
  4453. return -np.log(-np.log(q))
  4454. return _lazyselect(condlist,
  4455. [f0, f1, f2, f3],
  4456. [q, h, k],
  4457. default=np.nan)
  4458. def _get_stats_info(self, h, k):
  4459. condlist = [
  4460. np.logical_and(h < 0, k >= 0),
  4461. k < 0,
  4462. ]
  4463. def f0(h, k):
  4464. return (-1.0/h*k).astype(int)
  4465. def f1(h, k):
  4466. return (-1.0/k).astype(int)
  4467. return _lazyselect(condlist, [f0, f1], [h, k], default=5)
  4468. def _stats(self, h, k):
  4469. maxr = self._get_stats_info(h, k)
  4470. outputs = [None if np.any(r < maxr) else np.nan for r in range(1, 5)]
  4471. return outputs[:]
  4472. def _mom1_sc(self, m, *args):
  4473. maxr = self._get_stats_info(args[0], args[1])
  4474. if m >= maxr:
  4475. return np.nan
  4476. return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0]
  4477. kappa4 = kappa4_gen(name='kappa4')
  4478. class kappa3_gen(rv_continuous):
  4479. r"""Kappa 3 parameter distribution.
  4480. %(before_notes)s
  4481. Notes
  4482. -----
  4483. The probability density function for `kappa3` is:
  4484. .. math::
  4485. f(x, a) = a (a + x^a)^{-(a + 1)/a}
  4486. for :math:`x > 0` and :math:`a > 0`.
  4487. `kappa3` takes ``a`` as a shape parameter for :math:`a`.
  4488. References
  4489. ----------
  4490. P.W. Mielke and E.S. Johnson, "Three-Parameter Kappa Distribution Maximum
  4491. Likelihood and Likelihood Ratio Tests", Methods in Weather Research,
  4492. 701-707, (September, 1973),
  4493. :doi:`10.1175/1520-0493(1973)101<0701:TKDMLE>2.3.CO;2`
  4494. B. Kumphon, "Maximum Entropy and Maximum Likelihood Estimation for the
  4495. Three-Parameter Kappa Distribution", Open Journal of Statistics, vol 2,
  4496. 415-419 (2012), :doi:`10.4236/ojs.2012.24050`
  4497. %(after_notes)s
  4498. %(example)s
  4499. """
  4500. def _shape_info(self):
  4501. return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
  4502. def _pdf(self, x, a):
  4503. # kappa3.pdf(x, a) = a*(a + x**a)**(-(a + 1)/a), for x > 0
  4504. return a*(a + x**a)**(-1.0/a-1)
  4505. def _cdf(self, x, a):
  4506. return x*(a + x**a)**(-1.0/a)
  4507. def _ppf(self, q, a):
  4508. return (a/(q**-a - 1.0))**(1.0/a)
  4509. def _stats(self, a):
  4510. outputs = [None if np.any(i < a) else np.nan for i in range(1, 5)]
  4511. return outputs[:]
  4512. def _mom1_sc(self, m, *args):
  4513. if np.any(m >= args[0]):
  4514. return np.nan
  4515. return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0]
  4516. kappa3 = kappa3_gen(a=0.0, name='kappa3')
  4517. class moyal_gen(rv_continuous):
  4518. r"""A Moyal continuous random variable.
  4519. %(before_notes)s
  4520. Notes
  4521. -----
  4522. The probability density function for `moyal` is:
  4523. .. math::
  4524. f(x) = \exp(-(x + \exp(-x))/2) / \sqrt{2\pi}
  4525. for a real number :math:`x`.
  4526. %(after_notes)s
  4527. This distribution has utility in high-energy physics and radiation
  4528. detection. It describes the energy loss of a charged relativistic
  4529. particle due to ionization of the medium [1]_. It also provides an
  4530. approximation for the Landau distribution. For an in depth description
  4531. see [2]_. For additional description, see [3]_.
  4532. References
  4533. ----------
  4534. .. [1] J.E. Moyal, "XXX. Theory of ionization fluctuations",
  4535. The London, Edinburgh, and Dublin Philosophical Magazine
  4536. and Journal of Science, vol 46, 263-280, (1955).
  4537. :doi:`10.1080/14786440308521076` (gated)
  4538. .. [2] G. Cordeiro et al., "The beta Moyal: a useful skew distribution",
  4539. International Journal of Research and Reviews in Applied Sciences,
  4540. vol 10, 171-192, (2012).
  4541. http://www.arpapress.com/Volumes/Vol10Issue2/IJRRAS_10_2_02.pdf
  4542. .. [3] C. Walck, "Handbook on Statistical Distributions for
  4543. Experimentalists; International Report SUF-PFY/96-01", Chapter 26,
  4544. University of Stockholm: Stockholm, Sweden, (2007).
  4545. http://www.stat.rice.edu/~dobelman/textfiles/DistributionsHandbook.pdf
  4546. .. versionadded:: 1.1.0
  4547. %(example)s
  4548. """
  4549. def _shape_info(self):
  4550. return []
  4551. def _rvs(self, size=None, random_state=None):
  4552. u1 = gamma.rvs(a=0.5, scale=2, size=size,
  4553. random_state=random_state)
  4554. return -np.log(u1)
  4555. def _pdf(self, x):
  4556. return np.exp(-0.5 * (x + np.exp(-x))) / np.sqrt(2*np.pi)
  4557. def _cdf(self, x):
  4558. return sc.erfc(np.exp(-0.5 * x) / np.sqrt(2))
  4559. def _sf(self, x):
  4560. return sc.erf(np.exp(-0.5 * x) / np.sqrt(2))
  4561. def _ppf(self, x):
  4562. return -np.log(2 * sc.erfcinv(x)**2)
  4563. def _stats(self):
  4564. mu = np.log(2) + np.euler_gamma
  4565. mu2 = np.pi**2 / 2
  4566. g1 = 28 * np.sqrt(2) * sc.zeta(3) / np.pi**3
  4567. g2 = 4.
  4568. return mu, mu2, g1, g2
  4569. def _munp(self, n):
  4570. if n == 1.0:
  4571. return np.log(2) + np.euler_gamma
  4572. elif n == 2.0:
  4573. return np.pi**2 / 2 + (np.log(2) + np.euler_gamma)**2
  4574. elif n == 3.0:
  4575. tmp1 = 1.5 * np.pi**2 * (np.log(2)+np.euler_gamma)
  4576. tmp2 = (np.log(2)+np.euler_gamma)**3
  4577. tmp3 = 14 * sc.zeta(3)
  4578. return tmp1 + tmp2 + tmp3
  4579. elif n == 4.0:
  4580. tmp1 = 4 * 14 * sc.zeta(3) * (np.log(2) + np.euler_gamma)
  4581. tmp2 = 3 * np.pi**2 * (np.log(2) + np.euler_gamma)**2
  4582. tmp3 = (np.log(2) + np.euler_gamma)**4
  4583. tmp4 = 7 * np.pi**4 / 4
  4584. return tmp1 + tmp2 + tmp3 + tmp4
  4585. else:
  4586. # return generic for higher moments
  4587. # return rv_continuous._mom1_sc(self, n, b)
  4588. return self._mom1_sc(n)
  4589. moyal = moyal_gen(name="moyal")
  4590. class nakagami_gen(rv_continuous):
  4591. r"""A Nakagami continuous random variable.
  4592. %(before_notes)s
  4593. Notes
  4594. -----
  4595. The probability density function for `nakagami` is:
  4596. .. math::
  4597. f(x, \nu) = \frac{2 \nu^\nu}{\Gamma(\nu)} x^{2\nu-1} \exp(-\nu x^2)
  4598. for :math:`x >= 0`, :math:`\nu > 0`. The distribution was introduced in
  4599. [2]_, see also [1]_ for further information.
  4600. `nakagami` takes ``nu`` as a shape parameter for :math:`\nu`.
  4601. %(after_notes)s
  4602. References
  4603. ----------
  4604. .. [1] "Nakagami distribution", Wikipedia
  4605. https://en.wikipedia.org/wiki/Nakagami_distribution
  4606. .. [2] M. Nakagami, "The m-distribution - A general formula of intensity
  4607. distribution of rapid fading", Statistical methods in radio wave
  4608. propagation, Pergamon Press, 1960, 3-36.
  4609. :doi:`10.1016/B978-0-08-009306-2.50005-4`
  4610. %(example)s
  4611. """
  4612. def _shape_info(self):
  4613. return [_ShapeInfo("nu", False, (0, np.inf), (False, False))]
  4614. def _pdf(self, x, nu):
  4615. return np.exp(self._logpdf(x, nu))
  4616. def _logpdf(self, x, nu):
  4617. # nakagami.pdf(x, nu) = 2 * nu**nu / gamma(nu) *
  4618. # x**(2*nu-1) * exp(-nu*x**2)
  4619. return (np.log(2) + sc.xlogy(nu, nu) - sc.gammaln(nu) +
  4620. sc.xlogy(2*nu - 1, x) - nu*x**2)
  4621. def _cdf(self, x, nu):
  4622. return sc.gammainc(nu, nu*x*x)
  4623. def _ppf(self, q, nu):
  4624. return np.sqrt(1.0/nu*sc.gammaincinv(nu, q))
  4625. def _sf(self, x, nu):
  4626. return sc.gammaincc(nu, nu*x*x)
  4627. def _isf(self, p, nu):
  4628. return np.sqrt(1/nu * sc.gammainccinv(nu, p))
  4629. def _stats(self, nu):
  4630. mu = sc.gamma(nu+0.5)/sc.gamma(nu)/np.sqrt(nu)
  4631. mu2 = 1.0-mu*mu
  4632. g1 = mu * (1 - 4*nu*mu2) / 2.0 / nu / np.power(mu2, 1.5)
  4633. g2 = -6*mu**4*nu + (8*nu-2)*mu**2-2*nu + 1
  4634. g2 /= nu*mu2**2.0
  4635. return mu, mu2, g1, g2
  4636. def _rvs(self, nu, size=None, random_state=None):
  4637. # this relationship can be found in [1] or by a direct calculation
  4638. return np.sqrt(random_state.standard_gamma(nu, size=size) / nu)
  4639. def _fitstart(self, data, args=None):
  4640. if args is None:
  4641. args = (1.0,) * self.numargs
  4642. # Analytical justified estimates
  4643. # see: https://docs.scipy.org/doc/scipy/reference/tutorial/stats/continuous_nakagami.html
  4644. loc = np.min(data)
  4645. scale = np.sqrt(np.sum((data - loc)**2) / len(data))
  4646. return args + (loc, scale)
  4647. nakagami = nakagami_gen(a=0.0, name="nakagami")
  4648. # The function name ncx2 is an abbreviation for noncentral chi squared.
  4649. def _ncx2_log_pdf(x, df, nc):
  4650. # We use (xs**2 + ns**2)/2 = (xs - ns)**2/2 + xs*ns, and include the
  4651. # factor of exp(-xs*ns) into the ive function to improve numerical
  4652. # stability at large values of xs. See also `rice.pdf`.
  4653. df2 = df/2.0 - 1.0
  4654. xs, ns = np.sqrt(x), np.sqrt(nc)
  4655. res = sc.xlogy(df2/2.0, x/nc) - 0.5*(xs - ns)**2
  4656. corr = sc.ive(df2, xs*ns) / 2.0
  4657. # Return res + np.log(corr) avoiding np.log(0)
  4658. return _lazywhere(
  4659. corr > 0,
  4660. (res, corr),
  4661. f=lambda r, c: r + np.log(c),
  4662. fillvalue=-np.inf)
  4663. class ncx2_gen(rv_continuous):
  4664. r"""A non-central chi-squared continuous random variable.
  4665. %(before_notes)s
  4666. Notes
  4667. -----
  4668. The probability density function for `ncx2` is:
  4669. .. math::
  4670. f(x, k, \lambda) = \frac{1}{2} \exp(-(\lambda+x)/2)
  4671. (x/\lambda)^{(k-2)/4} I_{(k-2)/2}(\sqrt{\lambda x})
  4672. for :math:`x >= 0`, :math:`k > 0` and :math:`\lambda \ge 0`.
  4673. :math:`k` specifies the degrees of freedom (denoted ``df`` in the
  4674. implementation) and :math:`\lambda` is the non-centrality parameter
  4675. (denoted ``nc`` in the implementation). :math:`I_\nu` denotes the
  4676. modified Bessel function of first order of degree :math:`\nu`
  4677. (`scipy.special.iv`).
  4678. `ncx2` takes ``df`` and ``nc`` as shape parameters.
  4679. %(after_notes)s
  4680. %(example)s
  4681. """
  4682. def _argcheck(self, df, nc):
  4683. return (df > 0) & np.isfinite(df) & (nc >= 0)
  4684. def _shape_info(self):
  4685. idf = _ShapeInfo("df", False, (0, np.inf), (False, False))
  4686. inc = _ShapeInfo("nc", False, (0, np.inf), (True, False))
  4687. return [idf, inc]
  4688. def _rvs(self, df, nc, size=None, random_state=None):
  4689. return random_state.noncentral_chisquare(df, nc, size)
  4690. def _logpdf(self, x, df, nc):
  4691. cond = np.ones_like(x, dtype=bool) & (nc != 0)
  4692. return _lazywhere(cond, (x, df, nc), f=_ncx2_log_pdf,
  4693. f2=lambda x, df, _: chi2._logpdf(x, df))
  4694. def _pdf(self, x, df, nc):
  4695. cond = np.ones_like(x, dtype=bool) & (nc != 0)
  4696. with warnings.catch_warnings():
  4697. message = "overflow encountered in _ncx2_pdf"
  4698. warnings.filterwarnings("ignore", message=message)
  4699. return _lazywhere(cond, (x, df, nc), f=_boost._ncx2_pdf,
  4700. f2=lambda x, df, _: chi2._pdf(x, df))
  4701. def _cdf(self, x, df, nc):
  4702. cond = np.ones_like(x, dtype=bool) & (nc != 0)
  4703. return _lazywhere(cond, (x, df, nc), f=_boost._ncx2_cdf,
  4704. f2=lambda x, df, _: chi2._cdf(x, df))
  4705. def _ppf(self, q, df, nc):
  4706. cond = np.ones_like(q, dtype=bool) & (nc != 0)
  4707. with warnings.catch_warnings():
  4708. message = "overflow encountered in _ncx2_ppf"
  4709. warnings.filterwarnings("ignore", message=message)
  4710. return _lazywhere(cond, (q, df, nc), f=_boost._ncx2_ppf,
  4711. f2=lambda x, df, _: chi2._ppf(x, df))
  4712. def _sf(self, x, df, nc):
  4713. cond = np.ones_like(x, dtype=bool) & (nc != 0)
  4714. return _lazywhere(cond, (x, df, nc), f=_boost._ncx2_sf,
  4715. f2=lambda x, df, _: chi2._sf(x, df))
  4716. def _isf(self, x, df, nc):
  4717. cond = np.ones_like(x, dtype=bool) & (nc != 0)
  4718. with warnings.catch_warnings():
  4719. message = "overflow encountered in _ncx2_isf"
  4720. warnings.filterwarnings("ignore", message=message)
  4721. return _lazywhere(cond, (x, df, nc), f=_boost._ncx2_isf,
  4722. f2=lambda x, df, _: chi2._isf(x, df))
  4723. def _stats(self, df, nc):
  4724. return (
  4725. _boost._ncx2_mean(df, nc),
  4726. _boost._ncx2_variance(df, nc),
  4727. _boost._ncx2_skewness(df, nc),
  4728. _boost._ncx2_kurtosis_excess(df, nc),
  4729. )
  4730. ncx2 = ncx2_gen(a=0.0, name='ncx2')
  4731. class ncf_gen(rv_continuous):
  4732. r"""A non-central F distribution continuous random variable.
  4733. %(before_notes)s
  4734. See Also
  4735. --------
  4736. scipy.stats.f : Fisher distribution
  4737. Notes
  4738. -----
  4739. The probability density function for `ncf` is:
  4740. .. math::
  4741. f(x, n_1, n_2, \lambda) =
  4742. \exp\left(\frac{\lambda}{2} +
  4743. \lambda n_1 \frac{x}{2(n_1 x + n_2)}
  4744. \right)
  4745. n_1^{n_1/2} n_2^{n_2/2} x^{n_1/2 - 1} \\
  4746. (n_2 + n_1 x)^{-(n_1 + n_2)/2}
  4747. \gamma(n_1/2) \gamma(1 + n_2/2) \\
  4748. \frac{L^{\frac{n_1}{2}-1}_{n_2/2}
  4749. \left(-\lambda n_1 \frac{x}{2(n_1 x + n_2)}\right)}
  4750. {B(n_1/2, n_2/2)
  4751. \gamma\left(\frac{n_1 + n_2}{2}\right)}
  4752. for :math:`n_1, n_2 > 0`, :math:`\lambda \ge 0`. Here :math:`n_1` is the
  4753. degrees of freedom in the numerator, :math:`n_2` the degrees of freedom in
  4754. the denominator, :math:`\lambda` the non-centrality parameter,
  4755. :math:`\gamma` is the logarithm of the Gamma function, :math:`L_n^k` is a
  4756. generalized Laguerre polynomial and :math:`B` is the beta function.
  4757. `ncf` takes ``df1``, ``df2`` and ``nc`` as shape parameters. If ``nc=0``,
  4758. the distribution becomes equivalent to the Fisher distribution.
  4759. %(after_notes)s
  4760. %(example)s
  4761. """
  4762. def _argcheck(self, df1, df2, nc):
  4763. return (df1 > 0) & (df2 > 0) & (nc >= 0)
  4764. def _shape_info(self):
  4765. idf1 = _ShapeInfo("df1", False, (0, np.inf), (False, False))
  4766. idf2 = _ShapeInfo("df2", False, (0, np.inf), (False, False))
  4767. inc = _ShapeInfo("nc", False, (0, np.inf), (True, False))
  4768. return [idf1, idf2, inc]
  4769. def _rvs(self, dfn, dfd, nc, size=None, random_state=None):
  4770. return random_state.noncentral_f(dfn, dfd, nc, size)
  4771. def _pdf(self, x, dfn, dfd, nc):
  4772. # ncf.pdf(x, df1, df2, nc) = exp(nc/2 + nc*df1*x/(2*(df1*x+df2))) *
  4773. # df1**(df1/2) * df2**(df2/2) * x**(df1/2-1) *
  4774. # (df2+df1*x)**(-(df1+df2)/2) *
  4775. # gamma(df1/2)*gamma(1+df2/2) *
  4776. # L^{v1/2-1}^{v2/2}(-nc*v1*x/(2*(v1*x+v2))) /
  4777. # (B(v1/2, v2/2) * gamma((v1+v2)/2))
  4778. return _boost._ncf_pdf(x, dfn, dfd, nc)
  4779. def _cdf(self, x, dfn, dfd, nc):
  4780. return _boost._ncf_cdf(x, dfn, dfd, nc)
  4781. def _ppf(self, q, dfn, dfd, nc):
  4782. return _boost._ncf_ppf(q, dfn, dfd, nc)
  4783. def _sf(self, x, dfn, dfd, nc):
  4784. return _boost._ncf_sf(x, dfn, dfd, nc)
  4785. def _isf(self, x, dfn, dfd, nc):
  4786. return _boost._ncf_isf(x, dfn, dfd, nc)
  4787. def _munp(self, n, dfn, dfd, nc):
  4788. val = (dfn * 1.0/dfd)**n
  4789. term = sc.gammaln(n+0.5*dfn) + sc.gammaln(0.5*dfd-n) - sc.gammaln(dfd*0.5)
  4790. val *= np.exp(-nc / 2.0+term)
  4791. val *= sc.hyp1f1(n+0.5*dfn, 0.5*dfn, 0.5*nc)
  4792. return val
  4793. def _stats(self, dfn, dfd, nc, moments='mv'):
  4794. mu = _boost._ncf_mean(dfn, dfd, nc)
  4795. mu2 = _boost._ncf_variance(dfn, dfd, nc)
  4796. g1 = _boost._ncf_skewness(dfn, dfd, nc) if 's' in moments else None
  4797. g2 = _boost._ncf_kurtosis_excess(
  4798. dfn, dfd, nc) if 'k' in moments else None
  4799. return mu, mu2, g1, g2
  4800. ncf = ncf_gen(a=0.0, name='ncf')
  4801. class t_gen(rv_continuous):
  4802. r"""A Student's t continuous random variable.
  4803. For the noncentral t distribution, see `nct`.
  4804. %(before_notes)s
  4805. See Also
  4806. --------
  4807. nct
  4808. Notes
  4809. -----
  4810. The probability density function for `t` is:
  4811. .. math::
  4812. f(x, \nu) = \frac{\Gamma((\nu+1)/2)}
  4813. {\sqrt{\pi \nu} \Gamma(\nu/2)}
  4814. (1+x^2/\nu)^{-(\nu+1)/2}
  4815. where :math:`x` is a real number and the degrees of freedom parameter
  4816. :math:`\nu` (denoted ``df`` in the implementation) satisfies
  4817. :math:`\nu > 0`. :math:`\Gamma` is the gamma function
  4818. (`scipy.special.gamma`).
  4819. %(after_notes)s
  4820. %(example)s
  4821. """
  4822. def _shape_info(self):
  4823. return [_ShapeInfo("df", False, (0, np.inf), (False, False))]
  4824. def _rvs(self, df, size=None, random_state=None):
  4825. return random_state.standard_t(df, size=size)
  4826. def _pdf(self, x, df):
  4827. return _lazywhere(
  4828. df == np.inf, (x, df),
  4829. f=lambda x, df: norm._pdf(x),
  4830. f2=lambda x, df: (
  4831. np.exp(sc.gammaln((df+1)/2)-sc.gammaln(df/2))
  4832. / (np.sqrt(df*np.pi)*(1+(x**2)/df)**((df+1)/2))
  4833. )
  4834. )
  4835. def _logpdf(self, x, df):
  4836. return _lazywhere(
  4837. df == np.inf, (x, df),
  4838. f=lambda x, df: norm._logpdf(x),
  4839. f2=lambda x, df: (
  4840. sc.gammaln((df+1)/2) - sc.gammaln(df/2)
  4841. - (0.5*np.log(df*np.pi)
  4842. + (df+1)/2*np.log(1+(x**2)/df))
  4843. )
  4844. )
  4845. def _cdf(self, x, df):
  4846. return sc.stdtr(df, x)
  4847. def _sf(self, x, df):
  4848. return sc.stdtr(df, -x)
  4849. def _ppf(self, q, df):
  4850. return sc.stdtrit(df, q)
  4851. def _isf(self, q, df):
  4852. return -sc.stdtrit(df, q)
  4853. def _stats(self, df):
  4854. # infinite df -> normal distribution (0.0, 1.0, 0.0, 0.0)
  4855. infinite_df = np.isposinf(df)
  4856. mu = np.where(df > 1, 0.0, np.inf)
  4857. condlist = ((df > 1) & (df <= 2),
  4858. (df > 2) & np.isfinite(df),
  4859. infinite_df)
  4860. choicelist = (lambda df: np.broadcast_to(np.inf, df.shape),
  4861. lambda df: df / (df-2.0),
  4862. lambda df: np.broadcast_to(1, df.shape))
  4863. mu2 = _lazyselect(condlist, choicelist, (df,), np.nan)
  4864. g1 = np.where(df > 3, 0.0, np.nan)
  4865. condlist = ((df > 2) & (df <= 4),
  4866. (df > 4) & np.isfinite(df),
  4867. infinite_df)
  4868. choicelist = (lambda df: np.broadcast_to(np.inf, df.shape),
  4869. lambda df: 6.0 / (df-4.0),
  4870. lambda df: np.broadcast_to(0, df.shape))
  4871. g2 = _lazyselect(condlist, choicelist, (df,), np.nan)
  4872. return mu, mu2, g1, g2
  4873. def _entropy(self, df):
  4874. if df == np.inf:
  4875. return norm._entropy()
  4876. half = df/2
  4877. half1 = (df + 1)/2
  4878. return (half1*(sc.digamma(half1) - sc.digamma(half))
  4879. + np.log(np.sqrt(df)*sc.beta(half, 0.5)))
  4880. t = t_gen(name='t')
  4881. class nct_gen(rv_continuous):
  4882. r"""A non-central Student's t continuous random variable.
  4883. %(before_notes)s
  4884. Notes
  4885. -----
  4886. If :math:`Y` is a standard normal random variable and :math:`V` is
  4887. an independent chi-square random variable (`chi2`) with :math:`k` degrees
  4888. of freedom, then
  4889. .. math::
  4890. X = \frac{Y + c}{\sqrt{V/k}}
  4891. has a non-central Student's t distribution on the real line.
  4892. The degrees of freedom parameter :math:`k` (denoted ``df`` in the
  4893. implementation) satisfies :math:`k > 0` and the noncentrality parameter
  4894. :math:`c` (denoted ``nc`` in the implementation) is a real number.
  4895. %(after_notes)s
  4896. %(example)s
  4897. """
  4898. def _argcheck(self, df, nc):
  4899. return (df > 0) & (nc == nc)
  4900. def _shape_info(self):
  4901. idf = _ShapeInfo("df", False, (0, np.inf), (False, False))
  4902. inc = _ShapeInfo("nc", False, (-np.inf, np.inf), (False, False))
  4903. return [idf, inc]
  4904. def _rvs(self, df, nc, size=None, random_state=None):
  4905. n = norm.rvs(loc=nc, size=size, random_state=random_state)
  4906. c2 = chi2.rvs(df, size=size, random_state=random_state)
  4907. return n * np.sqrt(df) / np.sqrt(c2)
  4908. def _pdf(self, x, df, nc):
  4909. # Boost version has accuracy issues in left tail; see gh-16591
  4910. n = df*1.0
  4911. nc = nc*1.0
  4912. x2 = x*x
  4913. ncx2 = nc*nc*x2
  4914. fac1 = n + x2
  4915. trm1 = (n/2.*np.log(n) + sc.gammaln(n+1)
  4916. - (n*np.log(2) + nc*nc/2 + (n/2)*np.log(fac1)
  4917. + sc.gammaln(n/2)))
  4918. Px = np.exp(trm1)
  4919. valF = ncx2 / (2*fac1)
  4920. trm1 = (np.sqrt(2)*nc*x*sc.hyp1f1(n/2+1, 1.5, valF)
  4921. / np.asarray(fac1*sc.gamma((n+1)/2)))
  4922. trm2 = (sc.hyp1f1((n+1)/2, 0.5, valF)
  4923. / np.asarray(np.sqrt(fac1)*sc.gamma(n/2+1)))
  4924. Px *= trm1+trm2
  4925. return np.clip(Px, 0, None)
  4926. def _cdf(self, x, df, nc):
  4927. return np.clip(_boost._nct_cdf(x, df, nc), 0, 1)
  4928. def _ppf(self, q, df, nc):
  4929. return _boost._nct_ppf(q, df, nc)
  4930. def _sf(self, x, df, nc):
  4931. return np.clip(_boost._nct_sf(x, df, nc), 0, 1)
  4932. def _isf(self, x, df, nc):
  4933. return _boost._nct_isf(x, df, nc)
  4934. def _stats(self, df, nc, moments='mv'):
  4935. mu = _boost._nct_mean(df, nc)
  4936. mu2 = _boost._nct_variance(df, nc)
  4937. g1 = _boost._nct_skewness(df, nc) if 's' in moments else None
  4938. g2 = _boost._nct_kurtosis_excess(df, nc)-3 if 'k' in moments else None
  4939. return mu, mu2, g1, g2
  4940. nct = nct_gen(name="nct")
  4941. class pareto_gen(rv_continuous):
  4942. r"""A Pareto continuous random variable.
  4943. %(before_notes)s
  4944. Notes
  4945. -----
  4946. The probability density function for `pareto` is:
  4947. .. math::
  4948. f(x, b) = \frac{b}{x^{b+1}}
  4949. for :math:`x \ge 1`, :math:`b > 0`.
  4950. `pareto` takes ``b`` as a shape parameter for :math:`b`.
  4951. %(after_notes)s
  4952. %(example)s
  4953. """
  4954. def _shape_info(self):
  4955. return [_ShapeInfo("b", False, (0, np.inf), (False, False))]
  4956. def _pdf(self, x, b):
  4957. # pareto.pdf(x, b) = b / x**(b+1)
  4958. return b * x**(-b-1)
  4959. def _cdf(self, x, b):
  4960. return 1 - x**(-b)
  4961. def _ppf(self, q, b):
  4962. return pow(1-q, -1.0/b)
  4963. def _sf(self, x, b):
  4964. return x**(-b)
  4965. def _stats(self, b, moments='mv'):
  4966. mu, mu2, g1, g2 = None, None, None, None
  4967. if 'm' in moments:
  4968. mask = b > 1
  4969. bt = np.extract(mask, b)
  4970. mu = np.full(np.shape(b), fill_value=np.inf)
  4971. np.place(mu, mask, bt / (bt-1.0))
  4972. if 'v' in moments:
  4973. mask = b > 2
  4974. bt = np.extract(mask, b)
  4975. mu2 = np.full(np.shape(b), fill_value=np.inf)
  4976. np.place(mu2, mask, bt / (bt-2.0) / (bt-1.0)**2)
  4977. if 's' in moments:
  4978. mask = b > 3
  4979. bt = np.extract(mask, b)
  4980. g1 = np.full(np.shape(b), fill_value=np.nan)
  4981. vals = 2 * (bt + 1.0) * np.sqrt(bt - 2.0) / ((bt - 3.0) * np.sqrt(bt))
  4982. np.place(g1, mask, vals)
  4983. if 'k' in moments:
  4984. mask = b > 4
  4985. bt = np.extract(mask, b)
  4986. g2 = np.full(np.shape(b), fill_value=np.nan)
  4987. vals = (6.0*np.polyval([1.0, 1.0, -6, -2], bt) /
  4988. np.polyval([1.0, -7.0, 12.0, 0.0], bt))
  4989. np.place(g2, mask, vals)
  4990. return mu, mu2, g1, g2
  4991. def _entropy(self, c):
  4992. return 1 + 1.0/c - np.log(c)
  4993. @_call_super_mom
  4994. @inherit_docstring_from(rv_continuous)
  4995. def fit(self, data, *args, **kwds):
  4996. parameters = _check_fit_input_parameters(self, data, args, kwds)
  4997. data, fshape, floc, fscale = parameters
  4998. # ensure that any fixed parameters don't violate constraints of the
  4999. # distribution before continuing.
  5000. if floc is not None and np.min(data) - floc < (fscale or 0):
  5001. raise FitDataError("pareto", lower=1, upper=np.inf)
  5002. ndata = data.shape[0]
  5003. def get_shape(scale, location):
  5004. # The first-order necessary condition on `shape` can be solved in
  5005. # closed form
  5006. return ndata / np.sum(np.log((data - location) / scale))
  5007. if floc is fscale is None:
  5008. # The support of the distribution is `(x - loc)/scale > 0`.
  5009. # The method of Lagrange multipliers turns this constraint
  5010. # into an equation that can be solved numerically.
  5011. # See gh-12545 for details.
  5012. def dL_dScale(shape, scale):
  5013. # The partial derivative of the log-likelihood function w.r.t.
  5014. # the scale.
  5015. return ndata * shape / scale
  5016. def dL_dLocation(shape, location):
  5017. # The partial derivative of the log-likelihood function w.r.t.
  5018. # the location.
  5019. return (shape + 1) * np.sum(1 / (data - location))
  5020. def fun_to_solve(scale):
  5021. # optimize the scale by setting the partial derivatives
  5022. # w.r.t. to location and scale equal and solving.
  5023. location = np.min(data) - scale
  5024. shape = fshape or get_shape(scale, location)
  5025. return dL_dLocation(shape, location) - dL_dScale(shape, scale)
  5026. def interval_contains_root(lbrack, rbrack):
  5027. # return true if the signs disagree.
  5028. return (np.sign(fun_to_solve(lbrack)) !=
  5029. np.sign(fun_to_solve(rbrack)))
  5030. # set brackets for `root_scalar` to use when optimizing over the
  5031. # scale such that a root is likely between them. Use user supplied
  5032. # guess or default 1.
  5033. brack_start = kwds.get('scale', 1)
  5034. lbrack, rbrack = brack_start / 2, brack_start * 2
  5035. # if a root is not between the brackets, iteratively expand them
  5036. # until they include a sign change, checking after each bracket is
  5037. # modified.
  5038. while (not interval_contains_root(lbrack, rbrack)
  5039. and (lbrack > 0 or rbrack < np.inf)):
  5040. lbrack /= 2
  5041. rbrack *= 2
  5042. res = root_scalar(fun_to_solve, bracket=[lbrack, rbrack])
  5043. if res.converged:
  5044. scale = res.root
  5045. loc = np.min(data) - scale
  5046. shape = fshape or get_shape(scale, loc)
  5047. # The Pareto distribution requires that its parameters satisfy
  5048. # the condition `fscale + floc <= min(data)`. However, to
  5049. # avoid numerical issues, we require that `fscale + floc`
  5050. # is strictly less than `min(data)`. If this condition
  5051. # is not satisfied, reduce the scale with `np.nextafter` to
  5052. # ensure that data does not fall outside of the support.
  5053. if not (scale + loc) < np.min(data):
  5054. scale = np.min(data) - loc
  5055. scale = np.nextafter(scale, 0)
  5056. return shape, loc, scale
  5057. else:
  5058. return super().fit(data, **kwds)
  5059. elif floc is None:
  5060. loc = np.min(data) - fscale
  5061. else:
  5062. loc = floc
  5063. # Source: Evans, Hastings, and Peacock (2000), Statistical
  5064. # Distributions, 3rd. Ed., John Wiley and Sons. Page 149.
  5065. scale = fscale or np.min(data) - loc
  5066. shape = fshape or get_shape(scale, loc)
  5067. return shape, loc, scale
  5068. pareto = pareto_gen(a=1.0, name="pareto")
  5069. class lomax_gen(rv_continuous):
  5070. r"""A Lomax (Pareto of the second kind) continuous random variable.
  5071. %(before_notes)s
  5072. Notes
  5073. -----
  5074. The probability density function for `lomax` is:
  5075. .. math::
  5076. f(x, c) = \frac{c}{(1+x)^{c+1}}
  5077. for :math:`x \ge 0`, :math:`c > 0`.
  5078. `lomax` takes ``c`` as a shape parameter for :math:`c`.
  5079. `lomax` is a special case of `pareto` with ``loc=-1.0``.
  5080. %(after_notes)s
  5081. %(example)s
  5082. """
  5083. def _shape_info(self):
  5084. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  5085. def _pdf(self, x, c):
  5086. # lomax.pdf(x, c) = c / (1+x)**(c+1)
  5087. return c*1.0/(1.0+x)**(c+1.0)
  5088. def _logpdf(self, x, c):
  5089. return np.log(c) - (c+1)*sc.log1p(x)
  5090. def _cdf(self, x, c):
  5091. return -sc.expm1(-c*sc.log1p(x))
  5092. def _sf(self, x, c):
  5093. return np.exp(-c*sc.log1p(x))
  5094. def _logsf(self, x, c):
  5095. return -c*sc.log1p(x)
  5096. def _ppf(self, q, c):
  5097. return sc.expm1(-sc.log1p(-q)/c)
  5098. def _stats(self, c):
  5099. mu, mu2, g1, g2 = pareto.stats(c, loc=-1.0, moments='mvsk')
  5100. return mu, mu2, g1, g2
  5101. def _entropy(self, c):
  5102. return 1+1.0/c-np.log(c)
  5103. lomax = lomax_gen(a=0.0, name="lomax")
  5104. class pearson3_gen(rv_continuous):
  5105. r"""A pearson type III continuous random variable.
  5106. %(before_notes)s
  5107. Notes
  5108. -----
  5109. The probability density function for `pearson3` is:
  5110. .. math::
  5111. f(x, \kappa) = \frac{|\beta|}{\Gamma(\alpha)}
  5112. (\beta (x - \zeta))^{\alpha - 1}
  5113. \exp(-\beta (x - \zeta))
  5114. where:
  5115. .. math::
  5116. \beta = \frac{2}{\kappa}
  5117. \alpha = \beta^2 = \frac{4}{\kappa^2}
  5118. \zeta = -\frac{\alpha}{\beta} = -\beta
  5119. :math:`\Gamma` is the gamma function (`scipy.special.gamma`).
  5120. Pass the skew :math:`\kappa` into `pearson3` as the shape parameter
  5121. ``skew``.
  5122. %(after_notes)s
  5123. %(example)s
  5124. References
  5125. ----------
  5126. R.W. Vogel and D.E. McMartin, "Probability Plot Goodness-of-Fit and
  5127. Skewness Estimation Procedures for the Pearson Type 3 Distribution", Water
  5128. Resources Research, Vol.27, 3149-3158 (1991).
  5129. L.R. Salvosa, "Tables of Pearson's Type III Function", Ann. Math. Statist.,
  5130. Vol.1, 191-198 (1930).
  5131. "Using Modern Computing Tools to Fit the Pearson Type III Distribution to
  5132. Aviation Loads Data", Office of Aviation Research (2003).
  5133. """
  5134. def _preprocess(self, x, skew):
  5135. # The real 'loc' and 'scale' are handled in the calling pdf(...). The
  5136. # local variables 'loc' and 'scale' within pearson3._pdf are set to
  5137. # the defaults just to keep them as part of the equations for
  5138. # documentation.
  5139. loc = 0.0
  5140. scale = 1.0
  5141. # If skew is small, return _norm_pdf. The divide between pearson3
  5142. # and norm was found by brute force and is approximately a skew of
  5143. # 0.000016. No one, I hope, would actually use a skew value even
  5144. # close to this small.
  5145. norm2pearson_transition = 0.000016
  5146. ans, x, skew = np.broadcast_arrays(1.0, x, skew)
  5147. ans = ans.copy()
  5148. # mask is True where skew is small enough to use the normal approx.
  5149. mask = np.absolute(skew) < norm2pearson_transition
  5150. invmask = ~mask
  5151. beta = 2.0 / (skew[invmask] * scale)
  5152. alpha = (scale * beta)**2
  5153. zeta = loc - alpha / beta
  5154. transx = beta * (x[invmask] - zeta)
  5155. return ans, x, transx, mask, invmask, beta, alpha, zeta
  5156. def _argcheck(self, skew):
  5157. # The _argcheck function in rv_continuous only allows positive
  5158. # arguments. The skew argument for pearson3 can be zero (which I want
  5159. # to handle inside pearson3._pdf) or negative. So just return True
  5160. # for all skew args.
  5161. return np.isfinite(skew)
  5162. def _shape_info(self):
  5163. return [_ShapeInfo("skew", False, (-np.inf, np.inf), (False, False))]
  5164. def _stats(self, skew):
  5165. m = 0.0
  5166. v = 1.0
  5167. s = skew
  5168. k = 1.5*skew**2
  5169. return m, v, s, k
  5170. def _pdf(self, x, skew):
  5171. # pearson3.pdf(x, skew) = abs(beta) / gamma(alpha) *
  5172. # (beta * (x - zeta))**(alpha - 1) * exp(-beta*(x - zeta))
  5173. # Do the calculation in _logpdf since helps to limit
  5174. # overflow/underflow problems
  5175. ans = np.exp(self._logpdf(x, skew))
  5176. if ans.ndim == 0:
  5177. if np.isnan(ans):
  5178. return 0.0
  5179. return ans
  5180. ans[np.isnan(ans)] = 0.0
  5181. return ans
  5182. def _logpdf(self, x, skew):
  5183. # PEARSON3 logpdf GAMMA logpdf
  5184. # np.log(abs(beta))
  5185. # + (alpha - 1)*np.log(beta*(x - zeta)) + (a - 1)*np.log(x)
  5186. # - beta*(x - zeta) - x
  5187. # - sc.gammalnalpha) - sc.gammalna)
  5188. ans, x, transx, mask, invmask, beta, alpha, _ = (
  5189. self._preprocess(x, skew))
  5190. ans[mask] = np.log(_norm_pdf(x[mask]))
  5191. # use logpdf instead of _logpdf to fix issue mentioned in gh-12640
  5192. # (_logpdf does not return correct result for alpha = 1)
  5193. ans[invmask] = np.log(abs(beta)) + gamma.logpdf(transx, alpha)
  5194. return ans
  5195. def _cdf(self, x, skew):
  5196. ans, x, transx, mask, invmask, _, alpha, _ = (
  5197. self._preprocess(x, skew))
  5198. ans[mask] = _norm_cdf(x[mask])
  5199. skew = np.broadcast_to(skew, invmask.shape)
  5200. invmask1a = np.logical_and(invmask, skew > 0)
  5201. invmask1b = skew[invmask] > 0
  5202. # use cdf instead of _cdf to fix issue mentioned in gh-12640
  5203. # (_cdf produces NaNs for inputs outside support)
  5204. ans[invmask1a] = gamma.cdf(transx[invmask1b], alpha[invmask1b])
  5205. # The gamma._cdf approach wasn't working with negative skew.
  5206. # Note that multiplying the skew by -1 reflects about x=0.
  5207. # So instead of evaluating the CDF with negative skew at x,
  5208. # evaluate the SF with positive skew at -x.
  5209. invmask2a = np.logical_and(invmask, skew < 0)
  5210. invmask2b = skew[invmask] < 0
  5211. # gamma._sf produces NaNs when transx < 0, so use gamma.sf
  5212. ans[invmask2a] = gamma.sf(transx[invmask2b], alpha[invmask2b])
  5213. return ans
  5214. def _rvs(self, skew, size=None, random_state=None):
  5215. skew = np.broadcast_to(skew, size)
  5216. ans, _, _, mask, invmask, beta, alpha, zeta = (
  5217. self._preprocess([0], skew))
  5218. nsmall = mask.sum()
  5219. nbig = mask.size - nsmall
  5220. ans[mask] = random_state.standard_normal(nsmall)
  5221. ans[invmask] = random_state.standard_gamma(alpha, nbig)/beta + zeta
  5222. if size == ():
  5223. ans = ans[0]
  5224. return ans
  5225. def _ppf(self, q, skew):
  5226. ans, q, _, mask, invmask, beta, alpha, zeta = (
  5227. self._preprocess(q, skew))
  5228. ans[mask] = _norm_ppf(q[mask])
  5229. q = q[invmask]
  5230. q[beta < 0] = 1 - q[beta < 0] # for negative skew; see gh-17050
  5231. ans[invmask] = sc.gammaincinv(alpha, q)/beta + zeta
  5232. return ans
  5233. @_call_super_mom
  5234. @extend_notes_in_docstring(rv_continuous, notes="""\
  5235. Note that method of moments (`method='MM'`) is not
  5236. available for this distribution.\n\n""")
  5237. def fit(self, data, *args, **kwds):
  5238. if kwds.get("method", None) == 'MM':
  5239. raise NotImplementedError("Fit `method='MM'` is not available for "
  5240. "the Pearson3 distribution. Please try "
  5241. "the default `method='MLE'`.")
  5242. else:
  5243. return super(type(self), self).fit(data, *args, **kwds)
  5244. pearson3 = pearson3_gen(name="pearson3")
  5245. class powerlaw_gen(rv_continuous):
  5246. r"""A power-function continuous random variable.
  5247. %(before_notes)s
  5248. See Also
  5249. --------
  5250. pareto
  5251. Notes
  5252. -----
  5253. The probability density function for `powerlaw` is:
  5254. .. math::
  5255. f(x, a) = a x^{a-1}
  5256. for :math:`0 \le x \le 1`, :math:`a > 0`.
  5257. `powerlaw` takes ``a`` as a shape parameter for :math:`a`.
  5258. %(after_notes)s
  5259. For example, the support of `powerlaw` can be adjusted from the default
  5260. interval ``[0, 1]`` to the interval ``[c, c+d]`` by setting ``loc=c`` and
  5261. ``scale=d``. For a power-law distribution with infinite support, see
  5262. `pareto`.
  5263. `powerlaw` is a special case of `beta` with ``b=1``.
  5264. %(example)s
  5265. """
  5266. def _shape_info(self):
  5267. return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
  5268. def _pdf(self, x, a):
  5269. # powerlaw.pdf(x, a) = a * x**(a-1)
  5270. return a*x**(a-1.0)
  5271. def _logpdf(self, x, a):
  5272. return np.log(a) + sc.xlogy(a - 1, x)
  5273. def _cdf(self, x, a):
  5274. return x**(a*1.0)
  5275. def _logcdf(self, x, a):
  5276. return a*np.log(x)
  5277. def _ppf(self, q, a):
  5278. return pow(q, 1.0/a)
  5279. def _stats(self, a):
  5280. return (a / (a + 1.0),
  5281. a / (a + 2.0) / (a + 1.0) ** 2,
  5282. -2.0 * ((a - 1.0) / (a + 3.0)) * np.sqrt((a + 2.0) / a),
  5283. 6 * np.polyval([1, -1, -6, 2], a) / (a * (a + 3.0) * (a + 4)))
  5284. def _entropy(self, a):
  5285. return 1 - 1.0/a - np.log(a)
  5286. def _support_mask(self, x, a):
  5287. return (super(powerlaw_gen, self)._support_mask(x, a)
  5288. & ((x != 0) | (a >= 1)))
  5289. @_call_super_mom
  5290. @extend_notes_in_docstring(rv_continuous, notes="""\
  5291. Notes specifically for ``powerlaw.fit``: If the location is a free
  5292. parameter and the value returned for the shape parameter is less than
  5293. one, the true maximum likelihood approaches infinity. This causes
  5294. numerical difficulties, and the resulting estimates are approximate.
  5295. \n\n""")
  5296. def fit(self, data, *args, **kwds):
  5297. # Summary of the strategy:
  5298. #
  5299. # 1) If the scale and location are fixed, return the shape according
  5300. # to a formula.
  5301. #
  5302. # 2) If the scale is fixed, there are two possibilities for the other
  5303. # parameters - one corresponding with shape less than one, and
  5304. # another with shape greater than one. Calculate both, and return
  5305. # whichever has the better log-likelihood.
  5306. #
  5307. # At this point, the scale is known to be free.
  5308. #
  5309. # 3) If the location is fixed, return the scale and shape according to
  5310. # formulas (or, if the shape is fixed, the fixed shape).
  5311. #
  5312. # At this point, the location and scale are both free. There are
  5313. # separate equations depending on whether the shape is less than one or
  5314. # greater than one.
  5315. #
  5316. # 4a) If the shape is less than one, there are formulas for shape,
  5317. # location, and scale.
  5318. # 4b) If the shape is greater than one, there are formulas for shape
  5319. # and scale, but there is a condition for location to be solved
  5320. # numerically.
  5321. #
  5322. # If the shape is fixed and less than one, we use 4a.
  5323. # If the shape is fixed and greater than one, we use 4b.
  5324. # If the shape is also free, we calculate fits using both 4a and 4b
  5325. # and choose the one that results a better log-likelihood.
  5326. #
  5327. # In many cases, the use of `np.nextafter` is used to avoid numerical
  5328. # issues.
  5329. if kwds.pop('superfit', False):
  5330. return super().fit(data, *args, **kwds)
  5331. if len(np.unique(data)) == 1:
  5332. return super().fit(data, *args, **kwds)
  5333. data, fshape, floc, fscale = _check_fit_input_parameters(self, data,
  5334. args, kwds)
  5335. penalized_nllf_args = [data, (self._fitstart(data),)]
  5336. penalized_nllf = self._reduce_func(penalized_nllf_args, {})[1]
  5337. # ensure that any fixed parameters don't violate constraints of the
  5338. # distribution before continuing. The support of the distribution
  5339. # is `0 < (x - loc)/scale < 1`.
  5340. if floc is not None:
  5341. if not data.min() > floc:
  5342. raise FitDataError('powerlaw', 0, 1)
  5343. if fscale is not None and not data.max() <= floc + fscale:
  5344. raise FitDataError('powerlaw', 0, 1)
  5345. if fscale is not None:
  5346. if fscale <= 0:
  5347. raise ValueError("Negative or zero `fscale` is outside the "
  5348. "range allowed by the distribution.")
  5349. if fscale <= data.ptp():
  5350. msg = "`fscale` must be greater than the range of data."
  5351. raise ValueError(msg)
  5352. def get_shape(data, loc, scale):
  5353. # The first-order necessary condition on `shape` can be solved in
  5354. # closed form. It can be used no matter the assumption of the
  5355. # value of the shape.
  5356. N = len(data)
  5357. return - N / (np.sum(np.log(data - loc)) - N*np.log(scale))
  5358. def get_scale(data, loc):
  5359. # analytical solution for `scale` based on the location.
  5360. # It can be used no matter the assumption of the value of the
  5361. # shape.
  5362. return data.max() - loc
  5363. # 1) The location and scale are both fixed. Analytically determine the
  5364. # shape.
  5365. if fscale is not None and floc is not None:
  5366. return get_shape(data, floc, fscale), floc, fscale
  5367. # 2) The scale is fixed. There are two possibilities for the other
  5368. # parameters. Choose the option with better log-likelihood.
  5369. if fscale is not None:
  5370. # using `data.min()` as the optimal location
  5371. loc_lt1 = np.nextafter(data.min(), -np.inf)
  5372. shape_lt1 = fshape or get_shape(data, loc_lt1, fscale)
  5373. ll_lt1 = penalized_nllf((shape_lt1, loc_lt1, fscale), data)
  5374. # using `data.max() - scale` as the optimal location
  5375. loc_gt1 = np.nextafter(data.max() - fscale, np.inf)
  5376. shape_gt1 = fshape or get_shape(data, loc_gt1, fscale)
  5377. ll_gt1 = penalized_nllf((shape_gt1, loc_gt1, fscale), data)
  5378. if ll_lt1 < ll_gt1:
  5379. return shape_lt1, loc_lt1, fscale
  5380. else:
  5381. return shape_gt1, loc_gt1, fscale
  5382. # 3) The location is fixed. Return the analytical scale and the
  5383. # analytical (or fixed) shape.
  5384. if floc is not None:
  5385. scale = get_scale(data, floc)
  5386. shape = fshape or get_shape(data, floc, scale)
  5387. return shape, floc, scale
  5388. # 4) Location and scale are both free
  5389. # 4a) Use formulas that assume `shape <= 1`.
  5390. def fit_loc_scale_w_shape_lt_1():
  5391. loc = np.nextafter(data.min(), -np.inf)
  5392. if np.abs(loc) < np.finfo(loc.dtype).tiny:
  5393. loc = np.sign(loc) * np.finfo(loc.dtype).tiny
  5394. scale = np.nextafter(get_scale(data, loc), np.inf)
  5395. shape = fshape or get_shape(data, loc, scale)
  5396. return shape, loc, scale
  5397. # 4b) Fit under the assumption that `shape > 1`. The support
  5398. # of the distribution is `(x - loc)/scale <= 1`. The method of Lagrange
  5399. # multipliers turns this constraint into the condition that
  5400. # dL_dScale - dL_dLocation must be zero, which is solved numerically.
  5401. # (Alternatively, substitute the constraint into the objective
  5402. # function before deriving the likelihood equation for location.)
  5403. def dL_dScale(data, shape, scale):
  5404. # The partial derivative of the log-likelihood function w.r.t.
  5405. # the scale.
  5406. return -data.shape[0] * shape / scale
  5407. def dL_dLocation(data, shape, loc):
  5408. # The partial derivative of the log-likelihood function w.r.t.
  5409. # the location.
  5410. return (shape - 1) * np.sum(1 / (loc - data)) # -1/(data-loc)
  5411. def dL_dLocation_star(loc):
  5412. # The derivative of the log-likelihood function w.r.t.
  5413. # the location, given optimal shape and scale
  5414. scale = np.nextafter(get_scale(data, loc), -np.inf)
  5415. shape = fshape or get_shape(data, loc, scale)
  5416. return dL_dLocation(data, shape, loc)
  5417. def fun_to_solve(loc):
  5418. # optimize the location by setting the partial derivatives
  5419. # w.r.t. to location and scale equal and solving.
  5420. scale = np.nextafter(get_scale(data, loc), -np.inf)
  5421. shape = fshape or get_shape(data, loc, scale)
  5422. return (dL_dScale(data, shape, scale)
  5423. - dL_dLocation(data, shape, loc))
  5424. def fit_loc_scale_w_shape_gt_1():
  5425. # set brackets for `root_scalar` to use when optimizing over the
  5426. # location such that a root is likely between them.
  5427. rbrack = np.nextafter(data.min(), -np.inf)
  5428. # if the sign of `dL_dLocation_star` is positive at rbrack,
  5429. # we're not going to find the root we're looking for
  5430. delta = (data.min() - rbrack)
  5431. while dL_dLocation_star(rbrack) > 0:
  5432. rbrack = data.min() - delta
  5433. delta *= 2
  5434. def interval_contains_root(lbrack, rbrack):
  5435. # Check if the interval (lbrack, rbrack) contains the root.
  5436. return (np.sign(fun_to_solve(lbrack))
  5437. != np.sign(fun_to_solve(rbrack)))
  5438. lbrack = rbrack - 1
  5439. # if the sign doesn't change between the brackets, move the left
  5440. # bracket until it does. (The right bracket remains fixed at the
  5441. # maximum permissible value.)
  5442. i = 1.0
  5443. while (not interval_contains_root(lbrack, rbrack)
  5444. and lbrack != -np.inf):
  5445. lbrack = (data.min() - i)
  5446. i *= 2
  5447. root = optimize.root_scalar(fun_to_solve, bracket=(lbrack, rbrack))
  5448. loc = np.nextafter(root.root, -np.inf)
  5449. scale = np.nextafter(get_scale(data, loc), np.inf)
  5450. shape = fshape or get_shape(data, loc, scale)
  5451. return shape, loc, scale
  5452. # Shape is fixed - choose 4a or 4b accordingly.
  5453. if fshape is not None and fshape <= 1:
  5454. return fit_loc_scale_w_shape_lt_1()
  5455. elif fshape is not None and fshape > 1:
  5456. return fit_loc_scale_w_shape_gt_1()
  5457. # Shape is free
  5458. fit_shape_lt1 = fit_loc_scale_w_shape_lt_1()
  5459. ll_lt1 = self.nnlf(fit_shape_lt1, data)
  5460. fit_shape_gt1 = fit_loc_scale_w_shape_gt_1()
  5461. ll_gt1 = self.nnlf(fit_shape_gt1, data)
  5462. if ll_lt1 <= ll_gt1 and fit_shape_lt1[0] <= 1:
  5463. return fit_shape_lt1
  5464. elif ll_lt1 > ll_gt1 and fit_shape_gt1[0] > 1:
  5465. return fit_shape_gt1
  5466. else:
  5467. return super().fit(data, *args, **kwds)
  5468. powerlaw = powerlaw_gen(a=0.0, b=1.0, name="powerlaw")
  5469. class powerlognorm_gen(rv_continuous):
  5470. r"""A power log-normal continuous random variable.
  5471. %(before_notes)s
  5472. Notes
  5473. -----
  5474. The probability density function for `powerlognorm` is:
  5475. .. math::
  5476. f(x, c, s) = \frac{c}{x s} \phi(\log(x)/s)
  5477. (\Phi(-\log(x)/s))^{c-1}
  5478. where :math:`\phi` is the normal pdf, and :math:`\Phi` is the normal cdf,
  5479. and :math:`x > 0`, :math:`s, c > 0`.
  5480. `powerlognorm` takes :math:`c` and :math:`s` as shape parameters.
  5481. %(after_notes)s
  5482. %(example)s
  5483. """
  5484. _support_mask = rv_continuous._open_support_mask
  5485. def _shape_info(self):
  5486. ic = _ShapeInfo("c", False, (0, np.inf), (False, False))
  5487. i_s = _ShapeInfo("s", False, (0, np.inf), (False, False))
  5488. return [ic, i_s]
  5489. def _pdf(self, x, c, s):
  5490. # powerlognorm.pdf(x, c, s) = c / (x*s) * phi(log(x)/s) *
  5491. # (Phi(-log(x)/s))**(c-1),
  5492. return (c/(x*s) * _norm_pdf(np.log(x)/s) *
  5493. pow(_norm_cdf(-np.log(x)/s), c*1.0-1.0))
  5494. def _cdf(self, x, c, s):
  5495. return 1.0 - pow(_norm_cdf(-np.log(x)/s), c*1.0)
  5496. def _ppf(self, q, c, s):
  5497. return np.exp(-s * _norm_ppf(pow(1.0 - q, 1.0 / c)))
  5498. powerlognorm = powerlognorm_gen(a=0.0, name="powerlognorm")
  5499. class powernorm_gen(rv_continuous):
  5500. r"""A power normal continuous random variable.
  5501. %(before_notes)s
  5502. Notes
  5503. -----
  5504. The probability density function for `powernorm` is:
  5505. .. math::
  5506. f(x, c) = c \phi(x) (\Phi(-x))^{c-1}
  5507. where :math:`\phi` is the normal pdf, and :math:`\Phi` is the normal cdf,
  5508. and :math:`x >= 0`, :math:`c > 0`.
  5509. `powernorm` takes ``c`` as a shape parameter for :math:`c`.
  5510. %(after_notes)s
  5511. %(example)s
  5512. """
  5513. def _shape_info(self):
  5514. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  5515. def _pdf(self, x, c):
  5516. # powernorm.pdf(x, c) = c * phi(x) * (Phi(-x))**(c-1)
  5517. return c*_norm_pdf(x) * (_norm_cdf(-x)**(c-1.0))
  5518. def _logpdf(self, x, c):
  5519. return np.log(c) + _norm_logpdf(x) + (c-1)*_norm_logcdf(-x)
  5520. def _cdf(self, x, c):
  5521. return 1.0-_norm_cdf(-x)**(c*1.0)
  5522. def _ppf(self, q, c):
  5523. return -_norm_ppf(pow(1.0 - q, 1.0 / c))
  5524. powernorm = powernorm_gen(name='powernorm')
  5525. class rdist_gen(rv_continuous):
  5526. r"""An R-distributed (symmetric beta) continuous random variable.
  5527. %(before_notes)s
  5528. Notes
  5529. -----
  5530. The probability density function for `rdist` is:
  5531. .. math::
  5532. f(x, c) = \frac{(1-x^2)^{c/2-1}}{B(1/2, c/2)}
  5533. for :math:`-1 \le x \le 1`, :math:`c > 0`. `rdist` is also called the
  5534. symmetric beta distribution: if B has a `beta` distribution with
  5535. parameters (c/2, c/2), then X = 2*B - 1 follows a R-distribution with
  5536. parameter c.
  5537. `rdist` takes ``c`` as a shape parameter for :math:`c`.
  5538. This distribution includes the following distribution kernels as
  5539. special cases::
  5540. c = 2: uniform
  5541. c = 3: `semicircular`
  5542. c = 4: Epanechnikov (parabolic)
  5543. c = 6: quartic (biweight)
  5544. c = 8: triweight
  5545. %(after_notes)s
  5546. %(example)s
  5547. """
  5548. def _shape_info(self):
  5549. return [_ShapeInfo("c", False, (0, np.inf), (False, False))]
  5550. # use relation to the beta distribution for pdf, cdf, etc
  5551. def _pdf(self, x, c):
  5552. return np.exp(self._logpdf(x, c))
  5553. def _logpdf(self, x, c):
  5554. return -np.log(2) + beta._logpdf((x + 1)/2, c/2, c/2)
  5555. def _cdf(self, x, c):
  5556. return beta._cdf((x + 1)/2, c/2, c/2)
  5557. def _ppf(self, q, c):
  5558. return 2*beta._ppf(q, c/2, c/2) - 1
  5559. def _rvs(self, c, size=None, random_state=None):
  5560. return 2 * random_state.beta(c/2, c/2, size) - 1
  5561. def _munp(self, n, c):
  5562. numerator = (1 - (n % 2)) * sc.beta((n + 1.0) / 2, c / 2.0)
  5563. return numerator / sc.beta(1. / 2, c / 2.)
  5564. rdist = rdist_gen(a=-1.0, b=1.0, name="rdist")
  5565. class rayleigh_gen(rv_continuous):
  5566. r"""A Rayleigh continuous random variable.
  5567. %(before_notes)s
  5568. Notes
  5569. -----
  5570. The probability density function for `rayleigh` is:
  5571. .. math::
  5572. f(x) = x \exp(-x^2/2)
  5573. for :math:`x \ge 0`.
  5574. `rayleigh` is a special case of `chi` with ``df=2``.
  5575. %(after_notes)s
  5576. %(example)s
  5577. """
  5578. _support_mask = rv_continuous._open_support_mask
  5579. def _shape_info(self):
  5580. return []
  5581. def _rvs(self, size=None, random_state=None):
  5582. return chi.rvs(2, size=size, random_state=random_state)
  5583. def _pdf(self, r):
  5584. # rayleigh.pdf(r) = r * exp(-r**2/2)
  5585. return np.exp(self._logpdf(r))
  5586. def _logpdf(self, r):
  5587. return np.log(r) - 0.5 * r * r
  5588. def _cdf(self, r):
  5589. return -sc.expm1(-0.5 * r**2)
  5590. def _ppf(self, q):
  5591. return np.sqrt(-2 * sc.log1p(-q))
  5592. def _sf(self, r):
  5593. return np.exp(self._logsf(r))
  5594. def _logsf(self, r):
  5595. return -0.5 * r * r
  5596. def _isf(self, q):
  5597. return np.sqrt(-2 * np.log(q))
  5598. def _stats(self):
  5599. val = 4 - np.pi
  5600. return (np.sqrt(np.pi/2),
  5601. val/2,
  5602. 2*(np.pi-3)*np.sqrt(np.pi)/val**1.5,
  5603. 6*np.pi/val-16/val**2)
  5604. def _entropy(self):
  5605. return _EULER/2.0 + 1 - 0.5*np.log(2)
  5606. @_call_super_mom
  5607. @extend_notes_in_docstring(rv_continuous, notes="""\
  5608. Notes specifically for ``rayleigh.fit``: If the location is fixed with
  5609. the `floc` parameter, this method uses an analytical formula to find
  5610. the scale. Otherwise, this function uses a numerical root finder on
  5611. the first order conditions of the log-likelihood function to find the
  5612. MLE. Only the (optional) `loc` parameter is used as the initial guess
  5613. for the root finder; the `scale` parameter and any other parameters
  5614. for the optimizer are ignored.\n\n""")
  5615. def fit(self, data, *args, **kwds):
  5616. if kwds.pop('superfit', False):
  5617. return super().fit(data, *args, **kwds)
  5618. data, floc, fscale = _check_fit_input_parameters(self, data,
  5619. args, kwds)
  5620. def scale_mle(loc):
  5621. # Source: Statistical Distributions, 3rd Edition. Evans, Hastings,
  5622. # and Peacock (2000), Page 175
  5623. return (np.sum((data - loc) ** 2) / (2 * len(data))) ** .5
  5624. def loc_mle(loc):
  5625. # This implicit equation for `loc` is used when
  5626. # both `loc` and `scale` are free.
  5627. xm = data - loc
  5628. s1 = xm.sum()
  5629. s2 = (xm**2).sum()
  5630. s3 = (1/xm).sum()
  5631. return s1 - s2/(2*len(data))*s3
  5632. def loc_mle_scale_fixed(loc, scale=fscale):
  5633. # This implicit equation for `loc` is used when
  5634. # `scale` is fixed but `loc` is not.
  5635. xm = data - loc
  5636. return xm.sum() - scale**2 * (1/xm).sum()
  5637. if floc is not None:
  5638. # `loc` is fixed, analytically determine `scale`.
  5639. if np.any(data - floc <= 0):
  5640. raise FitDataError("rayleigh", lower=1, upper=np.inf)
  5641. else:
  5642. return floc, scale_mle(floc)
  5643. # Account for user provided guess of `loc`.
  5644. loc0 = kwds.get('loc')
  5645. if loc0 is None:
  5646. # Use _fitstart to estimate loc; ignore the returned scale.
  5647. loc0 = self._fitstart(data)[0]
  5648. fun = loc_mle if fscale is None else loc_mle_scale_fixed
  5649. rbrack = np.nextafter(np.min(data), -np.inf)
  5650. lbrack = _get_left_bracket(fun, rbrack)
  5651. res = optimize.root_scalar(fun, bracket=(lbrack, rbrack))
  5652. if not res.converged:
  5653. raise FitSolverError(res.flag)
  5654. loc = res.root
  5655. scale = fscale or scale_mle(loc)
  5656. return loc, scale
  5657. rayleigh = rayleigh_gen(a=0.0, name="rayleigh")
  5658. class reciprocal_gen(rv_continuous):
  5659. r"""A loguniform or reciprocal continuous random variable.
  5660. %(before_notes)s
  5661. Notes
  5662. -----
  5663. The probability density function for this class is:
  5664. .. math::
  5665. f(x, a, b) = \frac{1}{x \log(b/a)}
  5666. for :math:`a \le x \le b`, :math:`b > a > 0`. This class takes
  5667. :math:`a` and :math:`b` as shape parameters.
  5668. %(after_notes)s
  5669. %(example)s
  5670. This doesn't show the equal probability of ``0.01``, ``0.1`` and
  5671. ``1``. This is best when the x-axis is log-scaled:
  5672. >>> import numpy as np
  5673. >>> fig, ax = plt.subplots(1, 1)
  5674. >>> ax.hist(np.log10(r))
  5675. >>> ax.set_ylabel("Frequency")
  5676. >>> ax.set_xlabel("Value of random variable")
  5677. >>> ax.xaxis.set_major_locator(plt.FixedLocator([-2, -1, 0]))
  5678. >>> ticks = ["$10^{{ {} }}$".format(i) for i in [-2, -1, 0]]
  5679. >>> ax.set_xticklabels(ticks) # doctest: +SKIP
  5680. >>> plt.show()
  5681. This random variable will be log-uniform regardless of the base chosen for
  5682. ``a`` and ``b``. Let's specify with base ``2`` instead:
  5683. >>> rvs = %(name)s(2**-2, 2**0).rvs(size=1000)
  5684. Values of ``1/4``, ``1/2`` and ``1`` are equally likely with this random
  5685. variable. Here's the histogram:
  5686. >>> fig, ax = plt.subplots(1, 1)
  5687. >>> ax.hist(np.log2(rvs))
  5688. >>> ax.set_ylabel("Frequency")
  5689. >>> ax.set_xlabel("Value of random variable")
  5690. >>> ax.xaxis.set_major_locator(plt.FixedLocator([-2, -1, 0]))
  5691. >>> ticks = ["$2^{{ {} }}$".format(i) for i in [-2, -1, 0]]
  5692. >>> ax.set_xticklabels(ticks) # doctest: +SKIP
  5693. >>> plt.show()
  5694. """
  5695. def _argcheck(self, a, b):
  5696. return (a > 0) & (b > a)
  5697. def _shape_info(self):
  5698. ia = _ShapeInfo("a", False, (0, np.inf), (False, False))
  5699. ib = _ShapeInfo("b", False, (0, np.inf), (False, False))
  5700. return [ia, ib]
  5701. def _fitstart(self, data):
  5702. # Reasonable, since support is [a, b]
  5703. return super()._fitstart(data, args=(np.min(data), np.max(data)))
  5704. def _get_support(self, a, b):
  5705. return a, b
  5706. def _pdf(self, x, a, b):
  5707. # reciprocal.pdf(x, a, b) = 1 / (x*log(b/a))
  5708. return 1.0 / (x * np.log(b * 1.0 / a))
  5709. def _logpdf(self, x, a, b):
  5710. return -np.log(x) - np.log(np.log(b * 1.0 / a))
  5711. def _cdf(self, x, a, b):
  5712. return (np.log(x)-np.log(a)) / np.log(b * 1.0 / a)
  5713. def _ppf(self, q, a, b):
  5714. return a*pow(b*1.0/a, q)
  5715. def _munp(self, n, a, b):
  5716. return 1.0/np.log(b*1.0/a) / n * (pow(b*1.0, n) - pow(a*1.0, n))
  5717. def _entropy(self, a, b):
  5718. return 0.5*np.log(a*b)+np.log(np.log(b*1.0/a))
  5719. fit_note = """\
  5720. `loguniform`/`reciprocal` is over-parameterized. `fit` automatically
  5721. fixes `scale` to 1 unless `fscale` is provided by the user.\n\n"""
  5722. @extend_notes_in_docstring(rv_continuous, notes=fit_note)
  5723. def fit(self, data, *args, **kwds):
  5724. fscale = kwds.pop('fscale', 1)
  5725. return super().fit(data, *args, fscale=fscale, **kwds)
  5726. loguniform = reciprocal_gen(name="loguniform")
  5727. reciprocal = reciprocal_gen(name="reciprocal")
  5728. class rice_gen(rv_continuous):
  5729. r"""A Rice continuous random variable.
  5730. %(before_notes)s
  5731. Notes
  5732. -----
  5733. The probability density function for `rice` is:
  5734. .. math::
  5735. f(x, b) = x \exp(- \frac{x^2 + b^2}{2}) I_0(x b)
  5736. for :math:`x >= 0`, :math:`b > 0`. :math:`I_0` is the modified Bessel
  5737. function of order zero (`scipy.special.i0`).
  5738. `rice` takes ``b`` as a shape parameter for :math:`b`.
  5739. %(after_notes)s
  5740. The Rice distribution describes the length, :math:`r`, of a 2-D vector with
  5741. components :math:`(U+u, V+v)`, where :math:`U, V` are constant, :math:`u,
  5742. v` are independent Gaussian random variables with standard deviation
  5743. :math:`s`. Let :math:`R = \sqrt{U^2 + V^2}`. Then the pdf of :math:`r` is
  5744. ``rice.pdf(x, R/s, scale=s)``.
  5745. %(example)s
  5746. """
  5747. def _argcheck(self, b):
  5748. return b >= 0
  5749. def _shape_info(self):
  5750. return [_ShapeInfo("b", False, (0, np.inf), (True, False))]
  5751. def _rvs(self, b, size=None, random_state=None):
  5752. # https://en.wikipedia.org/wiki/Rice_distribution
  5753. t = b/np.sqrt(2) + random_state.standard_normal(size=(2,) + size)
  5754. return np.sqrt((t*t).sum(axis=0))
  5755. def _cdf(self, x, b):
  5756. return sc.chndtr(np.square(x), 2, np.square(b))
  5757. def _ppf(self, q, b):
  5758. return np.sqrt(sc.chndtrix(q, 2, np.square(b)))
  5759. def _pdf(self, x, b):
  5760. # rice.pdf(x, b) = x * exp(-(x**2+b**2)/2) * I[0](x*b)
  5761. #
  5762. # We use (x**2 + b**2)/2 = ((x-b)**2)/2 + xb.
  5763. # The factor of np.exp(-xb) is then included in the i0e function
  5764. # in place of the modified Bessel function, i0, improving
  5765. # numerical stability for large values of xb.
  5766. return x * np.exp(-(x-b)*(x-b)/2.0) * sc.i0e(x*b)
  5767. def _munp(self, n, b):
  5768. nd2 = n/2.0
  5769. n1 = 1 + nd2
  5770. b2 = b*b/2.0
  5771. return (2.0**(nd2) * np.exp(-b2) * sc.gamma(n1) *
  5772. sc.hyp1f1(n1, 1, b2))
  5773. rice = rice_gen(a=0.0, name="rice")
  5774. class recipinvgauss_gen(rv_continuous):
  5775. r"""A reciprocal inverse Gaussian continuous random variable.
  5776. %(before_notes)s
  5777. Notes
  5778. -----
  5779. The probability density function for `recipinvgauss` is:
  5780. .. math::
  5781. f(x, \mu) = \frac{1}{\sqrt{2\pi x}}
  5782. \exp\left(\frac{-(1-\mu x)^2}{2\mu^2x}\right)
  5783. for :math:`x \ge 0`.
  5784. `recipinvgauss` takes ``mu`` as a shape parameter for :math:`\mu`.
  5785. %(after_notes)s
  5786. %(example)s
  5787. """
  5788. def _shape_info(self):
  5789. return [_ShapeInfo("mu", False, (0, np.inf), (False, False))]
  5790. def _pdf(self, x, mu):
  5791. # recipinvgauss.pdf(x, mu) =
  5792. # 1/sqrt(2*pi*x) * exp(-(1-mu*x)**2/(2*x*mu**2))
  5793. return np.exp(self._logpdf(x, mu))
  5794. def _logpdf(self, x, mu):
  5795. return _lazywhere(x > 0, (x, mu),
  5796. lambda x, mu: (-(1 - mu*x)**2.0 / (2*x*mu**2.0)
  5797. - 0.5*np.log(2*np.pi*x)),
  5798. fillvalue=-np.inf)
  5799. def _cdf(self, x, mu):
  5800. trm1 = 1.0/mu - x
  5801. trm2 = 1.0/mu + x
  5802. isqx = 1.0/np.sqrt(x)
  5803. return _norm_cdf(-isqx*trm1) - np.exp(2.0/mu)*_norm_cdf(-isqx*trm2)
  5804. def _sf(self, x, mu):
  5805. trm1 = 1.0/mu - x
  5806. trm2 = 1.0/mu + x
  5807. isqx = 1.0/np.sqrt(x)
  5808. return _norm_cdf(isqx*trm1) + np.exp(2.0/mu)*_norm_cdf(-isqx*trm2)
  5809. def _rvs(self, mu, size=None, random_state=None):
  5810. return 1.0/random_state.wald(mu, 1.0, size=size)
  5811. recipinvgauss = recipinvgauss_gen(a=0.0, name='recipinvgauss')
  5812. class semicircular_gen(rv_continuous):
  5813. r"""A semicircular continuous random variable.
  5814. %(before_notes)s
  5815. See Also
  5816. --------
  5817. rdist
  5818. Notes
  5819. -----
  5820. The probability density function for `semicircular` is:
  5821. .. math::
  5822. f(x) = \frac{2}{\pi} \sqrt{1-x^2}
  5823. for :math:`-1 \le x \le 1`.
  5824. The distribution is a special case of `rdist` with `c = 3`.
  5825. %(after_notes)s
  5826. References
  5827. ----------
  5828. .. [1] "Wigner semicircle distribution",
  5829. https://en.wikipedia.org/wiki/Wigner_semicircle_distribution
  5830. %(example)s
  5831. """
  5832. def _shape_info(self):
  5833. return []
  5834. def _pdf(self, x):
  5835. return 2.0/np.pi*np.sqrt(1-x*x)
  5836. def _logpdf(self, x):
  5837. return np.log(2/np.pi) + 0.5*sc.log1p(-x*x)
  5838. def _cdf(self, x):
  5839. return 0.5+1.0/np.pi*(x*np.sqrt(1-x*x) + np.arcsin(x))
  5840. def _ppf(self, q):
  5841. return rdist._ppf(q, 3)
  5842. def _rvs(self, size=None, random_state=None):
  5843. # generate values uniformly distributed on the area under the pdf
  5844. # (semi-circle) by randomly generating the radius and angle
  5845. r = np.sqrt(random_state.uniform(size=size))
  5846. a = np.cos(np.pi * random_state.uniform(size=size))
  5847. return r * a
  5848. def _stats(self):
  5849. return 0, 0.25, 0, -1.0
  5850. def _entropy(self):
  5851. return 0.64472988584940017414
  5852. semicircular = semicircular_gen(a=-1.0, b=1.0, name="semicircular")
  5853. class skewcauchy_gen(rv_continuous):
  5854. r"""A skewed Cauchy random variable.
  5855. %(before_notes)s
  5856. See Also
  5857. --------
  5858. cauchy : Cauchy distribution
  5859. Notes
  5860. -----
  5861. The probability density function for `skewcauchy` is:
  5862. .. math::
  5863. f(x) = \frac{1}{\pi \left(\frac{x^2}{\left(a\, \text{sign}(x) + 1
  5864. \right)^2} + 1 \right)}
  5865. for a real number :math:`x` and skewness parameter :math:`-1 < a < 1`.
  5866. When :math:`a=0`, the distribution reduces to the usual Cauchy
  5867. distribution.
  5868. %(after_notes)s
  5869. References
  5870. ----------
  5871. .. [1] "Skewed generalized *t* distribution", Wikipedia
  5872. https://en.wikipedia.org/wiki/Skewed_generalized_t_distribution#Skewed_Cauchy_distribution
  5873. %(example)s
  5874. """
  5875. def _argcheck(self, a):
  5876. return np.abs(a) < 1
  5877. def _shape_info(self):
  5878. return [_ShapeInfo("a", False, (-1.0, 1.0), (False, False))]
  5879. def _pdf(self, x, a):
  5880. return 1 / (np.pi * (x**2 / (a * np.sign(x) + 1)**2 + 1))
  5881. def _cdf(self, x, a):
  5882. return np.where(x <= 0,
  5883. (1 - a) / 2 + (1 - a) / np.pi * np.arctan(x / (1 - a)),
  5884. (1 - a) / 2 + (1 + a) / np.pi * np.arctan(x / (1 + a)))
  5885. def _ppf(self, x, a):
  5886. i = x < self._cdf(0, a)
  5887. return np.where(i,
  5888. np.tan(np.pi / (1 - a) * (x - (1 - a) / 2)) * (1 - a),
  5889. np.tan(np.pi / (1 + a) * (x - (1 - a) / 2)) * (1 + a))
  5890. def _stats(self, a, moments='mvsk'):
  5891. return np.nan, np.nan, np.nan, np.nan
  5892. def _fitstart(self, data):
  5893. # Use 0 as the initial guess of the skewness shape parameter.
  5894. # For the location and scale, estimate using the median and
  5895. # quartiles.
  5896. p25, p50, p75 = np.percentile(data, [25, 50, 75])
  5897. return 0.0, p50, (p75 - p25)/2
  5898. skewcauchy = skewcauchy_gen(name='skewcauchy')
  5899. class skew_norm_gen(rv_continuous):
  5900. r"""A skew-normal random variable.
  5901. %(before_notes)s
  5902. Notes
  5903. -----
  5904. The pdf is::
  5905. skewnorm.pdf(x, a) = 2 * norm.pdf(x) * norm.cdf(a*x)
  5906. `skewnorm` takes a real number :math:`a` as a skewness parameter
  5907. When ``a = 0`` the distribution is identical to a normal distribution
  5908. (`norm`). `rvs` implements the method of [1]_.
  5909. %(after_notes)s
  5910. %(example)s
  5911. References
  5912. ----------
  5913. .. [1] A. Azzalini and A. Capitanio (1999). Statistical applications of
  5914. the multivariate skew-normal distribution. J. Roy. Statist. Soc.,
  5915. B 61, 579-602. :arxiv:`0911.2093`
  5916. """
  5917. def _argcheck(self, a):
  5918. return np.isfinite(a)
  5919. def _shape_info(self):
  5920. return [_ShapeInfo("a", False, (-np.inf, np.inf), (False, False))]
  5921. def _pdf(self, x, a):
  5922. return _lazywhere(
  5923. a == 0, (x, a), lambda x, a: _norm_pdf(x),
  5924. f2=lambda x, a: 2.*_norm_pdf(x)*_norm_cdf(a*x)
  5925. )
  5926. def _cdf(self, x, a):
  5927. cdf = _boost._skewnorm_cdf(x, 0, 1, a)
  5928. # for some reason, a isn't broadcasted if some of x are invalid
  5929. a = np.broadcast_to(a, cdf.shape)
  5930. # Boost is not accurate in left tail when a > 0
  5931. i_small_cdf = (cdf < 1e-6) & (a > 0)
  5932. cdf[i_small_cdf] = super()._cdf(x[i_small_cdf], a[i_small_cdf])
  5933. return np.clip(cdf, 0, 1)
  5934. def _ppf(self, x, a):
  5935. return _boost._skewnorm_ppf(x, 0, 1, a)
  5936. def _sf(self, x, a):
  5937. # Boost's SF is implemented this way. Use whatever customizations
  5938. # we made in the _cdf.
  5939. return self._cdf(-x, -a)
  5940. def _isf(self, x, a):
  5941. return _boost._skewnorm_isf(x, 0, 1, a)
  5942. def _rvs(self, a, size=None, random_state=None):
  5943. u0 = random_state.normal(size=size)
  5944. v = random_state.normal(size=size)
  5945. d = a/np.sqrt(1 + a**2)
  5946. u1 = d*u0 + v*np.sqrt(1 - d**2)
  5947. return np.where(u0 >= 0, u1, -u1)
  5948. def _stats(self, a, moments='mvsk'):
  5949. output = [None, None, None, None]
  5950. const = np.sqrt(2/np.pi) * a/np.sqrt(1 + a**2)
  5951. if 'm' in moments:
  5952. output[0] = const
  5953. if 'v' in moments:
  5954. output[1] = 1 - const**2
  5955. if 's' in moments:
  5956. output[2] = ((4 - np.pi)/2) * (const/np.sqrt(1 - const**2))**3
  5957. if 'k' in moments:
  5958. output[3] = (2*(np.pi - 3)) * (const**4/(1 - const**2)**2)
  5959. return output
  5960. # For odd order, the each noncentral moment of the skew-normal distribution
  5961. # with location 0 and scale 1 can be expressed as a polynomial in delta,
  5962. # where delta = a/sqrt(1 + a**2) and `a` is the skew-normal shape
  5963. # parameter. The dictionary _skewnorm_odd_moments defines those
  5964. # polynomials for orders up to 19. The dict is implemented as a cached
  5965. # property to reduce the impact of the creation of the dict on import time.
  5966. @cached_property
  5967. def _skewnorm_odd_moments(self):
  5968. skewnorm_odd_moments = {
  5969. 1: Polynomial([1]),
  5970. 3: Polynomial([3, -1]),
  5971. 5: Polynomial([15, -10, 3]),
  5972. 7: Polynomial([105, -105, 63, -15]),
  5973. 9: Polynomial([945, -1260, 1134, -540, 105]),
  5974. 11: Polynomial([10395, -17325, 20790, -14850, 5775, -945]),
  5975. 13: Polynomial([135135, -270270, 405405, -386100, 225225, -73710,
  5976. 10395]),
  5977. 15: Polynomial([2027025, -4729725, 8513505, -10135125, 7882875,
  5978. -3869775, 1091475, -135135]),
  5979. 17: Polynomial([34459425, -91891800, 192972780, -275675400,
  5980. 268017750, -175429800, 74220300, -18378360,
  5981. 2027025]),
  5982. 19: Polynomial([654729075, -1964187225, 4714049340, -7856748900,
  5983. 9166207050, -7499623950, 4230557100, -1571349780,
  5984. 346621275, -34459425]),
  5985. }
  5986. return skewnorm_odd_moments
  5987. def _munp(self, order, a):
  5988. if order & 1:
  5989. if order > 19:
  5990. raise NotImplementedError("skewnorm noncentral moments not "
  5991. "implemented for odd orders greater "
  5992. "than 19.")
  5993. # Use the precomputed polynomials that were derived from the
  5994. # moment generating function.
  5995. delta = a/np.sqrt(1 + a**2)
  5996. return (delta * self._skewnorm_odd_moments[order](delta**2)
  5997. * _SQRT_2_OVER_PI)
  5998. else:
  5999. # For even order, the moment is just (order-1)!!, where !! is the
  6000. # notation for the double factorial; for an odd integer m, m!! is
  6001. # m*(m-2)*...*3*1.
  6002. # We could use special.factorial2, but we know the argument is odd,
  6003. # so avoid the overhead of that function and compute the result
  6004. # directly here.
  6005. return sc.gamma((order + 1)/2) * 2**(order/2) / _SQRT_PI
  6006. @extend_notes_in_docstring(rv_continuous, notes="""\
  6007. If ``method='mm'``, parameters fixed by the user are respected, and the
  6008. remaining parameters are used to match distribution and sample moments
  6009. where possible. For example, if the user fixes the location with
  6010. ``floc``, the parameters will only match the distribution skewness and
  6011. variance to the sample skewness and variance; no attempt will be made
  6012. to match the means or minimize a norm of the errors.
  6013. Note that the maximum possible skewness magnitude of a
  6014. `scipy.stats.skewnorm` distribution is approximately 0.9952717; if the
  6015. magnitude of the data's sample skewness exceeds this, the returned
  6016. shape parameter ``a`` will be infinite.
  6017. \n\n""")
  6018. def fit(self, data, *args, **kwds):
  6019. # this extracts fixed shape, location, and scale however they
  6020. # are specified, and also leaves them in `kwds`
  6021. data, fa, floc, fscale = _check_fit_input_parameters(self, data,
  6022. args, kwds)
  6023. method = kwds.get("method", "mle").lower()
  6024. # See https://en.wikipedia.org/wiki/Skew_normal_distribution for
  6025. # moment formulas.
  6026. def skew_d(d): # skewness in terms of delta
  6027. return (4-np.pi)/2 * ((d * np.sqrt(2 / np.pi))**3
  6028. / (1 - 2*d**2 / np.pi)**(3/2))
  6029. # If skewness of data is greater than max possible population skewness,
  6030. # MoM won't provide a good guess. Get out early.
  6031. s = stats.skew(data)
  6032. s_max = skew_d(1)
  6033. if abs(s) >= s_max and method != "mm" and fa is None and not args:
  6034. return super().fit(data, *args, **kwds)
  6035. # If method is method of moments, we don't need the user's guesses.
  6036. # Otherwise, extract the guesses from args and kwds.
  6037. if method == "mm":
  6038. a, loc, scale = None, None, None
  6039. else:
  6040. a = args[0] if len(args) else None
  6041. loc = kwds.pop('loc', None)
  6042. scale = kwds.pop('scale', None)
  6043. if fa is None and a is None: # not fixed and no guess: use MoM
  6044. # Solve for a that matches sample distribution skewness to sample
  6045. # skewness.
  6046. s = np.clip(s, -s_max, s_max)
  6047. d = root_scalar(lambda d: skew_d(d) - s, bracket=[-1, 1]).root
  6048. with np.errstate(divide='ignore'):
  6049. a = np.sqrt(np.divide(d**2, (1-d**2)))*np.sign(s)
  6050. else:
  6051. a = fa if fa is not None else a
  6052. d = a / np.sqrt(1 + a**2)
  6053. if fscale is None and scale is None:
  6054. v = np.var(data)
  6055. scale = np.sqrt(v / (1 - 2*d**2/np.pi))
  6056. elif fscale is not None:
  6057. scale = fscale
  6058. if floc is None and loc is None:
  6059. m = np.mean(data)
  6060. loc = m - scale*d*np.sqrt(2/np.pi)
  6061. elif floc is not None:
  6062. loc = floc
  6063. if method == 'mm':
  6064. return a, loc, scale
  6065. else:
  6066. # At this point, parameter "guesses" may equal the fixed parameters
  6067. # in kwds. No harm in passing them as guesses, too.
  6068. return super().fit(data, a, loc=loc, scale=scale, **kwds)
  6069. skewnorm = skew_norm_gen(name='skewnorm')
  6070. class trapezoid_gen(rv_continuous):
  6071. r"""A trapezoidal continuous random variable.
  6072. %(before_notes)s
  6073. Notes
  6074. -----
  6075. The trapezoidal distribution can be represented with an up-sloping line
  6076. from ``loc`` to ``(loc + c*scale)``, then constant to ``(loc + d*scale)``
  6077. and then downsloping from ``(loc + d*scale)`` to ``(loc+scale)``. This
  6078. defines the trapezoid base from ``loc`` to ``(loc+scale)`` and the flat
  6079. top from ``c`` to ``d`` proportional to the position along the base
  6080. with ``0 <= c <= d <= 1``. When ``c=d``, this is equivalent to `triang`
  6081. with the same values for `loc`, `scale` and `c`.
  6082. The method of [1]_ is used for computing moments.
  6083. `trapezoid` takes :math:`c` and :math:`d` as shape parameters.
  6084. %(after_notes)s
  6085. The standard form is in the range [0, 1] with c the mode.
  6086. The location parameter shifts the start to `loc`.
  6087. The scale parameter changes the width from 1 to `scale`.
  6088. %(example)s
  6089. References
  6090. ----------
  6091. .. [1] Kacker, R.N. and Lawrence, J.F. (2007). Trapezoidal and triangular
  6092. distributions for Type B evaluation of standard uncertainty.
  6093. Metrologia 44, 117-127. :doi:`10.1088/0026-1394/44/2/003`
  6094. """
  6095. def _argcheck(self, c, d):
  6096. return (c >= 0) & (c <= 1) & (d >= 0) & (d <= 1) & (d >= c)
  6097. def _shape_info(self):
  6098. ic = _ShapeInfo("c", False, (0, 1.0), (True, True))
  6099. id = _ShapeInfo("d", False, (0, 1.0), (True, True))
  6100. return [ic, id]
  6101. def _pdf(self, x, c, d):
  6102. u = 2 / (d-c+1)
  6103. return _lazyselect([x < c,
  6104. (c <= x) & (x <= d),
  6105. x > d],
  6106. [lambda x, c, d, u: u * x / c,
  6107. lambda x, c, d, u: u,
  6108. lambda x, c, d, u: u * (1-x) / (1-d)],
  6109. (x, c, d, u))
  6110. def _cdf(self, x, c, d):
  6111. return _lazyselect([x < c,
  6112. (c <= x) & (x <= d),
  6113. x > d],
  6114. [lambda x, c, d: x**2 / c / (d-c+1),
  6115. lambda x, c, d: (c + 2 * (x-c)) / (d-c+1),
  6116. lambda x, c, d: 1-((1-x) ** 2
  6117. / (d-c+1) / (1-d))],
  6118. (x, c, d))
  6119. def _ppf(self, q, c, d):
  6120. qc, qd = self._cdf(c, c, d), self._cdf(d, c, d)
  6121. condlist = [q < qc, q <= qd, q > qd]
  6122. choicelist = [np.sqrt(q * c * (1 + d - c)),
  6123. 0.5 * q * (1 + d - c) + 0.5 * c,
  6124. 1 - np.sqrt((1 - q) * (d - c + 1) * (1 - d))]
  6125. return np.select(condlist, choicelist)
  6126. def _munp(self, n, c, d):
  6127. # Using the parameterization from Kacker, 2007, with
  6128. # a=bottom left, c=top left, d=top right, b=bottom right, then
  6129. # E[X^n] = h/(n+1)/(n+2) [(b^{n+2}-d^{n+2})/(b-d)
  6130. # - ((c^{n+2} - a^{n+2})/(c-a)]
  6131. # with h = 2/((b-a) - (d-c)). The corresponding parameterization
  6132. # in scipy, has a'=loc, c'=loc+c*scale, d'=loc+d*scale, b'=loc+scale,
  6133. # which for standard form reduces to a'=0, b'=1, c'=c, d'=d.
  6134. # Substituting into E[X^n] gives the bd' term as (1 - d^{n+2})/(1 - d)
  6135. # and the ac' term as c^{n-1} for the standard form. The bd' term has
  6136. # numerical difficulties near d=1, so replace (1 - d^{n+2})/(1-d)
  6137. # with expm1((n+2)*log(d))/(d-1).
  6138. # Testing with n=18 for c=(1e-30,1-eps) shows that this is stable.
  6139. # We still require an explicit test for d=1 to prevent divide by zero,
  6140. # and now a test for d=0 to prevent log(0).
  6141. ab_term = c**(n+1)
  6142. dc_term = _lazyselect(
  6143. [d == 0.0, (0.0 < d) & (d < 1.0), d == 1.0],
  6144. [lambda d: 1.0,
  6145. lambda d: np.expm1((n+2) * np.log(d)) / (d-1.0),
  6146. lambda d: n+2],
  6147. [d])
  6148. val = 2.0 / (1.0+d-c) * (dc_term - ab_term) / ((n+1) * (n+2))
  6149. return val
  6150. def _entropy(self, c, d):
  6151. # Using the parameterization from Wikipedia (van Dorp, 2003)
  6152. # with a=bottom left, c=top left, d=top right, b=bottom right
  6153. # gives a'=loc, b'=loc+c*scale, c'=loc+d*scale, d'=loc+scale,
  6154. # which for loc=0, scale=1 is a'=0, b'=c, c'=d, d'=1.
  6155. # Substituting into the entropy formula from Wikipedia gives
  6156. # the following result.
  6157. return 0.5 * (1.0-d+c) / (1.0+d-c) + np.log(0.5 * (1.0+d-c))
  6158. trapezoid = trapezoid_gen(a=0.0, b=1.0, name="trapezoid")
  6159. # Note: alias kept for backwards compatibility. Rename was done
  6160. # because trapz is a slur in colloquial English (see gh-12924).
  6161. trapz = trapezoid_gen(a=0.0, b=1.0, name="trapz")
  6162. if trapz.__doc__:
  6163. trapz.__doc__ = "trapz is an alias for `trapezoid`"
  6164. class triang_gen(rv_continuous):
  6165. r"""A triangular continuous random variable.
  6166. %(before_notes)s
  6167. Notes
  6168. -----
  6169. The triangular distribution can be represented with an up-sloping line from
  6170. ``loc`` to ``(loc + c*scale)`` and then downsloping for ``(loc + c*scale)``
  6171. to ``(loc + scale)``.
  6172. `triang` takes ``c`` as a shape parameter for :math:`0 \le c \le 1`.
  6173. %(after_notes)s
  6174. The standard form is in the range [0, 1] with c the mode.
  6175. The location parameter shifts the start to `loc`.
  6176. The scale parameter changes the width from 1 to `scale`.
  6177. %(example)s
  6178. """
  6179. def _rvs(self, c, size=None, random_state=None):
  6180. return random_state.triangular(0, c, 1, size)
  6181. def _argcheck(self, c):
  6182. return (c >= 0) & (c <= 1)
  6183. def _shape_info(self):
  6184. return [_ShapeInfo("c", False, (0, 1.0), (True, True))]
  6185. def _pdf(self, x, c):
  6186. # 0: edge case where c=0
  6187. # 1: generalised case for x < c, don't use x <= c, as it doesn't cope
  6188. # with c = 0.
  6189. # 2: generalised case for x >= c, but doesn't cope with c = 1
  6190. # 3: edge case where c=1
  6191. r = _lazyselect([c == 0,
  6192. x < c,
  6193. (x >= c) & (c != 1),
  6194. c == 1],
  6195. [lambda x, c: 2 - 2 * x,
  6196. lambda x, c: 2 * x / c,
  6197. lambda x, c: 2 * (1 - x) / (1 - c),
  6198. lambda x, c: 2 * x],
  6199. (x, c))
  6200. return r
  6201. def _cdf(self, x, c):
  6202. r = _lazyselect([c == 0,
  6203. x < c,
  6204. (x >= c) & (c != 1),
  6205. c == 1],
  6206. [lambda x, c: 2*x - x*x,
  6207. lambda x, c: x * x / c,
  6208. lambda x, c: (x*x - 2*x + c) / (c-1),
  6209. lambda x, c: x * x],
  6210. (x, c))
  6211. return r
  6212. def _ppf(self, q, c):
  6213. return np.where(q < c, np.sqrt(c * q), 1-np.sqrt((1-c) * (1-q)))
  6214. def _stats(self, c):
  6215. return ((c+1.0)/3.0,
  6216. (1.0-c+c*c)/18,
  6217. np.sqrt(2)*(2*c-1)*(c+1)*(c-2) / (5*np.power((1.0-c+c*c), 1.5)),
  6218. -3.0/5.0)
  6219. def _entropy(self, c):
  6220. return 0.5-np.log(2)
  6221. triang = triang_gen(a=0.0, b=1.0, name="triang")
  6222. class truncexpon_gen(rv_continuous):
  6223. r"""A truncated exponential continuous random variable.
  6224. %(before_notes)s
  6225. Notes
  6226. -----
  6227. The probability density function for `truncexpon` is:
  6228. .. math::
  6229. f(x, b) = \frac{\exp(-x)}{1 - \exp(-b)}
  6230. for :math:`0 <= x <= b`.
  6231. `truncexpon` takes ``b`` as a shape parameter for :math:`b`.
  6232. %(after_notes)s
  6233. %(example)s
  6234. """
  6235. def _shape_info(self):
  6236. return [_ShapeInfo("b", False, (0, np.inf), (False, False))]
  6237. def _get_support(self, b):
  6238. return self.a, b
  6239. def _pdf(self, x, b):
  6240. # truncexpon.pdf(x, b) = exp(-x) / (1-exp(-b))
  6241. return np.exp(-x)/(-sc.expm1(-b))
  6242. def _logpdf(self, x, b):
  6243. return -x - np.log(-sc.expm1(-b))
  6244. def _cdf(self, x, b):
  6245. return sc.expm1(-x)/sc.expm1(-b)
  6246. def _ppf(self, q, b):
  6247. return -sc.log1p(q*sc.expm1(-b))
  6248. def _munp(self, n, b):
  6249. # wrong answer with formula, same as in continuous.pdf
  6250. # return sc.gamman+1)-sc.gammainc1+n, b)
  6251. if n == 1:
  6252. return (1-(b+1)*np.exp(-b))/(-sc.expm1(-b))
  6253. elif n == 2:
  6254. return 2*(1-0.5*(b*b+2*b+2)*np.exp(-b))/(-sc.expm1(-b))
  6255. else:
  6256. # return generic for higher moments
  6257. # return rv_continuous._mom1_sc(self, n, b)
  6258. return self._mom1_sc(n, b)
  6259. def _entropy(self, b):
  6260. eB = np.exp(b)
  6261. return np.log(eB-1)+(1+eB*(b-1.0))/(1.0-eB)
  6262. truncexpon = truncexpon_gen(a=0.0, name='truncexpon')
  6263. # logsumexp trick for log(p + q) with only log(p) and log(q)
  6264. def _log_sum(log_p, log_q):
  6265. return sc.logsumexp([log_p, log_q], axis=0)
  6266. # same as above, but using -exp(x) = exp(x + πi)
  6267. def _log_diff(log_p, log_q):
  6268. return sc.logsumexp([log_p, log_q+np.pi*1j], axis=0)
  6269. def _log_gauss_mass(a, b):
  6270. """Log of Gaussian probability mass within an interval"""
  6271. a, b = np.atleast_1d(a), np.atleast_1d(b)
  6272. a, b = np.broadcast_arrays(a, b)
  6273. # Calculations in right tail are inaccurate, so we'll exploit the
  6274. # symmetry and work only in the left tail
  6275. case_left = b <= 0
  6276. case_right = a > 0
  6277. case_central = ~(case_left | case_right)
  6278. def mass_case_left(a, b):
  6279. return _log_diff(sc.log_ndtr(b), sc.log_ndtr(a))
  6280. def mass_case_right(a, b):
  6281. return mass_case_left(-b, -a)
  6282. def mass_case_central(a, b):
  6283. # Previously, this was implemented as:
  6284. # left_mass = mass_case_left(a, 0)
  6285. # right_mass = mass_case_right(0, b)
  6286. # return _log_sum(left_mass, right_mass)
  6287. # Catastrophic cancellation occurs as np.exp(log_mass) approaches 1.
  6288. # Correct for this with an alternative formulation.
  6289. # We're not concerned with underflow here: if only one term
  6290. # underflows, it was insignificant; if both terms underflow,
  6291. # the result can't accurately be represented in logspace anyway
  6292. # because sc.log1p(x) ~ x for small x.
  6293. return sc.log1p(-sc.ndtr(a) - sc.ndtr(-b))
  6294. # _lazyselect not working; don't care to debug it
  6295. out = np.full_like(a, fill_value=np.nan, dtype=np.complex128)
  6296. if a[case_left].size:
  6297. out[case_left] = mass_case_left(a[case_left], b[case_left])
  6298. if a[case_right].size:
  6299. out[case_right] = mass_case_right(a[case_right], b[case_right])
  6300. if a[case_central].size:
  6301. out[case_central] = mass_case_central(a[case_central], b[case_central])
  6302. return np.real(out) # discard ~0j
  6303. class truncnorm_gen(rv_continuous):
  6304. r"""A truncated normal continuous random variable.
  6305. %(before_notes)s
  6306. Notes
  6307. -----
  6308. This distribution is the normal distribution centered on ``loc`` (default
  6309. 0), with standard deviation ``scale`` (default 1), and clipped at ``a``,
  6310. ``b`` standard deviations to the left, right (respectively) from ``loc``.
  6311. If ``myclip_a`` and ``myclip_b`` are clip values in the sample space (as
  6312. opposed to the number of standard deviations) then they can be converted
  6313. to the required form according to::
  6314. a, b = (myclip_a - loc) / scale, (myclip_b - loc) / scale
  6315. %(example)s
  6316. """
  6317. def _argcheck(self, a, b):
  6318. return a < b
  6319. def _shape_info(self):
  6320. ia = _ShapeInfo("a", False, (-np.inf, np.inf), (True, False))
  6321. ib = _ShapeInfo("b", False, (-np.inf, np.inf), (False, True))
  6322. return [ia, ib]
  6323. def _fitstart(self, data):
  6324. # Reasonable, since support is [a, b]
  6325. return super()._fitstart(data, args=(np.min(data), np.max(data)))
  6326. def _get_support(self, a, b):
  6327. return a, b
  6328. def _pdf(self, x, a, b):
  6329. return np.exp(self._logpdf(x, a, b))
  6330. def _logpdf(self, x, a, b):
  6331. return _norm_logpdf(x) - _log_gauss_mass(a, b)
  6332. def _cdf(self, x, a, b):
  6333. return np.exp(self._logcdf(x, a, b))
  6334. def _logcdf(self, x, a, b):
  6335. x, a, b = np.broadcast_arrays(x, a, b)
  6336. logcdf = _log_gauss_mass(a, x) - _log_gauss_mass(a, b)
  6337. i = logcdf > -0.1 # avoid catastrophic cancellation
  6338. if np.any(i):
  6339. logcdf[i] = np.log1p(-np.exp(self._logsf(x[i], a[i], b[i])))
  6340. return logcdf
  6341. def _sf(self, x, a, b):
  6342. return np.exp(self._logsf(x, a, b))
  6343. def _logsf(self, x, a, b):
  6344. x, a, b = np.broadcast_arrays(x, a, b)
  6345. logsf = _log_gauss_mass(x, b) - _log_gauss_mass(a, b)
  6346. i = logsf > -0.1 # avoid catastrophic cancellation
  6347. if np.any(i):
  6348. logsf[i] = np.log1p(-np.exp(self._logcdf(x[i], a[i], b[i])))
  6349. return logsf
  6350. def _ppf(self, q, a, b):
  6351. q, a, b = np.broadcast_arrays(q, a, b)
  6352. case_left = a < 0
  6353. case_right = ~case_left
  6354. def ppf_left(q, a, b):
  6355. log_Phi_x = _log_sum(sc.log_ndtr(a),
  6356. np.log(q) + _log_gauss_mass(a, b))
  6357. return sc.ndtri_exp(log_Phi_x)
  6358. def ppf_right(q, a, b):
  6359. log_Phi_x = _log_sum(sc.log_ndtr(-b),
  6360. np.log1p(-q) + _log_gauss_mass(a, b))
  6361. return -sc.ndtri_exp(log_Phi_x)
  6362. out = np.empty_like(q)
  6363. q_left = q[case_left]
  6364. q_right = q[case_right]
  6365. if q_left.size:
  6366. out[case_left] = ppf_left(q_left, a[case_left], b[case_left])
  6367. if q_right.size:
  6368. out[case_right] = ppf_right(q_right, a[case_right], b[case_right])
  6369. return out
  6370. def _isf(self, q, a, b):
  6371. # Mostly copy-paste of _ppf, but I think this is simpler than combining
  6372. q, a, b = np.broadcast_arrays(q, a, b)
  6373. case_left = b < 0
  6374. case_right = ~case_left
  6375. def isf_left(q, a, b):
  6376. log_Phi_x = _log_diff(sc.log_ndtr(b),
  6377. np.log(q) + _log_gauss_mass(a, b))
  6378. return sc.ndtri_exp(np.real(log_Phi_x))
  6379. def isf_right(q, a, b):
  6380. log_Phi_x = _log_diff(sc.log_ndtr(-a),
  6381. np.log1p(-q) + _log_gauss_mass(a, b))
  6382. return -sc.ndtri_exp(np.real(log_Phi_x))
  6383. out = np.empty_like(q)
  6384. q_left = q[case_left]
  6385. q_right = q[case_right]
  6386. if q_left.size:
  6387. out[case_left] = isf_left(q_left, a[case_left], b[case_left])
  6388. if q_right.size:
  6389. out[case_right] = isf_right(q_right, a[case_right], b[case_right])
  6390. return out
  6391. def _munp(self, n, a, b):
  6392. def n_th_moment(n, a, b):
  6393. """
  6394. Returns n-th moment. Defined only if n >= 0.
  6395. Function cannot broadcast due to the loop over n
  6396. """
  6397. pA, pB = self._pdf([a, b], a, b)
  6398. probs = [pA, -pB]
  6399. moments = [0, 1]
  6400. for k in range(1, n+1):
  6401. # a or b might be infinite, and the corresponding pdf value
  6402. # is 0 in that case, but nan is returned for the
  6403. # multiplication. However, as b->infinity, pdf(b)*b**k -> 0.
  6404. # So it is safe to use _lazywhere to avoid the nan.
  6405. vals = _lazywhere(probs, [probs, [a, b]],
  6406. lambda x, y: x * y**(k-1), fillvalue=0)
  6407. mk = np.sum(vals) + (k-1) * moments[-2]
  6408. moments.append(mk)
  6409. return moments[-1]
  6410. return _lazywhere((n >= 0) & (a == a) & (b == b), (n, a, b),
  6411. np.vectorize(n_th_moment, otypes=[np.float64]),
  6412. np.nan)
  6413. def _stats(self, a, b, moments='mv'):
  6414. pA, pB = self.pdf(np.array([a, b]), a, b)
  6415. def _truncnorm_stats_scalar(a, b, pA, pB, moments):
  6416. m1 = pA - pB
  6417. mu = m1
  6418. # use _lazywhere to avoid nan (See detailed comment in _munp)
  6419. probs = [pA, -pB]
  6420. vals = _lazywhere(probs, [probs, [a, b]], lambda x, y: x*y,
  6421. fillvalue=0)
  6422. m2 = 1 + np.sum(vals)
  6423. vals = _lazywhere(probs, [probs, [a-mu, b-mu]], lambda x, y: x*y,
  6424. fillvalue=0)
  6425. # mu2 = m2 - mu**2, but not as numerically stable as:
  6426. # mu2 = (a-mu)*pA - (b-mu)*pB + 1
  6427. mu2 = 1 + np.sum(vals)
  6428. vals = _lazywhere(probs, [probs, [a, b]], lambda x, y: x*y**2,
  6429. fillvalue=0)
  6430. m3 = 2*m1 + np.sum(vals)
  6431. vals = _lazywhere(probs, [probs, [a, b]], lambda x, y: x*y**3,
  6432. fillvalue=0)
  6433. m4 = 3*m2 + np.sum(vals)
  6434. mu3 = m3 + m1 * (-3*m2 + 2*m1**2)
  6435. g1 = mu3 / np.power(mu2, 1.5)
  6436. mu4 = m4 + m1*(-4*m3 + 3*m1*(2*m2 - m1**2))
  6437. g2 = mu4 / mu2**2 - 3
  6438. return mu, mu2, g1, g2
  6439. _truncnorm_stats = np.vectorize(_truncnorm_stats_scalar,
  6440. excluded=('moments',))
  6441. return _truncnorm_stats(a, b, pA, pB, moments)
  6442. truncnorm = truncnorm_gen(name='truncnorm', momtype=1)
  6443. class truncpareto_gen(rv_continuous):
  6444. r"""An upper truncated Pareto continuous random variable.
  6445. %(before_notes)s
  6446. See Also
  6447. --------
  6448. pareto : Pareto distribution
  6449. Notes
  6450. -----
  6451. The probability density function for `truncpareto` is:
  6452. .. math::
  6453. f(x, b, c) = \frac{b}{1 - c^{-b}} \frac{1}{x^{b+1}}
  6454. for :math:`b > 0`, :math:`c > 1` and :math:`1 \le x \le c`.
  6455. `truncpareto` takes `b` and `c` as shape parameters for :math:`b` and
  6456. :math:`c`.
  6457. Notice that the upper truncation value :math:`c` is defined in
  6458. standardized form so that random values of an unscaled, unshifted variable
  6459. are within the range ``[1, c]``.
  6460. If ``u_r`` is the upper bound to a scaled and/or shifted variable,
  6461. then ``c = (u_r - loc) / scale``. In other words, the support of the
  6462. distribution becomes ``(scale + loc) <= x <= (c*scale + loc)`` when
  6463. `scale` and/or `loc` are provided.
  6464. %(after_notes)s
  6465. References
  6466. ----------
  6467. .. [1] Burroughs, S. M., and Tebbens S. F.
  6468. "Upper-truncated power laws in natural systems."
  6469. Pure and Applied Geophysics 158.4 (2001): 741-757.
  6470. %(example)s
  6471. """
  6472. def _shape_info(self):
  6473. ib = _ShapeInfo("b", False, (0.0, np.inf), (False, False))
  6474. ic = _ShapeInfo("c", False, (1.0, np.inf), (False, False))
  6475. return [ib, ic]
  6476. def _argcheck(self, b, c):
  6477. return (b > 0) & (c > 1)
  6478. def _get_support(self, b, c):
  6479. return self.a, c
  6480. def _pdf(self, x, b, c):
  6481. return b * x**-(b+1) / (1 - c**-b)
  6482. def _logpdf(self, x, b, c):
  6483. return np.log(b) - np.log1p(-c**-b) - (b+1)*np.log(x)
  6484. def _cdf(self, x, b, c):
  6485. return (1 - x**-b) / (1 - c**-b)
  6486. def _logcdf(self, x, b, c):
  6487. return np.log1p(-x**-b) - np.log1p(-c**-b)
  6488. def _ppf(self, q, b, c):
  6489. return pow(1 - (1 - c**-b)*q, -1/b)
  6490. def _sf(self, x, b, c):
  6491. return (x**-b - c**-b) / (1 - c**-b)
  6492. def _logsf(self, x, b, c):
  6493. return np.log(x**-b - c**-b) - np.log1p(-c**-b)
  6494. def _isf(self, q, b, c):
  6495. return pow(c**-b + (1 - c**-b)*q, -1/b)
  6496. def _entropy(self, b, c):
  6497. return -(np.log(b/(1 - c**-b))
  6498. + (b+1)*(np.log(c)/(c**b - 1) - 1/b))
  6499. def _munp(self, n, b, c):
  6500. if n == b:
  6501. return b*np.log(c) / (1 - c**-b)
  6502. else:
  6503. return b / (b-n) * (c**b - c**n) / (c**b - 1)
  6504. def _fitstart(self, data):
  6505. b, loc, scale = pareto.fit(data)
  6506. c = (max(data) - loc)/scale
  6507. return b, c, loc, scale
  6508. truncpareto = truncpareto_gen(a=1.0, name='truncpareto')
  6509. class tukeylambda_gen(rv_continuous):
  6510. r"""A Tukey-Lamdba continuous random variable.
  6511. %(before_notes)s
  6512. Notes
  6513. -----
  6514. A flexible distribution, able to represent and interpolate between the
  6515. following distributions:
  6516. - Cauchy (:math:`lambda = -1`)
  6517. - logistic (:math:`lambda = 0`)
  6518. - approx Normal (:math:`lambda = 0.14`)
  6519. - uniform from -1 to 1 (:math:`lambda = 1`)
  6520. `tukeylambda` takes a real number :math:`lambda` (denoted ``lam``
  6521. in the implementation) as a shape parameter.
  6522. %(after_notes)s
  6523. %(example)s
  6524. """
  6525. def _argcheck(self, lam):
  6526. return np.isfinite(lam)
  6527. def _shape_info(self):
  6528. return [_ShapeInfo("lam", False, (-np.inf, np.inf), (False, False))]
  6529. def _pdf(self, x, lam):
  6530. Fx = np.asarray(sc.tklmbda(x, lam))
  6531. Px = Fx**(lam-1.0) + (np.asarray(1-Fx))**(lam-1.0)
  6532. Px = 1.0/np.asarray(Px)
  6533. return np.where((lam <= 0) | (abs(x) < 1.0/np.asarray(lam)), Px, 0.0)
  6534. def _cdf(self, x, lam):
  6535. return sc.tklmbda(x, lam)
  6536. def _ppf(self, q, lam):
  6537. return sc.boxcox(q, lam) - sc.boxcox1p(-q, lam)
  6538. def _stats(self, lam):
  6539. return 0, _tlvar(lam), 0, _tlkurt(lam)
  6540. def _entropy(self, lam):
  6541. def integ(p):
  6542. return np.log(pow(p, lam-1)+pow(1-p, lam-1))
  6543. return integrate.quad(integ, 0, 1)[0]
  6544. tukeylambda = tukeylambda_gen(name='tukeylambda')
  6545. class FitUniformFixedScaleDataError(FitDataError):
  6546. def __init__(self, ptp, fscale):
  6547. self.args = (
  6548. "Invalid values in `data`. Maximum likelihood estimation with "
  6549. "the uniform distribution and fixed scale requires that "
  6550. "data.ptp() <= fscale, but data.ptp() = %r and fscale = %r." %
  6551. (ptp, fscale),
  6552. )
  6553. class uniform_gen(rv_continuous):
  6554. r"""A uniform continuous random variable.
  6555. In the standard form, the distribution is uniform on ``[0, 1]``. Using
  6556. the parameters ``loc`` and ``scale``, one obtains the uniform distribution
  6557. on ``[loc, loc + scale]``.
  6558. %(before_notes)s
  6559. %(example)s
  6560. """
  6561. def _shape_info(self):
  6562. return []
  6563. def _rvs(self, size=None, random_state=None):
  6564. return random_state.uniform(0.0, 1.0, size)
  6565. def _pdf(self, x):
  6566. return 1.0*(x == x)
  6567. def _cdf(self, x):
  6568. return x
  6569. def _ppf(self, q):
  6570. return q
  6571. def _stats(self):
  6572. return 0.5, 1.0/12, 0, -1.2
  6573. def _entropy(self):
  6574. return 0.0
  6575. @_call_super_mom
  6576. def fit(self, data, *args, **kwds):
  6577. """
  6578. Maximum likelihood estimate for the location and scale parameters.
  6579. `uniform.fit` uses only the following parameters. Because exact
  6580. formulas are used, the parameters related to optimization that are
  6581. available in the `fit` method of other distributions are ignored
  6582. here. The only positional argument accepted is `data`.
  6583. Parameters
  6584. ----------
  6585. data : array_like
  6586. Data to use in calculating the maximum likelihood estimate.
  6587. floc : float, optional
  6588. Hold the location parameter fixed to the specified value.
  6589. fscale : float, optional
  6590. Hold the scale parameter fixed to the specified value.
  6591. Returns
  6592. -------
  6593. loc, scale : float
  6594. Maximum likelihood estimates for the location and scale.
  6595. Notes
  6596. -----
  6597. An error is raised if `floc` is given and any values in `data` are
  6598. less than `floc`, or if `fscale` is given and `fscale` is less
  6599. than ``data.max() - data.min()``. An error is also raised if both
  6600. `floc` and `fscale` are given.
  6601. Examples
  6602. --------
  6603. >>> import numpy as np
  6604. >>> from scipy.stats import uniform
  6605. We'll fit the uniform distribution to `x`:
  6606. >>> x = np.array([2, 2.5, 3.1, 9.5, 13.0])
  6607. For a uniform distribution MLE, the location is the minimum of the
  6608. data, and the scale is the maximum minus the minimum.
  6609. >>> loc, scale = uniform.fit(x)
  6610. >>> loc
  6611. 2.0
  6612. >>> scale
  6613. 11.0
  6614. If we know the data comes from a uniform distribution where the support
  6615. starts at 0, we can use `floc=0`:
  6616. >>> loc, scale = uniform.fit(x, floc=0)
  6617. >>> loc
  6618. 0.0
  6619. >>> scale
  6620. 13.0
  6621. Alternatively, if we know the length of the support is 12, we can use
  6622. `fscale=12`:
  6623. >>> loc, scale = uniform.fit(x, fscale=12)
  6624. >>> loc
  6625. 1.5
  6626. >>> scale
  6627. 12.0
  6628. In that last example, the support interval is [1.5, 13.5]. This
  6629. solution is not unique. For example, the distribution with ``loc=2``
  6630. and ``scale=12`` has the same likelihood as the one above. When
  6631. `fscale` is given and it is larger than ``data.max() - data.min()``,
  6632. the parameters returned by the `fit` method center the support over
  6633. the interval ``[data.min(), data.max()]``.
  6634. """
  6635. if len(args) > 0:
  6636. raise TypeError("Too many arguments.")
  6637. floc = kwds.pop('floc', None)
  6638. fscale = kwds.pop('fscale', None)
  6639. _remove_optimizer_parameters(kwds)
  6640. if floc is not None and fscale is not None:
  6641. # This check is for consistency with `rv_continuous.fit`.
  6642. raise ValueError("All parameters fixed. There is nothing to "
  6643. "optimize.")
  6644. data = np.asarray(data)
  6645. if not np.isfinite(data).all():
  6646. raise ValueError("The data contains non-finite values.")
  6647. # MLE for the uniform distribution
  6648. # --------------------------------
  6649. # The PDF is
  6650. #
  6651. # f(x, loc, scale) = {1/scale for loc <= x <= loc + scale
  6652. # {0 otherwise}
  6653. #
  6654. # The likelihood function is
  6655. # L(x, loc, scale) = (1/scale)**n
  6656. # where n is len(x), assuming loc <= x <= loc + scale for all x.
  6657. # The log-likelihood is
  6658. # l(x, loc, scale) = -n*log(scale)
  6659. # The log-likelihood is maximized by making scale as small as possible,
  6660. # while keeping loc <= x <= loc + scale. So if neither loc nor scale
  6661. # are fixed, the log-likelihood is maximized by choosing
  6662. # loc = x.min()
  6663. # scale = x.ptp()
  6664. # If loc is fixed, it must be less than or equal to x.min(), and then
  6665. # the scale is
  6666. # scale = x.max() - loc
  6667. # If scale is fixed, it must not be less than x.ptp(). If scale is
  6668. # greater than x.ptp(), the solution is not unique. Note that the
  6669. # likelihood does not depend on loc, except for the requirement that
  6670. # loc <= x <= loc + scale. All choices of loc for which
  6671. # x.max() - scale <= loc <= x.min()
  6672. # have the same log-likelihood. In this case, we choose loc such that
  6673. # the support is centered over the interval [data.min(), data.max()]:
  6674. # loc = x.min() = 0.5*(scale - x.ptp())
  6675. if fscale is None:
  6676. # scale is not fixed.
  6677. if floc is None:
  6678. # loc is not fixed, scale is not fixed.
  6679. loc = data.min()
  6680. scale = data.ptp()
  6681. else:
  6682. # loc is fixed, scale is not fixed.
  6683. loc = floc
  6684. scale = data.max() - loc
  6685. if data.min() < loc:
  6686. raise FitDataError("uniform", lower=loc, upper=loc + scale)
  6687. else:
  6688. # loc is not fixed, scale is fixed.
  6689. ptp = data.ptp()
  6690. if ptp > fscale:
  6691. raise FitUniformFixedScaleDataError(ptp=ptp, fscale=fscale)
  6692. # If ptp < fscale, the ML estimate is not unique; see the comments
  6693. # above. We choose the distribution for which the support is
  6694. # centered over the interval [data.min(), data.max()].
  6695. loc = data.min() - 0.5*(fscale - ptp)
  6696. scale = fscale
  6697. # We expect the return values to be floating point, so ensure it
  6698. # by explicitly converting to float.
  6699. return float(loc), float(scale)
  6700. uniform = uniform_gen(a=0.0, b=1.0, name='uniform')
  6701. class vonmises_gen(rv_continuous):
  6702. r"""A Von Mises continuous random variable.
  6703. %(before_notes)s
  6704. Notes
  6705. -----
  6706. The probability density function for `vonmises` and `vonmises_line` is:
  6707. .. math::
  6708. f(x, \kappa) = \frac{ \exp(\kappa \cos(x)) }{ 2 \pi I_0(\kappa) }
  6709. for :math:`-\pi \le x \le \pi`, :math:`\kappa > 0`. :math:`I_0` is the
  6710. modified Bessel function of order zero (`scipy.special.i0`).
  6711. `vonmises` is a circular distribution which does not restrict the
  6712. distribution to a fixed interval. Currently, there is no circular
  6713. distribution framework in scipy. The ``cdf`` is implemented such that
  6714. ``cdf(x + 2*np.pi) == cdf(x) + 1``.
  6715. `vonmises_line` is the same distribution, defined on :math:`[-\pi, \pi]`
  6716. on the real line. This is a regular (i.e. non-circular) distribution.
  6717. `vonmises` and `vonmises_line` take ``kappa`` as a shape parameter.
  6718. %(after_notes)s
  6719. %(example)s
  6720. """
  6721. def _shape_info(self):
  6722. return [_ShapeInfo("kappa", False, (0, np.inf), (False, False))]
  6723. def _rvs(self, kappa, size=None, random_state=None):
  6724. return random_state.vonmises(0.0, kappa, size=size)
  6725. @inherit_docstring_from(rv_continuous)
  6726. def rvs(self, *args, **kwds):
  6727. rvs = super().rvs(*args, **kwds)
  6728. return np.mod(rvs + np.pi, 2*np.pi) - np.pi
  6729. def _pdf(self, x, kappa):
  6730. # vonmises.pdf(x, kappa) = exp(kappa * cos(x)) / (2*pi*I[0](kappa))
  6731. # = exp(kappa * (cos(x) - 1)) /
  6732. # (2*pi*exp(-kappa)*I[0](kappa))
  6733. # = exp(kappa * cosm1(x)) / (2*pi*i0e(kappa))
  6734. return np.exp(kappa*sc.cosm1(x)) / (2*np.pi*sc.i0e(kappa))
  6735. def _logpdf(self, x, kappa):
  6736. # vonmises.pdf(x, kappa) = exp(kappa * cosm1(x)) / (2*pi*i0e(kappa))
  6737. return kappa * sc.cosm1(x) - np.log(2*np.pi) - np.log(sc.i0e(kappa))
  6738. def _cdf(self, x, kappa):
  6739. return _stats.von_mises_cdf(kappa, x)
  6740. def _stats_skip(self, kappa):
  6741. return 0, None, 0, None
  6742. def _entropy(self, kappa):
  6743. # vonmises.entropy(kappa) = -kappa * I[1](kappa) / I[0](kappa) +
  6744. # log(2 * np.pi * I[0](kappa))
  6745. # = -kappa * I[1](kappa) * exp(-kappa) /
  6746. # (I[0](kappa) * exp(-kappa)) +
  6747. # log(2 * np.pi *
  6748. # I[0](kappa) * exp(-kappa) / exp(-kappa))
  6749. # = -kappa * sc.i1e(kappa) / sc.i0e(kappa) +
  6750. # log(2 * np.pi * i0e(kappa)) + kappa
  6751. return (-kappa * sc.i1e(kappa) / sc.i0e(kappa) +
  6752. np.log(2 * np.pi * sc.i0e(kappa)) + kappa)
  6753. @extend_notes_in_docstring(rv_continuous, notes="""\
  6754. The default limits of integration are endpoints of the interval
  6755. of width ``2*pi`` centered at `loc` (e.g. ``[-pi, pi]`` when
  6756. ``loc=0``).\n\n""")
  6757. def expect(self, func=None, args=(), loc=0, scale=1, lb=None, ub=None,
  6758. conditional=False, **kwds):
  6759. _a, _b = -np.pi, np.pi
  6760. if lb is None:
  6761. lb = loc + _a
  6762. if ub is None:
  6763. ub = loc + _b
  6764. return super().expect(func, args, loc,
  6765. scale, lb, ub, conditional, **kwds)
  6766. vonmises = vonmises_gen(name='vonmises')
  6767. vonmises_line = vonmises_gen(a=-np.pi, b=np.pi, name='vonmises_line')
  6768. class wald_gen(invgauss_gen):
  6769. r"""A Wald continuous random variable.
  6770. %(before_notes)s
  6771. Notes
  6772. -----
  6773. The probability density function for `wald` is:
  6774. .. math::
  6775. f(x) = \frac{1}{\sqrt{2\pi x^3}} \exp(- \frac{ (x-1)^2 }{ 2x })
  6776. for :math:`x >= 0`.
  6777. `wald` is a special case of `invgauss` with ``mu=1``.
  6778. %(after_notes)s
  6779. %(example)s
  6780. """
  6781. _support_mask = rv_continuous._open_support_mask
  6782. def _shape_info(self):
  6783. return []
  6784. def _rvs(self, size=None, random_state=None):
  6785. return random_state.wald(1.0, 1.0, size=size)
  6786. def _pdf(self, x):
  6787. # wald.pdf(x) = 1/sqrt(2*pi*x**3) * exp(-(x-1)**2/(2*x))
  6788. return invgauss._pdf(x, 1.0)
  6789. def _cdf(self, x):
  6790. return invgauss._cdf(x, 1.0)
  6791. def _sf(self, x):
  6792. return invgauss._sf(x, 1.0)
  6793. def _ppf(self, x):
  6794. return invgauss._ppf(x, 1.0)
  6795. def _isf(self, x):
  6796. return invgauss._isf(x, 1.0)
  6797. def _logpdf(self, x):
  6798. return invgauss._logpdf(x, 1.0)
  6799. def _logcdf(self, x):
  6800. return invgauss._logcdf(x, 1.0)
  6801. def _logsf(self, x):
  6802. return invgauss._logsf(x, 1.0)
  6803. def _stats(self):
  6804. return 1.0, 1.0, 3.0, 15.0
  6805. wald = wald_gen(a=0.0, name="wald")
  6806. class wrapcauchy_gen(rv_continuous):
  6807. r"""A wrapped Cauchy continuous random variable.
  6808. %(before_notes)s
  6809. Notes
  6810. -----
  6811. The probability density function for `wrapcauchy` is:
  6812. .. math::
  6813. f(x, c) = \frac{1-c^2}{2\pi (1+c^2 - 2c \cos(x))}
  6814. for :math:`0 \le x \le 2\pi`, :math:`0 < c < 1`.
  6815. `wrapcauchy` takes ``c`` as a shape parameter for :math:`c`.
  6816. %(after_notes)s
  6817. %(example)s
  6818. """
  6819. def _argcheck(self, c):
  6820. return (c > 0) & (c < 1)
  6821. def _shape_info(self):
  6822. return [_ShapeInfo("c", False, (0, 1), (False, False))]
  6823. def _pdf(self, x, c):
  6824. # wrapcauchy.pdf(x, c) = (1-c**2) / (2*pi*(1+c**2-2*c*cos(x)))
  6825. return (1.0-c*c)/(2*np.pi*(1+c*c-2*c*np.cos(x)))
  6826. def _cdf(self, x, c):
  6827. def f1(x, cr):
  6828. # CDF for 0 <= x < pi
  6829. return 1/np.pi * np.arctan(cr*np.tan(x/2))
  6830. def f2(x, cr):
  6831. # CDF for pi <= x <= 2*pi
  6832. return 1 - 1/np.pi * np.arctan(cr*np.tan((2*np.pi - x)/2))
  6833. cr = (1 + c)/(1 - c)
  6834. return _lazywhere(x < np.pi, (x, cr), f=f1, f2=f2)
  6835. def _ppf(self, q, c):
  6836. val = (1.0-c)/(1.0+c)
  6837. rcq = 2*np.arctan(val*np.tan(np.pi*q))
  6838. rcmq = 2*np.pi-2*np.arctan(val*np.tan(np.pi*(1-q)))
  6839. return np.where(q < 1.0/2, rcq, rcmq)
  6840. def _entropy(self, c):
  6841. return np.log(2*np.pi*(1-c*c))
  6842. def _fitstart(self, data):
  6843. # Use 0.5 as the initial guess of the shape parameter.
  6844. # For the location and scale, use the minimum and
  6845. # peak-to-peak/(2*pi), respectively.
  6846. return 0.5, np.min(data), np.ptp(data)/(2*np.pi)
  6847. wrapcauchy = wrapcauchy_gen(a=0.0, b=2*np.pi, name='wrapcauchy')
  6848. class gennorm_gen(rv_continuous):
  6849. r"""A generalized normal continuous random variable.
  6850. %(before_notes)s
  6851. See Also
  6852. --------
  6853. laplace : Laplace distribution
  6854. norm : normal distribution
  6855. Notes
  6856. -----
  6857. The probability density function for `gennorm` is [1]_:
  6858. .. math::
  6859. f(x, \beta) = \frac{\beta}{2 \Gamma(1/\beta)} \exp(-|x|^\beta),
  6860. where :math:`x` is a real number, :math:`\beta > 0` and
  6861. :math:`\Gamma` is the gamma function (`scipy.special.gamma`).
  6862. `gennorm` takes ``beta`` as a shape parameter for :math:`\beta`.
  6863. For :math:`\beta = 1`, it is identical to a Laplace distribution.
  6864. For :math:`\beta = 2`, it is identical to a normal distribution
  6865. (with ``scale=1/sqrt(2)``).
  6866. References
  6867. ----------
  6868. .. [1] "Generalized normal distribution, Version 1",
  6869. https://en.wikipedia.org/wiki/Generalized_normal_distribution#Version_1
  6870. .. [2] Nardon, Martina, and Paolo Pianca. "Simulation techniques for
  6871. generalized Gaussian densities." Journal of Statistical
  6872. Computation and Simulation 79.11 (2009): 1317-1329
  6873. .. [3] Wicklin, Rick. "Simulate data from a generalized Gaussian
  6874. distribution" in The DO Loop blog, September 21, 2016,
  6875. https://blogs.sas.com/content/iml/2016/09/21/simulate-generalized-gaussian-sas.html
  6876. %(example)s
  6877. """
  6878. def _shape_info(self):
  6879. return [_ShapeInfo("beta", False, (0, np.inf), (False, False))]
  6880. def _pdf(self, x, beta):
  6881. return np.exp(self._logpdf(x, beta))
  6882. def _logpdf(self, x, beta):
  6883. return np.log(0.5*beta) - sc.gammaln(1.0/beta) - abs(x)**beta
  6884. def _cdf(self, x, beta):
  6885. c = 0.5 * np.sign(x)
  6886. # evaluating (.5 + c) first prevents numerical cancellation
  6887. return (0.5 + c) - c * sc.gammaincc(1.0/beta, abs(x)**beta)
  6888. def _ppf(self, x, beta):
  6889. c = np.sign(x - 0.5)
  6890. # evaluating (1. + c) first prevents numerical cancellation
  6891. return c * sc.gammainccinv(1.0/beta, (1.0 + c) - 2.0*c*x)**(1.0/beta)
  6892. def _sf(self, x, beta):
  6893. return self._cdf(-x, beta)
  6894. def _isf(self, x, beta):
  6895. return -self._ppf(x, beta)
  6896. def _stats(self, beta):
  6897. c1, c3, c5 = sc.gammaln([1.0/beta, 3.0/beta, 5.0/beta])
  6898. return 0., np.exp(c3 - c1), 0., np.exp(c5 + c1 - 2.0*c3) - 3.
  6899. def _entropy(self, beta):
  6900. return 1. / beta - np.log(.5 * beta) + sc.gammaln(1. / beta)
  6901. def _rvs(self, beta, size=None, random_state=None):
  6902. # see [2]_ for the algorithm
  6903. # see [3]_ for reference implementation in SAS
  6904. z = random_state.gamma(1/beta, size=size)
  6905. y = z ** (1/beta)
  6906. # convert y to array to ensure masking support
  6907. y = np.asarray(y)
  6908. mask = random_state.random(size=y.shape) < 0.5
  6909. y[mask] = -y[mask]
  6910. return y
  6911. gennorm = gennorm_gen(name='gennorm')
  6912. class halfgennorm_gen(rv_continuous):
  6913. r"""The upper half of a generalized normal continuous random variable.
  6914. %(before_notes)s
  6915. See Also
  6916. --------
  6917. gennorm : generalized normal distribution
  6918. expon : exponential distribution
  6919. halfnorm : half normal distribution
  6920. Notes
  6921. -----
  6922. The probability density function for `halfgennorm` is:
  6923. .. math::
  6924. f(x, \beta) = \frac{\beta}{\Gamma(1/\beta)} \exp(-|x|^\beta)
  6925. for :math:`x, \beta > 0`. :math:`\Gamma` is the gamma function
  6926. (`scipy.special.gamma`).
  6927. `halfgennorm` takes ``beta`` as a shape parameter for :math:`\beta`.
  6928. For :math:`\beta = 1`, it is identical to an exponential distribution.
  6929. For :math:`\beta = 2`, it is identical to a half normal distribution
  6930. (with ``scale=1/sqrt(2)``).
  6931. References
  6932. ----------
  6933. .. [1] "Generalized normal distribution, Version 1",
  6934. https://en.wikipedia.org/wiki/Generalized_normal_distribution#Version_1
  6935. %(example)s
  6936. """
  6937. def _shape_info(self):
  6938. return [_ShapeInfo("beta", False, (0, np.inf), (False, False))]
  6939. def _pdf(self, x, beta):
  6940. # beta
  6941. # halfgennorm.pdf(x, beta) = ------------- exp(-|x|**beta)
  6942. # gamma(1/beta)
  6943. return np.exp(self._logpdf(x, beta))
  6944. def _logpdf(self, x, beta):
  6945. return np.log(beta) - sc.gammaln(1.0/beta) - x**beta
  6946. def _cdf(self, x, beta):
  6947. return sc.gammainc(1.0/beta, x**beta)
  6948. def _ppf(self, x, beta):
  6949. return sc.gammaincinv(1.0/beta, x)**(1.0/beta)
  6950. def _sf(self, x, beta):
  6951. return sc.gammaincc(1.0/beta, x**beta)
  6952. def _isf(self, x, beta):
  6953. return sc.gammainccinv(1.0/beta, x)**(1.0/beta)
  6954. def _entropy(self, beta):
  6955. return 1.0/beta - np.log(beta) + sc.gammaln(1.0/beta)
  6956. halfgennorm = halfgennorm_gen(a=0, name='halfgennorm')
  6957. class crystalball_gen(rv_continuous):
  6958. r"""
  6959. Crystalball distribution
  6960. %(before_notes)s
  6961. Notes
  6962. -----
  6963. The probability density function for `crystalball` is:
  6964. .. math::
  6965. f(x, \beta, m) = \begin{cases}
  6966. N \exp(-x^2 / 2), &\text{for } x > -\beta\\
  6967. N A (B - x)^{-m} &\text{for } x \le -\beta
  6968. \end{cases}
  6969. where :math:`A = (m / |\beta|)^m \exp(-\beta^2 / 2)`,
  6970. :math:`B = m/|\beta| - |\beta|` and :math:`N` is a normalisation constant.
  6971. `crystalball` takes :math:`\beta > 0` and :math:`m > 1` as shape
  6972. parameters. :math:`\beta` defines the point where the pdf changes
  6973. from a power-law to a Gaussian distribution. :math:`m` is the power
  6974. of the power-law tail.
  6975. References
  6976. ----------
  6977. .. [1] "Crystal Ball Function",
  6978. https://en.wikipedia.org/wiki/Crystal_Ball_function
  6979. %(after_notes)s
  6980. .. versionadded:: 0.19.0
  6981. %(example)s
  6982. """
  6983. def _argcheck(self, beta, m):
  6984. """
  6985. Shape parameter bounds are m > 1 and beta > 0.
  6986. """
  6987. return (m > 1) & (beta > 0)
  6988. def _shape_info(self):
  6989. ibeta = _ShapeInfo("beta", False, (0, np.inf), (False, False))
  6990. im = _ShapeInfo("m", False, (1, np.inf), (False, False))
  6991. return [ibeta, im]
  6992. def _fitstart(self, data):
  6993. # Arbitrary, but the default m=1 is not valid
  6994. return super()._fitstart(data, args=(1, 1.5))
  6995. def _pdf(self, x, beta, m):
  6996. """
  6997. Return PDF of the crystalball function.
  6998. --
  6999. | exp(-x**2 / 2), for x > -beta
  7000. crystalball.pdf(x, beta, m) = N * |
  7001. | A * (B - x)**(-m), for x <= -beta
  7002. --
  7003. """
  7004. N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
  7005. _norm_pdf_C * _norm_cdf(beta))
  7006. def rhs(x, beta, m):
  7007. return np.exp(-x**2 / 2)
  7008. def lhs(x, beta, m):
  7009. return ((m/beta)**m * np.exp(-beta**2 / 2.0) *
  7010. (m/beta - beta - x)**(-m))
  7011. return N * _lazywhere(x > -beta, (x, beta, m), f=rhs, f2=lhs)
  7012. def _logpdf(self, x, beta, m):
  7013. """
  7014. Return the log of the PDF of the crystalball function.
  7015. """
  7016. N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
  7017. _norm_pdf_C * _norm_cdf(beta))
  7018. def rhs(x, beta, m):
  7019. return -x**2/2
  7020. def lhs(x, beta, m):
  7021. return m*np.log(m/beta) - beta**2/2 - m*np.log(m/beta - beta - x)
  7022. return np.log(N) + _lazywhere(x > -beta, (x, beta, m), f=rhs, f2=lhs)
  7023. def _cdf(self, x, beta, m):
  7024. """
  7025. Return CDF of the crystalball function
  7026. """
  7027. N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
  7028. _norm_pdf_C * _norm_cdf(beta))
  7029. def rhs(x, beta, m):
  7030. return ((m/beta) * np.exp(-beta**2 / 2.0) / (m-1) +
  7031. _norm_pdf_C * (_norm_cdf(x) - _norm_cdf(-beta)))
  7032. def lhs(x, beta, m):
  7033. return ((m/beta)**m * np.exp(-beta**2 / 2.0) *
  7034. (m/beta - beta - x)**(-m+1) / (m-1))
  7035. return N * _lazywhere(x > -beta, (x, beta, m), f=rhs, f2=lhs)
  7036. def _ppf(self, p, beta, m):
  7037. N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
  7038. _norm_pdf_C * _norm_cdf(beta))
  7039. pbeta = N * (m/beta) * np.exp(-beta**2/2) / (m - 1)
  7040. def ppf_less(p, beta, m):
  7041. eb2 = np.exp(-beta**2/2)
  7042. C = (m/beta) * eb2 / (m-1)
  7043. N = 1/(C + _norm_pdf_C * _norm_cdf(beta))
  7044. return (m/beta - beta -
  7045. ((m - 1)*(m/beta)**(-m)/eb2*p/N)**(1/(1-m)))
  7046. def ppf_greater(p, beta, m):
  7047. eb2 = np.exp(-beta**2/2)
  7048. C = (m/beta) * eb2 / (m-1)
  7049. N = 1/(C + _norm_pdf_C * _norm_cdf(beta))
  7050. return _norm_ppf(_norm_cdf(-beta) + (1/_norm_pdf_C)*(p/N - C))
  7051. return _lazywhere(p < pbeta, (p, beta, m), f=ppf_less, f2=ppf_greater)
  7052. def _munp(self, n, beta, m):
  7053. """
  7054. Returns the n-th non-central moment of the crystalball function.
  7055. """
  7056. N = 1.0 / (m/beta / (m-1) * np.exp(-beta**2 / 2.0) +
  7057. _norm_pdf_C * _norm_cdf(beta))
  7058. def n_th_moment(n, beta, m):
  7059. """
  7060. Returns n-th moment. Defined only if n+1 < m
  7061. Function cannot broadcast due to the loop over n
  7062. """
  7063. A = (m/beta)**m * np.exp(-beta**2 / 2.0)
  7064. B = m/beta - beta
  7065. rhs = (2**((n-1)/2.0) * sc.gamma((n+1)/2) *
  7066. (1.0 + (-1)**n * sc.gammainc((n+1)/2, beta**2 / 2)))
  7067. lhs = np.zeros(rhs.shape)
  7068. for k in range(n + 1):
  7069. lhs += (sc.binom(n, k) * B**(n-k) * (-1)**k / (m - k - 1) *
  7070. (m/beta)**(-m + k + 1))
  7071. return A * lhs + rhs
  7072. return N * _lazywhere(n + 1 < m, (n, beta, m),
  7073. np.vectorize(n_th_moment, otypes=[np.float64]),
  7074. np.inf)
  7075. crystalball = crystalball_gen(name='crystalball', longname="A Crystalball Function")
  7076. def _argus_phi(chi):
  7077. """
  7078. Utility function for the argus distribution used in the pdf, sf and
  7079. moment calculation.
  7080. Note that for all x > 0:
  7081. gammainc(1.5, x**2/2) = 2 * (_norm_cdf(x) - x * _norm_pdf(x) - 0.5).
  7082. This can be verified directly by noting that the cdf of Gamma(1.5) can
  7083. be written as erf(sqrt(x)) - 2*sqrt(x)*exp(-x)/sqrt(Pi).
  7084. We use gammainc instead of the usual definition because it is more precise
  7085. for small chi.
  7086. """
  7087. return sc.gammainc(1.5, chi**2/2) / 2
  7088. class argus_gen(rv_continuous):
  7089. r"""
  7090. Argus distribution
  7091. %(before_notes)s
  7092. Notes
  7093. -----
  7094. The probability density function for `argus` is:
  7095. .. math::
  7096. f(x, \chi) = \frac{\chi^3}{\sqrt{2\pi} \Psi(\chi)} x \sqrt{1-x^2}
  7097. \exp(-\chi^2 (1 - x^2)/2)
  7098. for :math:`0 < x < 1` and :math:`\chi > 0`, where
  7099. .. math::
  7100. \Psi(\chi) = \Phi(\chi) - \chi \phi(\chi) - 1/2
  7101. with :math:`\Phi` and :math:`\phi` being the CDF and PDF of a standard
  7102. normal distribution, respectively.
  7103. `argus` takes :math:`\chi` as shape a parameter.
  7104. %(after_notes)s
  7105. References
  7106. ----------
  7107. .. [1] "ARGUS distribution",
  7108. https://en.wikipedia.org/wiki/ARGUS_distribution
  7109. .. versionadded:: 0.19.0
  7110. %(example)s
  7111. """
  7112. def _shape_info(self):
  7113. return [_ShapeInfo("chi", False, (0, np.inf), (False, False))]
  7114. def _logpdf(self, x, chi):
  7115. # for x = 0 or 1, logpdf returns -np.inf
  7116. with np.errstate(divide='ignore'):
  7117. y = 1.0 - x*x
  7118. A = 3*np.log(chi) - _norm_pdf_logC - np.log(_argus_phi(chi))
  7119. return A + np.log(x) + 0.5*np.log1p(-x*x) - chi**2 * y / 2
  7120. def _pdf(self, x, chi):
  7121. return np.exp(self._logpdf(x, chi))
  7122. def _cdf(self, x, chi):
  7123. return 1.0 - self._sf(x, chi)
  7124. def _sf(self, x, chi):
  7125. return _argus_phi(chi * np.sqrt(1 - x**2)) / _argus_phi(chi)
  7126. def _rvs(self, chi, size=None, random_state=None):
  7127. chi = np.asarray(chi)
  7128. if chi.size == 1:
  7129. out = self._rvs_scalar(chi, numsamples=size,
  7130. random_state=random_state)
  7131. else:
  7132. shp, bc = _check_shape(chi.shape, size)
  7133. numsamples = int(np.prod(shp))
  7134. out = np.empty(size)
  7135. it = np.nditer([chi],
  7136. flags=['multi_index'],
  7137. op_flags=[['readonly']])
  7138. while not it.finished:
  7139. idx = tuple((it.multi_index[j] if not bc[j] else slice(None))
  7140. for j in range(-len(size), 0))
  7141. r = self._rvs_scalar(it[0], numsamples=numsamples,
  7142. random_state=random_state)
  7143. out[idx] = r.reshape(shp)
  7144. it.iternext()
  7145. if size == ():
  7146. out = out[()]
  7147. return out
  7148. def _rvs_scalar(self, chi, numsamples=None, random_state=None):
  7149. # if chi <= 1.8:
  7150. # use rejection method, see Devroye:
  7151. # Non-Uniform Random Variate Generation, 1986, section II.3.2.
  7152. # write: PDF f(x) = c * g(x) * h(x), where
  7153. # h is [0,1]-valued and g is a density
  7154. # we use two ways to write f
  7155. #
  7156. # Case 1:
  7157. # write g(x) = 3*x*sqrt(1-x**2), h(x) = exp(-chi**2 (1-x**2) / 2)
  7158. # If X has a distribution with density g its ppf G_inv is given by:
  7159. # G_inv(u) = np.sqrt(1 - u**(2/3))
  7160. #
  7161. # Case 2:
  7162. # g(x) = chi**2 * x * exp(-chi**2 * (1-x**2)/2) / (1 - exp(-chi**2 /2))
  7163. # h(x) = sqrt(1 - x**2), 0 <= x <= 1
  7164. # one can show that
  7165. # G_inv(u) = np.sqrt(2*np.log(u*(np.exp(chi**2/2)-1)+1))/chi
  7166. # = np.sqrt(1 + 2*np.log(np.exp(-chi**2/2)*(1-u)+u)/chi**2)
  7167. # the latter expression is used for precision with small chi
  7168. #
  7169. # In both cases, the inverse cdf of g can be written analytically, and
  7170. # we can apply the rejection method:
  7171. #
  7172. # REPEAT
  7173. # Generate U uniformly distributed on [0, 1]
  7174. # Generate X with density g (e.g. via inverse transform sampling:
  7175. # X = G_inv(V) with V uniformly distributed on [0, 1])
  7176. # UNTIL X <= h(X)
  7177. # RETURN X
  7178. #
  7179. # We use case 1 for chi <= 0.5 as it maintains precision for small chi
  7180. # and case 2 for 0.5 < chi <= 1.8 due to its speed for moderate chi.
  7181. #
  7182. # if chi > 1.8:
  7183. # use relation to the Gamma distribution: if X is ARGUS with parameter
  7184. # chi), then Y = chi**2 * (1 - X**2) / 2 has density proportional to
  7185. # sqrt(u) * exp(-u) on [0, chi**2 / 2], i.e. a Gamma(3/2) distribution
  7186. # conditioned on [0, chi**2 / 2]). Therefore, to sample X from the
  7187. # ARGUS distribution, we sample Y from the gamma distribution, keeping
  7188. # only samples on [0, chi**2 / 2], and apply the inverse
  7189. # transformation X = (1 - 2*Y/chi**2)**(1/2). Since we only
  7190. # look at chi > 1.8, gamma(1.5).cdf(chi**2/2) is large enough such
  7191. # Y falls in the inteval [0, chi**2 / 2] with a high probability:
  7192. # stats.gamma(1.5).cdf(1.8**2/2) = 0.644...
  7193. #
  7194. # The points to switch between the different methods are determined
  7195. # by a comparison of the runtime of the different methods. However,
  7196. # the runtime is platform-dependent. The implemented values should
  7197. # ensure a good overall performance and are supported by an analysis
  7198. # of the rejection constants of different methods.
  7199. size1d = tuple(np.atleast_1d(numsamples))
  7200. N = int(np.prod(size1d))
  7201. x = np.zeros(N)
  7202. simulated = 0
  7203. chi2 = chi * chi
  7204. if chi <= 0.5:
  7205. d = -chi2 / 2
  7206. while simulated < N:
  7207. k = N - simulated
  7208. u = random_state.uniform(size=k)
  7209. v = random_state.uniform(size=k)
  7210. z = v**(2/3)
  7211. # acceptance condition: u <= h(G_inv(v)). This simplifies to
  7212. accept = (np.log(u) <= d * z)
  7213. num_accept = np.sum(accept)
  7214. if num_accept > 0:
  7215. # we still need to transform z=v**(2/3) to X = G_inv(v)
  7216. rvs = np.sqrt(1 - z[accept])
  7217. x[simulated:(simulated + num_accept)] = rvs
  7218. simulated += num_accept
  7219. elif chi <= 1.8:
  7220. echi = np.exp(-chi2 / 2)
  7221. while simulated < N:
  7222. k = N - simulated
  7223. u = random_state.uniform(size=k)
  7224. v = random_state.uniform(size=k)
  7225. z = 2 * np.log(echi * (1 - v) + v) / chi2
  7226. # as in case one, simplify u <= h(G_inv(v)) and then transform
  7227. # z to the target distribution X = G_inv(v)
  7228. accept = (u**2 + z <= 0)
  7229. num_accept = np.sum(accept)
  7230. if num_accept > 0:
  7231. rvs = np.sqrt(1 + z[accept])
  7232. x[simulated:(simulated + num_accept)] = rvs
  7233. simulated += num_accept
  7234. else:
  7235. # conditional Gamma for chi > 1.8
  7236. while simulated < N:
  7237. k = N - simulated
  7238. g = random_state.standard_gamma(1.5, size=k)
  7239. accept = (g <= chi2 / 2)
  7240. num_accept = np.sum(accept)
  7241. if num_accept > 0:
  7242. x[simulated:(simulated + num_accept)] = g[accept]
  7243. simulated += num_accept
  7244. x = np.sqrt(1 - 2 * x / chi2)
  7245. return np.reshape(x, size1d)
  7246. def _stats(self, chi):
  7247. # need to ensure that dtype is float
  7248. # otherwise the mask below does not work for integers
  7249. chi = np.asarray(chi, dtype=float)
  7250. phi = _argus_phi(chi)
  7251. m = np.sqrt(np.pi/8) * chi * sc.ive(1, chi**2/4) / phi
  7252. # compute second moment, use Taylor expansion for small chi (<= 0.1)
  7253. mu2 = np.empty_like(chi)
  7254. mask = chi > 0.1
  7255. c = chi[mask]
  7256. mu2[mask] = 1 - 3 / c**2 + c * _norm_pdf(c) / phi[mask]
  7257. c = chi[~mask]
  7258. coef = [-358/65690625, 0, -94/1010625, 0, 2/2625, 0, 6/175, 0, 0.4]
  7259. mu2[~mask] = np.polyval(coef, c)
  7260. return m, mu2 - m**2, None, None
  7261. argus = argus_gen(name='argus', longname="An Argus Function", a=0.0, b=1.0)
  7262. class rv_histogram(rv_continuous):
  7263. """
  7264. Generates a distribution given by a histogram.
  7265. This is useful to generate a template distribution from a binned
  7266. datasample.
  7267. As a subclass of the `rv_continuous` class, `rv_histogram` inherits from it
  7268. a collection of generic methods (see `rv_continuous` for the full list),
  7269. and implements them based on the properties of the provided binned
  7270. datasample.
  7271. Parameters
  7272. ----------
  7273. histogram : tuple of array_like
  7274. Tuple containing two array_like objects.
  7275. The first containing the content of n bins,
  7276. the second containing the (n+1) bin boundaries.
  7277. In particular, the return value of `numpy.histogram` is accepted.
  7278. density : bool, optional
  7279. If False, assumes the histogram is proportional to counts per bin;
  7280. otherwise, assumes it is proportional to a density.
  7281. For constant bin widths, these are equivalent, but the distinction
  7282. is important when bin widths vary (see Notes).
  7283. If None (default), sets ``density=True`` for backwards compatibility,
  7284. but warns if the bin widths are variable. Set `density` explicitly
  7285. to silence the warning.
  7286. .. versionadded:: 1.10.0
  7287. Notes
  7288. -----
  7289. When a histogram has unequal bin widths, there is a distinction between
  7290. histograms that are proportional to counts per bin and histograms that are
  7291. proportional to probability density over a bin. If `numpy.histogram` is
  7292. called with its default ``density=False``, the resulting histogram is the
  7293. number of counts per bin, so ``density=False`` should be passed to
  7294. `rv_histogram`. If `numpy.histogram` is called with ``density=True``, the
  7295. resulting histogram is in terms of probability density, so ``density=True``
  7296. should be passed to `rv_histogram`. To avoid warnings, always pass
  7297. ``density`` explicitly when the input histogram has unequal bin widths.
  7298. There are no additional shape parameters except for the loc and scale.
  7299. The pdf is defined as a stepwise function from the provided histogram.
  7300. The cdf is a linear interpolation of the pdf.
  7301. .. versionadded:: 0.19.0
  7302. Examples
  7303. --------
  7304. Create a scipy.stats distribution from a numpy histogram
  7305. >>> import scipy.stats
  7306. >>> import numpy as np
  7307. >>> data = scipy.stats.norm.rvs(size=100000, loc=0, scale=1.5, random_state=123)
  7308. >>> hist = np.histogram(data, bins=100)
  7309. >>> hist_dist = scipy.stats.rv_histogram(hist, density=False)
  7310. Behaves like an ordinary scipy rv_continuous distribution
  7311. >>> hist_dist.pdf(1.0)
  7312. 0.20538577847618705
  7313. >>> hist_dist.cdf(2.0)
  7314. 0.90818568543056499
  7315. PDF is zero above (below) the highest (lowest) bin of the histogram,
  7316. defined by the max (min) of the original dataset
  7317. >>> hist_dist.pdf(np.max(data))
  7318. 0.0
  7319. >>> hist_dist.cdf(np.max(data))
  7320. 1.0
  7321. >>> hist_dist.pdf(np.min(data))
  7322. 7.7591907244498314e-05
  7323. >>> hist_dist.cdf(np.min(data))
  7324. 0.0
  7325. PDF and CDF follow the histogram
  7326. >>> import matplotlib.pyplot as plt
  7327. >>> X = np.linspace(-5.0, 5.0, 100)
  7328. >>> fig, ax = plt.subplots()
  7329. >>> ax.set_title("PDF from Template")
  7330. >>> ax.hist(data, density=True, bins=100)
  7331. >>> ax.plot(X, hist_dist.pdf(X), label='PDF')
  7332. >>> ax.plot(X, hist_dist.cdf(X), label='CDF')
  7333. >>> ax.legend()
  7334. >>> fig.show()
  7335. """
  7336. _support_mask = rv_continuous._support_mask
  7337. def __init__(self, histogram, *args, density=None, **kwargs):
  7338. """
  7339. Create a new distribution using the given histogram
  7340. Parameters
  7341. ----------
  7342. histogram : tuple of array_like
  7343. Tuple containing two array_like objects.
  7344. The first containing the content of n bins,
  7345. the second containing the (n+1) bin boundaries.
  7346. In particular, the return value of np.histogram is accepted.
  7347. density : bool, optional
  7348. If False, assumes the histogram is proportional to counts per bin;
  7349. otherwise, assumes it is proportional to a density.
  7350. For constant bin widths, these are equivalent.
  7351. If None (default), sets ``density=True`` for backward
  7352. compatibility, but warns if the bin widths are variable. Set
  7353. `density` explicitly to silence the warning.
  7354. """
  7355. self._histogram = histogram
  7356. self._density = density
  7357. if len(histogram) != 2:
  7358. raise ValueError("Expected length 2 for parameter histogram")
  7359. self._hpdf = np.asarray(histogram[0])
  7360. self._hbins = np.asarray(histogram[1])
  7361. if len(self._hpdf) + 1 != len(self._hbins):
  7362. raise ValueError("Number of elements in histogram content "
  7363. "and histogram boundaries do not match, "
  7364. "expected n and n+1.")
  7365. self._hbin_widths = self._hbins[1:] - self._hbins[:-1]
  7366. bins_vary = not np.allclose(self._hbin_widths, self._hbin_widths[0])
  7367. if density is None and bins_vary:
  7368. message = ("Bin widths are not constant. Assuming `density=True`."
  7369. "Specify `density` explicitly to silence this warning.")
  7370. warnings.warn(message, RuntimeWarning, stacklevel=2)
  7371. density = True
  7372. elif not density:
  7373. self._hpdf = self._hpdf / self._hbin_widths
  7374. self._hpdf = self._hpdf / float(np.sum(self._hpdf * self._hbin_widths))
  7375. self._hcdf = np.cumsum(self._hpdf * self._hbin_widths)
  7376. self._hpdf = np.hstack([0.0, self._hpdf, 0.0])
  7377. self._hcdf = np.hstack([0.0, self._hcdf])
  7378. # Set support
  7379. kwargs['a'] = self.a = self._hbins[0]
  7380. kwargs['b'] = self.b = self._hbins[-1]
  7381. super().__init__(*args, **kwargs)
  7382. def _pdf(self, x):
  7383. """
  7384. PDF of the histogram
  7385. """
  7386. return self._hpdf[np.searchsorted(self._hbins, x, side='right')]
  7387. def _cdf(self, x):
  7388. """
  7389. CDF calculated from the histogram
  7390. """
  7391. return np.interp(x, self._hbins, self._hcdf)
  7392. def _ppf(self, x):
  7393. """
  7394. Percentile function calculated from the histogram
  7395. """
  7396. return np.interp(x, self._hcdf, self._hbins)
  7397. def _munp(self, n):
  7398. """Compute the n-th non-central moment."""
  7399. integrals = (self._hbins[1:]**(n+1) - self._hbins[:-1]**(n+1)) / (n+1)
  7400. return np.sum(self._hpdf[1:-1] * integrals)
  7401. def _entropy(self):
  7402. """Compute entropy of distribution"""
  7403. res = _lazywhere(self._hpdf[1:-1] > 0.0,
  7404. (self._hpdf[1:-1],),
  7405. np.log,
  7406. 0.0)
  7407. return -np.sum(self._hpdf[1:-1] * res * self._hbin_widths)
  7408. def _updated_ctor_param(self):
  7409. """
  7410. Set the histogram as additional constructor argument
  7411. """
  7412. dct = super()._updated_ctor_param()
  7413. dct['histogram'] = self._histogram
  7414. dct['density'] = self._density
  7415. return dct
  7416. class studentized_range_gen(rv_continuous):
  7417. r"""A studentized range continuous random variable.
  7418. %(before_notes)s
  7419. See Also
  7420. --------
  7421. t: Student's t distribution
  7422. Notes
  7423. -----
  7424. The probability density function for `studentized_range` is:
  7425. .. math::
  7426. f(x; k, \nu) = \frac{k(k-1)\nu^{\nu/2}}{\Gamma(\nu/2)
  7427. 2^{\nu/2-1}} \int_{0}^{\infty} \int_{-\infty}^{\infty}
  7428. s^{\nu} e^{-\nu s^2/2} \phi(z) \phi(sx + z)
  7429. [\Phi(sx + z) - \Phi(z)]^{k-2} \,dz \,ds
  7430. for :math:`x ≥ 0`, :math:`k > 1`, and :math:`\nu > 0`.
  7431. `studentized_range` takes ``k`` for :math:`k` and ``df`` for :math:`\nu`
  7432. as shape parameters.
  7433. When :math:`\nu` exceeds 100,000, an asymptotic approximation (infinite
  7434. degrees of freedom) is used to compute the cumulative distribution
  7435. function [4]_ and probability distribution function.
  7436. %(after_notes)s
  7437. References
  7438. ----------
  7439. .. [1] "Studentized range distribution",
  7440. https://en.wikipedia.org/wiki/Studentized_range_distribution
  7441. .. [2] Batista, Ben Dêivide, et al. "Externally Studentized Normal Midrange
  7442. Distribution." Ciência e Agrotecnologia, vol. 41, no. 4, 2017, pp.
  7443. 378-389., doi:10.1590/1413-70542017414047716.
  7444. .. [3] Harter, H. Leon. "Tables of Range and Studentized Range." The Annals
  7445. of Mathematical Statistics, vol. 31, no. 4, 1960, pp. 1122-1147.
  7446. JSTOR, www.jstor.org/stable/2237810. Accessed 18 Feb. 2021.
  7447. .. [4] Lund, R. E., and J. R. Lund. "Algorithm AS 190: Probabilities and
  7448. Upper Quantiles for the Studentized Range." Journal of the Royal
  7449. Statistical Society. Series C (Applied Statistics), vol. 32, no. 2,
  7450. 1983, pp. 204-210. JSTOR, www.jstor.org/stable/2347300. Accessed 18
  7451. Feb. 2021.
  7452. Examples
  7453. --------
  7454. >>> import numpy as np
  7455. >>> from scipy.stats import studentized_range
  7456. >>> import matplotlib.pyplot as plt
  7457. >>> fig, ax = plt.subplots(1, 1)
  7458. Calculate the first four moments:
  7459. >>> k, df = 3, 10
  7460. >>> mean, var, skew, kurt = studentized_range.stats(k, df, moments='mvsk')
  7461. Display the probability density function (``pdf``):
  7462. >>> x = np.linspace(studentized_range.ppf(0.01, k, df),
  7463. ... studentized_range.ppf(0.99, k, df), 100)
  7464. >>> ax.plot(x, studentized_range.pdf(x, k, df),
  7465. ... 'r-', lw=5, alpha=0.6, label='studentized_range pdf')
  7466. Alternatively, the distribution object can be called (as a function)
  7467. to fix the shape, location and scale parameters. This returns a "frozen"
  7468. RV object holding the given parameters fixed.
  7469. Freeze the distribution and display the frozen ``pdf``:
  7470. >>> rv = studentized_range(k, df)
  7471. >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
  7472. Check accuracy of ``cdf`` and ``ppf``:
  7473. >>> vals = studentized_range.ppf([0.001, 0.5, 0.999], k, df)
  7474. >>> np.allclose([0.001, 0.5, 0.999], studentized_range.cdf(vals, k, df))
  7475. True
  7476. Rather than using (``studentized_range.rvs``) to generate random variates,
  7477. which is very slow for this distribution, we can approximate the inverse
  7478. CDF using an interpolator, and then perform inverse transform sampling
  7479. with this approximate inverse CDF.
  7480. This distribution has an infinite but thin right tail, so we focus our
  7481. attention on the leftmost 99.9 percent.
  7482. >>> a, b = studentized_range.ppf([0, .999], k, df)
  7483. >>> a, b
  7484. 0, 7.41058083802274
  7485. >>> from scipy.interpolate import interp1d
  7486. >>> rng = np.random.default_rng()
  7487. >>> xs = np.linspace(a, b, 50)
  7488. >>> cdf = studentized_range.cdf(xs, k, df)
  7489. # Create an interpolant of the inverse CDF
  7490. >>> ppf = interp1d(cdf, xs, fill_value='extrapolate')
  7491. # Perform inverse transform sampling using the interpolant
  7492. >>> r = ppf(rng.uniform(size=1000))
  7493. And compare the histogram:
  7494. >>> ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
  7495. >>> ax.legend(loc='best', frameon=False)
  7496. >>> plt.show()
  7497. """
  7498. def _argcheck(self, k, df):
  7499. return (k > 1) & (df > 0)
  7500. def _shape_info(self):
  7501. ik = _ShapeInfo("k", False, (1, np.inf), (False, False))
  7502. idf = _ShapeInfo("df", False, (0, np.inf), (False, False))
  7503. return [ik, idf]
  7504. def _fitstart(self, data):
  7505. # Default is k=1, but that is not a valid value of the parameter.
  7506. return super(studentized_range_gen, self)._fitstart(data, args=(2, 1))
  7507. def _munp(self, K, k, df):
  7508. cython_symbol = '_studentized_range_moment'
  7509. _a, _b = self._get_support()
  7510. # all three of these are used to create a numpy array so they must
  7511. # be the same shape.
  7512. def _single_moment(K, k, df):
  7513. log_const = _stats._studentized_range_pdf_logconst(k, df)
  7514. arg = [K, k, df, log_const]
  7515. usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
  7516. llc = LowLevelCallable.from_cython(_stats, cython_symbol, usr_data)
  7517. ranges = [(-np.inf, np.inf), (0, np.inf), (_a, _b)]
  7518. opts = dict(epsabs=1e-11, epsrel=1e-12)
  7519. return integrate.nquad(llc, ranges=ranges, opts=opts)[0]
  7520. ufunc = np.frompyfunc(_single_moment, 3, 1)
  7521. return np.float64(ufunc(K, k, df))
  7522. def _pdf(self, x, k, df):
  7523. def _single_pdf(q, k, df):
  7524. # The infinite form of the PDF is derived from the infinite
  7525. # CDF.
  7526. if df < 100000:
  7527. cython_symbol = '_studentized_range_pdf'
  7528. log_const = _stats._studentized_range_pdf_logconst(k, df)
  7529. arg = [q, k, df, log_const]
  7530. usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
  7531. ranges = [(-np.inf, np.inf), (0, np.inf)]
  7532. else:
  7533. cython_symbol = '_studentized_range_pdf_asymptotic'
  7534. arg = [q, k]
  7535. usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
  7536. ranges = [(-np.inf, np.inf)]
  7537. llc = LowLevelCallable.from_cython(_stats, cython_symbol, usr_data)
  7538. opts = dict(epsabs=1e-11, epsrel=1e-12)
  7539. return integrate.nquad(llc, ranges=ranges, opts=opts)[0]
  7540. ufunc = np.frompyfunc(_single_pdf, 3, 1)
  7541. return np.float64(ufunc(x, k, df))
  7542. def _cdf(self, x, k, df):
  7543. def _single_cdf(q, k, df):
  7544. # "When the degrees of freedom V are infinite the probability
  7545. # integral takes [on a] simpler form," and a single asymptotic
  7546. # integral is evaluated rather than the standard double integral.
  7547. # (Lund, Lund, page 205)
  7548. if df < 100000:
  7549. cython_symbol = '_studentized_range_cdf'
  7550. log_const = _stats._studentized_range_cdf_logconst(k, df)
  7551. arg = [q, k, df, log_const]
  7552. usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
  7553. ranges = [(-np.inf, np.inf), (0, np.inf)]
  7554. else:
  7555. cython_symbol = '_studentized_range_cdf_asymptotic'
  7556. arg = [q, k]
  7557. usr_data = np.array(arg, float).ctypes.data_as(ctypes.c_void_p)
  7558. ranges = [(-np.inf, np.inf)]
  7559. llc = LowLevelCallable.from_cython(_stats, cython_symbol, usr_data)
  7560. opts = dict(epsabs=1e-11, epsrel=1e-12)
  7561. return integrate.nquad(llc, ranges=ranges, opts=opts)[0]
  7562. ufunc = np.frompyfunc(_single_cdf, 3, 1)
  7563. # clip p-values to ensure they are in [0, 1].
  7564. return np.clip(np.float64(ufunc(x, k, df)), 0, 1)
  7565. studentized_range = studentized_range_gen(name='studentized_range', a=0,
  7566. b=np.inf)
  7567. # Collect names of classes and objects in this module.
  7568. pairs = list(globals().copy().items())
  7569. _distn_names, _distn_gen_names = get_distribution_names(pairs, rv_continuous)
  7570. __all__ = _distn_names + _distn_gen_names + ['rv_histogram']