_multivariate.py 185 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691
  1. #
  2. # Author: Joris Vankerschaver 2013
  3. #
  4. import math
  5. import numpy as np
  6. from numpy import asarray_chkfinite, asarray
  7. from numpy.lib import NumpyVersion
  8. import scipy.linalg
  9. from scipy._lib import doccer
  10. from scipy.special import gammaln, psi, multigammaln, xlogy, entr, betaln
  11. from scipy._lib._util import check_random_state
  12. from scipy.linalg.blas import drot
  13. from scipy.linalg._misc import LinAlgError
  14. from scipy.linalg.lapack import get_lapack_funcs
  15. from ._discrete_distns import binom
  16. from . import _mvn, _covariance, _rcont
  17. __all__ = ['multivariate_normal',
  18. 'matrix_normal',
  19. 'dirichlet',
  20. 'wishart',
  21. 'invwishart',
  22. 'multinomial',
  23. 'special_ortho_group',
  24. 'ortho_group',
  25. 'random_correlation',
  26. 'unitary_group',
  27. 'multivariate_t',
  28. 'multivariate_hypergeom',
  29. 'random_table',
  30. 'uniform_direction']
  31. _LOG_2PI = np.log(2 * np.pi)
  32. _LOG_2 = np.log(2)
  33. _LOG_PI = np.log(np.pi)
  34. _doc_random_state = """\
  35. seed : {None, int, np.random.RandomState, np.random.Generator}, optional
  36. Used for drawing random variates.
  37. If `seed` is `None`, the `~np.random.RandomState` singleton is used.
  38. If `seed` is an int, a new ``RandomState`` instance is used, seeded
  39. with seed.
  40. If `seed` is already a ``RandomState`` or ``Generator`` instance,
  41. then that object is used.
  42. Default is `None`.
  43. """
  44. def _squeeze_output(out):
  45. """
  46. Remove single-dimensional entries from array and convert to scalar,
  47. if necessary.
  48. """
  49. out = out.squeeze()
  50. if out.ndim == 0:
  51. out = out[()]
  52. return out
  53. def _eigvalsh_to_eps(spectrum, cond=None, rcond=None):
  54. """Determine which eigenvalues are "small" given the spectrum.
  55. This is for compatibility across various linear algebra functions
  56. that should agree about whether or not a Hermitian matrix is numerically
  57. singular and what is its numerical matrix rank.
  58. This is designed to be compatible with scipy.linalg.pinvh.
  59. Parameters
  60. ----------
  61. spectrum : 1d ndarray
  62. Array of eigenvalues of a Hermitian matrix.
  63. cond, rcond : float, optional
  64. Cutoff for small eigenvalues.
  65. Singular values smaller than rcond * largest_eigenvalue are
  66. considered zero.
  67. If None or -1, suitable machine precision is used.
  68. Returns
  69. -------
  70. eps : float
  71. Magnitude cutoff for numerical negligibility.
  72. """
  73. if rcond is not None:
  74. cond = rcond
  75. if cond in [None, -1]:
  76. t = spectrum.dtype.char.lower()
  77. factor = {'f': 1E3, 'd': 1E6}
  78. cond = factor[t] * np.finfo(t).eps
  79. eps = cond * np.max(abs(spectrum))
  80. return eps
  81. def _pinv_1d(v, eps=1e-5):
  82. """A helper function for computing the pseudoinverse.
  83. Parameters
  84. ----------
  85. v : iterable of numbers
  86. This may be thought of as a vector of eigenvalues or singular values.
  87. eps : float
  88. Values with magnitude no greater than eps are considered negligible.
  89. Returns
  90. -------
  91. v_pinv : 1d float ndarray
  92. A vector of pseudo-inverted numbers.
  93. """
  94. return np.array([0 if abs(x) <= eps else 1/x for x in v], dtype=float)
  95. class _PSD:
  96. """
  97. Compute coordinated functions of a symmetric positive semidefinite matrix.
  98. This class addresses two issues. Firstly it allows the pseudoinverse,
  99. the logarithm of the pseudo-determinant, and the rank of the matrix
  100. to be computed using one call to eigh instead of three.
  101. Secondly it allows these functions to be computed in a way
  102. that gives mutually compatible results.
  103. All of the functions are computed with a common understanding as to
  104. which of the eigenvalues are to be considered negligibly small.
  105. The functions are designed to coordinate with scipy.linalg.pinvh()
  106. but not necessarily with np.linalg.det() or with np.linalg.matrix_rank().
  107. Parameters
  108. ----------
  109. M : array_like
  110. Symmetric positive semidefinite matrix (2-D).
  111. cond, rcond : float, optional
  112. Cutoff for small eigenvalues.
  113. Singular values smaller than rcond * largest_eigenvalue are
  114. considered zero.
  115. If None or -1, suitable machine precision is used.
  116. lower : bool, optional
  117. Whether the pertinent array data is taken from the lower
  118. or upper triangle of M. (Default: lower)
  119. check_finite : bool, optional
  120. Whether to check that the input matrices contain only finite
  121. numbers. Disabling may give a performance gain, but may result
  122. in problems (crashes, non-termination) if the inputs do contain
  123. infinities or NaNs.
  124. allow_singular : bool, optional
  125. Whether to allow a singular matrix. (Default: True)
  126. Notes
  127. -----
  128. The arguments are similar to those of scipy.linalg.pinvh().
  129. """
  130. def __init__(self, M, cond=None, rcond=None, lower=True,
  131. check_finite=True, allow_singular=True):
  132. self._M = np.asarray(M)
  133. # Compute the symmetric eigendecomposition.
  134. # Note that eigh takes care of array conversion, chkfinite,
  135. # and assertion that the matrix is square.
  136. s, u = scipy.linalg.eigh(M, lower=lower, check_finite=check_finite)
  137. eps = _eigvalsh_to_eps(s, cond, rcond)
  138. if np.min(s) < -eps:
  139. msg = "The input matrix must be symmetric positive semidefinite."
  140. raise ValueError(msg)
  141. d = s[s > eps]
  142. if len(d) < len(s) and not allow_singular:
  143. msg = ("When `allow_singular is False`, the input matrix must be "
  144. "symmetric positive definite.")
  145. raise np.linalg.LinAlgError(msg)
  146. s_pinv = _pinv_1d(s, eps)
  147. U = np.multiply(u, np.sqrt(s_pinv))
  148. # Save the eigenvector basis, and tolerance for testing support
  149. self.eps = 1e3*eps
  150. self.V = u[:, s <= eps]
  151. # Initialize the eagerly precomputed attributes.
  152. self.rank = len(d)
  153. self.U = U
  154. self.log_pdet = np.sum(np.log(d))
  155. # Initialize attributes to be lazily computed.
  156. self._pinv = None
  157. def _support_mask(self, x):
  158. """
  159. Check whether x lies in the support of the distribution.
  160. """
  161. residual = np.linalg.norm(x @ self.V, axis=-1)
  162. in_support = residual < self.eps
  163. return in_support
  164. @property
  165. def pinv(self):
  166. if self._pinv is None:
  167. self._pinv = np.dot(self.U, self.U.T)
  168. return self._pinv
  169. class multi_rv_generic:
  170. """
  171. Class which encapsulates common functionality between all multivariate
  172. distributions.
  173. """
  174. def __init__(self, seed=None):
  175. super().__init__()
  176. self._random_state = check_random_state(seed)
  177. @property
  178. def random_state(self):
  179. """ Get or set the Generator object for generating random variates.
  180. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  181. singleton is used.
  182. If `seed` is an int, a new ``RandomState`` instance is used,
  183. seeded with `seed`.
  184. If `seed` is already a ``Generator`` or ``RandomState`` instance then
  185. that instance is used.
  186. """
  187. return self._random_state
  188. @random_state.setter
  189. def random_state(self, seed):
  190. self._random_state = check_random_state(seed)
  191. def _get_random_state(self, random_state):
  192. if random_state is not None:
  193. return check_random_state(random_state)
  194. else:
  195. return self._random_state
  196. class multi_rv_frozen:
  197. """
  198. Class which encapsulates common functionality between all frozen
  199. multivariate distributions.
  200. """
  201. @property
  202. def random_state(self):
  203. return self._dist._random_state
  204. @random_state.setter
  205. def random_state(self, seed):
  206. self._dist._random_state = check_random_state(seed)
  207. _mvn_doc_default_callparams = """\
  208. mean : array_like, default: ``[0]``
  209. Mean of the distribution.
  210. cov : array_like or `Covariance`, default: ``[1]``
  211. Symmetric positive (semi)definite covariance matrix of the distribution.
  212. allow_singular : bool, default: ``False``
  213. Whether to allow a singular covariance matrix. This is ignored if `cov` is
  214. a `Covariance` object.
  215. """
  216. _mvn_doc_callparams_note = """\
  217. Setting the parameter `mean` to `None` is equivalent to having `mean`
  218. be the zero-vector. The parameter `cov` can be a scalar, in which case
  219. the covariance matrix is the identity times that value, a vector of
  220. diagonal entries for the covariance matrix, a two-dimensional array_like,
  221. or a `Covariance` object.
  222. """
  223. _mvn_doc_frozen_callparams = ""
  224. _mvn_doc_frozen_callparams_note = """\
  225. See class definition for a detailed description of parameters."""
  226. mvn_docdict_params = {
  227. '_mvn_doc_default_callparams': _mvn_doc_default_callparams,
  228. '_mvn_doc_callparams_note': _mvn_doc_callparams_note,
  229. '_doc_random_state': _doc_random_state
  230. }
  231. mvn_docdict_noparams = {
  232. '_mvn_doc_default_callparams': _mvn_doc_frozen_callparams,
  233. '_mvn_doc_callparams_note': _mvn_doc_frozen_callparams_note,
  234. '_doc_random_state': _doc_random_state
  235. }
  236. class multivariate_normal_gen(multi_rv_generic):
  237. r"""A multivariate normal random variable.
  238. The `mean` keyword specifies the mean. The `cov` keyword specifies the
  239. covariance matrix.
  240. Methods
  241. -------
  242. pdf(x, mean=None, cov=1, allow_singular=False)
  243. Probability density function.
  244. logpdf(x, mean=None, cov=1, allow_singular=False)
  245. Log of the probability density function.
  246. cdf(x, mean=None, cov=1, allow_singular=False, maxpts=1000000*dim, abseps=1e-5, releps=1e-5, lower_limit=None) # noqa
  247. Cumulative distribution function.
  248. logcdf(x, mean=None, cov=1, allow_singular=False, maxpts=1000000*dim, abseps=1e-5, releps=1e-5)
  249. Log of the cumulative distribution function.
  250. rvs(mean=None, cov=1, size=1, random_state=None)
  251. Draw random samples from a multivariate normal distribution.
  252. entropy()
  253. Compute the differential entropy of the multivariate normal.
  254. Parameters
  255. ----------
  256. %(_mvn_doc_default_callparams)s
  257. %(_doc_random_state)s
  258. Notes
  259. -----
  260. %(_mvn_doc_callparams_note)s
  261. The covariance matrix `cov` may be an instance of a subclass of
  262. `Covariance`, e.g. `scipy.stats.CovViaPrecision`. If so, `allow_singular`
  263. is ignored.
  264. Otherwise, `cov` must be a symmetric positive semidefinite
  265. matrix when `allow_singular` is True; it must be (strictly) positive
  266. definite when `allow_singular` is False.
  267. Symmetry is not checked; only the lower triangular portion is used.
  268. The determinant and inverse of `cov` are computed
  269. as the pseudo-determinant and pseudo-inverse, respectively, so
  270. that `cov` does not need to have full rank.
  271. The probability density function for `multivariate_normal` is
  272. .. math::
  273. f(x) = \frac{1}{\sqrt{(2 \pi)^k \det \Sigma}}
  274. \exp\left( -\frac{1}{2} (x - \mu)^T \Sigma^{-1} (x - \mu) \right),
  275. where :math:`\mu` is the mean, :math:`\Sigma` the covariance matrix,
  276. :math:`k` the rank of :math:`\Sigma`. In case of singular :math:`\Sigma`,
  277. SciPy extends this definition according to [1]_.
  278. .. versionadded:: 0.14.0
  279. References
  280. ----------
  281. .. [1] Multivariate Normal Distribution - Degenerate Case, Wikipedia,
  282. https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Degenerate_case
  283. Examples
  284. --------
  285. >>> import numpy as np
  286. >>> import matplotlib.pyplot as plt
  287. >>> from scipy.stats import multivariate_normal
  288. >>> x = np.linspace(0, 5, 10, endpoint=False)
  289. >>> y = multivariate_normal.pdf(x, mean=2.5, cov=0.5); y
  290. array([ 0.00108914, 0.01033349, 0.05946514, 0.20755375, 0.43939129,
  291. 0.56418958, 0.43939129, 0.20755375, 0.05946514, 0.01033349])
  292. >>> fig1 = plt.figure()
  293. >>> ax = fig1.add_subplot(111)
  294. >>> ax.plot(x, y)
  295. >>> plt.show()
  296. Alternatively, the object may be called (as a function) to fix the mean
  297. and covariance parameters, returning a "frozen" multivariate normal
  298. random variable:
  299. >>> rv = multivariate_normal(mean=None, cov=1, allow_singular=False)
  300. >>> # Frozen object with the same methods but holding the given
  301. >>> # mean and covariance fixed.
  302. The input quantiles can be any shape of array, as long as the last
  303. axis labels the components. This allows us for instance to
  304. display the frozen pdf for a non-isotropic random variable in 2D as
  305. follows:
  306. >>> x, y = np.mgrid[-1:1:.01, -1:1:.01]
  307. >>> pos = np.dstack((x, y))
  308. >>> rv = multivariate_normal([0.5, -0.2], [[2.0, 0.3], [0.3, 0.5]])
  309. >>> fig2 = plt.figure()
  310. >>> ax2 = fig2.add_subplot(111)
  311. >>> ax2.contourf(x, y, rv.pdf(pos))
  312. """
  313. def __init__(self, seed=None):
  314. super().__init__(seed)
  315. self.__doc__ = doccer.docformat(self.__doc__, mvn_docdict_params)
  316. def __call__(self, mean=None, cov=1, allow_singular=False, seed=None):
  317. """Create a frozen multivariate normal distribution.
  318. See `multivariate_normal_frozen` for more information.
  319. """
  320. return multivariate_normal_frozen(mean, cov,
  321. allow_singular=allow_singular,
  322. seed=seed)
  323. def _process_parameters(self, mean, cov, allow_singular=True):
  324. """
  325. Infer dimensionality from mean or covariance matrix, ensure that
  326. mean and covariance are full vector resp. matrix.
  327. """
  328. if isinstance(cov, _covariance.Covariance):
  329. return self._process_parameters_Covariance(mean, cov)
  330. else:
  331. # Before `Covariance` classes were introduced,
  332. # `multivariate_normal` accepted plain arrays as `cov` and used the
  333. # following input validation. To avoid disturbing the behavior of
  334. # `multivariate_normal` when plain arrays are used, we use the
  335. # original input validation here.
  336. dim, mean, cov = self._process_parameters_psd(None, mean, cov)
  337. # After input validation, some methods then processed the arrays
  338. # with a `_PSD` object and used that to perform computation.
  339. # To avoid branching statements in each method depending on whether
  340. # `cov` is an array or `Covariance` object, we always process the
  341. # array with `_PSD`, and then use wrapper that satisfies the
  342. # `Covariance` interface, `CovViaPSD`.
  343. psd = _PSD(cov, allow_singular=allow_singular)
  344. cov_object = _covariance.CovViaPSD(psd)
  345. return dim, mean, cov_object
  346. def _process_parameters_Covariance(self, mean, cov):
  347. dim = cov.shape[-1]
  348. mean = np.array([0.]) if mean is None else mean
  349. message = (f"`cov` represents a covariance matrix in {dim} dimensions,"
  350. f"and so `mean` must be broadcastable to shape {(dim,)}")
  351. try:
  352. mean = np.broadcast_to(mean, dim)
  353. except ValueError as e:
  354. raise ValueError(message) from e
  355. return dim, mean, cov
  356. def _process_parameters_psd(self, dim, mean, cov):
  357. # Try to infer dimensionality
  358. if dim is None:
  359. if mean is None:
  360. if cov is None:
  361. dim = 1
  362. else:
  363. cov = np.asarray(cov, dtype=float)
  364. if cov.ndim < 2:
  365. dim = 1
  366. else:
  367. dim = cov.shape[0]
  368. else:
  369. mean = np.asarray(mean, dtype=float)
  370. dim = mean.size
  371. else:
  372. if not np.isscalar(dim):
  373. raise ValueError("Dimension of random variable must be "
  374. "a scalar.")
  375. # Check input sizes and return full arrays for mean and cov if
  376. # necessary
  377. if mean is None:
  378. mean = np.zeros(dim)
  379. mean = np.asarray(mean, dtype=float)
  380. if cov is None:
  381. cov = 1.0
  382. cov = np.asarray(cov, dtype=float)
  383. if dim == 1:
  384. mean = mean.reshape(1)
  385. cov = cov.reshape(1, 1)
  386. if mean.ndim != 1 or mean.shape[0] != dim:
  387. raise ValueError("Array 'mean' must be a vector of length %d." %
  388. dim)
  389. if cov.ndim == 0:
  390. cov = cov * np.eye(dim)
  391. elif cov.ndim == 1:
  392. cov = np.diag(cov)
  393. elif cov.ndim == 2 and cov.shape != (dim, dim):
  394. rows, cols = cov.shape
  395. if rows != cols:
  396. msg = ("Array 'cov' must be square if it is two dimensional,"
  397. " but cov.shape = %s." % str(cov.shape))
  398. else:
  399. msg = ("Dimension mismatch: array 'cov' is of shape %s,"
  400. " but 'mean' is a vector of length %d.")
  401. msg = msg % (str(cov.shape), len(mean))
  402. raise ValueError(msg)
  403. elif cov.ndim > 2:
  404. raise ValueError("Array 'cov' must be at most two-dimensional,"
  405. " but cov.ndim = %d" % cov.ndim)
  406. return dim, mean, cov
  407. def _process_quantiles(self, x, dim):
  408. """
  409. Adjust quantiles array so that last axis labels the components of
  410. each data point.
  411. """
  412. x = np.asarray(x, dtype=float)
  413. if x.ndim == 0:
  414. x = x[np.newaxis]
  415. elif x.ndim == 1:
  416. if dim == 1:
  417. x = x[:, np.newaxis]
  418. else:
  419. x = x[np.newaxis, :]
  420. return x
  421. def _logpdf(self, x, mean, cov_object):
  422. """Log of the multivariate normal probability density function.
  423. Parameters
  424. ----------
  425. x : ndarray
  426. Points at which to evaluate the log of the probability
  427. density function
  428. mean : ndarray
  429. Mean of the distribution
  430. cov_object : Covariance
  431. An object representing the Covariance matrix
  432. Notes
  433. -----
  434. As this function does no argument checking, it should not be
  435. called directly; use 'logpdf' instead.
  436. """
  437. log_det_cov, rank = cov_object.log_pdet, cov_object.rank
  438. dev = x - mean
  439. if dev.ndim > 1:
  440. log_det_cov = log_det_cov[..., np.newaxis]
  441. rank = rank[..., np.newaxis]
  442. maha = np.sum(np.square(cov_object.whiten(dev)), axis=-1)
  443. return -0.5 * (rank * _LOG_2PI + log_det_cov + maha)
  444. def logpdf(self, x, mean=None, cov=1, allow_singular=False):
  445. """Log of the multivariate normal probability density function.
  446. Parameters
  447. ----------
  448. x : array_like
  449. Quantiles, with the last axis of `x` denoting the components.
  450. %(_mvn_doc_default_callparams)s
  451. Returns
  452. -------
  453. pdf : ndarray or scalar
  454. Log of the probability density function evaluated at `x`
  455. Notes
  456. -----
  457. %(_mvn_doc_callparams_note)s
  458. """
  459. params = self._process_parameters(mean, cov, allow_singular)
  460. dim, mean, cov_object = params
  461. x = self._process_quantiles(x, dim)
  462. out = self._logpdf(x, mean, cov_object)
  463. if np.any(cov_object.rank < dim):
  464. out_of_bounds = ~cov_object._support_mask(x-mean)
  465. out[out_of_bounds] = -np.inf
  466. return _squeeze_output(out)
  467. def pdf(self, x, mean=None, cov=1, allow_singular=False):
  468. """Multivariate normal probability density function.
  469. Parameters
  470. ----------
  471. x : array_like
  472. Quantiles, with the last axis of `x` denoting the components.
  473. %(_mvn_doc_default_callparams)s
  474. Returns
  475. -------
  476. pdf : ndarray or scalar
  477. Probability density function evaluated at `x`
  478. Notes
  479. -----
  480. %(_mvn_doc_callparams_note)s
  481. """
  482. params = self._process_parameters(mean, cov, allow_singular)
  483. dim, mean, cov_object = params
  484. x = self._process_quantiles(x, dim)
  485. out = np.exp(self._logpdf(x, mean, cov_object))
  486. if np.any((cov_object.rank < dim)):
  487. out_of_bounds = ~cov_object._support_mask(x-mean)
  488. out[out_of_bounds] = 0.0
  489. return _squeeze_output(out)
  490. def _cdf(self, x, mean, cov, maxpts, abseps, releps, lower_limit):
  491. """Multivariate normal cumulative distribution function.
  492. Parameters
  493. ----------
  494. x : ndarray
  495. Points at which to evaluate the cumulative distribution function.
  496. mean : ndarray
  497. Mean of the distribution
  498. cov : array_like
  499. Covariance matrix of the distribution
  500. maxpts : integer
  501. The maximum number of points to use for integration
  502. abseps : float
  503. Absolute error tolerance
  504. releps : float
  505. Relative error tolerance
  506. lower_limit : array_like, optional
  507. Lower limit of integration of the cumulative distribution function.
  508. Default is negative infinity. Must be broadcastable with `x`.
  509. Notes
  510. -----
  511. As this function does no argument checking, it should not be
  512. called directly; use 'cdf' instead.
  513. .. versionadded:: 1.0.0
  514. """
  515. lower = (np.full(mean.shape, -np.inf)
  516. if lower_limit is None else lower_limit)
  517. # In 2d, _mvn.mvnun accepts input in which `lower` bound elements
  518. # are greater than `x`. Not so in other dimensions. Fix this by
  519. # ensuring that lower bounds are indeed lower when passed, then
  520. # set signs of resulting CDF manually.
  521. b, a = np.broadcast_arrays(x, lower)
  522. i_swap = b < a
  523. signs = (-1)**(i_swap.sum(axis=-1)) # odd # of swaps -> negative
  524. a, b = a.copy(), b.copy()
  525. a[i_swap], b[i_swap] = b[i_swap], a[i_swap]
  526. n = x.shape[-1]
  527. limits = np.concatenate((a, b), axis=-1)
  528. # mvnun expects 1-d arguments, so process points sequentially
  529. def func1d(limits):
  530. return _mvn.mvnun(limits[:n], limits[n:], mean, cov,
  531. maxpts, abseps, releps)[0]
  532. out = np.apply_along_axis(func1d, -1, limits) * signs
  533. return _squeeze_output(out)
  534. def logcdf(self, x, mean=None, cov=1, allow_singular=False, maxpts=None,
  535. abseps=1e-5, releps=1e-5, *, lower_limit=None):
  536. """Log of the multivariate normal cumulative distribution function.
  537. Parameters
  538. ----------
  539. x : array_like
  540. Quantiles, with the last axis of `x` denoting the components.
  541. %(_mvn_doc_default_callparams)s
  542. maxpts : integer, optional
  543. The maximum number of points to use for integration
  544. (default `1000000*dim`)
  545. abseps : float, optional
  546. Absolute error tolerance (default 1e-5)
  547. releps : float, optional
  548. Relative error tolerance (default 1e-5)
  549. lower_limit : array_like, optional
  550. Lower limit of integration of the cumulative distribution function.
  551. Default is negative infinity. Must be broadcastable with `x`.
  552. Returns
  553. -------
  554. cdf : ndarray or scalar
  555. Log of the cumulative distribution function evaluated at `x`
  556. Notes
  557. -----
  558. %(_mvn_doc_callparams_note)s
  559. .. versionadded:: 1.0.0
  560. """
  561. params = self._process_parameters(mean, cov, allow_singular)
  562. dim, mean, cov_object = params
  563. cov = cov_object.covariance
  564. x = self._process_quantiles(x, dim)
  565. if not maxpts:
  566. maxpts = 1000000 * dim
  567. cdf = self._cdf(x, mean, cov, maxpts, abseps, releps, lower_limit)
  568. # the log of a negative real is complex, and cdf can be negative
  569. # if lower limit is greater than upper limit
  570. cdf = cdf + 0j if np.any(cdf < 0) else cdf
  571. out = np.log(cdf)
  572. return out
  573. def cdf(self, x, mean=None, cov=1, allow_singular=False, maxpts=None,
  574. abseps=1e-5, releps=1e-5, *, lower_limit=None):
  575. """Multivariate normal cumulative distribution function.
  576. Parameters
  577. ----------
  578. x : array_like
  579. Quantiles, with the last axis of `x` denoting the components.
  580. %(_mvn_doc_default_callparams)s
  581. maxpts : integer, optional
  582. The maximum number of points to use for integration
  583. (default `1000000*dim`)
  584. abseps : float, optional
  585. Absolute error tolerance (default 1e-5)
  586. releps : float, optional
  587. Relative error tolerance (default 1e-5)
  588. lower_limit : array_like, optional
  589. Lower limit of integration of the cumulative distribution function.
  590. Default is negative infinity. Must be broadcastable with `x`.
  591. Returns
  592. -------
  593. cdf : ndarray or scalar
  594. Cumulative distribution function evaluated at `x`
  595. Notes
  596. -----
  597. %(_mvn_doc_callparams_note)s
  598. .. versionadded:: 1.0.0
  599. """
  600. params = self._process_parameters(mean, cov, allow_singular)
  601. dim, mean, cov_object = params
  602. cov = cov_object.covariance
  603. x = self._process_quantiles(x, dim)
  604. if not maxpts:
  605. maxpts = 1000000 * dim
  606. out = self._cdf(x, mean, cov, maxpts, abseps, releps, lower_limit)
  607. return out
  608. def rvs(self, mean=None, cov=1, size=1, random_state=None):
  609. """Draw random samples from a multivariate normal distribution.
  610. Parameters
  611. ----------
  612. %(_mvn_doc_default_callparams)s
  613. size : integer, optional
  614. Number of samples to draw (default 1).
  615. %(_doc_random_state)s
  616. Returns
  617. -------
  618. rvs : ndarray or scalar
  619. Random variates of size (`size`, `N`), where `N` is the
  620. dimension of the random variable.
  621. Notes
  622. -----
  623. %(_mvn_doc_callparams_note)s
  624. """
  625. dim, mean, cov_object = self._process_parameters(mean, cov)
  626. random_state = self._get_random_state(random_state)
  627. if isinstance(cov_object, _covariance.CovViaPSD):
  628. cov = cov_object.covariance
  629. out = random_state.multivariate_normal(mean, cov, size)
  630. out = _squeeze_output(out)
  631. else:
  632. size = size or tuple()
  633. if not np.iterable(size):
  634. size = (size,)
  635. shape = tuple(size) + (cov_object.shape[-1],)
  636. x = random_state.normal(size=shape)
  637. out = mean + cov_object.colorize(x)
  638. return out
  639. def entropy(self, mean=None, cov=1):
  640. """Compute the differential entropy of the multivariate normal.
  641. Parameters
  642. ----------
  643. %(_mvn_doc_default_callparams)s
  644. Returns
  645. -------
  646. h : scalar
  647. Entropy of the multivariate normal distribution
  648. Notes
  649. -----
  650. %(_mvn_doc_callparams_note)s
  651. """
  652. dim, mean, cov_object = self._process_parameters(mean, cov)
  653. return 0.5 * (cov_object.rank * (_LOG_2PI + 1) + cov_object.log_pdet)
  654. multivariate_normal = multivariate_normal_gen()
  655. class multivariate_normal_frozen(multi_rv_frozen):
  656. def __init__(self, mean=None, cov=1, allow_singular=False, seed=None,
  657. maxpts=None, abseps=1e-5, releps=1e-5):
  658. """Create a frozen multivariate normal distribution.
  659. Parameters
  660. ----------
  661. mean : array_like, default: ``[0]``
  662. Mean of the distribution.
  663. cov : array_like, default: ``[1]``
  664. Symmetric positive (semi)definite covariance matrix of the
  665. distribution.
  666. allow_singular : bool, default: ``False``
  667. Whether to allow a singular covariance matrix.
  668. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  669. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  670. singleton is used.
  671. If `seed` is an int, a new ``RandomState`` instance is used,
  672. seeded with `seed`.
  673. If `seed` is already a ``Generator`` or ``RandomState`` instance
  674. then that instance is used.
  675. maxpts : integer, optional
  676. The maximum number of points to use for integration of the
  677. cumulative distribution function (default `1000000*dim`)
  678. abseps : float, optional
  679. Absolute error tolerance for the cumulative distribution function
  680. (default 1e-5)
  681. releps : float, optional
  682. Relative error tolerance for the cumulative distribution function
  683. (default 1e-5)
  684. Examples
  685. --------
  686. When called with the default parameters, this will create a 1D random
  687. variable with mean 0 and covariance 1:
  688. >>> from scipy.stats import multivariate_normal
  689. >>> r = multivariate_normal()
  690. >>> r.mean
  691. array([ 0.])
  692. >>> r.cov
  693. array([[1.]])
  694. """
  695. self._dist = multivariate_normal_gen(seed)
  696. self.dim, self.mean, self.cov_object = (
  697. self._dist._process_parameters(mean, cov, allow_singular))
  698. self.allow_singular = allow_singular or self.cov_object._allow_singular
  699. if not maxpts:
  700. maxpts = 1000000 * self.dim
  701. self.maxpts = maxpts
  702. self.abseps = abseps
  703. self.releps = releps
  704. @property
  705. def cov(self):
  706. return self.cov_object.covariance
  707. def logpdf(self, x):
  708. x = self._dist._process_quantiles(x, self.dim)
  709. out = self._dist._logpdf(x, self.mean, self.cov_object)
  710. if np.any(self.cov_object.rank < self.dim):
  711. out_of_bounds = ~self.cov_object._support_mask(x-self.mean)
  712. out[out_of_bounds] = -np.inf
  713. return _squeeze_output(out)
  714. def pdf(self, x):
  715. return np.exp(self.logpdf(x))
  716. def logcdf(self, x, *, lower_limit=None):
  717. cdf = self.cdf(x, lower_limit=lower_limit)
  718. # the log of a negative real is complex, and cdf can be negative
  719. # if lower limit is greater than upper limit
  720. cdf = cdf + 0j if np.any(cdf < 0) else cdf
  721. out = np.log(cdf)
  722. return out
  723. def cdf(self, x, *, lower_limit=None):
  724. x = self._dist._process_quantiles(x, self.dim)
  725. out = self._dist._cdf(x, self.mean, self.cov_object.covariance,
  726. self.maxpts, self.abseps, self.releps,
  727. lower_limit)
  728. return _squeeze_output(out)
  729. def rvs(self, size=1, random_state=None):
  730. return self._dist.rvs(self.mean, self.cov_object, size, random_state)
  731. def entropy(self):
  732. """Computes the differential entropy of the multivariate normal.
  733. Returns
  734. -------
  735. h : scalar
  736. Entropy of the multivariate normal distribution
  737. """
  738. log_pdet = self.cov_object.log_pdet
  739. rank = self.cov_object.rank
  740. return 0.5 * (rank * (_LOG_2PI + 1) + log_pdet)
  741. # Set frozen generator docstrings from corresponding docstrings in
  742. # multivariate_normal_gen and fill in default strings in class docstrings
  743. for name in ['logpdf', 'pdf', 'logcdf', 'cdf', 'rvs']:
  744. method = multivariate_normal_gen.__dict__[name]
  745. method_frozen = multivariate_normal_frozen.__dict__[name]
  746. method_frozen.__doc__ = doccer.docformat(method.__doc__,
  747. mvn_docdict_noparams)
  748. method.__doc__ = doccer.docformat(method.__doc__, mvn_docdict_params)
  749. _matnorm_doc_default_callparams = """\
  750. mean : array_like, optional
  751. Mean of the distribution (default: `None`)
  752. rowcov : array_like, optional
  753. Among-row covariance matrix of the distribution (default: `1`)
  754. colcov : array_like, optional
  755. Among-column covariance matrix of the distribution (default: `1`)
  756. """
  757. _matnorm_doc_callparams_note = """\
  758. If `mean` is set to `None` then a matrix of zeros is used for the mean.
  759. The dimensions of this matrix are inferred from the shape of `rowcov` and
  760. `colcov`, if these are provided, or set to `1` if ambiguous.
  761. `rowcov` and `colcov` can be two-dimensional array_likes specifying the
  762. covariance matrices directly. Alternatively, a one-dimensional array will
  763. be be interpreted as the entries of a diagonal matrix, and a scalar or
  764. zero-dimensional array will be interpreted as this value times the
  765. identity matrix.
  766. """
  767. _matnorm_doc_frozen_callparams = ""
  768. _matnorm_doc_frozen_callparams_note = """\
  769. See class definition for a detailed description of parameters."""
  770. matnorm_docdict_params = {
  771. '_matnorm_doc_default_callparams': _matnorm_doc_default_callparams,
  772. '_matnorm_doc_callparams_note': _matnorm_doc_callparams_note,
  773. '_doc_random_state': _doc_random_state
  774. }
  775. matnorm_docdict_noparams = {
  776. '_matnorm_doc_default_callparams': _matnorm_doc_frozen_callparams,
  777. '_matnorm_doc_callparams_note': _matnorm_doc_frozen_callparams_note,
  778. '_doc_random_state': _doc_random_state
  779. }
  780. class matrix_normal_gen(multi_rv_generic):
  781. r"""A matrix normal random variable.
  782. The `mean` keyword specifies the mean. The `rowcov` keyword specifies the
  783. among-row covariance matrix. The 'colcov' keyword specifies the
  784. among-column covariance matrix.
  785. Methods
  786. -------
  787. pdf(X, mean=None, rowcov=1, colcov=1)
  788. Probability density function.
  789. logpdf(X, mean=None, rowcov=1, colcov=1)
  790. Log of the probability density function.
  791. rvs(mean=None, rowcov=1, colcov=1, size=1, random_state=None)
  792. Draw random samples.
  793. Parameters
  794. ----------
  795. %(_matnorm_doc_default_callparams)s
  796. %(_doc_random_state)s
  797. Notes
  798. -----
  799. %(_matnorm_doc_callparams_note)s
  800. The covariance matrices specified by `rowcov` and `colcov` must be
  801. (symmetric) positive definite. If the samples in `X` are
  802. :math:`m \times n`, then `rowcov` must be :math:`m \times m` and
  803. `colcov` must be :math:`n \times n`. `mean` must be the same shape as `X`.
  804. The probability density function for `matrix_normal` is
  805. .. math::
  806. f(X) = (2 \pi)^{-\frac{mn}{2}}|U|^{-\frac{n}{2}} |V|^{-\frac{m}{2}}
  807. \exp\left( -\frac{1}{2} \mathrm{Tr}\left[ U^{-1} (X-M) V^{-1}
  808. (X-M)^T \right] \right),
  809. where :math:`M` is the mean, :math:`U` the among-row covariance matrix,
  810. :math:`V` the among-column covariance matrix.
  811. The `allow_singular` behaviour of the `multivariate_normal`
  812. distribution is not currently supported. Covariance matrices must be
  813. full rank.
  814. The `matrix_normal` distribution is closely related to the
  815. `multivariate_normal` distribution. Specifically, :math:`\mathrm{Vec}(X)`
  816. (the vector formed by concatenating the columns of :math:`X`) has a
  817. multivariate normal distribution with mean :math:`\mathrm{Vec}(M)`
  818. and covariance :math:`V \otimes U` (where :math:`\otimes` is the Kronecker
  819. product). Sampling and pdf evaluation are
  820. :math:`\mathcal{O}(m^3 + n^3 + m^2 n + m n^2)` for the matrix normal, but
  821. :math:`\mathcal{O}(m^3 n^3)` for the equivalent multivariate normal,
  822. making this equivalent form algorithmically inefficient.
  823. .. versionadded:: 0.17.0
  824. Examples
  825. --------
  826. >>> import numpy as np
  827. >>> from scipy.stats import matrix_normal
  828. >>> M = np.arange(6).reshape(3,2); M
  829. array([[0, 1],
  830. [2, 3],
  831. [4, 5]])
  832. >>> U = np.diag([1,2,3]); U
  833. array([[1, 0, 0],
  834. [0, 2, 0],
  835. [0, 0, 3]])
  836. >>> V = 0.3*np.identity(2); V
  837. array([[ 0.3, 0. ],
  838. [ 0. , 0.3]])
  839. >>> X = M + 0.1; X
  840. array([[ 0.1, 1.1],
  841. [ 2.1, 3.1],
  842. [ 4.1, 5.1]])
  843. >>> matrix_normal.pdf(X, mean=M, rowcov=U, colcov=V)
  844. 0.023410202050005054
  845. >>> # Equivalent multivariate normal
  846. >>> from scipy.stats import multivariate_normal
  847. >>> vectorised_X = X.T.flatten()
  848. >>> equiv_mean = M.T.flatten()
  849. >>> equiv_cov = np.kron(V,U)
  850. >>> multivariate_normal.pdf(vectorised_X, mean=equiv_mean, cov=equiv_cov)
  851. 0.023410202050005054
  852. Alternatively, the object may be called (as a function) to fix the mean
  853. and covariance parameters, returning a "frozen" matrix normal
  854. random variable:
  855. >>> rv = matrix_normal(mean=None, rowcov=1, colcov=1)
  856. >>> # Frozen object with the same methods but holding the given
  857. >>> # mean and covariance fixed.
  858. """
  859. def __init__(self, seed=None):
  860. super().__init__(seed)
  861. self.__doc__ = doccer.docformat(self.__doc__, matnorm_docdict_params)
  862. def __call__(self, mean=None, rowcov=1, colcov=1, seed=None):
  863. """Create a frozen matrix normal distribution.
  864. See `matrix_normal_frozen` for more information.
  865. """
  866. return matrix_normal_frozen(mean, rowcov, colcov, seed=seed)
  867. def _process_parameters(self, mean, rowcov, colcov):
  868. """
  869. Infer dimensionality from mean or covariance matrices. Handle
  870. defaults. Ensure compatible dimensions.
  871. """
  872. # Process mean
  873. if mean is not None:
  874. mean = np.asarray(mean, dtype=float)
  875. meanshape = mean.shape
  876. if len(meanshape) != 2:
  877. raise ValueError("Array `mean` must be two dimensional.")
  878. if np.any(meanshape == 0):
  879. raise ValueError("Array `mean` has invalid shape.")
  880. # Process among-row covariance
  881. rowcov = np.asarray(rowcov, dtype=float)
  882. if rowcov.ndim == 0:
  883. if mean is not None:
  884. rowcov = rowcov * np.identity(meanshape[0])
  885. else:
  886. rowcov = rowcov * np.identity(1)
  887. elif rowcov.ndim == 1:
  888. rowcov = np.diag(rowcov)
  889. rowshape = rowcov.shape
  890. if len(rowshape) != 2:
  891. raise ValueError("`rowcov` must be a scalar or a 2D array.")
  892. if rowshape[0] != rowshape[1]:
  893. raise ValueError("Array `rowcov` must be square.")
  894. if rowshape[0] == 0:
  895. raise ValueError("Array `rowcov` has invalid shape.")
  896. numrows = rowshape[0]
  897. # Process among-column covariance
  898. colcov = np.asarray(colcov, dtype=float)
  899. if colcov.ndim == 0:
  900. if mean is not None:
  901. colcov = colcov * np.identity(meanshape[1])
  902. else:
  903. colcov = colcov * np.identity(1)
  904. elif colcov.ndim == 1:
  905. colcov = np.diag(colcov)
  906. colshape = colcov.shape
  907. if len(colshape) != 2:
  908. raise ValueError("`colcov` must be a scalar or a 2D array.")
  909. if colshape[0] != colshape[1]:
  910. raise ValueError("Array `colcov` must be square.")
  911. if colshape[0] == 0:
  912. raise ValueError("Array `colcov` has invalid shape.")
  913. numcols = colshape[0]
  914. # Ensure mean and covariances compatible
  915. if mean is not None:
  916. if meanshape[0] != numrows:
  917. raise ValueError("Arrays `mean` and `rowcov` must have the "
  918. "same number of rows.")
  919. if meanshape[1] != numcols:
  920. raise ValueError("Arrays `mean` and `colcov` must have the "
  921. "same number of columns.")
  922. else:
  923. mean = np.zeros((numrows, numcols))
  924. dims = (numrows, numcols)
  925. return dims, mean, rowcov, colcov
  926. def _process_quantiles(self, X, dims):
  927. """
  928. Adjust quantiles array so that last two axes labels the components of
  929. each data point.
  930. """
  931. X = np.asarray(X, dtype=float)
  932. if X.ndim == 2:
  933. X = X[np.newaxis, :]
  934. if X.shape[-2:] != dims:
  935. raise ValueError("The shape of array `X` is not compatible "
  936. "with the distribution parameters.")
  937. return X
  938. def _logpdf(self, dims, X, mean, row_prec_rt, log_det_rowcov,
  939. col_prec_rt, log_det_colcov):
  940. """Log of the matrix normal probability density function.
  941. Parameters
  942. ----------
  943. dims : tuple
  944. Dimensions of the matrix variates
  945. X : ndarray
  946. Points at which to evaluate the log of the probability
  947. density function
  948. mean : ndarray
  949. Mean of the distribution
  950. row_prec_rt : ndarray
  951. A decomposition such that np.dot(row_prec_rt, row_prec_rt.T)
  952. is the inverse of the among-row covariance matrix
  953. log_det_rowcov : float
  954. Logarithm of the determinant of the among-row covariance matrix
  955. col_prec_rt : ndarray
  956. A decomposition such that np.dot(col_prec_rt, col_prec_rt.T)
  957. is the inverse of the among-column covariance matrix
  958. log_det_colcov : float
  959. Logarithm of the determinant of the among-column covariance matrix
  960. Notes
  961. -----
  962. As this function does no argument checking, it should not be
  963. called directly; use 'logpdf' instead.
  964. """
  965. numrows, numcols = dims
  966. roll_dev = np.moveaxis(X-mean, -1, 0)
  967. scale_dev = np.tensordot(col_prec_rt.T,
  968. np.dot(roll_dev, row_prec_rt), 1)
  969. maha = np.sum(np.sum(np.square(scale_dev), axis=-1), axis=0)
  970. return -0.5 * (numrows*numcols*_LOG_2PI + numcols*log_det_rowcov
  971. + numrows*log_det_colcov + maha)
  972. def logpdf(self, X, mean=None, rowcov=1, colcov=1):
  973. """Log of the matrix normal probability density function.
  974. Parameters
  975. ----------
  976. X : array_like
  977. Quantiles, with the last two axes of `X` denoting the components.
  978. %(_matnorm_doc_default_callparams)s
  979. Returns
  980. -------
  981. logpdf : ndarray
  982. Log of the probability density function evaluated at `X`
  983. Notes
  984. -----
  985. %(_matnorm_doc_callparams_note)s
  986. """
  987. dims, mean, rowcov, colcov = self._process_parameters(mean, rowcov,
  988. colcov)
  989. X = self._process_quantiles(X, dims)
  990. rowpsd = _PSD(rowcov, allow_singular=False)
  991. colpsd = _PSD(colcov, allow_singular=False)
  992. out = self._logpdf(dims, X, mean, rowpsd.U, rowpsd.log_pdet, colpsd.U,
  993. colpsd.log_pdet)
  994. return _squeeze_output(out)
  995. def pdf(self, X, mean=None, rowcov=1, colcov=1):
  996. """Matrix normal probability density function.
  997. Parameters
  998. ----------
  999. X : array_like
  1000. Quantiles, with the last two axes of `X` denoting the components.
  1001. %(_matnorm_doc_default_callparams)s
  1002. Returns
  1003. -------
  1004. pdf : ndarray
  1005. Probability density function evaluated at `X`
  1006. Notes
  1007. -----
  1008. %(_matnorm_doc_callparams_note)s
  1009. """
  1010. return np.exp(self.logpdf(X, mean, rowcov, colcov))
  1011. def rvs(self, mean=None, rowcov=1, colcov=1, size=1, random_state=None):
  1012. """Draw random samples from a matrix normal distribution.
  1013. Parameters
  1014. ----------
  1015. %(_matnorm_doc_default_callparams)s
  1016. size : integer, optional
  1017. Number of samples to draw (default 1).
  1018. %(_doc_random_state)s
  1019. Returns
  1020. -------
  1021. rvs : ndarray or scalar
  1022. Random variates of size (`size`, `dims`), where `dims` is the
  1023. dimension of the random matrices.
  1024. Notes
  1025. -----
  1026. %(_matnorm_doc_callparams_note)s
  1027. """
  1028. size = int(size)
  1029. dims, mean, rowcov, colcov = self._process_parameters(mean, rowcov,
  1030. colcov)
  1031. rowchol = scipy.linalg.cholesky(rowcov, lower=True)
  1032. colchol = scipy.linalg.cholesky(colcov, lower=True)
  1033. random_state = self._get_random_state(random_state)
  1034. # We aren't generating standard normal variates with size=(size,
  1035. # dims[0], dims[1]) directly to ensure random variates remain backwards
  1036. # compatible. See https://github.com/scipy/scipy/pull/12312 for more
  1037. # details.
  1038. std_norm = random_state.standard_normal(
  1039. size=(dims[1], size, dims[0])
  1040. ).transpose(1, 2, 0)
  1041. out = mean + np.einsum('jp,ipq,kq->ijk',
  1042. rowchol, std_norm, colchol,
  1043. optimize=True)
  1044. if size == 1:
  1045. out = out.reshape(mean.shape)
  1046. return out
  1047. matrix_normal = matrix_normal_gen()
  1048. class matrix_normal_frozen(multi_rv_frozen):
  1049. """
  1050. Create a frozen matrix normal distribution.
  1051. Parameters
  1052. ----------
  1053. %(_matnorm_doc_default_callparams)s
  1054. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  1055. If `seed` is `None` the `~np.random.RandomState` singleton is used.
  1056. If `seed` is an int, a new ``RandomState`` instance is used, seeded
  1057. with seed.
  1058. If `seed` is already a ``RandomState`` or ``Generator`` instance,
  1059. then that object is used.
  1060. Default is `None`.
  1061. Examples
  1062. --------
  1063. >>> import numpy as np
  1064. >>> from scipy.stats import matrix_normal
  1065. >>> distn = matrix_normal(mean=np.zeros((3,3)))
  1066. >>> X = distn.rvs(); X
  1067. array([[-0.02976962, 0.93339138, -0.09663178],
  1068. [ 0.67405524, 0.28250467, -0.93308929],
  1069. [-0.31144782, 0.74535536, 1.30412916]])
  1070. >>> distn.pdf(X)
  1071. 2.5160642368346784e-05
  1072. >>> distn.logpdf(X)
  1073. -10.590229595124615
  1074. """
  1075. def __init__(self, mean=None, rowcov=1, colcov=1, seed=None):
  1076. self._dist = matrix_normal_gen(seed)
  1077. self.dims, self.mean, self.rowcov, self.colcov = \
  1078. self._dist._process_parameters(mean, rowcov, colcov)
  1079. self.rowpsd = _PSD(self.rowcov, allow_singular=False)
  1080. self.colpsd = _PSD(self.colcov, allow_singular=False)
  1081. def logpdf(self, X):
  1082. X = self._dist._process_quantiles(X, self.dims)
  1083. out = self._dist._logpdf(self.dims, X, self.mean, self.rowpsd.U,
  1084. self.rowpsd.log_pdet, self.colpsd.U,
  1085. self.colpsd.log_pdet)
  1086. return _squeeze_output(out)
  1087. def pdf(self, X):
  1088. return np.exp(self.logpdf(X))
  1089. def rvs(self, size=1, random_state=None):
  1090. return self._dist.rvs(self.mean, self.rowcov, self.colcov, size,
  1091. random_state)
  1092. # Set frozen generator docstrings from corresponding docstrings in
  1093. # matrix_normal_gen and fill in default strings in class docstrings
  1094. for name in ['logpdf', 'pdf', 'rvs']:
  1095. method = matrix_normal_gen.__dict__[name]
  1096. method_frozen = matrix_normal_frozen.__dict__[name]
  1097. method_frozen.__doc__ = doccer.docformat(method.__doc__,
  1098. matnorm_docdict_noparams)
  1099. method.__doc__ = doccer.docformat(method.__doc__, matnorm_docdict_params)
  1100. _dirichlet_doc_default_callparams = """\
  1101. alpha : array_like
  1102. The concentration parameters. The number of entries determines the
  1103. dimensionality of the distribution.
  1104. """
  1105. _dirichlet_doc_frozen_callparams = ""
  1106. _dirichlet_doc_frozen_callparams_note = """\
  1107. See class definition for a detailed description of parameters."""
  1108. dirichlet_docdict_params = {
  1109. '_dirichlet_doc_default_callparams': _dirichlet_doc_default_callparams,
  1110. '_doc_random_state': _doc_random_state
  1111. }
  1112. dirichlet_docdict_noparams = {
  1113. '_dirichlet_doc_default_callparams': _dirichlet_doc_frozen_callparams,
  1114. '_doc_random_state': _doc_random_state
  1115. }
  1116. def _dirichlet_check_parameters(alpha):
  1117. alpha = np.asarray(alpha)
  1118. if np.min(alpha) <= 0:
  1119. raise ValueError("All parameters must be greater than 0")
  1120. elif alpha.ndim != 1:
  1121. raise ValueError("Parameter vector 'a' must be one dimensional, "
  1122. "but a.shape = %s." % (alpha.shape, ))
  1123. return alpha
  1124. def _dirichlet_check_input(alpha, x):
  1125. x = np.asarray(x)
  1126. if x.shape[0] + 1 != alpha.shape[0] and x.shape[0] != alpha.shape[0]:
  1127. raise ValueError("Vector 'x' must have either the same number "
  1128. "of entries as, or one entry fewer than, "
  1129. "parameter vector 'a', but alpha.shape = %s "
  1130. "and x.shape = %s." % (alpha.shape, x.shape))
  1131. if x.shape[0] != alpha.shape[0]:
  1132. xk = np.array([1 - np.sum(x, 0)])
  1133. if xk.ndim == 1:
  1134. x = np.append(x, xk)
  1135. elif xk.ndim == 2:
  1136. x = np.vstack((x, xk))
  1137. else:
  1138. raise ValueError("The input must be one dimensional or a two "
  1139. "dimensional matrix containing the entries.")
  1140. if np.min(x) < 0:
  1141. raise ValueError("Each entry in 'x' must be greater than or equal "
  1142. "to zero.")
  1143. if np.max(x) > 1:
  1144. raise ValueError("Each entry in 'x' must be smaller or equal one.")
  1145. # Check x_i > 0 or alpha_i > 1
  1146. xeq0 = (x == 0)
  1147. alphalt1 = (alpha < 1)
  1148. if x.shape != alpha.shape:
  1149. alphalt1 = np.repeat(alphalt1, x.shape[-1], axis=-1).reshape(x.shape)
  1150. chk = np.logical_and(xeq0, alphalt1)
  1151. if np.sum(chk):
  1152. raise ValueError("Each entry in 'x' must be greater than zero if its "
  1153. "alpha is less than one.")
  1154. if (np.abs(np.sum(x, 0) - 1.0) > 10e-10).any():
  1155. raise ValueError("The input vector 'x' must lie within the normal "
  1156. "simplex. but np.sum(x, 0) = %s." % np.sum(x, 0))
  1157. return x
  1158. def _lnB(alpha):
  1159. r"""Internal helper function to compute the log of the useful quotient.
  1160. .. math::
  1161. B(\alpha) = \frac{\prod_{i=1}{K}\Gamma(\alpha_i)}
  1162. {\Gamma\left(\sum_{i=1}^{K} \alpha_i \right)}
  1163. Parameters
  1164. ----------
  1165. %(_dirichlet_doc_default_callparams)s
  1166. Returns
  1167. -------
  1168. B : scalar
  1169. Helper quotient, internal use only
  1170. """
  1171. return np.sum(gammaln(alpha)) - gammaln(np.sum(alpha))
  1172. class dirichlet_gen(multi_rv_generic):
  1173. r"""A Dirichlet random variable.
  1174. The ``alpha`` keyword specifies the concentration parameters of the
  1175. distribution.
  1176. .. versionadded:: 0.15.0
  1177. Methods
  1178. -------
  1179. pdf(x, alpha)
  1180. Probability density function.
  1181. logpdf(x, alpha)
  1182. Log of the probability density function.
  1183. rvs(alpha, size=1, random_state=None)
  1184. Draw random samples from a Dirichlet distribution.
  1185. mean(alpha)
  1186. The mean of the Dirichlet distribution
  1187. var(alpha)
  1188. The variance of the Dirichlet distribution
  1189. entropy(alpha)
  1190. Compute the differential entropy of the Dirichlet distribution.
  1191. Parameters
  1192. ----------
  1193. %(_dirichlet_doc_default_callparams)s
  1194. %(_doc_random_state)s
  1195. Notes
  1196. -----
  1197. Each :math:`\alpha` entry must be positive. The distribution has only
  1198. support on the simplex defined by
  1199. .. math::
  1200. \sum_{i=1}^{K} x_i = 1
  1201. where :math:`0 < x_i < 1`.
  1202. If the quantiles don't lie within the simplex, a ValueError is raised.
  1203. The probability density function for `dirichlet` is
  1204. .. math::
  1205. f(x) = \frac{1}{\mathrm{B}(\boldsymbol\alpha)} \prod_{i=1}^K x_i^{\alpha_i - 1}
  1206. where
  1207. .. math::
  1208. \mathrm{B}(\boldsymbol\alpha) = \frac{\prod_{i=1}^K \Gamma(\alpha_i)}
  1209. {\Gamma\bigl(\sum_{i=1}^K \alpha_i\bigr)}
  1210. and :math:`\boldsymbol\alpha=(\alpha_1,\ldots,\alpha_K)`, the
  1211. concentration parameters and :math:`K` is the dimension of the space
  1212. where :math:`x` takes values.
  1213. Note that the `dirichlet` interface is somewhat inconsistent.
  1214. The array returned by the rvs function is transposed
  1215. with respect to the format expected by the pdf and logpdf.
  1216. Examples
  1217. --------
  1218. >>> import numpy as np
  1219. >>> from scipy.stats import dirichlet
  1220. Generate a dirichlet random variable
  1221. >>> quantiles = np.array([0.2, 0.2, 0.6]) # specify quantiles
  1222. >>> alpha = np.array([0.4, 5, 15]) # specify concentration parameters
  1223. >>> dirichlet.pdf(quantiles, alpha)
  1224. 0.2843831684937255
  1225. The same PDF but following a log scale
  1226. >>> dirichlet.logpdf(quantiles, alpha)
  1227. -1.2574327653159187
  1228. Once we specify the dirichlet distribution
  1229. we can then calculate quantities of interest
  1230. >>> dirichlet.mean(alpha) # get the mean of the distribution
  1231. array([0.01960784, 0.24509804, 0.73529412])
  1232. >>> dirichlet.var(alpha) # get variance
  1233. array([0.00089829, 0.00864603, 0.00909517])
  1234. >>> dirichlet.entropy(alpha) # calculate the differential entropy
  1235. -4.3280162474082715
  1236. We can also return random samples from the distribution
  1237. >>> dirichlet.rvs(alpha, size=1, random_state=1)
  1238. array([[0.00766178, 0.24670518, 0.74563305]])
  1239. >>> dirichlet.rvs(alpha, size=2, random_state=2)
  1240. array([[0.01639427, 0.1292273 , 0.85437844],
  1241. [0.00156917, 0.19033695, 0.80809388]])
  1242. Alternatively, the object may be called (as a function) to fix
  1243. concentration parameters, returning a "frozen" Dirichlet
  1244. random variable:
  1245. >>> rv = dirichlet(alpha)
  1246. >>> # Frozen object with the same methods but holding the given
  1247. >>> # concentration parameters fixed.
  1248. """
  1249. def __init__(self, seed=None):
  1250. super().__init__(seed)
  1251. self.__doc__ = doccer.docformat(self.__doc__, dirichlet_docdict_params)
  1252. def __call__(self, alpha, seed=None):
  1253. return dirichlet_frozen(alpha, seed=seed)
  1254. def _logpdf(self, x, alpha):
  1255. """Log of the Dirichlet probability density function.
  1256. Parameters
  1257. ----------
  1258. x : ndarray
  1259. Points at which to evaluate the log of the probability
  1260. density function
  1261. %(_dirichlet_doc_default_callparams)s
  1262. Notes
  1263. -----
  1264. As this function does no argument checking, it should not be
  1265. called directly; use 'logpdf' instead.
  1266. """
  1267. lnB = _lnB(alpha)
  1268. return - lnB + np.sum((xlogy(alpha - 1, x.T)).T, 0)
  1269. def logpdf(self, x, alpha):
  1270. """Log of the Dirichlet probability density function.
  1271. Parameters
  1272. ----------
  1273. x : array_like
  1274. Quantiles, with the last axis of `x` denoting the components.
  1275. %(_dirichlet_doc_default_callparams)s
  1276. Returns
  1277. -------
  1278. pdf : ndarray or scalar
  1279. Log of the probability density function evaluated at `x`.
  1280. """
  1281. alpha = _dirichlet_check_parameters(alpha)
  1282. x = _dirichlet_check_input(alpha, x)
  1283. out = self._logpdf(x, alpha)
  1284. return _squeeze_output(out)
  1285. def pdf(self, x, alpha):
  1286. """The Dirichlet probability density function.
  1287. Parameters
  1288. ----------
  1289. x : array_like
  1290. Quantiles, with the last axis of `x` denoting the components.
  1291. %(_dirichlet_doc_default_callparams)s
  1292. Returns
  1293. -------
  1294. pdf : ndarray or scalar
  1295. The probability density function evaluated at `x`.
  1296. """
  1297. alpha = _dirichlet_check_parameters(alpha)
  1298. x = _dirichlet_check_input(alpha, x)
  1299. out = np.exp(self._logpdf(x, alpha))
  1300. return _squeeze_output(out)
  1301. def mean(self, alpha):
  1302. """Mean of the Dirichlet distribution.
  1303. Parameters
  1304. ----------
  1305. %(_dirichlet_doc_default_callparams)s
  1306. Returns
  1307. -------
  1308. mu : ndarray or scalar
  1309. Mean of the Dirichlet distribution.
  1310. """
  1311. alpha = _dirichlet_check_parameters(alpha)
  1312. out = alpha / (np.sum(alpha))
  1313. return _squeeze_output(out)
  1314. def var(self, alpha):
  1315. """Variance of the Dirichlet distribution.
  1316. Parameters
  1317. ----------
  1318. %(_dirichlet_doc_default_callparams)s
  1319. Returns
  1320. -------
  1321. v : ndarray or scalar
  1322. Variance of the Dirichlet distribution.
  1323. """
  1324. alpha = _dirichlet_check_parameters(alpha)
  1325. alpha0 = np.sum(alpha)
  1326. out = (alpha * (alpha0 - alpha)) / ((alpha0 * alpha0) * (alpha0 + 1))
  1327. return _squeeze_output(out)
  1328. def entropy(self, alpha):
  1329. """
  1330. Differential entropy of the Dirichlet distribution.
  1331. Parameters
  1332. ----------
  1333. %(_dirichlet_doc_default_callparams)s
  1334. Returns
  1335. -------
  1336. h : scalar
  1337. Entropy of the Dirichlet distribution
  1338. """
  1339. alpha = _dirichlet_check_parameters(alpha)
  1340. alpha0 = np.sum(alpha)
  1341. lnB = _lnB(alpha)
  1342. K = alpha.shape[0]
  1343. out = lnB + (alpha0 - K) * scipy.special.psi(alpha0) - np.sum(
  1344. (alpha - 1) * scipy.special.psi(alpha))
  1345. return _squeeze_output(out)
  1346. def rvs(self, alpha, size=1, random_state=None):
  1347. """
  1348. Draw random samples from a Dirichlet distribution.
  1349. Parameters
  1350. ----------
  1351. %(_dirichlet_doc_default_callparams)s
  1352. size : int, optional
  1353. Number of samples to draw (default 1).
  1354. %(_doc_random_state)s
  1355. Returns
  1356. -------
  1357. rvs : ndarray or scalar
  1358. Random variates of size (`size`, `N`), where `N` is the
  1359. dimension of the random variable.
  1360. """
  1361. alpha = _dirichlet_check_parameters(alpha)
  1362. random_state = self._get_random_state(random_state)
  1363. return random_state.dirichlet(alpha, size=size)
  1364. dirichlet = dirichlet_gen()
  1365. class dirichlet_frozen(multi_rv_frozen):
  1366. def __init__(self, alpha, seed=None):
  1367. self.alpha = _dirichlet_check_parameters(alpha)
  1368. self._dist = dirichlet_gen(seed)
  1369. def logpdf(self, x):
  1370. return self._dist.logpdf(x, self.alpha)
  1371. def pdf(self, x):
  1372. return self._dist.pdf(x, self.alpha)
  1373. def mean(self):
  1374. return self._dist.mean(self.alpha)
  1375. def var(self):
  1376. return self._dist.var(self.alpha)
  1377. def entropy(self):
  1378. return self._dist.entropy(self.alpha)
  1379. def rvs(self, size=1, random_state=None):
  1380. return self._dist.rvs(self.alpha, size, random_state)
  1381. # Set frozen generator docstrings from corresponding docstrings in
  1382. # multivariate_normal_gen and fill in default strings in class docstrings
  1383. for name in ['logpdf', 'pdf', 'rvs', 'mean', 'var', 'entropy']:
  1384. method = dirichlet_gen.__dict__[name]
  1385. method_frozen = dirichlet_frozen.__dict__[name]
  1386. method_frozen.__doc__ = doccer.docformat(
  1387. method.__doc__, dirichlet_docdict_noparams)
  1388. method.__doc__ = doccer.docformat(method.__doc__, dirichlet_docdict_params)
  1389. _wishart_doc_default_callparams = """\
  1390. df : int
  1391. Degrees of freedom, must be greater than or equal to dimension of the
  1392. scale matrix
  1393. scale : array_like
  1394. Symmetric positive definite scale matrix of the distribution
  1395. """
  1396. _wishart_doc_callparams_note = ""
  1397. _wishart_doc_frozen_callparams = ""
  1398. _wishart_doc_frozen_callparams_note = """\
  1399. See class definition for a detailed description of parameters."""
  1400. wishart_docdict_params = {
  1401. '_doc_default_callparams': _wishart_doc_default_callparams,
  1402. '_doc_callparams_note': _wishart_doc_callparams_note,
  1403. '_doc_random_state': _doc_random_state
  1404. }
  1405. wishart_docdict_noparams = {
  1406. '_doc_default_callparams': _wishart_doc_frozen_callparams,
  1407. '_doc_callparams_note': _wishart_doc_frozen_callparams_note,
  1408. '_doc_random_state': _doc_random_state
  1409. }
  1410. class wishart_gen(multi_rv_generic):
  1411. r"""A Wishart random variable.
  1412. The `df` keyword specifies the degrees of freedom. The `scale` keyword
  1413. specifies the scale matrix, which must be symmetric and positive definite.
  1414. In this context, the scale matrix is often interpreted in terms of a
  1415. multivariate normal precision matrix (the inverse of the covariance
  1416. matrix). These arguments must satisfy the relationship
  1417. ``df > scale.ndim - 1``, but see notes on using the `rvs` method with
  1418. ``df < scale.ndim``.
  1419. Methods
  1420. -------
  1421. pdf(x, df, scale)
  1422. Probability density function.
  1423. logpdf(x, df, scale)
  1424. Log of the probability density function.
  1425. rvs(df, scale, size=1, random_state=None)
  1426. Draw random samples from a Wishart distribution.
  1427. entropy()
  1428. Compute the differential entropy of the Wishart distribution.
  1429. Parameters
  1430. ----------
  1431. %(_doc_default_callparams)s
  1432. %(_doc_random_state)s
  1433. Raises
  1434. ------
  1435. scipy.linalg.LinAlgError
  1436. If the scale matrix `scale` is not positive definite.
  1437. See Also
  1438. --------
  1439. invwishart, chi2
  1440. Notes
  1441. -----
  1442. %(_doc_callparams_note)s
  1443. The scale matrix `scale` must be a symmetric positive definite
  1444. matrix. Singular matrices, including the symmetric positive semi-definite
  1445. case, are not supported. Symmetry is not checked; only the lower triangular
  1446. portion is used.
  1447. The Wishart distribution is often denoted
  1448. .. math::
  1449. W_p(\nu, \Sigma)
  1450. where :math:`\nu` is the degrees of freedom and :math:`\Sigma` is the
  1451. :math:`p \times p` scale matrix.
  1452. The probability density function for `wishart` has support over positive
  1453. definite matrices :math:`S`; if :math:`S \sim W_p(\nu, \Sigma)`, then
  1454. its PDF is given by:
  1455. .. math::
  1456. f(S) = \frac{|S|^{\frac{\nu - p - 1}{2}}}{2^{ \frac{\nu p}{2} }
  1457. |\Sigma|^\frac{\nu}{2} \Gamma_p \left ( \frac{\nu}{2} \right )}
  1458. \exp\left( -tr(\Sigma^{-1} S) / 2 \right)
  1459. If :math:`S \sim W_p(\nu, \Sigma)` (Wishart) then
  1460. :math:`S^{-1} \sim W_p^{-1}(\nu, \Sigma^{-1})` (inverse Wishart).
  1461. If the scale matrix is 1-dimensional and equal to one, then the Wishart
  1462. distribution :math:`W_1(\nu, 1)` collapses to the :math:`\chi^2(\nu)`
  1463. distribution.
  1464. The algorithm [2]_ implemented by the `rvs` method may
  1465. produce numerically singular matrices with :math:`p - 1 < \nu < p`; the
  1466. user may wish to check for this condition and generate replacement samples
  1467. as necessary.
  1468. .. versionadded:: 0.16.0
  1469. References
  1470. ----------
  1471. .. [1] M.L. Eaton, "Multivariate Statistics: A Vector Space Approach",
  1472. Wiley, 1983.
  1473. .. [2] W.B. Smith and R.R. Hocking, "Algorithm AS 53: Wishart Variate
  1474. Generator", Applied Statistics, vol. 21, pp. 341-345, 1972.
  1475. Examples
  1476. --------
  1477. >>> import numpy as np
  1478. >>> import matplotlib.pyplot as plt
  1479. >>> from scipy.stats import wishart, chi2
  1480. >>> x = np.linspace(1e-5, 8, 100)
  1481. >>> w = wishart.pdf(x, df=3, scale=1); w[:5]
  1482. array([ 0.00126156, 0.10892176, 0.14793434, 0.17400548, 0.1929669 ])
  1483. >>> c = chi2.pdf(x, 3); c[:5]
  1484. array([ 0.00126156, 0.10892176, 0.14793434, 0.17400548, 0.1929669 ])
  1485. >>> plt.plot(x, w)
  1486. >>> plt.show()
  1487. The input quantiles can be any shape of array, as long as the last
  1488. axis labels the components.
  1489. Alternatively, the object may be called (as a function) to fix the degrees
  1490. of freedom and scale parameters, returning a "frozen" Wishart random
  1491. variable:
  1492. >>> rv = wishart(df=1, scale=1)
  1493. >>> # Frozen object with the same methods but holding the given
  1494. >>> # degrees of freedom and scale fixed.
  1495. """
  1496. def __init__(self, seed=None):
  1497. super().__init__(seed)
  1498. self.__doc__ = doccer.docformat(self.__doc__, wishart_docdict_params)
  1499. def __call__(self, df=None, scale=None, seed=None):
  1500. """Create a frozen Wishart distribution.
  1501. See `wishart_frozen` for more information.
  1502. """
  1503. return wishart_frozen(df, scale, seed)
  1504. def _process_parameters(self, df, scale):
  1505. if scale is None:
  1506. scale = 1.0
  1507. scale = np.asarray(scale, dtype=float)
  1508. if scale.ndim == 0:
  1509. scale = scale[np.newaxis, np.newaxis]
  1510. elif scale.ndim == 1:
  1511. scale = np.diag(scale)
  1512. elif scale.ndim == 2 and not scale.shape[0] == scale.shape[1]:
  1513. raise ValueError("Array 'scale' must be square if it is two"
  1514. " dimensional, but scale.scale = %s."
  1515. % str(scale.shape))
  1516. elif scale.ndim > 2:
  1517. raise ValueError("Array 'scale' must be at most two-dimensional,"
  1518. " but scale.ndim = %d" % scale.ndim)
  1519. dim = scale.shape[0]
  1520. if df is None:
  1521. df = dim
  1522. elif not np.isscalar(df):
  1523. raise ValueError("Degrees of freedom must be a scalar.")
  1524. elif df <= dim - 1:
  1525. raise ValueError("Degrees of freedom must be greater than the "
  1526. "dimension of scale matrix minus 1.")
  1527. return dim, df, scale
  1528. def _process_quantiles(self, x, dim):
  1529. """
  1530. Adjust quantiles array so that last axis labels the components of
  1531. each data point.
  1532. """
  1533. x = np.asarray(x, dtype=float)
  1534. if x.ndim == 0:
  1535. x = x * np.eye(dim)[:, :, np.newaxis]
  1536. if x.ndim == 1:
  1537. if dim == 1:
  1538. x = x[np.newaxis, np.newaxis, :]
  1539. else:
  1540. x = np.diag(x)[:, :, np.newaxis]
  1541. elif x.ndim == 2:
  1542. if not x.shape[0] == x.shape[1]:
  1543. raise ValueError("Quantiles must be square if they are two"
  1544. " dimensional, but x.shape = %s."
  1545. % str(x.shape))
  1546. x = x[:, :, np.newaxis]
  1547. elif x.ndim == 3:
  1548. if not x.shape[0] == x.shape[1]:
  1549. raise ValueError("Quantiles must be square in the first two"
  1550. " dimensions if they are three dimensional"
  1551. ", but x.shape = %s." % str(x.shape))
  1552. elif x.ndim > 3:
  1553. raise ValueError("Quantiles must be at most two-dimensional with"
  1554. " an additional dimension for multiple"
  1555. "components, but x.ndim = %d" % x.ndim)
  1556. # Now we have 3-dim array; should have shape [dim, dim, *]
  1557. if not x.shape[0:2] == (dim, dim):
  1558. raise ValueError('Quantiles have incompatible dimensions: should'
  1559. ' be %s, got %s.' % ((dim, dim), x.shape[0:2]))
  1560. return x
  1561. def _process_size(self, size):
  1562. size = np.asarray(size)
  1563. if size.ndim == 0:
  1564. size = size[np.newaxis]
  1565. elif size.ndim > 1:
  1566. raise ValueError('Size must be an integer or tuple of integers;'
  1567. ' thus must have dimension <= 1.'
  1568. ' Got size.ndim = %s' % str(tuple(size)))
  1569. n = size.prod()
  1570. shape = tuple(size)
  1571. return n, shape
  1572. def _logpdf(self, x, dim, df, scale, log_det_scale, C):
  1573. """Log of the Wishart probability density function.
  1574. Parameters
  1575. ----------
  1576. x : ndarray
  1577. Points at which to evaluate the log of the probability
  1578. density function
  1579. dim : int
  1580. Dimension of the scale matrix
  1581. df : int
  1582. Degrees of freedom
  1583. scale : ndarray
  1584. Scale matrix
  1585. log_det_scale : float
  1586. Logarithm of the determinant of the scale matrix
  1587. C : ndarray
  1588. Cholesky factorization of the scale matrix, lower triagular.
  1589. Notes
  1590. -----
  1591. As this function does no argument checking, it should not be
  1592. called directly; use 'logpdf' instead.
  1593. """
  1594. # log determinant of x
  1595. # Note: x has components along the last axis, so that x.T has
  1596. # components alone the 0-th axis. Then since det(A) = det(A'), this
  1597. # gives us a 1-dim vector of determinants
  1598. # Retrieve tr(scale^{-1} x)
  1599. log_det_x = np.empty(x.shape[-1])
  1600. scale_inv_x = np.empty(x.shape)
  1601. tr_scale_inv_x = np.empty(x.shape[-1])
  1602. for i in range(x.shape[-1]):
  1603. _, log_det_x[i] = self._cholesky_logdet(x[:, :, i])
  1604. scale_inv_x[:, :, i] = scipy.linalg.cho_solve((C, True), x[:, :, i])
  1605. tr_scale_inv_x[i] = scale_inv_x[:, :, i].trace()
  1606. # Log PDF
  1607. out = ((0.5 * (df - dim - 1) * log_det_x - 0.5 * tr_scale_inv_x) -
  1608. (0.5 * df * dim * _LOG_2 + 0.5 * df * log_det_scale +
  1609. multigammaln(0.5*df, dim)))
  1610. return out
  1611. def logpdf(self, x, df, scale):
  1612. """Log of the Wishart probability density function.
  1613. Parameters
  1614. ----------
  1615. x : array_like
  1616. Quantiles, with the last axis of `x` denoting the components.
  1617. Each quantile must be a symmetric positive definite matrix.
  1618. %(_doc_default_callparams)s
  1619. Returns
  1620. -------
  1621. pdf : ndarray
  1622. Log of the probability density function evaluated at `x`
  1623. Notes
  1624. -----
  1625. %(_doc_callparams_note)s
  1626. """
  1627. dim, df, scale = self._process_parameters(df, scale)
  1628. x = self._process_quantiles(x, dim)
  1629. # Cholesky decomposition of scale, get log(det(scale))
  1630. C, log_det_scale = self._cholesky_logdet(scale)
  1631. out = self._logpdf(x, dim, df, scale, log_det_scale, C)
  1632. return _squeeze_output(out)
  1633. def pdf(self, x, df, scale):
  1634. """Wishart probability density function.
  1635. Parameters
  1636. ----------
  1637. x : array_like
  1638. Quantiles, with the last axis of `x` denoting the components.
  1639. Each quantile must be a symmetric positive definite matrix.
  1640. %(_doc_default_callparams)s
  1641. Returns
  1642. -------
  1643. pdf : ndarray
  1644. Probability density function evaluated at `x`
  1645. Notes
  1646. -----
  1647. %(_doc_callparams_note)s
  1648. """
  1649. return np.exp(self.logpdf(x, df, scale))
  1650. def _mean(self, dim, df, scale):
  1651. """Mean of the Wishart distribution.
  1652. Parameters
  1653. ----------
  1654. dim : int
  1655. Dimension of the scale matrix
  1656. %(_doc_default_callparams)s
  1657. Notes
  1658. -----
  1659. As this function does no argument checking, it should not be
  1660. called directly; use 'mean' instead.
  1661. """
  1662. return df * scale
  1663. def mean(self, df, scale):
  1664. """Mean of the Wishart distribution.
  1665. Parameters
  1666. ----------
  1667. %(_doc_default_callparams)s
  1668. Returns
  1669. -------
  1670. mean : float
  1671. The mean of the distribution
  1672. """
  1673. dim, df, scale = self._process_parameters(df, scale)
  1674. out = self._mean(dim, df, scale)
  1675. return _squeeze_output(out)
  1676. def _mode(self, dim, df, scale):
  1677. """Mode of the Wishart distribution.
  1678. Parameters
  1679. ----------
  1680. dim : int
  1681. Dimension of the scale matrix
  1682. %(_doc_default_callparams)s
  1683. Notes
  1684. -----
  1685. As this function does no argument checking, it should not be
  1686. called directly; use 'mode' instead.
  1687. """
  1688. if df >= dim + 1:
  1689. out = (df-dim-1) * scale
  1690. else:
  1691. out = None
  1692. return out
  1693. def mode(self, df, scale):
  1694. """Mode of the Wishart distribution
  1695. Only valid if the degrees of freedom are greater than the dimension of
  1696. the scale matrix.
  1697. Parameters
  1698. ----------
  1699. %(_doc_default_callparams)s
  1700. Returns
  1701. -------
  1702. mode : float or None
  1703. The Mode of the distribution
  1704. """
  1705. dim, df, scale = self._process_parameters(df, scale)
  1706. out = self._mode(dim, df, scale)
  1707. return _squeeze_output(out) if out is not None else out
  1708. def _var(self, dim, df, scale):
  1709. """Variance of the Wishart distribution.
  1710. Parameters
  1711. ----------
  1712. dim : int
  1713. Dimension of the scale matrix
  1714. %(_doc_default_callparams)s
  1715. Notes
  1716. -----
  1717. As this function does no argument checking, it should not be
  1718. called directly; use 'var' instead.
  1719. """
  1720. var = scale**2
  1721. diag = scale.diagonal() # 1 x dim array
  1722. var += np.outer(diag, diag)
  1723. var *= df
  1724. return var
  1725. def var(self, df, scale):
  1726. """Variance of the Wishart distribution.
  1727. Parameters
  1728. ----------
  1729. %(_doc_default_callparams)s
  1730. Returns
  1731. -------
  1732. var : float
  1733. The variance of the distribution
  1734. """
  1735. dim, df, scale = self._process_parameters(df, scale)
  1736. out = self._var(dim, df, scale)
  1737. return _squeeze_output(out)
  1738. def _standard_rvs(self, n, shape, dim, df, random_state):
  1739. """
  1740. Parameters
  1741. ----------
  1742. n : integer
  1743. Number of variates to generate
  1744. shape : iterable
  1745. Shape of the variates to generate
  1746. dim : int
  1747. Dimension of the scale matrix
  1748. df : int
  1749. Degrees of freedom
  1750. random_state : {None, int, `numpy.random.Generator`,
  1751. `numpy.random.RandomState`}, optional
  1752. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  1753. singleton is used.
  1754. If `seed` is an int, a new ``RandomState`` instance is used,
  1755. seeded with `seed`.
  1756. If `seed` is already a ``Generator`` or ``RandomState`` instance
  1757. then that instance is used.
  1758. Notes
  1759. -----
  1760. As this function does no argument checking, it should not be
  1761. called directly; use 'rvs' instead.
  1762. """
  1763. # Random normal variates for off-diagonal elements
  1764. n_tril = dim * (dim-1) // 2
  1765. covariances = random_state.normal(
  1766. size=n*n_tril).reshape(shape+(n_tril,))
  1767. # Random chi-square variates for diagonal elements
  1768. variances = (np.r_[[random_state.chisquare(df-(i+1)+1, size=n)**0.5
  1769. for i in range(dim)]].reshape((dim,) +
  1770. shape[::-1]).T)
  1771. # Create the A matri(ces) - lower triangular
  1772. A = np.zeros(shape + (dim, dim))
  1773. # Input the covariances
  1774. size_idx = tuple([slice(None, None, None)]*len(shape))
  1775. tril_idx = np.tril_indices(dim, k=-1)
  1776. A[size_idx + tril_idx] = covariances
  1777. # Input the variances
  1778. diag_idx = np.diag_indices(dim)
  1779. A[size_idx + diag_idx] = variances
  1780. return A
  1781. def _rvs(self, n, shape, dim, df, C, random_state):
  1782. """Draw random samples from a Wishart distribution.
  1783. Parameters
  1784. ----------
  1785. n : integer
  1786. Number of variates to generate
  1787. shape : iterable
  1788. Shape of the variates to generate
  1789. dim : int
  1790. Dimension of the scale matrix
  1791. df : int
  1792. Degrees of freedom
  1793. C : ndarray
  1794. Cholesky factorization of the scale matrix, lower triangular.
  1795. %(_doc_random_state)s
  1796. Notes
  1797. -----
  1798. As this function does no argument checking, it should not be
  1799. called directly; use 'rvs' instead.
  1800. """
  1801. random_state = self._get_random_state(random_state)
  1802. # Calculate the matrices A, which are actually lower triangular
  1803. # Cholesky factorizations of a matrix B such that B ~ W(df, I)
  1804. A = self._standard_rvs(n, shape, dim, df, random_state)
  1805. # Calculate SA = C A A' C', where SA ~ W(df, scale)
  1806. # Note: this is the product of a (lower) (lower) (lower)' (lower)'
  1807. # or, denoting B = AA', it is C B C' where C is the lower
  1808. # triangular Cholesky factorization of the scale matrix.
  1809. # this appears to conflict with the instructions in [1]_, which
  1810. # suggest that it should be D' B D where D is the lower
  1811. # triangular factorization of the scale matrix. However, it is
  1812. # meant to refer to the Bartlett (1933) representation of a
  1813. # Wishart random variate as L A A' L' where L is lower triangular
  1814. # so it appears that understanding D' to be upper triangular
  1815. # is either a typo in or misreading of [1]_.
  1816. for index in np.ndindex(shape):
  1817. CA = np.dot(C, A[index])
  1818. A[index] = np.dot(CA, CA.T)
  1819. return A
  1820. def rvs(self, df, scale, size=1, random_state=None):
  1821. """Draw random samples from a Wishart distribution.
  1822. Parameters
  1823. ----------
  1824. %(_doc_default_callparams)s
  1825. size : integer or iterable of integers, optional
  1826. Number of samples to draw (default 1).
  1827. %(_doc_random_state)s
  1828. Returns
  1829. -------
  1830. rvs : ndarray
  1831. Random variates of shape (`size`) + (`dim`, `dim), where `dim` is
  1832. the dimension of the scale matrix.
  1833. Notes
  1834. -----
  1835. %(_doc_callparams_note)s
  1836. """
  1837. n, shape = self._process_size(size)
  1838. dim, df, scale = self._process_parameters(df, scale)
  1839. # Cholesky decomposition of scale
  1840. C = scipy.linalg.cholesky(scale, lower=True)
  1841. out = self._rvs(n, shape, dim, df, C, random_state)
  1842. return _squeeze_output(out)
  1843. def _entropy(self, dim, df, log_det_scale):
  1844. """Compute the differential entropy of the Wishart.
  1845. Parameters
  1846. ----------
  1847. dim : int
  1848. Dimension of the scale matrix
  1849. df : int
  1850. Degrees of freedom
  1851. log_det_scale : float
  1852. Logarithm of the determinant of the scale matrix
  1853. Notes
  1854. -----
  1855. As this function does no argument checking, it should not be
  1856. called directly; use 'entropy' instead.
  1857. """
  1858. return (
  1859. 0.5 * (dim+1) * log_det_scale +
  1860. 0.5 * dim * (dim+1) * _LOG_2 +
  1861. multigammaln(0.5*df, dim) -
  1862. 0.5 * (df - dim - 1) * np.sum(
  1863. [psi(0.5*(df + 1 - (i+1))) for i in range(dim)]
  1864. ) +
  1865. 0.5 * df * dim
  1866. )
  1867. def entropy(self, df, scale):
  1868. """Compute the differential entropy of the Wishart.
  1869. Parameters
  1870. ----------
  1871. %(_doc_default_callparams)s
  1872. Returns
  1873. -------
  1874. h : scalar
  1875. Entropy of the Wishart distribution
  1876. Notes
  1877. -----
  1878. %(_doc_callparams_note)s
  1879. """
  1880. dim, df, scale = self._process_parameters(df, scale)
  1881. _, log_det_scale = self._cholesky_logdet(scale)
  1882. return self._entropy(dim, df, log_det_scale)
  1883. def _cholesky_logdet(self, scale):
  1884. """Compute Cholesky decomposition and determine (log(det(scale)).
  1885. Parameters
  1886. ----------
  1887. scale : ndarray
  1888. Scale matrix.
  1889. Returns
  1890. -------
  1891. c_decomp : ndarray
  1892. The Cholesky decomposition of `scale`.
  1893. logdet : scalar
  1894. The log of the determinant of `scale`.
  1895. Notes
  1896. -----
  1897. This computation of ``logdet`` is equivalent to
  1898. ``np.linalg.slogdet(scale)``. It is ~2x faster though.
  1899. """
  1900. c_decomp = scipy.linalg.cholesky(scale, lower=True)
  1901. logdet = 2 * np.sum(np.log(c_decomp.diagonal()))
  1902. return c_decomp, logdet
  1903. wishart = wishart_gen()
  1904. class wishart_frozen(multi_rv_frozen):
  1905. """Create a frozen Wishart distribution.
  1906. Parameters
  1907. ----------
  1908. df : array_like
  1909. Degrees of freedom of the distribution
  1910. scale : array_like
  1911. Scale matrix of the distribution
  1912. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  1913. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  1914. singleton is used.
  1915. If `seed` is an int, a new ``RandomState`` instance is used,
  1916. seeded with `seed`.
  1917. If `seed` is already a ``Generator`` or ``RandomState`` instance then
  1918. that instance is used.
  1919. """
  1920. def __init__(self, df, scale, seed=None):
  1921. self._dist = wishart_gen(seed)
  1922. self.dim, self.df, self.scale = self._dist._process_parameters(
  1923. df, scale)
  1924. self.C, self.log_det_scale = self._dist._cholesky_logdet(self.scale)
  1925. def logpdf(self, x):
  1926. x = self._dist._process_quantiles(x, self.dim)
  1927. out = self._dist._logpdf(x, self.dim, self.df, self.scale,
  1928. self.log_det_scale, self.C)
  1929. return _squeeze_output(out)
  1930. def pdf(self, x):
  1931. return np.exp(self.logpdf(x))
  1932. def mean(self):
  1933. out = self._dist._mean(self.dim, self.df, self.scale)
  1934. return _squeeze_output(out)
  1935. def mode(self):
  1936. out = self._dist._mode(self.dim, self.df, self.scale)
  1937. return _squeeze_output(out) if out is not None else out
  1938. def var(self):
  1939. out = self._dist._var(self.dim, self.df, self.scale)
  1940. return _squeeze_output(out)
  1941. def rvs(self, size=1, random_state=None):
  1942. n, shape = self._dist._process_size(size)
  1943. out = self._dist._rvs(n, shape, self.dim, self.df,
  1944. self.C, random_state)
  1945. return _squeeze_output(out)
  1946. def entropy(self):
  1947. return self._dist._entropy(self.dim, self.df, self.log_det_scale)
  1948. # Set frozen generator docstrings from corresponding docstrings in
  1949. # Wishart and fill in default strings in class docstrings
  1950. for name in ['logpdf', 'pdf', 'mean', 'mode', 'var', 'rvs', 'entropy']:
  1951. method = wishart_gen.__dict__[name]
  1952. method_frozen = wishart_frozen.__dict__[name]
  1953. method_frozen.__doc__ = doccer.docformat(
  1954. method.__doc__, wishart_docdict_noparams)
  1955. method.__doc__ = doccer.docformat(method.__doc__, wishart_docdict_params)
  1956. def _cho_inv_batch(a, check_finite=True):
  1957. """
  1958. Invert the matrices a_i, using a Cholesky factorization of A, where
  1959. a_i resides in the last two dimensions of a and the other indices describe
  1960. the index i.
  1961. Overwrites the data in a.
  1962. Parameters
  1963. ----------
  1964. a : array
  1965. Array of matrices to invert, where the matrices themselves are stored
  1966. in the last two dimensions.
  1967. check_finite : bool, optional
  1968. Whether to check that the input matrices contain only finite numbers.
  1969. Disabling may give a performance gain, but may result in problems
  1970. (crashes, non-termination) if the inputs do contain infinities or NaNs.
  1971. Returns
  1972. -------
  1973. x : array
  1974. Array of inverses of the matrices ``a_i``.
  1975. See Also
  1976. --------
  1977. scipy.linalg.cholesky : Cholesky factorization of a matrix
  1978. """
  1979. if check_finite:
  1980. a1 = asarray_chkfinite(a)
  1981. else:
  1982. a1 = asarray(a)
  1983. if len(a1.shape) < 2 or a1.shape[-2] != a1.shape[-1]:
  1984. raise ValueError('expected square matrix in last two dimensions')
  1985. potrf, potri = get_lapack_funcs(('potrf', 'potri'), (a1,))
  1986. triu_rows, triu_cols = np.triu_indices(a.shape[-2], k=1)
  1987. for index in np.ndindex(a1.shape[:-2]):
  1988. # Cholesky decomposition
  1989. a1[index], info = potrf(a1[index], lower=True, overwrite_a=False,
  1990. clean=False)
  1991. if info > 0:
  1992. raise LinAlgError("%d-th leading minor not positive definite"
  1993. % info)
  1994. if info < 0:
  1995. raise ValueError('illegal value in %d-th argument of internal'
  1996. ' potrf' % -info)
  1997. # Inversion
  1998. a1[index], info = potri(a1[index], lower=True, overwrite_c=False)
  1999. if info > 0:
  2000. raise LinAlgError("the inverse could not be computed")
  2001. if info < 0:
  2002. raise ValueError('illegal value in %d-th argument of internal'
  2003. ' potrf' % -info)
  2004. # Make symmetric (dpotri only fills in the lower triangle)
  2005. a1[index][triu_rows, triu_cols] = a1[index][triu_cols, triu_rows]
  2006. return a1
  2007. class invwishart_gen(wishart_gen):
  2008. r"""An inverse Wishart random variable.
  2009. The `df` keyword specifies the degrees of freedom. The `scale` keyword
  2010. specifies the scale matrix, which must be symmetric and positive definite.
  2011. In this context, the scale matrix is often interpreted in terms of a
  2012. multivariate normal covariance matrix.
  2013. Methods
  2014. -------
  2015. pdf(x, df, scale)
  2016. Probability density function.
  2017. logpdf(x, df, scale)
  2018. Log of the probability density function.
  2019. rvs(df, scale, size=1, random_state=None)
  2020. Draw random samples from an inverse Wishart distribution.
  2021. Parameters
  2022. ----------
  2023. %(_doc_default_callparams)s
  2024. %(_doc_random_state)s
  2025. Raises
  2026. ------
  2027. scipy.linalg.LinAlgError
  2028. If the scale matrix `scale` is not positive definite.
  2029. See Also
  2030. --------
  2031. wishart
  2032. Notes
  2033. -----
  2034. %(_doc_callparams_note)s
  2035. The scale matrix `scale` must be a symmetric positive definite
  2036. matrix. Singular matrices, including the symmetric positive semi-definite
  2037. case, are not supported. Symmetry is not checked; only the lower triangular
  2038. portion is used.
  2039. The inverse Wishart distribution is often denoted
  2040. .. math::
  2041. W_p^{-1}(\nu, \Psi)
  2042. where :math:`\nu` is the degrees of freedom and :math:`\Psi` is the
  2043. :math:`p \times p` scale matrix.
  2044. The probability density function for `invwishart` has support over positive
  2045. definite matrices :math:`S`; if :math:`S \sim W^{-1}_p(\nu, \Sigma)`,
  2046. then its PDF is given by:
  2047. .. math::
  2048. f(S) = \frac{|\Sigma|^\frac{\nu}{2}}{2^{ \frac{\nu p}{2} }
  2049. |S|^{\frac{\nu + p + 1}{2}} \Gamma_p \left(\frac{\nu}{2} \right)}
  2050. \exp\left( -tr(\Sigma S^{-1}) / 2 \right)
  2051. If :math:`S \sim W_p^{-1}(\nu, \Psi)` (inverse Wishart) then
  2052. :math:`S^{-1} \sim W_p(\nu, \Psi^{-1})` (Wishart).
  2053. If the scale matrix is 1-dimensional and equal to one, then the inverse
  2054. Wishart distribution :math:`W_1(\nu, 1)` collapses to the
  2055. inverse Gamma distribution with parameters shape = :math:`\frac{\nu}{2}`
  2056. and scale = :math:`\frac{1}{2}`.
  2057. .. versionadded:: 0.16.0
  2058. References
  2059. ----------
  2060. .. [1] M.L. Eaton, "Multivariate Statistics: A Vector Space Approach",
  2061. Wiley, 1983.
  2062. .. [2] M.C. Jones, "Generating Inverse Wishart Matrices", Communications
  2063. in Statistics - Simulation and Computation, vol. 14.2, pp.511-514,
  2064. 1985.
  2065. Examples
  2066. --------
  2067. >>> import numpy as np
  2068. >>> import matplotlib.pyplot as plt
  2069. >>> from scipy.stats import invwishart, invgamma
  2070. >>> x = np.linspace(0.01, 1, 100)
  2071. >>> iw = invwishart.pdf(x, df=6, scale=1)
  2072. >>> iw[:3]
  2073. array([ 1.20546865e-15, 5.42497807e-06, 4.45813929e-03])
  2074. >>> ig = invgamma.pdf(x, 6/2., scale=1./2)
  2075. >>> ig[:3]
  2076. array([ 1.20546865e-15, 5.42497807e-06, 4.45813929e-03])
  2077. >>> plt.plot(x, iw)
  2078. >>> plt.show()
  2079. The input quantiles can be any shape of array, as long as the last
  2080. axis labels the components.
  2081. Alternatively, the object may be called (as a function) to fix the degrees
  2082. of freedom and scale parameters, returning a "frozen" inverse Wishart
  2083. random variable:
  2084. >>> rv = invwishart(df=1, scale=1)
  2085. >>> # Frozen object with the same methods but holding the given
  2086. >>> # degrees of freedom and scale fixed.
  2087. """
  2088. def __init__(self, seed=None):
  2089. super().__init__(seed)
  2090. self.__doc__ = doccer.docformat(self.__doc__, wishart_docdict_params)
  2091. def __call__(self, df=None, scale=None, seed=None):
  2092. """Create a frozen inverse Wishart distribution.
  2093. See `invwishart_frozen` for more information.
  2094. """
  2095. return invwishart_frozen(df, scale, seed)
  2096. def _logpdf(self, x, dim, df, scale, log_det_scale):
  2097. """Log of the inverse Wishart probability density function.
  2098. Parameters
  2099. ----------
  2100. x : ndarray
  2101. Points at which to evaluate the log of the probability
  2102. density function.
  2103. dim : int
  2104. Dimension of the scale matrix
  2105. df : int
  2106. Degrees of freedom
  2107. scale : ndarray
  2108. Scale matrix
  2109. log_det_scale : float
  2110. Logarithm of the determinant of the scale matrix
  2111. Notes
  2112. -----
  2113. As this function does no argument checking, it should not be
  2114. called directly; use 'logpdf' instead.
  2115. """
  2116. log_det_x = np.empty(x.shape[-1])
  2117. x_inv = np.copy(x).T
  2118. if dim > 1:
  2119. _cho_inv_batch(x_inv) # works in-place
  2120. else:
  2121. x_inv = 1./x_inv
  2122. tr_scale_x_inv = np.empty(x.shape[-1])
  2123. for i in range(x.shape[-1]):
  2124. C, lower = scipy.linalg.cho_factor(x[:, :, i], lower=True)
  2125. log_det_x[i] = 2 * np.sum(np.log(C.diagonal()))
  2126. tr_scale_x_inv[i] = np.dot(scale, x_inv[i]).trace()
  2127. # Log PDF
  2128. out = ((0.5 * df * log_det_scale - 0.5 * tr_scale_x_inv) -
  2129. (0.5 * df * dim * _LOG_2 + 0.5 * (df + dim + 1) * log_det_x) -
  2130. multigammaln(0.5*df, dim))
  2131. return out
  2132. def logpdf(self, x, df, scale):
  2133. """Log of the inverse Wishart probability density function.
  2134. Parameters
  2135. ----------
  2136. x : array_like
  2137. Quantiles, with the last axis of `x` denoting the components.
  2138. Each quantile must be a symmetric positive definite matrix.
  2139. %(_doc_default_callparams)s
  2140. Returns
  2141. -------
  2142. pdf : ndarray
  2143. Log of the probability density function evaluated at `x`
  2144. Notes
  2145. -----
  2146. %(_doc_callparams_note)s
  2147. """
  2148. dim, df, scale = self._process_parameters(df, scale)
  2149. x = self._process_quantiles(x, dim)
  2150. _, log_det_scale = self._cholesky_logdet(scale)
  2151. out = self._logpdf(x, dim, df, scale, log_det_scale)
  2152. return _squeeze_output(out)
  2153. def pdf(self, x, df, scale):
  2154. """Inverse Wishart probability density function.
  2155. Parameters
  2156. ----------
  2157. x : array_like
  2158. Quantiles, with the last axis of `x` denoting the components.
  2159. Each quantile must be a symmetric positive definite matrix.
  2160. %(_doc_default_callparams)s
  2161. Returns
  2162. -------
  2163. pdf : ndarray
  2164. Probability density function evaluated at `x`
  2165. Notes
  2166. -----
  2167. %(_doc_callparams_note)s
  2168. """
  2169. return np.exp(self.logpdf(x, df, scale))
  2170. def _mean(self, dim, df, scale):
  2171. """Mean of the inverse Wishart distribution.
  2172. Parameters
  2173. ----------
  2174. dim : int
  2175. Dimension of the scale matrix
  2176. %(_doc_default_callparams)s
  2177. Notes
  2178. -----
  2179. As this function does no argument checking, it should not be
  2180. called directly; use 'mean' instead.
  2181. """
  2182. if df > dim + 1:
  2183. out = scale / (df - dim - 1)
  2184. else:
  2185. out = None
  2186. return out
  2187. def mean(self, df, scale):
  2188. """Mean of the inverse Wishart distribution.
  2189. Only valid if the degrees of freedom are greater than the dimension of
  2190. the scale matrix plus one.
  2191. Parameters
  2192. ----------
  2193. %(_doc_default_callparams)s
  2194. Returns
  2195. -------
  2196. mean : float or None
  2197. The mean of the distribution
  2198. """
  2199. dim, df, scale = self._process_parameters(df, scale)
  2200. out = self._mean(dim, df, scale)
  2201. return _squeeze_output(out) if out is not None else out
  2202. def _mode(self, dim, df, scale):
  2203. """Mode of the inverse Wishart distribution.
  2204. Parameters
  2205. ----------
  2206. dim : int
  2207. Dimension of the scale matrix
  2208. %(_doc_default_callparams)s
  2209. Notes
  2210. -----
  2211. As this function does no argument checking, it should not be
  2212. called directly; use 'mode' instead.
  2213. """
  2214. return scale / (df + dim + 1)
  2215. def mode(self, df, scale):
  2216. """Mode of the inverse Wishart distribution.
  2217. Parameters
  2218. ----------
  2219. %(_doc_default_callparams)s
  2220. Returns
  2221. -------
  2222. mode : float
  2223. The Mode of the distribution
  2224. """
  2225. dim, df, scale = self._process_parameters(df, scale)
  2226. out = self._mode(dim, df, scale)
  2227. return _squeeze_output(out)
  2228. def _var(self, dim, df, scale):
  2229. """Variance of the inverse Wishart distribution.
  2230. Parameters
  2231. ----------
  2232. dim : int
  2233. Dimension of the scale matrix
  2234. %(_doc_default_callparams)s
  2235. Notes
  2236. -----
  2237. As this function does no argument checking, it should not be
  2238. called directly; use 'var' instead.
  2239. """
  2240. if df > dim + 3:
  2241. var = (df - dim + 1) * scale**2
  2242. diag = scale.diagonal() # 1 x dim array
  2243. var += (df - dim - 1) * np.outer(diag, diag)
  2244. var /= (df - dim) * (df - dim - 1)**2 * (df - dim - 3)
  2245. else:
  2246. var = None
  2247. return var
  2248. def var(self, df, scale):
  2249. """Variance of the inverse Wishart distribution.
  2250. Only valid if the degrees of freedom are greater than the dimension of
  2251. the scale matrix plus three.
  2252. Parameters
  2253. ----------
  2254. %(_doc_default_callparams)s
  2255. Returns
  2256. -------
  2257. var : float
  2258. The variance of the distribution
  2259. """
  2260. dim, df, scale = self._process_parameters(df, scale)
  2261. out = self._var(dim, df, scale)
  2262. return _squeeze_output(out) if out is not None else out
  2263. def _rvs(self, n, shape, dim, df, C, random_state):
  2264. """Draw random samples from an inverse Wishart distribution.
  2265. Parameters
  2266. ----------
  2267. n : integer
  2268. Number of variates to generate
  2269. shape : iterable
  2270. Shape of the variates to generate
  2271. dim : int
  2272. Dimension of the scale matrix
  2273. df : int
  2274. Degrees of freedom
  2275. C : ndarray
  2276. Cholesky factorization of the scale matrix, lower triagular.
  2277. %(_doc_random_state)s
  2278. Notes
  2279. -----
  2280. As this function does no argument checking, it should not be
  2281. called directly; use 'rvs' instead.
  2282. """
  2283. random_state = self._get_random_state(random_state)
  2284. # Get random draws A such that A ~ W(df, I)
  2285. A = super()._standard_rvs(n, shape, dim, df, random_state)
  2286. # Calculate SA = (CA)'^{-1} (CA)^{-1} ~ iW(df, scale)
  2287. eye = np.eye(dim)
  2288. trtrs = get_lapack_funcs(('trtrs'), (A,))
  2289. for index in np.ndindex(A.shape[:-2]):
  2290. # Calculate CA
  2291. CA = np.dot(C, A[index])
  2292. # Get (C A)^{-1} via triangular solver
  2293. if dim > 1:
  2294. CA, info = trtrs(CA, eye, lower=True)
  2295. if info > 0:
  2296. raise LinAlgError("Singular matrix.")
  2297. if info < 0:
  2298. raise ValueError('Illegal value in %d-th argument of'
  2299. ' internal trtrs' % -info)
  2300. else:
  2301. CA = 1. / CA
  2302. # Get SA
  2303. A[index] = np.dot(CA.T, CA)
  2304. return A
  2305. def rvs(self, df, scale, size=1, random_state=None):
  2306. """Draw random samples from an inverse Wishart distribution.
  2307. Parameters
  2308. ----------
  2309. %(_doc_default_callparams)s
  2310. size : integer or iterable of integers, optional
  2311. Number of samples to draw (default 1).
  2312. %(_doc_random_state)s
  2313. Returns
  2314. -------
  2315. rvs : ndarray
  2316. Random variates of shape (`size`) + (`dim`, `dim), where `dim` is
  2317. the dimension of the scale matrix.
  2318. Notes
  2319. -----
  2320. %(_doc_callparams_note)s
  2321. """
  2322. n, shape = self._process_size(size)
  2323. dim, df, scale = self._process_parameters(df, scale)
  2324. # Invert the scale
  2325. eye = np.eye(dim)
  2326. L, lower = scipy.linalg.cho_factor(scale, lower=True)
  2327. inv_scale = scipy.linalg.cho_solve((L, lower), eye)
  2328. # Cholesky decomposition of inverted scale
  2329. C = scipy.linalg.cholesky(inv_scale, lower=True)
  2330. out = self._rvs(n, shape, dim, df, C, random_state)
  2331. return _squeeze_output(out)
  2332. def entropy(self):
  2333. # Need to find reference for inverse Wishart entropy
  2334. raise AttributeError
  2335. invwishart = invwishart_gen()
  2336. class invwishart_frozen(multi_rv_frozen):
  2337. def __init__(self, df, scale, seed=None):
  2338. """Create a frozen inverse Wishart distribution.
  2339. Parameters
  2340. ----------
  2341. df : array_like
  2342. Degrees of freedom of the distribution
  2343. scale : array_like
  2344. Scale matrix of the distribution
  2345. seed : {None, int, `numpy.random.Generator`}, optional
  2346. If `seed` is None the `numpy.random.Generator` singleton is used.
  2347. If `seed` is an int, a new ``Generator`` instance is used,
  2348. seeded with `seed`.
  2349. If `seed` is already a ``Generator`` instance then that instance is
  2350. used.
  2351. """
  2352. self._dist = invwishart_gen(seed)
  2353. self.dim, self.df, self.scale = self._dist._process_parameters(
  2354. df, scale
  2355. )
  2356. # Get the determinant via Cholesky factorization
  2357. C, lower = scipy.linalg.cho_factor(self.scale, lower=True)
  2358. self.log_det_scale = 2 * np.sum(np.log(C.diagonal()))
  2359. # Get the inverse using the Cholesky factorization
  2360. eye = np.eye(self.dim)
  2361. self.inv_scale = scipy.linalg.cho_solve((C, lower), eye)
  2362. # Get the Cholesky factorization of the inverse scale
  2363. self.C = scipy.linalg.cholesky(self.inv_scale, lower=True)
  2364. def logpdf(self, x):
  2365. x = self._dist._process_quantiles(x, self.dim)
  2366. out = self._dist._logpdf(x, self.dim, self.df, self.scale,
  2367. self.log_det_scale)
  2368. return _squeeze_output(out)
  2369. def pdf(self, x):
  2370. return np.exp(self.logpdf(x))
  2371. def mean(self):
  2372. out = self._dist._mean(self.dim, self.df, self.scale)
  2373. return _squeeze_output(out) if out is not None else out
  2374. def mode(self):
  2375. out = self._dist._mode(self.dim, self.df, self.scale)
  2376. return _squeeze_output(out)
  2377. def var(self):
  2378. out = self._dist._var(self.dim, self.df, self.scale)
  2379. return _squeeze_output(out) if out is not None else out
  2380. def rvs(self, size=1, random_state=None):
  2381. n, shape = self._dist._process_size(size)
  2382. out = self._dist._rvs(n, shape, self.dim, self.df,
  2383. self.C, random_state)
  2384. return _squeeze_output(out)
  2385. def entropy(self):
  2386. # Need to find reference for inverse Wishart entropy
  2387. raise AttributeError
  2388. # Set frozen generator docstrings from corresponding docstrings in
  2389. # inverse Wishart and fill in default strings in class docstrings
  2390. for name in ['logpdf', 'pdf', 'mean', 'mode', 'var', 'rvs']:
  2391. method = invwishart_gen.__dict__[name]
  2392. method_frozen = wishart_frozen.__dict__[name]
  2393. method_frozen.__doc__ = doccer.docformat(
  2394. method.__doc__, wishart_docdict_noparams)
  2395. method.__doc__ = doccer.docformat(method.__doc__, wishart_docdict_params)
  2396. _multinomial_doc_default_callparams = """\
  2397. n : int
  2398. Number of trials
  2399. p : array_like
  2400. Probability of a trial falling into each category; should sum to 1
  2401. """
  2402. _multinomial_doc_callparams_note = """\
  2403. `n` should be a positive integer. Each element of `p` should be in the
  2404. interval :math:`[0,1]` and the elements should sum to 1. If they do not sum to
  2405. 1, the last element of the `p` array is not used and is replaced with the
  2406. remaining probability left over from the earlier elements.
  2407. """
  2408. _multinomial_doc_frozen_callparams = ""
  2409. _multinomial_doc_frozen_callparams_note = """\
  2410. See class definition for a detailed description of parameters."""
  2411. multinomial_docdict_params = {
  2412. '_doc_default_callparams': _multinomial_doc_default_callparams,
  2413. '_doc_callparams_note': _multinomial_doc_callparams_note,
  2414. '_doc_random_state': _doc_random_state
  2415. }
  2416. multinomial_docdict_noparams = {
  2417. '_doc_default_callparams': _multinomial_doc_frozen_callparams,
  2418. '_doc_callparams_note': _multinomial_doc_frozen_callparams_note,
  2419. '_doc_random_state': _doc_random_state
  2420. }
  2421. class multinomial_gen(multi_rv_generic):
  2422. r"""A multinomial random variable.
  2423. Methods
  2424. -------
  2425. pmf(x, n, p)
  2426. Probability mass function.
  2427. logpmf(x, n, p)
  2428. Log of the probability mass function.
  2429. rvs(n, p, size=1, random_state=None)
  2430. Draw random samples from a multinomial distribution.
  2431. entropy(n, p)
  2432. Compute the entropy of the multinomial distribution.
  2433. cov(n, p)
  2434. Compute the covariance matrix of the multinomial distribution.
  2435. Parameters
  2436. ----------
  2437. %(_doc_default_callparams)s
  2438. %(_doc_random_state)s
  2439. Notes
  2440. -----
  2441. %(_doc_callparams_note)s
  2442. The probability mass function for `multinomial` is
  2443. .. math::
  2444. f(x) = \frac{n!}{x_1! \cdots x_k!} p_1^{x_1} \cdots p_k^{x_k},
  2445. supported on :math:`x=(x_1, \ldots, x_k)` where each :math:`x_i` is a
  2446. nonnegative integer and their sum is :math:`n`.
  2447. .. versionadded:: 0.19.0
  2448. Examples
  2449. --------
  2450. >>> from scipy.stats import multinomial
  2451. >>> rv = multinomial(8, [0.3, 0.2, 0.5])
  2452. >>> rv.pmf([1, 3, 4])
  2453. 0.042000000000000072
  2454. The multinomial distribution for :math:`k=2` is identical to the
  2455. corresponding binomial distribution (tiny numerical differences
  2456. notwithstanding):
  2457. >>> from scipy.stats import binom
  2458. >>> multinomial.pmf([3, 4], n=7, p=[0.4, 0.6])
  2459. 0.29030399999999973
  2460. >>> binom.pmf(3, 7, 0.4)
  2461. 0.29030400000000012
  2462. The functions ``pmf``, ``logpmf``, ``entropy``, and ``cov`` support
  2463. broadcasting, under the convention that the vector parameters (``x`` and
  2464. ``p``) are interpreted as if each row along the last axis is a single
  2465. object. For instance:
  2466. >>> multinomial.pmf([[3, 4], [3, 5]], n=[7, 8], p=[.3, .7])
  2467. array([0.2268945, 0.25412184])
  2468. Here, ``x.shape == (2, 2)``, ``n.shape == (2,)``, and ``p.shape == (2,)``,
  2469. but following the rules mentioned above they behave as if the rows
  2470. ``[3, 4]`` and ``[3, 5]`` in ``x`` and ``[.3, .7]`` in ``p`` were a single
  2471. object, and as if we had ``x.shape = (2,)``, ``n.shape = (2,)``, and
  2472. ``p.shape = ()``. To obtain the individual elements without broadcasting,
  2473. we would do this:
  2474. >>> multinomial.pmf([3, 4], n=7, p=[.3, .7])
  2475. 0.2268945
  2476. >>> multinomial.pmf([3, 5], 8, p=[.3, .7])
  2477. 0.25412184
  2478. This broadcasting also works for ``cov``, where the output objects are
  2479. square matrices of size ``p.shape[-1]``. For example:
  2480. >>> multinomial.cov([4, 5], [[.3, .7], [.4, .6]])
  2481. array([[[ 0.84, -0.84],
  2482. [-0.84, 0.84]],
  2483. [[ 1.2 , -1.2 ],
  2484. [-1.2 , 1.2 ]]])
  2485. In this example, ``n.shape == (2,)`` and ``p.shape == (2, 2)``, and
  2486. following the rules above, these broadcast as if ``p.shape == (2,)``.
  2487. Thus the result should also be of shape ``(2,)``, but since each output is
  2488. a :math:`2 \times 2` matrix, the result in fact has shape ``(2, 2, 2)``,
  2489. where ``result[0]`` is equal to ``multinomial.cov(n=4, p=[.3, .7])`` and
  2490. ``result[1]`` is equal to ``multinomial.cov(n=5, p=[.4, .6])``.
  2491. Alternatively, the object may be called (as a function) to fix the `n` and
  2492. `p` parameters, returning a "frozen" multinomial random variable:
  2493. >>> rv = multinomial(n=7, p=[.3, .7])
  2494. >>> # Frozen object with the same methods but holding the given
  2495. >>> # degrees of freedom and scale fixed.
  2496. See also
  2497. --------
  2498. scipy.stats.binom : The binomial distribution.
  2499. numpy.random.Generator.multinomial : Sampling from the multinomial distribution.
  2500. scipy.stats.multivariate_hypergeom :
  2501. The multivariate hypergeometric distribution.
  2502. """ # noqa: E501
  2503. def __init__(self, seed=None):
  2504. super().__init__(seed)
  2505. self.__doc__ = \
  2506. doccer.docformat(self.__doc__, multinomial_docdict_params)
  2507. def __call__(self, n, p, seed=None):
  2508. """Create a frozen multinomial distribution.
  2509. See `multinomial_frozen` for more information.
  2510. """
  2511. return multinomial_frozen(n, p, seed)
  2512. def _process_parameters(self, n, p, eps=1e-15):
  2513. """Returns: n_, p_, npcond.
  2514. n_ and p_ are arrays of the correct shape; npcond is a boolean array
  2515. flagging values out of the domain.
  2516. """
  2517. p = np.array(p, dtype=np.float64, copy=True)
  2518. p_adjusted = 1. - p[..., :-1].sum(axis=-1)
  2519. i_adjusted = np.abs(p_adjusted) > eps
  2520. p[i_adjusted, -1] = p_adjusted[i_adjusted]
  2521. # true for bad p
  2522. pcond = np.any(p < 0, axis=-1)
  2523. pcond |= np.any(p > 1, axis=-1)
  2524. n = np.array(n, dtype=np.int_, copy=True)
  2525. # true for bad n
  2526. ncond = n <= 0
  2527. return n, p, ncond | pcond
  2528. def _process_quantiles(self, x, n, p):
  2529. """Returns: x_, xcond.
  2530. x_ is an int array; xcond is a boolean array flagging values out of the
  2531. domain.
  2532. """
  2533. xx = np.asarray(x, dtype=np.int_)
  2534. if xx.ndim == 0:
  2535. raise ValueError("x must be an array.")
  2536. if xx.size != 0 and not xx.shape[-1] == p.shape[-1]:
  2537. raise ValueError("Size of each quantile should be size of p: "
  2538. "received %d, but expected %d." %
  2539. (xx.shape[-1], p.shape[-1]))
  2540. # true for x out of the domain
  2541. cond = np.any(xx != x, axis=-1)
  2542. cond |= np.any(xx < 0, axis=-1)
  2543. cond = cond | (np.sum(xx, axis=-1) != n)
  2544. return xx, cond
  2545. def _checkresult(self, result, cond, bad_value):
  2546. result = np.asarray(result)
  2547. if cond.ndim != 0:
  2548. result[cond] = bad_value
  2549. elif cond:
  2550. if result.ndim == 0:
  2551. return bad_value
  2552. result[...] = bad_value
  2553. return result
  2554. def _logpmf(self, x, n, p):
  2555. return gammaln(n+1) + np.sum(xlogy(x, p) - gammaln(x+1), axis=-1)
  2556. def logpmf(self, x, n, p):
  2557. """Log of the Multinomial probability mass function.
  2558. Parameters
  2559. ----------
  2560. x : array_like
  2561. Quantiles, with the last axis of `x` denoting the components.
  2562. %(_doc_default_callparams)s
  2563. Returns
  2564. -------
  2565. logpmf : ndarray or scalar
  2566. Log of the probability mass function evaluated at `x`
  2567. Notes
  2568. -----
  2569. %(_doc_callparams_note)s
  2570. """
  2571. n, p, npcond = self._process_parameters(n, p)
  2572. x, xcond = self._process_quantiles(x, n, p)
  2573. result = self._logpmf(x, n, p)
  2574. # replace values for which x was out of the domain; broadcast
  2575. # xcond to the right shape
  2576. xcond_ = xcond | np.zeros(npcond.shape, dtype=np.bool_)
  2577. result = self._checkresult(result, xcond_, np.NINF)
  2578. # replace values bad for n or p; broadcast npcond to the right shape
  2579. npcond_ = npcond | np.zeros(xcond.shape, dtype=np.bool_)
  2580. return self._checkresult(result, npcond_, np.NAN)
  2581. def pmf(self, x, n, p):
  2582. """Multinomial probability mass function.
  2583. Parameters
  2584. ----------
  2585. x : array_like
  2586. Quantiles, with the last axis of `x` denoting the components.
  2587. %(_doc_default_callparams)s
  2588. Returns
  2589. -------
  2590. pmf : ndarray or scalar
  2591. Probability density function evaluated at `x`
  2592. Notes
  2593. -----
  2594. %(_doc_callparams_note)s
  2595. """
  2596. return np.exp(self.logpmf(x, n, p))
  2597. def mean(self, n, p):
  2598. """Mean of the Multinomial distribution.
  2599. Parameters
  2600. ----------
  2601. %(_doc_default_callparams)s
  2602. Returns
  2603. -------
  2604. mean : float
  2605. The mean of the distribution
  2606. """
  2607. n, p, npcond = self._process_parameters(n, p)
  2608. result = n[..., np.newaxis]*p
  2609. return self._checkresult(result, npcond, np.NAN)
  2610. def cov(self, n, p):
  2611. """Covariance matrix of the multinomial distribution.
  2612. Parameters
  2613. ----------
  2614. %(_doc_default_callparams)s
  2615. Returns
  2616. -------
  2617. cov : ndarray
  2618. The covariance matrix of the distribution
  2619. """
  2620. n, p, npcond = self._process_parameters(n, p)
  2621. nn = n[..., np.newaxis, np.newaxis]
  2622. result = nn * np.einsum('...j,...k->...jk', -p, p)
  2623. # change the diagonal
  2624. for i in range(p.shape[-1]):
  2625. result[..., i, i] += n*p[..., i]
  2626. return self._checkresult(result, npcond, np.nan)
  2627. def entropy(self, n, p):
  2628. r"""Compute the entropy of the multinomial distribution.
  2629. The entropy is computed using this expression:
  2630. .. math::
  2631. f(x) = - \log n! - n\sum_{i=1}^k p_i \log p_i +
  2632. \sum_{i=1}^k \sum_{x=0}^n \binom n x p_i^x(1-p_i)^{n-x} \log x!
  2633. Parameters
  2634. ----------
  2635. %(_doc_default_callparams)s
  2636. Returns
  2637. -------
  2638. h : scalar
  2639. Entropy of the Multinomial distribution
  2640. Notes
  2641. -----
  2642. %(_doc_callparams_note)s
  2643. """
  2644. n, p, npcond = self._process_parameters(n, p)
  2645. x = np.r_[1:np.max(n)+1]
  2646. term1 = n*np.sum(entr(p), axis=-1)
  2647. term1 -= gammaln(n+1)
  2648. n = n[..., np.newaxis]
  2649. new_axes_needed = max(p.ndim, n.ndim) - x.ndim + 1
  2650. x.shape += (1,)*new_axes_needed
  2651. term2 = np.sum(binom.pmf(x, n, p)*gammaln(x+1),
  2652. axis=(-1, -1-new_axes_needed))
  2653. return self._checkresult(term1 + term2, npcond, np.nan)
  2654. def rvs(self, n, p, size=None, random_state=None):
  2655. """Draw random samples from a Multinomial distribution.
  2656. Parameters
  2657. ----------
  2658. %(_doc_default_callparams)s
  2659. size : integer or iterable of integers, optional
  2660. Number of samples to draw (default 1).
  2661. %(_doc_random_state)s
  2662. Returns
  2663. -------
  2664. rvs : ndarray or scalar
  2665. Random variates of shape (`size`, `len(p)`)
  2666. Notes
  2667. -----
  2668. %(_doc_callparams_note)s
  2669. """
  2670. n, p, npcond = self._process_parameters(n, p)
  2671. random_state = self._get_random_state(random_state)
  2672. return random_state.multinomial(n, p, size)
  2673. multinomial = multinomial_gen()
  2674. class multinomial_frozen(multi_rv_frozen):
  2675. r"""Create a frozen Multinomial distribution.
  2676. Parameters
  2677. ----------
  2678. n : int
  2679. number of trials
  2680. p: array_like
  2681. probability of a trial falling into each category; should sum to 1
  2682. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  2683. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  2684. singleton is used.
  2685. If `seed` is an int, a new ``RandomState`` instance is used,
  2686. seeded with `seed`.
  2687. If `seed` is already a ``Generator`` or ``RandomState`` instance then
  2688. that instance is used.
  2689. """
  2690. def __init__(self, n, p, seed=None):
  2691. self._dist = multinomial_gen(seed)
  2692. self.n, self.p, self.npcond = self._dist._process_parameters(n, p)
  2693. # monkey patch self._dist
  2694. def _process_parameters(n, p):
  2695. return self.n, self.p, self.npcond
  2696. self._dist._process_parameters = _process_parameters
  2697. def logpmf(self, x):
  2698. return self._dist.logpmf(x, self.n, self.p)
  2699. def pmf(self, x):
  2700. return self._dist.pmf(x, self.n, self.p)
  2701. def mean(self):
  2702. return self._dist.mean(self.n, self.p)
  2703. def cov(self):
  2704. return self._dist.cov(self.n, self.p)
  2705. def entropy(self):
  2706. return self._dist.entropy(self.n, self.p)
  2707. def rvs(self, size=1, random_state=None):
  2708. return self._dist.rvs(self.n, self.p, size, random_state)
  2709. # Set frozen generator docstrings from corresponding docstrings in
  2710. # multinomial and fill in default strings in class docstrings
  2711. for name in ['logpmf', 'pmf', 'mean', 'cov', 'rvs']:
  2712. method = multinomial_gen.__dict__[name]
  2713. method_frozen = multinomial_frozen.__dict__[name]
  2714. method_frozen.__doc__ = doccer.docformat(
  2715. method.__doc__, multinomial_docdict_noparams)
  2716. method.__doc__ = doccer.docformat(method.__doc__,
  2717. multinomial_docdict_params)
  2718. class special_ortho_group_gen(multi_rv_generic):
  2719. r"""A Special Orthogonal matrix (SO(N)) random variable.
  2720. Return a random rotation matrix, drawn from the Haar distribution
  2721. (the only uniform distribution on SO(N)) with a determinant of +1.
  2722. The `dim` keyword specifies the dimension N.
  2723. Methods
  2724. -------
  2725. rvs(dim=None, size=1, random_state=None)
  2726. Draw random samples from SO(N).
  2727. Parameters
  2728. ----------
  2729. dim : scalar
  2730. Dimension of matrices
  2731. seed : {None, int, np.random.RandomState, np.random.Generator}, optional
  2732. Used for drawing random variates.
  2733. If `seed` is `None`, the `~np.random.RandomState` singleton is used.
  2734. If `seed` is an int, a new ``RandomState`` instance is used, seeded
  2735. with seed.
  2736. If `seed` is already a ``RandomState`` or ``Generator`` instance,
  2737. then that object is used.
  2738. Default is `None`.
  2739. Notes
  2740. -----
  2741. This class is wrapping the random_rot code from the MDP Toolkit,
  2742. https://github.com/mdp-toolkit/mdp-toolkit
  2743. Return a random rotation matrix, drawn from the Haar distribution
  2744. (the only uniform distribution on SO(N)).
  2745. The algorithm is described in the paper
  2746. Stewart, G.W., "The efficient generation of random orthogonal
  2747. matrices with an application to condition estimators", SIAM Journal
  2748. on Numerical Analysis, 17(3), pp. 403-409, 1980.
  2749. For more information see
  2750. https://en.wikipedia.org/wiki/Orthogonal_matrix#Randomization
  2751. See also the similar `ortho_group`. For a random rotation in three
  2752. dimensions, see `scipy.spatial.transform.Rotation.random`.
  2753. Examples
  2754. --------
  2755. >>> import numpy as np
  2756. >>> from scipy.stats import special_ortho_group
  2757. >>> x = special_ortho_group.rvs(3)
  2758. >>> np.dot(x, x.T)
  2759. array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16],
  2760. [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16],
  2761. [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]])
  2762. >>> import scipy.linalg
  2763. >>> scipy.linalg.det(x)
  2764. 1.0
  2765. This generates one random matrix from SO(3). It is orthogonal and
  2766. has a determinant of 1.
  2767. Alternatively, the object may be called (as a function) to fix the `dim`
  2768. parameter, returning a "frozen" special_ortho_group random variable:
  2769. >>> rv = special_ortho_group(5)
  2770. >>> # Frozen object with the same methods but holding the
  2771. >>> # dimension parameter fixed.
  2772. See Also
  2773. --------
  2774. ortho_group, scipy.spatial.transform.Rotation.random
  2775. """
  2776. def __init__(self, seed=None):
  2777. super().__init__(seed)
  2778. self.__doc__ = doccer.docformat(self.__doc__)
  2779. def __call__(self, dim=None, seed=None):
  2780. """Create a frozen SO(N) distribution.
  2781. See `special_ortho_group_frozen` for more information.
  2782. """
  2783. return special_ortho_group_frozen(dim, seed=seed)
  2784. def _process_parameters(self, dim):
  2785. """Dimension N must be specified; it cannot be inferred."""
  2786. if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim):
  2787. raise ValueError("""Dimension of rotation must be specified,
  2788. and must be a scalar greater than 1.""")
  2789. return dim
  2790. def rvs(self, dim, size=1, random_state=None):
  2791. """Draw random samples from SO(N).
  2792. Parameters
  2793. ----------
  2794. dim : integer
  2795. Dimension of rotation space (N).
  2796. size : integer, optional
  2797. Number of samples to draw (default 1).
  2798. Returns
  2799. -------
  2800. rvs : ndarray or scalar
  2801. Random size N-dimensional matrices, dimension (size, dim, dim)
  2802. """
  2803. random_state = self._get_random_state(random_state)
  2804. size = int(size)
  2805. size = (size,) if size > 1 else ()
  2806. dim = self._process_parameters(dim)
  2807. # H represents a (dim, dim) matrix, while D represents the diagonal of
  2808. # a (dim, dim) diagonal matrix. The algorithm that follows is
  2809. # broadcasted on the leading shape in `size` to vectorize along
  2810. # samples.
  2811. H = np.empty(size + (dim, dim))
  2812. H[..., :, :] = np.eye(dim)
  2813. D = np.empty(size + (dim,))
  2814. for n in range(dim-1):
  2815. # x is a vector with length dim-n, xrow and xcol are views of it as
  2816. # a row vector and column vector respectively. It's important they
  2817. # are views and not copies because we are going to modify x
  2818. # in-place.
  2819. x = random_state.normal(size=size + (dim-n,))
  2820. xrow = x[..., None, :]
  2821. xcol = x[..., :, None]
  2822. # This is the squared norm of x, without vectorization it would be
  2823. # dot(x, x), to have proper broadcasting we use matmul and squeeze
  2824. # out (convert to scalar) the resulting 1x1 matrix
  2825. norm2 = np.matmul(xrow, xcol).squeeze((-2, -1))
  2826. x0 = x[..., 0].copy()
  2827. D[..., n] = np.where(x0 != 0, np.sign(x0), 1)
  2828. x[..., 0] += D[..., n]*np.sqrt(norm2)
  2829. # In renormalizing x we have to append an additional axis with
  2830. # [..., None] to broadcast the scalar against the vector x
  2831. x /= np.sqrt((norm2 - x0**2 + x[..., 0]**2) / 2.)[..., None]
  2832. # Householder transformation, without vectorization the RHS can be
  2833. # written as outer(H @ x, x) (apart from the slicing)
  2834. H[..., :, n:] -= np.matmul(H[..., :, n:], xcol) * xrow
  2835. D[..., -1] = (-1)**(dim-1)*D[..., :-1].prod(axis=-1)
  2836. # Without vectorization this could be written as H = diag(D) @ H,
  2837. # left-multiplication by a diagonal matrix amounts to multiplying each
  2838. # row of H by an element of the diagonal, so we add a dummy axis for
  2839. # the column index
  2840. H *= D[..., :, None]
  2841. return H
  2842. special_ortho_group = special_ortho_group_gen()
  2843. class special_ortho_group_frozen(multi_rv_frozen):
  2844. def __init__(self, dim=None, seed=None):
  2845. """Create a frozen SO(N) distribution.
  2846. Parameters
  2847. ----------
  2848. dim : scalar
  2849. Dimension of matrices
  2850. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  2851. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  2852. singleton is used.
  2853. If `seed` is an int, a new ``RandomState`` instance is used,
  2854. seeded with `seed`.
  2855. If `seed` is already a ``Generator`` or ``RandomState`` instance
  2856. then that instance is used.
  2857. Examples
  2858. --------
  2859. >>> from scipy.stats import special_ortho_group
  2860. >>> g = special_ortho_group(5)
  2861. >>> x = g.rvs()
  2862. """
  2863. self._dist = special_ortho_group_gen(seed)
  2864. self.dim = self._dist._process_parameters(dim)
  2865. def rvs(self, size=1, random_state=None):
  2866. return self._dist.rvs(self.dim, size, random_state)
  2867. class ortho_group_gen(multi_rv_generic):
  2868. r"""An Orthogonal matrix (O(N)) random variable.
  2869. Return a random orthogonal matrix, drawn from the O(N) Haar
  2870. distribution (the only uniform distribution on O(N)).
  2871. The `dim` keyword specifies the dimension N.
  2872. Methods
  2873. -------
  2874. rvs(dim=None, size=1, random_state=None)
  2875. Draw random samples from O(N).
  2876. Parameters
  2877. ----------
  2878. dim : scalar
  2879. Dimension of matrices
  2880. seed : {None, int, np.random.RandomState, np.random.Generator}, optional
  2881. Used for drawing random variates.
  2882. If `seed` is `None`, the `~np.random.RandomState` singleton is used.
  2883. If `seed` is an int, a new ``RandomState`` instance is used, seeded
  2884. with seed.
  2885. If `seed` is already a ``RandomState`` or ``Generator`` instance,
  2886. then that object is used.
  2887. Default is `None`.
  2888. Notes
  2889. -----
  2890. This class is closely related to `special_ortho_group`.
  2891. Some care is taken to avoid numerical error, as per the paper by Mezzadri.
  2892. References
  2893. ----------
  2894. .. [1] F. Mezzadri, "How to generate random matrices from the classical
  2895. compact groups", :arXiv:`math-ph/0609050v2`.
  2896. Examples
  2897. --------
  2898. >>> import numpy as np
  2899. >>> from scipy.stats import ortho_group
  2900. >>> x = ortho_group.rvs(3)
  2901. >>> np.dot(x, x.T)
  2902. array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16],
  2903. [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16],
  2904. [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]])
  2905. >>> import scipy.linalg
  2906. >>> np.fabs(scipy.linalg.det(x))
  2907. 1.0
  2908. This generates one random matrix from O(3). It is orthogonal and
  2909. has a determinant of +1 or -1.
  2910. Alternatively, the object may be called (as a function) to fix the `dim`
  2911. parameter, returning a "frozen" ortho_group random variable:
  2912. >>> rv = ortho_group(5)
  2913. >>> # Frozen object with the same methods but holding the
  2914. >>> # dimension parameter fixed.
  2915. See Also
  2916. --------
  2917. special_ortho_group
  2918. """
  2919. def __init__(self, seed=None):
  2920. super().__init__(seed)
  2921. self.__doc__ = doccer.docformat(self.__doc__)
  2922. def __call__(self, dim=None, seed=None):
  2923. """Create a frozen O(N) distribution.
  2924. See `ortho_group_frozen` for more information.
  2925. """
  2926. return ortho_group_frozen(dim, seed=seed)
  2927. def _process_parameters(self, dim):
  2928. """Dimension N must be specified; it cannot be inferred."""
  2929. if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim):
  2930. raise ValueError("Dimension of rotation must be specified,"
  2931. "and must be a scalar greater than 1.")
  2932. return dim
  2933. def rvs(self, dim, size=1, random_state=None):
  2934. """Draw random samples from O(N).
  2935. Parameters
  2936. ----------
  2937. dim : integer
  2938. Dimension of rotation space (N).
  2939. size : integer, optional
  2940. Number of samples to draw (default 1).
  2941. Returns
  2942. -------
  2943. rvs : ndarray or scalar
  2944. Random size N-dimensional matrices, dimension (size, dim, dim)
  2945. """
  2946. random_state = self._get_random_state(random_state)
  2947. size = int(size)
  2948. if size > 1 and NumpyVersion(np.__version__) < '1.22.0':
  2949. return np.array([self.rvs(dim, size=1, random_state=random_state)
  2950. for i in range(size)])
  2951. dim = self._process_parameters(dim)
  2952. size = (size,) if size > 1 else ()
  2953. z = random_state.normal(size=size + (dim, dim))
  2954. q, r = np.linalg.qr(z)
  2955. # The last two dimensions are the rows and columns of R matrices.
  2956. # Extract the diagonals. Note that this eliminates a dimension.
  2957. d = r.diagonal(offset=0, axis1=-2, axis2=-1)
  2958. # Add back a dimension for proper broadcasting: we're dividing
  2959. # each row of each R matrix by the diagonal of the R matrix.
  2960. q *= (d/abs(d))[..., np.newaxis, :] # to broadcast properly
  2961. return q
  2962. ortho_group = ortho_group_gen()
  2963. class ortho_group_frozen(multi_rv_frozen):
  2964. def __init__(self, dim=None, seed=None):
  2965. """Create a frozen O(N) distribution.
  2966. Parameters
  2967. ----------
  2968. dim : scalar
  2969. Dimension of matrices
  2970. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  2971. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  2972. singleton is used.
  2973. If `seed` is an int, a new ``RandomState`` instance is used,
  2974. seeded with `seed`.
  2975. If `seed` is already a ``Generator`` or ``RandomState`` instance
  2976. then that instance is used.
  2977. Examples
  2978. --------
  2979. >>> from scipy.stats import ortho_group
  2980. >>> g = ortho_group(5)
  2981. >>> x = g.rvs()
  2982. """
  2983. self._dist = ortho_group_gen(seed)
  2984. self.dim = self._dist._process_parameters(dim)
  2985. def rvs(self, size=1, random_state=None):
  2986. return self._dist.rvs(self.dim, size, random_state)
  2987. class random_correlation_gen(multi_rv_generic):
  2988. r"""A random correlation matrix.
  2989. Return a random correlation matrix, given a vector of eigenvalues.
  2990. The `eigs` keyword specifies the eigenvalues of the correlation matrix,
  2991. and implies the dimension.
  2992. Methods
  2993. -------
  2994. rvs(eigs=None, random_state=None)
  2995. Draw random correlation matrices, all with eigenvalues eigs.
  2996. Parameters
  2997. ----------
  2998. eigs : 1d ndarray
  2999. Eigenvalues of correlation matrix
  3000. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  3001. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  3002. singleton is used.
  3003. If `seed` is an int, a new ``RandomState`` instance is used,
  3004. seeded with `seed`.
  3005. If `seed` is already a ``Generator`` or ``RandomState`` instance
  3006. then that instance is used.
  3007. tol : float, optional
  3008. Tolerance for input parameter checks
  3009. diag_tol : float, optional
  3010. Tolerance for deviation of the diagonal of the resulting
  3011. matrix. Default: 1e-7
  3012. Raises
  3013. ------
  3014. RuntimeError
  3015. Floating point error prevented generating a valid correlation
  3016. matrix.
  3017. Returns
  3018. -------
  3019. rvs : ndarray or scalar
  3020. Random size N-dimensional matrices, dimension (size, dim, dim),
  3021. each having eigenvalues eigs.
  3022. Notes
  3023. -----
  3024. Generates a random correlation matrix following a numerically stable
  3025. algorithm spelled out by Davies & Higham. This algorithm uses a single O(N)
  3026. similarity transformation to construct a symmetric positive semi-definite
  3027. matrix, and applies a series of Givens rotations to scale it to have ones
  3028. on the diagonal.
  3029. References
  3030. ----------
  3031. .. [1] Davies, Philip I; Higham, Nicholas J; "Numerically stable generation
  3032. of correlation matrices and their factors", BIT 2000, Vol. 40,
  3033. No. 4, pp. 640 651
  3034. Examples
  3035. --------
  3036. >>> import numpy as np
  3037. >>> from scipy.stats import random_correlation
  3038. >>> rng = np.random.default_rng()
  3039. >>> x = random_correlation.rvs((.5, .8, 1.2, 1.5), random_state=rng)
  3040. >>> x
  3041. array([[ 1. , -0.02423399, 0.03130519, 0.4946965 ],
  3042. [-0.02423399, 1. , 0.20334736, 0.04039817],
  3043. [ 0.03130519, 0.20334736, 1. , 0.02694275],
  3044. [ 0.4946965 , 0.04039817, 0.02694275, 1. ]])
  3045. >>> import scipy.linalg
  3046. >>> e, v = scipy.linalg.eigh(x)
  3047. >>> e
  3048. array([ 0.5, 0.8, 1.2, 1.5])
  3049. """
  3050. def __init__(self, seed=None):
  3051. super().__init__(seed)
  3052. self.__doc__ = doccer.docformat(self.__doc__)
  3053. def __call__(self, eigs, seed=None, tol=1e-13, diag_tol=1e-7):
  3054. """Create a frozen random correlation matrix.
  3055. See `random_correlation_frozen` for more information.
  3056. """
  3057. return random_correlation_frozen(eigs, seed=seed, tol=tol,
  3058. diag_tol=diag_tol)
  3059. def _process_parameters(self, eigs, tol):
  3060. eigs = np.asarray(eigs, dtype=float)
  3061. dim = eigs.size
  3062. if eigs.ndim != 1 or eigs.shape[0] != dim or dim <= 1:
  3063. raise ValueError("Array 'eigs' must be a vector of length "
  3064. "greater than 1.")
  3065. if np.fabs(np.sum(eigs) - dim) > tol:
  3066. raise ValueError("Sum of eigenvalues must equal dimensionality.")
  3067. for x in eigs:
  3068. if x < -tol:
  3069. raise ValueError("All eigenvalues must be non-negative.")
  3070. return dim, eigs
  3071. def _givens_to_1(self, aii, ajj, aij):
  3072. """Computes a 2x2 Givens matrix to put 1's on the diagonal.
  3073. The input matrix is a 2x2 symmetric matrix M = [ aii aij ; aij ajj ].
  3074. The output matrix g is a 2x2 anti-symmetric matrix of the form
  3075. [ c s ; -s c ]; the elements c and s are returned.
  3076. Applying the output matrix to the input matrix (as b=g.T M g)
  3077. results in a matrix with bii=1, provided tr(M) - det(M) >= 1
  3078. and floating point issues do not occur. Otherwise, some other
  3079. valid rotation is returned. When tr(M)==2, also bjj=1.
  3080. """
  3081. aiid = aii - 1.
  3082. ajjd = ajj - 1.
  3083. if ajjd == 0:
  3084. # ajj==1, so swap aii and ajj to avoid division by zero
  3085. return 0., 1.
  3086. dd = math.sqrt(max(aij**2 - aiid*ajjd, 0))
  3087. # The choice of t should be chosen to avoid cancellation [1]
  3088. t = (aij + math.copysign(dd, aij)) / ajjd
  3089. c = 1. / math.sqrt(1. + t*t)
  3090. if c == 0:
  3091. # Underflow
  3092. s = 1.0
  3093. else:
  3094. s = c*t
  3095. return c, s
  3096. def _to_corr(self, m):
  3097. """
  3098. Given a psd matrix m, rotate to put one's on the diagonal, turning it
  3099. into a correlation matrix. This also requires the trace equal the
  3100. dimensionality. Note: modifies input matrix
  3101. """
  3102. # Check requirements for in-place Givens
  3103. if not (m.flags.c_contiguous and m.dtype == np.float64 and
  3104. m.shape[0] == m.shape[1]):
  3105. raise ValueError()
  3106. d = m.shape[0]
  3107. for i in range(d-1):
  3108. if m[i, i] == 1:
  3109. continue
  3110. elif m[i, i] > 1:
  3111. for j in range(i+1, d):
  3112. if m[j, j] < 1:
  3113. break
  3114. else:
  3115. for j in range(i+1, d):
  3116. if m[j, j] > 1:
  3117. break
  3118. c, s = self._givens_to_1(m[i, i], m[j, j], m[i, j])
  3119. # Use BLAS to apply Givens rotations in-place. Equivalent to:
  3120. # g = np.eye(d)
  3121. # g[i, i] = g[j,j] = c
  3122. # g[j, i] = -s; g[i, j] = s
  3123. # m = np.dot(g.T, np.dot(m, g))
  3124. mv = m.ravel()
  3125. drot(mv, mv, c, -s, n=d,
  3126. offx=i*d, incx=1, offy=j*d, incy=1,
  3127. overwrite_x=True, overwrite_y=True)
  3128. drot(mv, mv, c, -s, n=d,
  3129. offx=i, incx=d, offy=j, incy=d,
  3130. overwrite_x=True, overwrite_y=True)
  3131. return m
  3132. def rvs(self, eigs, random_state=None, tol=1e-13, diag_tol=1e-7):
  3133. """Draw random correlation matrices.
  3134. Parameters
  3135. ----------
  3136. eigs : 1d ndarray
  3137. Eigenvalues of correlation matrix
  3138. tol : float, optional
  3139. Tolerance for input parameter checks
  3140. diag_tol : float, optional
  3141. Tolerance for deviation of the diagonal of the resulting
  3142. matrix. Default: 1e-7
  3143. Raises
  3144. ------
  3145. RuntimeError
  3146. Floating point error prevented generating a valid correlation
  3147. matrix.
  3148. Returns
  3149. -------
  3150. rvs : ndarray or scalar
  3151. Random size N-dimensional matrices, dimension (size, dim, dim),
  3152. each having eigenvalues eigs.
  3153. """
  3154. dim, eigs = self._process_parameters(eigs, tol=tol)
  3155. random_state = self._get_random_state(random_state)
  3156. m = ortho_group.rvs(dim, random_state=random_state)
  3157. m = np.dot(np.dot(m, np.diag(eigs)), m.T) # Set the trace of m
  3158. m = self._to_corr(m) # Carefully rotate to unit diagonal
  3159. # Check diagonal
  3160. if abs(m.diagonal() - 1).max() > diag_tol:
  3161. raise RuntimeError("Failed to generate a valid correlation matrix")
  3162. return m
  3163. random_correlation = random_correlation_gen()
  3164. class random_correlation_frozen(multi_rv_frozen):
  3165. def __init__(self, eigs, seed=None, tol=1e-13, diag_tol=1e-7):
  3166. """Create a frozen random correlation matrix distribution.
  3167. Parameters
  3168. ----------
  3169. eigs : 1d ndarray
  3170. Eigenvalues of correlation matrix
  3171. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  3172. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  3173. singleton is used.
  3174. If `seed` is an int, a new ``RandomState`` instance is used,
  3175. seeded with `seed`.
  3176. If `seed` is already a ``Generator`` or ``RandomState`` instance
  3177. then that instance is used.
  3178. tol : float, optional
  3179. Tolerance for input parameter checks
  3180. diag_tol : float, optional
  3181. Tolerance for deviation of the diagonal of the resulting
  3182. matrix. Default: 1e-7
  3183. Raises
  3184. ------
  3185. RuntimeError
  3186. Floating point error prevented generating a valid correlation
  3187. matrix.
  3188. Returns
  3189. -------
  3190. rvs : ndarray or scalar
  3191. Random size N-dimensional matrices, dimension (size, dim, dim),
  3192. each having eigenvalues eigs.
  3193. """
  3194. self._dist = random_correlation_gen(seed)
  3195. self.tol = tol
  3196. self.diag_tol = diag_tol
  3197. _, self.eigs = self._dist._process_parameters(eigs, tol=self.tol)
  3198. def rvs(self, random_state=None):
  3199. return self._dist.rvs(self.eigs, random_state=random_state,
  3200. tol=self.tol, diag_tol=self.diag_tol)
  3201. class unitary_group_gen(multi_rv_generic):
  3202. r"""A matrix-valued U(N) random variable.
  3203. Return a random unitary matrix.
  3204. The `dim` keyword specifies the dimension N.
  3205. Methods
  3206. -------
  3207. rvs(dim=None, size=1, random_state=None)
  3208. Draw random samples from U(N).
  3209. Parameters
  3210. ----------
  3211. dim : scalar
  3212. Dimension of matrices
  3213. seed : {None, int, np.random.RandomState, np.random.Generator}, optional
  3214. Used for drawing random variates.
  3215. If `seed` is `None`, the `~np.random.RandomState` singleton is used.
  3216. If `seed` is an int, a new ``RandomState`` instance is used, seeded
  3217. with seed.
  3218. If `seed` is already a ``RandomState`` or ``Generator`` instance,
  3219. then that object is used.
  3220. Default is `None`.
  3221. Notes
  3222. -----
  3223. This class is similar to `ortho_group`.
  3224. References
  3225. ----------
  3226. .. [1] F. Mezzadri, "How to generate random matrices from the classical
  3227. compact groups", :arXiv:`math-ph/0609050v2`.
  3228. Examples
  3229. --------
  3230. >>> import numpy as np
  3231. >>> from scipy.stats import unitary_group
  3232. >>> x = unitary_group.rvs(3)
  3233. >>> np.dot(x, x.conj().T)
  3234. array([[ 1.00000000e+00, 1.13231364e-17, -2.86852790e-16],
  3235. [ 1.13231364e-17, 1.00000000e+00, -1.46845020e-16],
  3236. [ -2.86852790e-16, -1.46845020e-16, 1.00000000e+00]])
  3237. This generates one random matrix from U(3). The dot product confirms that
  3238. it is unitary up to machine precision.
  3239. Alternatively, the object may be called (as a function) to fix the `dim`
  3240. parameter, return a "frozen" unitary_group random variable:
  3241. >>> rv = unitary_group(5)
  3242. See Also
  3243. --------
  3244. ortho_group
  3245. """
  3246. def __init__(self, seed=None):
  3247. super().__init__(seed)
  3248. self.__doc__ = doccer.docformat(self.__doc__)
  3249. def __call__(self, dim=None, seed=None):
  3250. """Create a frozen (U(N)) n-dimensional unitary matrix distribution.
  3251. See `unitary_group_frozen` for more information.
  3252. """
  3253. return unitary_group_frozen(dim, seed=seed)
  3254. def _process_parameters(self, dim):
  3255. """Dimension N must be specified; it cannot be inferred."""
  3256. if dim is None or not np.isscalar(dim) or dim <= 1 or dim != int(dim):
  3257. raise ValueError("Dimension of rotation must be specified,"
  3258. "and must be a scalar greater than 1.")
  3259. return dim
  3260. def rvs(self, dim, size=1, random_state=None):
  3261. """Draw random samples from U(N).
  3262. Parameters
  3263. ----------
  3264. dim : integer
  3265. Dimension of space (N).
  3266. size : integer, optional
  3267. Number of samples to draw (default 1).
  3268. Returns
  3269. -------
  3270. rvs : ndarray or scalar
  3271. Random size N-dimensional matrices, dimension (size, dim, dim)
  3272. """
  3273. random_state = self._get_random_state(random_state)
  3274. size = int(size)
  3275. if size > 1 and NumpyVersion(np.__version__) < '1.22.0':
  3276. return np.array([self.rvs(dim, size=1, random_state=random_state)
  3277. for i in range(size)])
  3278. dim = self._process_parameters(dim)
  3279. size = (size,) if size > 1 else ()
  3280. z = 1/math.sqrt(2)*(random_state.normal(size=size + (dim, dim)) +
  3281. 1j*random_state.normal(size=size + (dim, dim)))
  3282. q, r = np.linalg.qr(z)
  3283. # The last two dimensions are the rows and columns of R matrices.
  3284. # Extract the diagonals. Note that this eliminates a dimension.
  3285. d = r.diagonal(offset=0, axis1=-2, axis2=-1)
  3286. # Add back a dimension for proper broadcasting: we're dividing
  3287. # each row of each R matrix by the diagonal of the R matrix.
  3288. q *= (d/abs(d))[..., np.newaxis, :] # to broadcast properly
  3289. return q
  3290. unitary_group = unitary_group_gen()
  3291. class unitary_group_frozen(multi_rv_frozen):
  3292. def __init__(self, dim=None, seed=None):
  3293. """Create a frozen (U(N)) n-dimensional unitary matrix distribution.
  3294. Parameters
  3295. ----------
  3296. dim : scalar
  3297. Dimension of matrices
  3298. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  3299. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  3300. singleton is used.
  3301. If `seed` is an int, a new ``RandomState`` instance is used,
  3302. seeded with `seed`.
  3303. If `seed` is already a ``Generator`` or ``RandomState`` instance
  3304. then that instance is used.
  3305. Examples
  3306. --------
  3307. >>> from scipy.stats import unitary_group
  3308. >>> x = unitary_group(3)
  3309. >>> x.rvs()
  3310. """
  3311. self._dist = unitary_group_gen(seed)
  3312. self.dim = self._dist._process_parameters(dim)
  3313. def rvs(self, size=1, random_state=None):
  3314. return self._dist.rvs(self.dim, size, random_state)
  3315. _mvt_doc_default_callparams = """\
  3316. loc : array_like, optional
  3317. Location of the distribution. (default ``0``)
  3318. shape : array_like, optional
  3319. Positive semidefinite matrix of the distribution. (default ``1``)
  3320. df : float, optional
  3321. Degrees of freedom of the distribution; must be greater than zero.
  3322. If ``np.inf`` then results are multivariate normal. The default is ``1``.
  3323. allow_singular : bool, optional
  3324. Whether to allow a singular matrix. (default ``False``)
  3325. """
  3326. _mvt_doc_callparams_note = """\
  3327. Setting the parameter `loc` to ``None`` is equivalent to having `loc`
  3328. be the zero-vector. The parameter `shape` can be a scalar, in which case
  3329. the shape matrix is the identity times that value, a vector of
  3330. diagonal entries for the shape matrix, or a two-dimensional array_like.
  3331. """
  3332. _mvt_doc_frozen_callparams_note = """\
  3333. See class definition for a detailed description of parameters."""
  3334. mvt_docdict_params = {
  3335. '_mvt_doc_default_callparams': _mvt_doc_default_callparams,
  3336. '_mvt_doc_callparams_note': _mvt_doc_callparams_note,
  3337. '_doc_random_state': _doc_random_state
  3338. }
  3339. mvt_docdict_noparams = {
  3340. '_mvt_doc_default_callparams': "",
  3341. '_mvt_doc_callparams_note': _mvt_doc_frozen_callparams_note,
  3342. '_doc_random_state': _doc_random_state
  3343. }
  3344. class multivariate_t_gen(multi_rv_generic):
  3345. r"""A multivariate t-distributed random variable.
  3346. The `loc` parameter specifies the location. The `shape` parameter specifies
  3347. the positive semidefinite shape matrix. The `df` parameter specifies the
  3348. degrees of freedom.
  3349. In addition to calling the methods below, the object itself may be called
  3350. as a function to fix the location, shape matrix, and degrees of freedom
  3351. parameters, returning a "frozen" multivariate t-distribution random.
  3352. Methods
  3353. -------
  3354. pdf(x, loc=None, shape=1, df=1, allow_singular=False)
  3355. Probability density function.
  3356. logpdf(x, loc=None, shape=1, df=1, allow_singular=False)
  3357. Log of the probability density function.
  3358. rvs(loc=None, shape=1, df=1, size=1, random_state=None)
  3359. Draw random samples from a multivariate t-distribution.
  3360. Parameters
  3361. ----------
  3362. %(_mvt_doc_default_callparams)s
  3363. %(_doc_random_state)s
  3364. Notes
  3365. -----
  3366. %(_mvt_doc_callparams_note)s
  3367. The matrix `shape` must be a (symmetric) positive semidefinite matrix. The
  3368. determinant and inverse of `shape` are computed as the pseudo-determinant
  3369. and pseudo-inverse, respectively, so that `shape` does not need to have
  3370. full rank.
  3371. The probability density function for `multivariate_t` is
  3372. .. math::
  3373. f(x) = \frac{\Gamma(\nu + p)/2}{\Gamma(\nu/2)\nu^{p/2}\pi^{p/2}|\Sigma|^{1/2}}
  3374. \left[1 + \frac{1}{\nu} (\mathbf{x} - \boldsymbol{\mu})^{\top}
  3375. \boldsymbol{\Sigma}^{-1}
  3376. (\mathbf{x} - \boldsymbol{\mu}) \right]^{-(\nu + p)/2},
  3377. where :math:`p` is the dimension of :math:`\mathbf{x}`,
  3378. :math:`\boldsymbol{\mu}` is the :math:`p`-dimensional location,
  3379. :math:`\boldsymbol{\Sigma}` the :math:`p \times p`-dimensional shape
  3380. matrix, and :math:`\nu` is the degrees of freedom.
  3381. .. versionadded:: 1.6.0
  3382. Examples
  3383. --------
  3384. The object may be called (as a function) to fix the `loc`, `shape`,
  3385. `df`, and `allow_singular` parameters, returning a "frozen"
  3386. multivariate_t random variable:
  3387. >>> import numpy as np
  3388. >>> from scipy.stats import multivariate_t
  3389. >>> rv = multivariate_t([1.0, -0.5], [[2.1, 0.3], [0.3, 1.5]], df=2)
  3390. >>> # Frozen object with the same methods but holding the given location,
  3391. >>> # scale, and degrees of freedom fixed.
  3392. Create a contour plot of the PDF.
  3393. >>> import matplotlib.pyplot as plt
  3394. >>> x, y = np.mgrid[-1:3:.01, -2:1.5:.01]
  3395. >>> pos = np.dstack((x, y))
  3396. >>> fig, ax = plt.subplots(1, 1)
  3397. >>> ax.set_aspect('equal')
  3398. >>> plt.contourf(x, y, rv.pdf(pos))
  3399. """
  3400. def __init__(self, seed=None):
  3401. """Initialize a multivariate t-distributed random variable.
  3402. Parameters
  3403. ----------
  3404. seed : Random state.
  3405. """
  3406. super().__init__(seed)
  3407. self.__doc__ = doccer.docformat(self.__doc__, mvt_docdict_params)
  3408. self._random_state = check_random_state(seed)
  3409. def __call__(self, loc=None, shape=1, df=1, allow_singular=False,
  3410. seed=None):
  3411. """Create a frozen multivariate t-distribution.
  3412. See `multivariate_t_frozen` for parameters.
  3413. """
  3414. if df == np.inf:
  3415. return multivariate_normal_frozen(mean=loc, cov=shape,
  3416. allow_singular=allow_singular,
  3417. seed=seed)
  3418. return multivariate_t_frozen(loc=loc, shape=shape, df=df,
  3419. allow_singular=allow_singular, seed=seed)
  3420. def pdf(self, x, loc=None, shape=1, df=1, allow_singular=False):
  3421. """Multivariate t-distribution probability density function.
  3422. Parameters
  3423. ----------
  3424. x : array_like
  3425. Points at which to evaluate the probability density function.
  3426. %(_mvt_doc_default_callparams)s
  3427. Returns
  3428. -------
  3429. pdf : Probability density function evaluated at `x`.
  3430. Examples
  3431. --------
  3432. >>> from scipy.stats import multivariate_t
  3433. >>> x = [0.4, 5]
  3434. >>> loc = [0, 1]
  3435. >>> shape = [[1, 0.1], [0.1, 1]]
  3436. >>> df = 7
  3437. >>> multivariate_t.pdf(x, loc, shape, df)
  3438. array([0.00075713])
  3439. """
  3440. dim, loc, shape, df = self._process_parameters(loc, shape, df)
  3441. x = self._process_quantiles(x, dim)
  3442. shape_info = _PSD(shape, allow_singular=allow_singular)
  3443. logpdf = self._logpdf(x, loc, shape_info.U, shape_info.log_pdet, df,
  3444. dim, shape_info.rank)
  3445. return np.exp(logpdf)
  3446. def logpdf(self, x, loc=None, shape=1, df=1):
  3447. """Log of the multivariate t-distribution probability density function.
  3448. Parameters
  3449. ----------
  3450. x : array_like
  3451. Points at which to evaluate the log of the probability density
  3452. function.
  3453. %(_mvt_doc_default_callparams)s
  3454. Returns
  3455. -------
  3456. logpdf : Log of the probability density function evaluated at `x`.
  3457. Examples
  3458. --------
  3459. >>> from scipy.stats import multivariate_t
  3460. >>> x = [0.4, 5]
  3461. >>> loc = [0, 1]
  3462. >>> shape = [[1, 0.1], [0.1, 1]]
  3463. >>> df = 7
  3464. >>> multivariate_t.logpdf(x, loc, shape, df)
  3465. array([-7.1859802])
  3466. See Also
  3467. --------
  3468. pdf : Probability density function.
  3469. """
  3470. dim, loc, shape, df = self._process_parameters(loc, shape, df)
  3471. x = self._process_quantiles(x, dim)
  3472. shape_info = _PSD(shape)
  3473. return self._logpdf(x, loc, shape_info.U, shape_info.log_pdet, df, dim,
  3474. shape_info.rank)
  3475. def _logpdf(self, x, loc, prec_U, log_pdet, df, dim, rank):
  3476. """Utility method `pdf`, `logpdf` for parameters.
  3477. Parameters
  3478. ----------
  3479. x : ndarray
  3480. Points at which to evaluate the log of the probability density
  3481. function.
  3482. loc : ndarray
  3483. Location of the distribution.
  3484. prec_U : ndarray
  3485. A decomposition such that `np.dot(prec_U, prec_U.T)` is the inverse
  3486. of the shape matrix.
  3487. log_pdet : float
  3488. Logarithm of the determinant of the shape matrix.
  3489. df : float
  3490. Degrees of freedom of the distribution.
  3491. dim : int
  3492. Dimension of the quantiles x.
  3493. rank : int
  3494. Rank of the shape matrix.
  3495. Notes
  3496. -----
  3497. As this function does no argument checking, it should not be called
  3498. directly; use 'logpdf' instead.
  3499. """
  3500. if df == np.inf:
  3501. return multivariate_normal._logpdf(x, loc, prec_U, log_pdet, rank)
  3502. dev = x - loc
  3503. maha = np.square(np.dot(dev, prec_U)).sum(axis=-1)
  3504. t = 0.5 * (df + dim)
  3505. A = gammaln(t)
  3506. B = gammaln(0.5 * df)
  3507. C = dim/2. * np.log(df * np.pi)
  3508. D = 0.5 * log_pdet
  3509. E = -t * np.log(1 + (1./df) * maha)
  3510. return _squeeze_output(A - B - C - D + E)
  3511. def rvs(self, loc=None, shape=1, df=1, size=1, random_state=None):
  3512. """Draw random samples from a multivariate t-distribution.
  3513. Parameters
  3514. ----------
  3515. %(_mvt_doc_default_callparams)s
  3516. size : integer, optional
  3517. Number of samples to draw (default 1).
  3518. %(_doc_random_state)s
  3519. Returns
  3520. -------
  3521. rvs : ndarray or scalar
  3522. Random variates of size (`size`, `P`), where `P` is the
  3523. dimension of the random variable.
  3524. Examples
  3525. --------
  3526. >>> from scipy.stats import multivariate_t
  3527. >>> x = [0.4, 5]
  3528. >>> loc = [0, 1]
  3529. >>> shape = [[1, 0.1], [0.1, 1]]
  3530. >>> df = 7
  3531. >>> multivariate_t.rvs(loc, shape, df)
  3532. array([[0.93477495, 3.00408716]])
  3533. """
  3534. # For implementation details, see equation (3):
  3535. #
  3536. # Hofert, "On Sampling from the Multivariatet Distribution", 2013
  3537. # http://rjournal.github.io/archive/2013-2/hofert.pdf
  3538. #
  3539. dim, loc, shape, df = self._process_parameters(loc, shape, df)
  3540. if random_state is not None:
  3541. rng = check_random_state(random_state)
  3542. else:
  3543. rng = self._random_state
  3544. if np.isinf(df):
  3545. x = np.ones(size)
  3546. else:
  3547. x = rng.chisquare(df, size=size) / df
  3548. z = rng.multivariate_normal(np.zeros(dim), shape, size=size)
  3549. samples = loc + z / np.sqrt(x)[..., None]
  3550. return _squeeze_output(samples)
  3551. def _process_quantiles(self, x, dim):
  3552. """
  3553. Adjust quantiles array so that last axis labels the components of
  3554. each data point.
  3555. """
  3556. x = np.asarray(x, dtype=float)
  3557. if x.ndim == 0:
  3558. x = x[np.newaxis]
  3559. elif x.ndim == 1:
  3560. if dim == 1:
  3561. x = x[:, np.newaxis]
  3562. else:
  3563. x = x[np.newaxis, :]
  3564. return x
  3565. def _process_parameters(self, loc, shape, df):
  3566. """
  3567. Infer dimensionality from location array and shape matrix, handle
  3568. defaults, and ensure compatible dimensions.
  3569. """
  3570. if loc is None and shape is None:
  3571. loc = np.asarray(0, dtype=float)
  3572. shape = np.asarray(1, dtype=float)
  3573. dim = 1
  3574. elif loc is None:
  3575. shape = np.asarray(shape, dtype=float)
  3576. if shape.ndim < 2:
  3577. dim = 1
  3578. else:
  3579. dim = shape.shape[0]
  3580. loc = np.zeros(dim)
  3581. elif shape is None:
  3582. loc = np.asarray(loc, dtype=float)
  3583. dim = loc.size
  3584. shape = np.eye(dim)
  3585. else:
  3586. shape = np.asarray(shape, dtype=float)
  3587. loc = np.asarray(loc, dtype=float)
  3588. dim = loc.size
  3589. if dim == 1:
  3590. loc = loc.reshape(1)
  3591. shape = shape.reshape(1, 1)
  3592. if loc.ndim != 1 or loc.shape[0] != dim:
  3593. raise ValueError("Array 'loc' must be a vector of length %d." %
  3594. dim)
  3595. if shape.ndim == 0:
  3596. shape = shape * np.eye(dim)
  3597. elif shape.ndim == 1:
  3598. shape = np.diag(shape)
  3599. elif shape.ndim == 2 and shape.shape != (dim, dim):
  3600. rows, cols = shape.shape
  3601. if rows != cols:
  3602. msg = ("Array 'cov' must be square if it is two dimensional,"
  3603. " but cov.shape = %s." % str(shape.shape))
  3604. else:
  3605. msg = ("Dimension mismatch: array 'cov' is of shape %s,"
  3606. " but 'loc' is a vector of length %d.")
  3607. msg = msg % (str(shape.shape), len(loc))
  3608. raise ValueError(msg)
  3609. elif shape.ndim > 2:
  3610. raise ValueError("Array 'cov' must be at most two-dimensional,"
  3611. " but cov.ndim = %d" % shape.ndim)
  3612. # Process degrees of freedom.
  3613. if df is None:
  3614. df = 1
  3615. elif df <= 0:
  3616. raise ValueError("'df' must be greater than zero.")
  3617. elif np.isnan(df):
  3618. raise ValueError("'df' is 'nan' but must be greater than zero or 'np.inf'.")
  3619. return dim, loc, shape, df
  3620. class multivariate_t_frozen(multi_rv_frozen):
  3621. def __init__(self, loc=None, shape=1, df=1, allow_singular=False,
  3622. seed=None):
  3623. """Create a frozen multivariate t distribution.
  3624. Parameters
  3625. ----------
  3626. %(_mvt_doc_default_callparams)s
  3627. Examples
  3628. --------
  3629. >>> import numpy as np
  3630. >>> loc = np.zeros(3)
  3631. >>> shape = np.eye(3)
  3632. >>> df = 10
  3633. >>> dist = multivariate_t(loc, shape, df)
  3634. >>> dist.rvs()
  3635. array([[ 0.81412036, -1.53612361, 0.42199647]])
  3636. >>> dist.pdf([1, 1, 1])
  3637. array([0.01237803])
  3638. """
  3639. self._dist = multivariate_t_gen(seed)
  3640. dim, loc, shape, df = self._dist._process_parameters(loc, shape, df)
  3641. self.dim, self.loc, self.shape, self.df = dim, loc, shape, df
  3642. self.shape_info = _PSD(shape, allow_singular=allow_singular)
  3643. def logpdf(self, x):
  3644. x = self._dist._process_quantiles(x, self.dim)
  3645. U = self.shape_info.U
  3646. log_pdet = self.shape_info.log_pdet
  3647. return self._dist._logpdf(x, self.loc, U, log_pdet, self.df, self.dim,
  3648. self.shape_info.rank)
  3649. def pdf(self, x):
  3650. return np.exp(self.logpdf(x))
  3651. def rvs(self, size=1, random_state=None):
  3652. return self._dist.rvs(loc=self.loc,
  3653. shape=self.shape,
  3654. df=self.df,
  3655. size=size,
  3656. random_state=random_state)
  3657. multivariate_t = multivariate_t_gen()
  3658. # Set frozen generator docstrings from corresponding docstrings in
  3659. # matrix_normal_gen and fill in default strings in class docstrings
  3660. for name in ['logpdf', 'pdf', 'rvs']:
  3661. method = multivariate_t_gen.__dict__[name]
  3662. method_frozen = multivariate_t_frozen.__dict__[name]
  3663. method_frozen.__doc__ = doccer.docformat(method.__doc__,
  3664. mvt_docdict_noparams)
  3665. method.__doc__ = doccer.docformat(method.__doc__, mvt_docdict_params)
  3666. _mhg_doc_default_callparams = """\
  3667. m : array_like
  3668. The number of each type of object in the population.
  3669. That is, :math:`m[i]` is the number of objects of
  3670. type :math:`i`.
  3671. n : array_like
  3672. The number of samples taken from the population.
  3673. """
  3674. _mhg_doc_callparams_note = """\
  3675. `m` must be an array of positive integers. If the quantile
  3676. :math:`i` contains values out of the range :math:`[0, m_i]`
  3677. where :math:`m_i` is the number of objects of type :math:`i`
  3678. in the population or if the parameters are inconsistent with one
  3679. another (e.g. ``x.sum() != n``), methods return the appropriate
  3680. value (e.g. ``0`` for ``pmf``). If `m` or `n` contain negative
  3681. values, the result will contain ``nan`` there.
  3682. """
  3683. _mhg_doc_frozen_callparams = ""
  3684. _mhg_doc_frozen_callparams_note = """\
  3685. See class definition for a detailed description of parameters."""
  3686. mhg_docdict_params = {
  3687. '_doc_default_callparams': _mhg_doc_default_callparams,
  3688. '_doc_callparams_note': _mhg_doc_callparams_note,
  3689. '_doc_random_state': _doc_random_state
  3690. }
  3691. mhg_docdict_noparams = {
  3692. '_doc_default_callparams': _mhg_doc_frozen_callparams,
  3693. '_doc_callparams_note': _mhg_doc_frozen_callparams_note,
  3694. '_doc_random_state': _doc_random_state
  3695. }
  3696. class multivariate_hypergeom_gen(multi_rv_generic):
  3697. r"""A multivariate hypergeometric random variable.
  3698. Methods
  3699. -------
  3700. pmf(x, m, n)
  3701. Probability mass function.
  3702. logpmf(x, m, n)
  3703. Log of the probability mass function.
  3704. rvs(m, n, size=1, random_state=None)
  3705. Draw random samples from a multivariate hypergeometric
  3706. distribution.
  3707. mean(m, n)
  3708. Mean of the multivariate hypergeometric distribution.
  3709. var(m, n)
  3710. Variance of the multivariate hypergeometric distribution.
  3711. cov(m, n)
  3712. Compute the covariance matrix of the multivariate
  3713. hypergeometric distribution.
  3714. Parameters
  3715. ----------
  3716. %(_doc_default_callparams)s
  3717. %(_doc_random_state)s
  3718. Notes
  3719. -----
  3720. %(_doc_callparams_note)s
  3721. The probability mass function for `multivariate_hypergeom` is
  3722. .. math::
  3723. P(X_1 = x_1, X_2 = x_2, \ldots, X_k = x_k) = \frac{\binom{m_1}{x_1}
  3724. \binom{m_2}{x_2} \cdots \binom{m_k}{x_k}}{\binom{M}{n}}, \\ \quad
  3725. (x_1, x_2, \ldots, x_k) \in \mathbb{N}^k \text{ with }
  3726. \sum_{i=1}^k x_i = n
  3727. where :math:`m_i` are the number of objects of type :math:`i`, :math:`M`
  3728. is the total number of objects in the population (sum of all the
  3729. :math:`m_i`), and :math:`n` is the size of the sample to be taken
  3730. from the population.
  3731. .. versionadded:: 1.6.0
  3732. Examples
  3733. --------
  3734. To evaluate the probability mass function of the multivariate
  3735. hypergeometric distribution, with a dichotomous population of size
  3736. :math:`10` and :math:`20`, at a sample of size :math:`12` with
  3737. :math:`8` objects of the first type and :math:`4` objects of the
  3738. second type, use:
  3739. >>> from scipy.stats import multivariate_hypergeom
  3740. >>> multivariate_hypergeom.pmf(x=[8, 4], m=[10, 20], n=12)
  3741. 0.0025207176631464523
  3742. The `multivariate_hypergeom` distribution is identical to the
  3743. corresponding `hypergeom` distribution (tiny numerical differences
  3744. notwithstanding) when only two types (good and bad) of objects
  3745. are present in the population as in the example above. Consider
  3746. another example for a comparison with the hypergeometric distribution:
  3747. >>> from scipy.stats import hypergeom
  3748. >>> multivariate_hypergeom.pmf(x=[3, 1], m=[10, 5], n=4)
  3749. 0.4395604395604395
  3750. >>> hypergeom.pmf(k=3, M=15, n=4, N=10)
  3751. 0.43956043956044005
  3752. The functions ``pmf``, ``logpmf``, ``mean``, ``var``, ``cov``, and ``rvs``
  3753. support broadcasting, under the convention that the vector parameters
  3754. (``x``, ``m``, and ``n``) are interpreted as if each row along the last
  3755. axis is a single object. For instance, we can combine the previous two
  3756. calls to `multivariate_hypergeom` as
  3757. >>> multivariate_hypergeom.pmf(x=[[8, 4], [3, 1]], m=[[10, 20], [10, 5]],
  3758. ... n=[12, 4])
  3759. array([0.00252072, 0.43956044])
  3760. This broadcasting also works for ``cov``, where the output objects are
  3761. square matrices of size ``m.shape[-1]``. For example:
  3762. >>> multivariate_hypergeom.cov(m=[[7, 9], [10, 15]], n=[8, 12])
  3763. array([[[ 1.05, -1.05],
  3764. [-1.05, 1.05]],
  3765. [[ 1.56, -1.56],
  3766. [-1.56, 1.56]]])
  3767. That is, ``result[0]`` is equal to
  3768. ``multivariate_hypergeom.cov(m=[7, 9], n=8)`` and ``result[1]`` is equal
  3769. to ``multivariate_hypergeom.cov(m=[10, 15], n=12)``.
  3770. Alternatively, the object may be called (as a function) to fix the `m`
  3771. and `n` parameters, returning a "frozen" multivariate hypergeometric
  3772. random variable.
  3773. >>> rv = multivariate_hypergeom(m=[10, 20], n=12)
  3774. >>> rv.pmf(x=[8, 4])
  3775. 0.0025207176631464523
  3776. See Also
  3777. --------
  3778. scipy.stats.hypergeom : The hypergeometric distribution.
  3779. scipy.stats.multinomial : The multinomial distribution.
  3780. References
  3781. ----------
  3782. .. [1] The Multivariate Hypergeometric Distribution,
  3783. http://www.randomservices.org/random/urn/MultiHypergeometric.html
  3784. .. [2] Thomas J. Sargent and John Stachurski, 2020,
  3785. Multivariate Hypergeometric Distribution
  3786. https://python.quantecon.org/_downloads/pdf/multi_hyper.pdf
  3787. """
  3788. def __init__(self, seed=None):
  3789. super().__init__(seed)
  3790. self.__doc__ = doccer.docformat(self.__doc__, mhg_docdict_params)
  3791. def __call__(self, m, n, seed=None):
  3792. """Create a frozen multivariate_hypergeom distribution.
  3793. See `multivariate_hypergeom_frozen` for more information.
  3794. """
  3795. return multivariate_hypergeom_frozen(m, n, seed=seed)
  3796. def _process_parameters(self, m, n):
  3797. m = np.asarray(m)
  3798. n = np.asarray(n)
  3799. if m.size == 0:
  3800. m = m.astype(int)
  3801. if n.size == 0:
  3802. n = n.astype(int)
  3803. if not np.issubdtype(m.dtype, np.integer):
  3804. raise TypeError("'m' must an array of integers.")
  3805. if not np.issubdtype(n.dtype, np.integer):
  3806. raise TypeError("'n' must an array of integers.")
  3807. if m.ndim == 0:
  3808. raise ValueError("'m' must be an array with"
  3809. " at least one dimension.")
  3810. # check for empty arrays
  3811. if m.size != 0:
  3812. n = n[..., np.newaxis]
  3813. m, n = np.broadcast_arrays(m, n)
  3814. # check for empty arrays
  3815. if m.size != 0:
  3816. n = n[..., 0]
  3817. mcond = m < 0
  3818. M = m.sum(axis=-1)
  3819. ncond = (n < 0) | (n > M)
  3820. return M, m, n, mcond, ncond, np.any(mcond, axis=-1) | ncond
  3821. def _process_quantiles(self, x, M, m, n):
  3822. x = np.asarray(x)
  3823. if not np.issubdtype(x.dtype, np.integer):
  3824. raise TypeError("'x' must an array of integers.")
  3825. if x.ndim == 0:
  3826. raise ValueError("'x' must be an array with"
  3827. " at least one dimension.")
  3828. if not x.shape[-1] == m.shape[-1]:
  3829. raise ValueError(f"Size of each quantile must be size of 'm': "
  3830. f"received {x.shape[-1]}, "
  3831. f"but expected {m.shape[-1]}.")
  3832. # check for empty arrays
  3833. if m.size != 0:
  3834. n = n[..., np.newaxis]
  3835. M = M[..., np.newaxis]
  3836. x, m, n, M = np.broadcast_arrays(x, m, n, M)
  3837. # check for empty arrays
  3838. if m.size != 0:
  3839. n, M = n[..., 0], M[..., 0]
  3840. xcond = (x < 0) | (x > m)
  3841. return (x, M, m, n, xcond,
  3842. np.any(xcond, axis=-1) | (x.sum(axis=-1) != n))
  3843. def _checkresult(self, result, cond, bad_value):
  3844. result = np.asarray(result)
  3845. if cond.ndim != 0:
  3846. result[cond] = bad_value
  3847. elif cond:
  3848. return bad_value
  3849. if result.ndim == 0:
  3850. return result[()]
  3851. return result
  3852. def _logpmf(self, x, M, m, n, mxcond, ncond):
  3853. # This equation of the pmf comes from the relation,
  3854. # n combine r = beta(n+1, 1) / beta(r+1, n-r+1)
  3855. num = np.zeros_like(m, dtype=np.float_)
  3856. den = np.zeros_like(n, dtype=np.float_)
  3857. m, x = m[~mxcond], x[~mxcond]
  3858. M, n = M[~ncond], n[~ncond]
  3859. num[~mxcond] = (betaln(m+1, 1) - betaln(x+1, m-x+1))
  3860. den[~ncond] = (betaln(M+1, 1) - betaln(n+1, M-n+1))
  3861. num[mxcond] = np.nan
  3862. den[ncond] = np.nan
  3863. num = num.sum(axis=-1)
  3864. return num - den
  3865. def logpmf(self, x, m, n):
  3866. """Log of the multivariate hypergeometric probability mass function.
  3867. Parameters
  3868. ----------
  3869. x : array_like
  3870. Quantiles, with the last axis of `x` denoting the components.
  3871. %(_doc_default_callparams)s
  3872. Returns
  3873. -------
  3874. logpmf : ndarray or scalar
  3875. Log of the probability mass function evaluated at `x`
  3876. Notes
  3877. -----
  3878. %(_doc_callparams_note)s
  3879. """
  3880. M, m, n, mcond, ncond, mncond = self._process_parameters(m, n)
  3881. (x, M, m, n, xcond,
  3882. xcond_reduced) = self._process_quantiles(x, M, m, n)
  3883. mxcond = mcond | xcond
  3884. ncond = ncond | np.zeros(n.shape, dtype=np.bool_)
  3885. result = self._logpmf(x, M, m, n, mxcond, ncond)
  3886. # replace values for which x was out of the domain; broadcast
  3887. # xcond to the right shape
  3888. xcond_ = xcond_reduced | np.zeros(mncond.shape, dtype=np.bool_)
  3889. result = self._checkresult(result, xcond_, np.NINF)
  3890. # replace values bad for n or m; broadcast
  3891. # mncond to the right shape
  3892. mncond_ = mncond | np.zeros(xcond_reduced.shape, dtype=np.bool_)
  3893. return self._checkresult(result, mncond_, np.nan)
  3894. def pmf(self, x, m, n):
  3895. """Multivariate hypergeometric probability mass function.
  3896. Parameters
  3897. ----------
  3898. x : array_like
  3899. Quantiles, with the last axis of `x` denoting the components.
  3900. %(_doc_default_callparams)s
  3901. Returns
  3902. -------
  3903. pmf : ndarray or scalar
  3904. Probability density function evaluated at `x`
  3905. Notes
  3906. -----
  3907. %(_doc_callparams_note)s
  3908. """
  3909. out = np.exp(self.logpmf(x, m, n))
  3910. return out
  3911. def mean(self, m, n):
  3912. """Mean of the multivariate hypergeometric distribution.
  3913. Parameters
  3914. ----------
  3915. %(_doc_default_callparams)s
  3916. Returns
  3917. -------
  3918. mean : array_like or scalar
  3919. The mean of the distribution
  3920. """
  3921. M, m, n, _, _, mncond = self._process_parameters(m, n)
  3922. # check for empty arrays
  3923. if m.size != 0:
  3924. M, n = M[..., np.newaxis], n[..., np.newaxis]
  3925. cond = (M == 0)
  3926. M = np.ma.masked_array(M, mask=cond)
  3927. mu = n*(m/M)
  3928. if m.size != 0:
  3929. mncond = (mncond[..., np.newaxis] |
  3930. np.zeros(mu.shape, dtype=np.bool_))
  3931. return self._checkresult(mu, mncond, np.nan)
  3932. def var(self, m, n):
  3933. """Variance of the multivariate hypergeometric distribution.
  3934. Parameters
  3935. ----------
  3936. %(_doc_default_callparams)s
  3937. Returns
  3938. -------
  3939. array_like
  3940. The variances of the components of the distribution. This is
  3941. the diagonal of the covariance matrix of the distribution
  3942. """
  3943. M, m, n, _, _, mncond = self._process_parameters(m, n)
  3944. # check for empty arrays
  3945. if m.size != 0:
  3946. M, n = M[..., np.newaxis], n[..., np.newaxis]
  3947. cond = (M == 0) & (M-1 == 0)
  3948. M = np.ma.masked_array(M, mask=cond)
  3949. output = n * m/M * (M-m)/M * (M-n)/(M-1)
  3950. if m.size != 0:
  3951. mncond = (mncond[..., np.newaxis] |
  3952. np.zeros(output.shape, dtype=np.bool_))
  3953. return self._checkresult(output, mncond, np.nan)
  3954. def cov(self, m, n):
  3955. """Covariance matrix of the multivariate hypergeometric distribution.
  3956. Parameters
  3957. ----------
  3958. %(_doc_default_callparams)s
  3959. Returns
  3960. -------
  3961. cov : array_like
  3962. The covariance matrix of the distribution
  3963. """
  3964. # see [1]_ for the formula and [2]_ for implementation
  3965. # cov( x_i,x_j ) = -n * (M-n)/(M-1) * (K_i*K_j) / (M**2)
  3966. M, m, n, _, _, mncond = self._process_parameters(m, n)
  3967. # check for empty arrays
  3968. if m.size != 0:
  3969. M = M[..., np.newaxis, np.newaxis]
  3970. n = n[..., np.newaxis, np.newaxis]
  3971. cond = (M == 0) & (M-1 == 0)
  3972. M = np.ma.masked_array(M, mask=cond)
  3973. output = (-n * (M-n)/(M-1) *
  3974. np.einsum("...i,...j->...ij", m, m) / (M**2))
  3975. # check for empty arrays
  3976. if m.size != 0:
  3977. M, n = M[..., 0, 0], n[..., 0, 0]
  3978. cond = cond[..., 0, 0]
  3979. dim = m.shape[-1]
  3980. # diagonal entries need to be computed differently
  3981. for i in range(dim):
  3982. output[..., i, i] = (n * (M-n) * m[..., i]*(M-m[..., i]))
  3983. output[..., i, i] = output[..., i, i] / (M-1)
  3984. output[..., i, i] = output[..., i, i] / (M**2)
  3985. if m.size != 0:
  3986. mncond = (mncond[..., np.newaxis, np.newaxis] |
  3987. np.zeros(output.shape, dtype=np.bool_))
  3988. return self._checkresult(output, mncond, np.nan)
  3989. def rvs(self, m, n, size=None, random_state=None):
  3990. """Draw random samples from a multivariate hypergeometric distribution.
  3991. Parameters
  3992. ----------
  3993. %(_doc_default_callparams)s
  3994. size : integer or iterable of integers, optional
  3995. Number of samples to draw. Default is ``None``, in which case a
  3996. single variate is returned as an array with shape ``m.shape``.
  3997. %(_doc_random_state)s
  3998. Returns
  3999. -------
  4000. rvs : array_like
  4001. Random variates of shape ``size`` or ``m.shape``
  4002. (if ``size=None``).
  4003. Notes
  4004. -----
  4005. %(_doc_callparams_note)s
  4006. Also note that NumPy's `multivariate_hypergeometric` sampler is not
  4007. used as it doesn't support broadcasting.
  4008. """
  4009. M, m, n, _, _, _ = self._process_parameters(m, n)
  4010. random_state = self._get_random_state(random_state)
  4011. if size is not None and isinstance(size, int):
  4012. size = (size, )
  4013. if size is None:
  4014. rvs = np.empty(m.shape, dtype=m.dtype)
  4015. else:
  4016. rvs = np.empty(size + (m.shape[-1], ), dtype=m.dtype)
  4017. rem = M
  4018. # This sampler has been taken from numpy gh-13794
  4019. # https://github.com/numpy/numpy/pull/13794
  4020. for c in range(m.shape[-1] - 1):
  4021. rem = rem - m[..., c]
  4022. n0mask = n == 0
  4023. rvs[..., c] = (~n0mask *
  4024. random_state.hypergeometric(m[..., c],
  4025. rem + n0mask,
  4026. n + n0mask,
  4027. size=size))
  4028. n = n - rvs[..., c]
  4029. rvs[..., m.shape[-1] - 1] = n
  4030. return rvs
  4031. multivariate_hypergeom = multivariate_hypergeom_gen()
  4032. class multivariate_hypergeom_frozen(multi_rv_frozen):
  4033. def __init__(self, m, n, seed=None):
  4034. self._dist = multivariate_hypergeom_gen(seed)
  4035. (self.M, self.m, self.n,
  4036. self.mcond, self.ncond,
  4037. self.mncond) = self._dist._process_parameters(m, n)
  4038. # monkey patch self._dist
  4039. def _process_parameters(m, n):
  4040. return (self.M, self.m, self.n,
  4041. self.mcond, self.ncond,
  4042. self.mncond)
  4043. self._dist._process_parameters = _process_parameters
  4044. def logpmf(self, x):
  4045. return self._dist.logpmf(x, self.m, self.n)
  4046. def pmf(self, x):
  4047. return self._dist.pmf(x, self.m, self.n)
  4048. def mean(self):
  4049. return self._dist.mean(self.m, self.n)
  4050. def var(self):
  4051. return self._dist.var(self.m, self.n)
  4052. def cov(self):
  4053. return self._dist.cov(self.m, self.n)
  4054. def rvs(self, size=1, random_state=None):
  4055. return self._dist.rvs(self.m, self.n,
  4056. size=size,
  4057. random_state=random_state)
  4058. # Set frozen generator docstrings from corresponding docstrings in
  4059. # multivariate_hypergeom and fill in default strings in class docstrings
  4060. for name in ['logpmf', 'pmf', 'mean', 'var', 'cov', 'rvs']:
  4061. method = multivariate_hypergeom_gen.__dict__[name]
  4062. method_frozen = multivariate_hypergeom_frozen.__dict__[name]
  4063. method_frozen.__doc__ = doccer.docformat(
  4064. method.__doc__, mhg_docdict_noparams)
  4065. method.__doc__ = doccer.docformat(method.__doc__,
  4066. mhg_docdict_params)
  4067. class random_table_gen(multi_rv_generic):
  4068. r"""Contingency tables from independent samples with fixed marginal sums.
  4069. This is the distribution of random tables with given row and column vector
  4070. sums. This distribution represents the set of random tables under the null
  4071. hypothesis that rows and columns are independent. It is used in hypothesis
  4072. tests of independence.
  4073. Because of assumed independence, the expected frequency of each table
  4074. element can be computed from the row and column sums, so that the
  4075. distribution is completely determined by these two vectors.
  4076. Methods
  4077. -------
  4078. logpmf(x)
  4079. Log-probability of table `x` to occur in the distribution.
  4080. pmf(x)
  4081. Probability of table `x` to occur in the distribution.
  4082. mean(row, col)
  4083. Mean table.
  4084. rvs(row, col, size=None, method=None, random_state=None)
  4085. Draw random tables with given row and column vector sums.
  4086. Parameters
  4087. ----------
  4088. %(_doc_row_col)s
  4089. %(_doc_random_state)s
  4090. Notes
  4091. -----
  4092. %(_doc_row_col_note)s
  4093. Random elements from the distribution are generated either with Boyett's
  4094. [1]_ or Patefield's algorithm [2]_. Boyett's algorithm has
  4095. O(N) time and space complexity, where N is the total sum of entries in the
  4096. table. Patefield's algorithm has O(K x log(N)) time complexity, where K is
  4097. the number of cells in the table and requires only a small constant work
  4098. space. By default, the `rvs` method selects the fastest algorithm based on
  4099. the input, but you can specify the algorithm with the keyword `method`.
  4100. Allowed values are "boyett" and "patefield".
  4101. .. versionadded:: 1.10.0
  4102. Examples
  4103. --------
  4104. >>> from scipy.stats import random_table
  4105. >>> row = [1, 5]
  4106. >>> col = [2, 3, 1]
  4107. >>> random_table.mean(row, col)
  4108. array([[0.33333333, 0.5 , 0.16666667],
  4109. [1.66666667, 2.5 , 0.83333333]])
  4110. Alternatively, the object may be called (as a function) to fix the row
  4111. and column vector sums, returning a "frozen" distribution.
  4112. >>> dist = random_table(row, col)
  4113. >>> dist.rvs(random_state=123)
  4114. array([[1., 0., 0.],
  4115. [1., 3., 1.]])
  4116. References
  4117. ----------
  4118. .. [1] J. Boyett, AS 144 Appl. Statist. 28 (1979) 329-332
  4119. .. [2] W.M. Patefield, AS 159 Appl. Statist. 30 (1981) 91-97
  4120. """
  4121. def __init__(self, seed=None):
  4122. super().__init__(seed)
  4123. def __call__(self, row, col, *, seed=None):
  4124. """Create a frozen distribution of tables with given marginals.
  4125. See `random_table_frozen` for more information.
  4126. """
  4127. return random_table_frozen(row, col, seed=seed)
  4128. def logpmf(self, x, row, col):
  4129. """Log-probability of table to occur in the distribution.
  4130. Parameters
  4131. ----------
  4132. %(_doc_x)s
  4133. %(_doc_row_col)s
  4134. Returns
  4135. -------
  4136. logpmf : ndarray or scalar
  4137. Log of the probability mass function evaluated at `x`.
  4138. Notes
  4139. -----
  4140. %(_doc_row_col_note)s
  4141. If row and column marginals of `x` do not match `row` and `col`,
  4142. negative infinity is returned.
  4143. Examples
  4144. --------
  4145. >>> from scipy.stats import random_table
  4146. >>> import numpy as np
  4147. >>> x = [[1, 5, 1], [2, 3, 1]]
  4148. >>> row = np.sum(x, axis=1)
  4149. >>> col = np.sum(x, axis=0)
  4150. >>> random_table.logpmf(x, row, col)
  4151. -1.6306401200847027
  4152. Alternatively, the object may be called (as a function) to fix the row
  4153. and column vector sums, returning a "frozen" distribution.
  4154. >>> d = random_table(row, col)
  4155. >>> d.logpmf(x)
  4156. -1.6306401200847027
  4157. """
  4158. r, c, n = self._process_parameters(row, col)
  4159. x = np.asarray(x)
  4160. if x.ndim < 2:
  4161. raise ValueError("`x` must be at least two-dimensional")
  4162. dtype_is_int = np.issubdtype(x.dtype, np.integer)
  4163. with np.errstate(invalid='ignore'):
  4164. if not dtype_is_int and not np.all(x.astype(int) == x):
  4165. raise ValueError("`x` must contain only integral values")
  4166. # x does not contain NaN if we arrive here
  4167. if np.any(x < 0):
  4168. raise ValueError("`x` must contain only non-negative values")
  4169. r2 = np.sum(x, axis=-1)
  4170. c2 = np.sum(x, axis=-2)
  4171. if r2.shape[-1] != len(r):
  4172. raise ValueError("shape of `x` must agree with `row`")
  4173. if c2.shape[-1] != len(c):
  4174. raise ValueError("shape of `x` must agree with `col`")
  4175. res = np.empty(x.shape[:-2])
  4176. mask = np.all(r2 == r, axis=-1) & np.all(c2 == c, axis=-1)
  4177. def lnfac(x):
  4178. return gammaln(x + 1)
  4179. res[mask] = (np.sum(lnfac(r), axis=-1) + np.sum(lnfac(c), axis=-1)
  4180. - lnfac(n) - np.sum(lnfac(x[mask]), axis=(-1, -2)))
  4181. res[~mask] = -np.inf
  4182. return res[()]
  4183. def pmf(self, x, row, col):
  4184. """Probability of table to occur in the distribution.
  4185. Parameters
  4186. ----------
  4187. %(_doc_x)s
  4188. %(_doc_row_col)s
  4189. Returns
  4190. -------
  4191. pmf : ndarray or scalar
  4192. Probability mass function evaluated at `x`.
  4193. Notes
  4194. -----
  4195. %(_doc_row_col_note)s
  4196. If row and column marginals of `x` do not match `row` and `col`,
  4197. zero is returned.
  4198. Examples
  4199. --------
  4200. >>> from scipy.stats import random_table
  4201. >>> import numpy as np
  4202. >>> x = [[1, 5, 1], [2, 3, 1]]
  4203. >>> row = np.sum(x, axis=1)
  4204. >>> col = np.sum(x, axis=0)
  4205. >>> random_table.pmf(x, row, col)
  4206. 0.19580419580419592
  4207. Alternatively, the object may be called (as a function) to fix the row
  4208. and column vector sums, returning a "frozen" distribution.
  4209. >>> d = random_table(row, col)
  4210. >>> d.pmf(x)
  4211. 0.19580419580419592
  4212. """
  4213. return np.exp(self.logpmf(x, row, col))
  4214. def mean(self, row, col):
  4215. """Mean of distribution of conditional tables.
  4216. %(_doc_mean_params)s
  4217. Returns
  4218. -------
  4219. mean: ndarray
  4220. Mean of the distribution.
  4221. Notes
  4222. -----
  4223. %(_doc_row_col_note)s
  4224. Examples
  4225. --------
  4226. >>> from scipy.stats import random_table
  4227. >>> row = [1, 5]
  4228. >>> col = [2, 3, 1]
  4229. >>> random_table.mean(row, col)
  4230. array([[0.33333333, 0.5 , 0.16666667],
  4231. [1.66666667, 2.5 , 0.83333333]])
  4232. Alternatively, the object may be called (as a function) to fix the row
  4233. and column vector sums, returning a "frozen" distribution.
  4234. >>> d = random_table(row, col)
  4235. >>> d.mean()
  4236. array([[0.33333333, 0.5 , 0.16666667],
  4237. [1.66666667, 2.5 , 0.83333333]])
  4238. """
  4239. r, c, n = self._process_parameters(row, col)
  4240. return np.outer(r, c) / n
  4241. def rvs(self, row, col, *, size=None, method=None, random_state=None):
  4242. """Draw random tables with fixed column and row marginals.
  4243. Parameters
  4244. ----------
  4245. %(_doc_row_col)s
  4246. size : integer, optional
  4247. Number of samples to draw (default 1).
  4248. method : str, optional
  4249. Which method to use, "boyett" or "patefield". If None (default),
  4250. selects the fastest method for this input.
  4251. %(_doc_random_state)s
  4252. Returns
  4253. -------
  4254. rvs : ndarray
  4255. Random 2D tables of shape (`size`, `len(row)`, `len(col)`).
  4256. Notes
  4257. -----
  4258. %(_doc_row_col_note)s
  4259. Examples
  4260. --------
  4261. >>> from scipy.stats import random_table
  4262. >>> row = [1, 5]
  4263. >>> col = [2, 3, 1]
  4264. >>> random_table.rvs(row, col, random_state=123)
  4265. array([[1., 0., 0.],
  4266. [1., 3., 1.]])
  4267. Alternatively, the object may be called (as a function) to fix the row
  4268. and column vector sums, returning a "frozen" distribution.
  4269. >>> d = random_table(row, col)
  4270. >>> d.rvs(random_state=123)
  4271. array([[1., 0., 0.],
  4272. [1., 3., 1.]])
  4273. """
  4274. r, c, n = self._process_parameters(row, col)
  4275. size, shape = self._process_size_shape(size, r, c)
  4276. random_state = self._get_random_state(random_state)
  4277. meth = self._process_rvs_method(method, r, c, n)
  4278. return meth(r, c, n, size, random_state).reshape(shape)
  4279. @staticmethod
  4280. def _process_parameters(row, col):
  4281. """
  4282. Check that row and column vectors are one-dimensional, that they do
  4283. not contain negative or non-integer entries, and that the sums over
  4284. both vectors are equal.
  4285. """
  4286. r = np.array(row, dtype=np.int64, copy=True)
  4287. c = np.array(col, dtype=np.int64, copy=True)
  4288. if np.ndim(r) != 1:
  4289. raise ValueError("`row` must be one-dimensional")
  4290. if np.ndim(c) != 1:
  4291. raise ValueError("`col` must be one-dimensional")
  4292. if np.any(r < 0):
  4293. raise ValueError("each element of `row` must be non-negative")
  4294. if np.any(c < 0):
  4295. raise ValueError("each element of `col` must be non-negative")
  4296. n = np.sum(r)
  4297. if n != np.sum(c):
  4298. raise ValueError("sums over `row` and `col` must be equal")
  4299. if not np.all(r == np.asarray(row)):
  4300. raise ValueError("each element of `row` must be an integer")
  4301. if not np.all(c == np.asarray(col)):
  4302. raise ValueError("each element of `col` must be an integer")
  4303. return r, c, n
  4304. @staticmethod
  4305. def _process_size_shape(size, r, c):
  4306. """
  4307. Compute the number of samples to be drawn and the shape of the output
  4308. """
  4309. shape = (len(r), len(c))
  4310. if size is None:
  4311. return 1, shape
  4312. size = np.atleast_1d(size)
  4313. if not np.issubdtype(size.dtype, np.integer) or np.any(size < 0):
  4314. raise ValueError("`size` must be a non-negative integer or `None`")
  4315. return np.prod(size), tuple(size) + shape
  4316. @classmethod
  4317. def _process_rvs_method(cls, method, r, c, n):
  4318. known_methods = {
  4319. None: cls._rvs_select(r, c, n),
  4320. "boyett": cls._rvs_boyett,
  4321. "patefield": cls._rvs_patefield,
  4322. }
  4323. try:
  4324. return known_methods[method]
  4325. except KeyError:
  4326. raise ValueError(f"'{method}' not recognized, "
  4327. f"must be one of {set(known_methods)}")
  4328. @classmethod
  4329. def _rvs_select(cls, r, c, n):
  4330. fac = 1.0 # benchmarks show that this value is about 1
  4331. k = len(r) * len(c) # number of cells
  4332. # n + 1 guards against failure if n == 0
  4333. if n > fac * np.log(n + 1) * k:
  4334. return cls._rvs_patefield
  4335. return cls._rvs_boyett
  4336. @staticmethod
  4337. def _rvs_boyett(row, col, ntot, size, random_state):
  4338. return _rcont.rvs_rcont1(row, col, ntot, size, random_state)
  4339. @staticmethod
  4340. def _rvs_patefield(row, col, ntot, size, random_state):
  4341. return _rcont.rvs_rcont2(row, col, ntot, size, random_state)
  4342. random_table = random_table_gen()
  4343. class random_table_frozen(multi_rv_frozen):
  4344. def __init__(self, row, col, *, seed=None):
  4345. self._dist = random_table_gen(seed)
  4346. self._params = self._dist._process_parameters(row, col)
  4347. # monkey patch self._dist
  4348. def _process_parameters(r, c):
  4349. return self._params
  4350. self._dist._process_parameters = _process_parameters
  4351. def logpmf(self, x):
  4352. return self._dist.logpmf(x, None, None)
  4353. def pmf(self, x):
  4354. return self._dist.pmf(x, None, None)
  4355. def mean(self):
  4356. return self._dist.mean(None, None)
  4357. def rvs(self, size=None, method=None, random_state=None):
  4358. # optimisations are possible here
  4359. return self._dist.rvs(None, None, size=size, method=method,
  4360. random_state=random_state)
  4361. _ctab_doc_row_col = """\
  4362. row : array_like
  4363. Sum of table entries in each row.
  4364. col : array_like
  4365. Sum of table entries in each column."""
  4366. _ctab_doc_x = """\
  4367. x : array-like
  4368. Two-dimensional table of non-negative integers, or a
  4369. multi-dimensional array with the last two dimensions
  4370. corresponding with the tables."""
  4371. _ctab_doc_row_col_note = """\
  4372. The row and column vectors must be one-dimensional, not empty,
  4373. and each sum up to the same value. They cannot contain negative
  4374. or noninteger entries."""
  4375. _ctab_doc_mean_params = f"""
  4376. Parameters
  4377. ----------
  4378. {_ctab_doc_row_col}"""
  4379. _ctab_doc_row_col_note_frozen = """\
  4380. See class definition for a detailed description of parameters."""
  4381. _ctab_docdict = {
  4382. "_doc_random_state": _doc_random_state,
  4383. "_doc_row_col": _ctab_doc_row_col,
  4384. "_doc_x": _ctab_doc_x,
  4385. "_doc_mean_params": _ctab_doc_mean_params,
  4386. "_doc_row_col_note": _ctab_doc_row_col_note,
  4387. }
  4388. _ctab_docdict_frozen = _ctab_docdict.copy()
  4389. _ctab_docdict_frozen.update({
  4390. "_doc_row_col": "",
  4391. "_doc_mean_params": "",
  4392. "_doc_row_col_note": _ctab_doc_row_col_note_frozen,
  4393. })
  4394. def _docfill(obj, docdict, template=None):
  4395. obj.__doc__ = doccer.docformat(template or obj.__doc__, docdict)
  4396. # Set frozen generator docstrings from corresponding docstrings in
  4397. # random_table and fill in default strings in class docstrings
  4398. _docfill(random_table_gen, _ctab_docdict)
  4399. for name in ['logpmf', 'pmf', 'mean', 'rvs']:
  4400. method = random_table_gen.__dict__[name]
  4401. method_frozen = random_table_frozen.__dict__[name]
  4402. _docfill(method_frozen, _ctab_docdict_frozen, method.__doc__)
  4403. _docfill(method, _ctab_docdict)
  4404. class uniform_direction_gen(multi_rv_generic):
  4405. r"""A vector-valued uniform direction.
  4406. Return a random direction (unit vector). The `dim` keyword specifies
  4407. the dimensionality of the space.
  4408. Methods
  4409. -------
  4410. rvs(dim=None, size=1, random_state=None)
  4411. Draw random directions.
  4412. Parameters
  4413. ----------
  4414. dim : scalar
  4415. Dimension of directions.
  4416. seed : {None, int, `numpy.random.Generator`,
  4417. `numpy.random.RandomState`}, optional
  4418. Used for drawing random variates.
  4419. If `seed` is `None`, the `~np.random.RandomState` singleton is used.
  4420. If `seed` is an int, a new ``RandomState`` instance is used, seeded
  4421. with seed.
  4422. If `seed` is already a ``RandomState`` or ``Generator`` instance,
  4423. then that object is used.
  4424. Default is `None`.
  4425. Notes
  4426. -----
  4427. This distribution generates unit vectors uniformly distributed on
  4428. the surface of a hypersphere. These can be interpreted as random
  4429. directions.
  4430. For example, if `dim` is 3, 3D vectors from the surface of :math:`S^2`
  4431. will be sampled.
  4432. References
  4433. ----------
  4434. .. [1] Marsaglia, G. (1972). "Choosing a Point from the Surface of a
  4435. Sphere". Annals of Mathematical Statistics. 43 (2): 645-646.
  4436. Examples
  4437. --------
  4438. >>> import numpy as np
  4439. >>> from scipy.stats import uniform_direction
  4440. >>> x = uniform_direction.rvs(3)
  4441. >>> np.linalg.norm(x)
  4442. 1.
  4443. This generates one random direction, a vector on the surface of
  4444. :math:`S^2`.
  4445. Alternatively, the object may be called (as a function) to return a frozen
  4446. distribution with fixed `dim` parameter. Here,
  4447. we create a `uniform_direction` with ``dim=3`` and draw 5 observations.
  4448. The samples are then arranged in an array of shape 5x3.
  4449. >>> rng = np.random.default_rng()
  4450. >>> uniform_sphere_dist = uniform_direction(3)
  4451. >>> unit_vectors = uniform_sphere_dist.rvs(5, random_state=rng)
  4452. >>> unit_vectors
  4453. array([[ 0.56688642, -0.1332634 , -0.81294566],
  4454. [-0.427126 , -0.74779278, 0.50830044],
  4455. [ 0.3793989 , 0.92346629, 0.05715323],
  4456. [ 0.36428383, -0.92449076, -0.11231259],
  4457. [-0.27733285, 0.94410968, -0.17816678]])
  4458. """
  4459. def __init__(self, seed=None):
  4460. super().__init__(seed)
  4461. self.__doc__ = doccer.docformat(self.__doc__)
  4462. def __call__(self, dim=None, seed=None):
  4463. """Create a frozen n-dimensional uniform direction distribution.
  4464. See `uniform_direction` for more information.
  4465. """
  4466. return uniform_direction_frozen(dim, seed=seed)
  4467. def _process_parameters(self, dim):
  4468. """Dimension N must be specified; it cannot be inferred."""
  4469. if dim is None or not np.isscalar(dim) or dim < 1 or dim != int(dim):
  4470. raise ValueError("Dimension of vector must be specified, "
  4471. "and must be an integer greater than 0.")
  4472. return int(dim)
  4473. def rvs(self, dim, size=None, random_state=None):
  4474. """Draw random samples from S(N-1).
  4475. Parameters
  4476. ----------
  4477. dim : integer
  4478. Dimension of space (N).
  4479. size : int or tuple of ints, optional
  4480. Given a shape of, for example, (m,n,k), m*n*k samples are
  4481. generated, and packed in an m-by-n-by-k arrangement.
  4482. Because each sample is N-dimensional, the output shape
  4483. is (m,n,k,N). If no shape is specified, a single (N-D)
  4484. sample is returned.
  4485. random_state : {None, int, `numpy.random.Generator`,
  4486. `numpy.random.RandomState`}, optional
  4487. Pseudorandom number generator state used to generate resamples.
  4488. If `random_state` is ``None`` (or `np.random`), the
  4489. `numpy.random.RandomState` singleton is used.
  4490. If `random_state` is an int, a new ``RandomState`` instance is
  4491. used, seeded with `random_state`.
  4492. If `random_state` is already a ``Generator`` or ``RandomState``
  4493. instance then that instance is used.
  4494. Returns
  4495. -------
  4496. rvs : ndarray
  4497. Random direction vectors
  4498. """
  4499. random_state = self._get_random_state(random_state)
  4500. if size is None:
  4501. size = np.array([], dtype=int)
  4502. size = np.atleast_1d(size)
  4503. dim = self._process_parameters(dim)
  4504. samples = _sample_uniform_direction(dim, size, random_state)
  4505. return samples
  4506. uniform_direction = uniform_direction_gen()
  4507. class uniform_direction_frozen(multi_rv_frozen):
  4508. def __init__(self, dim=None, seed=None):
  4509. """Create a frozen n-dimensional uniform direction distribution.
  4510. Parameters
  4511. ----------
  4512. dim : int
  4513. Dimension of matrices
  4514. seed : {None, int, `numpy.random.Generator`,
  4515. `numpy.random.RandomState`}, optional
  4516. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  4517. singleton is used.
  4518. If `seed` is an int, a new ``RandomState`` instance is used,
  4519. seeded with `seed`.
  4520. If `seed` is already a ``Generator`` or ``RandomState`` instance
  4521. then that instance is used.
  4522. Examples
  4523. --------
  4524. >>> from scipy.stats import uniform_direction
  4525. >>> x = uniform_direction(3)
  4526. >>> x.rvs()
  4527. """
  4528. self._dist = uniform_direction_gen(seed)
  4529. self.dim = self._dist._process_parameters(dim)
  4530. def rvs(self, size=None, random_state=None):
  4531. return self._dist.rvs(self.dim, size, random_state)
  4532. def _sample_uniform_direction(dim, size, random_state):
  4533. """
  4534. Private method to generate uniform directions
  4535. Reference: Marsaglia, G. (1972). "Choosing a Point from the Surface of a
  4536. Sphere". Annals of Mathematical Statistics. 43 (2): 645-646.
  4537. """
  4538. samples_shape = np.append(size, dim)
  4539. samples = random_state.standard_normal(samples_shape)
  4540. samples /= np.linalg.norm(samples, axis=-1, keepdims=True)
  4541. return samples