_distn_infrastructure.py 146 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062
  1. #
  2. # Author: Travis Oliphant 2002-2011 with contributions from
  3. # SciPy Developers 2004-2011
  4. #
  5. from scipy._lib._util import getfullargspec_no_self as _getfullargspec
  6. import sys
  7. import keyword
  8. import re
  9. import types
  10. import warnings
  11. from itertools import zip_longest
  12. from scipy._lib import doccer
  13. from ._distr_params import distcont, distdiscrete
  14. from scipy._lib._util import check_random_state
  15. from scipy.special import comb, entr
  16. # for root finding for continuous distribution ppf, and max likelihood
  17. # estimation
  18. from scipy import optimize
  19. # for functions of continuous distributions (e.g. moments, entropy, cdf)
  20. from scipy import integrate
  21. # to approximate the pdf of a continuous distribution given its cdf
  22. from scipy._lib._finite_differences import _derivative
  23. # for scipy.stats.entropy. Attempts to import just that function or file
  24. # have cause import problems
  25. from scipy import stats
  26. from numpy import (arange, putmask, ravel, ones, shape, ndarray, zeros, floor,
  27. logical_and, log, sqrt, place, argmax, vectorize, asarray,
  28. nan, inf, isinf, NINF, empty)
  29. import numpy as np
  30. from ._constants import _XMAX
  31. from scipy.stats._warnings_errors import FitError
  32. # These are the docstring parts used for substitution in specific
  33. # distribution docstrings
  34. docheaders = {'methods': """\nMethods\n-------\n""",
  35. 'notes': """\nNotes\n-----\n""",
  36. 'examples': """\nExamples\n--------\n"""}
  37. _doc_rvs = """\
  38. rvs(%(shapes)s, loc=0, scale=1, size=1, random_state=None)
  39. Random variates.
  40. """
  41. _doc_pdf = """\
  42. pdf(x, %(shapes)s, loc=0, scale=1)
  43. Probability density function.
  44. """
  45. _doc_logpdf = """\
  46. logpdf(x, %(shapes)s, loc=0, scale=1)
  47. Log of the probability density function.
  48. """
  49. _doc_pmf = """\
  50. pmf(k, %(shapes)s, loc=0, scale=1)
  51. Probability mass function.
  52. """
  53. _doc_logpmf = """\
  54. logpmf(k, %(shapes)s, loc=0, scale=1)
  55. Log of the probability mass function.
  56. """
  57. _doc_cdf = """\
  58. cdf(x, %(shapes)s, loc=0, scale=1)
  59. Cumulative distribution function.
  60. """
  61. _doc_logcdf = """\
  62. logcdf(x, %(shapes)s, loc=0, scale=1)
  63. Log of the cumulative distribution function.
  64. """
  65. _doc_sf = """\
  66. sf(x, %(shapes)s, loc=0, scale=1)
  67. Survival function (also defined as ``1 - cdf``, but `sf` is sometimes more accurate).
  68. """
  69. _doc_logsf = """\
  70. logsf(x, %(shapes)s, loc=0, scale=1)
  71. Log of the survival function.
  72. """
  73. _doc_ppf = """\
  74. ppf(q, %(shapes)s, loc=0, scale=1)
  75. Percent point function (inverse of ``cdf`` --- percentiles).
  76. """
  77. _doc_isf = """\
  78. isf(q, %(shapes)s, loc=0, scale=1)
  79. Inverse survival function (inverse of ``sf``).
  80. """
  81. _doc_moment = """\
  82. moment(order, %(shapes)s, loc=0, scale=1)
  83. Non-central moment of the specified order.
  84. """
  85. _doc_stats = """\
  86. stats(%(shapes)s, loc=0, scale=1, moments='mv')
  87. Mean('m'), variance('v'), skew('s'), and/or kurtosis('k').
  88. """
  89. _doc_entropy = """\
  90. entropy(%(shapes)s, loc=0, scale=1)
  91. (Differential) entropy of the RV.
  92. """
  93. _doc_fit = """\
  94. fit(data)
  95. Parameter estimates for generic data.
  96. See `scipy.stats.rv_continuous.fit <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_continuous.fit.html#scipy.stats.rv_continuous.fit>`__ for detailed documentation of the
  97. keyword arguments.
  98. """
  99. _doc_expect = """\
  100. expect(func, args=(%(shapes_)s), loc=0, scale=1, lb=None, ub=None, conditional=False, **kwds)
  101. Expected value of a function (of one argument) with respect to the distribution.
  102. """
  103. _doc_expect_discrete = """\
  104. expect(func, args=(%(shapes_)s), loc=0, lb=None, ub=None, conditional=False)
  105. Expected value of a function (of one argument) with respect to the distribution.
  106. """
  107. _doc_median = """\
  108. median(%(shapes)s, loc=0, scale=1)
  109. Median of the distribution.
  110. """
  111. _doc_mean = """\
  112. mean(%(shapes)s, loc=0, scale=1)
  113. Mean of the distribution.
  114. """
  115. _doc_var = """\
  116. var(%(shapes)s, loc=0, scale=1)
  117. Variance of the distribution.
  118. """
  119. _doc_std = """\
  120. std(%(shapes)s, loc=0, scale=1)
  121. Standard deviation of the distribution.
  122. """
  123. _doc_interval = """\
  124. interval(confidence, %(shapes)s, loc=0, scale=1)
  125. Confidence interval with equal areas around the median.
  126. """
  127. _doc_allmethods = ''.join([docheaders['methods'], _doc_rvs, _doc_pdf,
  128. _doc_logpdf, _doc_cdf, _doc_logcdf, _doc_sf,
  129. _doc_logsf, _doc_ppf, _doc_isf, _doc_moment,
  130. _doc_stats, _doc_entropy, _doc_fit,
  131. _doc_expect, _doc_median,
  132. _doc_mean, _doc_var, _doc_std, _doc_interval])
  133. _doc_default_longsummary = """\
  134. As an instance of the `rv_continuous` class, `%(name)s` object inherits from it
  135. a collection of generic methods (see below for the full list),
  136. and completes them with details specific for this particular distribution.
  137. """
  138. _doc_default_frozen_note = """
  139. Alternatively, the object may be called (as a function) to fix the shape,
  140. location, and scale parameters returning a "frozen" continuous RV object:
  141. rv = %(name)s(%(shapes)s, loc=0, scale=1)
  142. - Frozen RV object with the same methods but holding the given shape,
  143. location, and scale fixed.
  144. """
  145. _doc_default_example = """\
  146. Examples
  147. --------
  148. >>> import numpy as np
  149. >>> from scipy.stats import %(name)s
  150. >>> import matplotlib.pyplot as plt
  151. >>> fig, ax = plt.subplots(1, 1)
  152. Calculate the first four moments:
  153. %(set_vals_stmt)s
  154. >>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk')
  155. Display the probability density function (``pdf``):
  156. >>> x = np.linspace(%(name)s.ppf(0.01, %(shapes)s),
  157. ... %(name)s.ppf(0.99, %(shapes)s), 100)
  158. >>> ax.plot(x, %(name)s.pdf(x, %(shapes)s),
  159. ... 'r-', lw=5, alpha=0.6, label='%(name)s pdf')
  160. Alternatively, the distribution object can be called (as a function)
  161. to fix the shape, location and scale parameters. This returns a "frozen"
  162. RV object holding the given parameters fixed.
  163. Freeze the distribution and display the frozen ``pdf``:
  164. >>> rv = %(name)s(%(shapes)s)
  165. >>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
  166. Check accuracy of ``cdf`` and ``ppf``:
  167. >>> vals = %(name)s.ppf([0.001, 0.5, 0.999], %(shapes)s)
  168. >>> np.allclose([0.001, 0.5, 0.999], %(name)s.cdf(vals, %(shapes)s))
  169. True
  170. Generate random numbers:
  171. >>> r = %(name)s.rvs(%(shapes)s, size=1000)
  172. And compare the histogram:
  173. >>> ax.hist(r, density=True, bins='auto', histtype='stepfilled', alpha=0.2)
  174. >>> ax.set_xlim([x[0], x[-1]])
  175. >>> ax.legend(loc='best', frameon=False)
  176. >>> plt.show()
  177. """
  178. _doc_default_locscale = """\
  179. The probability density above is defined in the "standardized" form. To shift
  180. and/or scale the distribution use the ``loc`` and ``scale`` parameters.
  181. Specifically, ``%(name)s.pdf(x, %(shapes)s, loc, scale)`` is identically
  182. equivalent to ``%(name)s.pdf(y, %(shapes)s) / scale`` with
  183. ``y = (x - loc) / scale``. Note that shifting the location of a distribution
  184. does not make it a "noncentral" distribution; noncentral generalizations of
  185. some distributions are available in separate classes.
  186. """
  187. _doc_default = ''.join([_doc_default_longsummary,
  188. _doc_allmethods,
  189. '\n',
  190. _doc_default_example])
  191. _doc_default_before_notes = ''.join([_doc_default_longsummary,
  192. _doc_allmethods])
  193. docdict = {
  194. 'rvs': _doc_rvs,
  195. 'pdf': _doc_pdf,
  196. 'logpdf': _doc_logpdf,
  197. 'cdf': _doc_cdf,
  198. 'logcdf': _doc_logcdf,
  199. 'sf': _doc_sf,
  200. 'logsf': _doc_logsf,
  201. 'ppf': _doc_ppf,
  202. 'isf': _doc_isf,
  203. 'stats': _doc_stats,
  204. 'entropy': _doc_entropy,
  205. 'fit': _doc_fit,
  206. 'moment': _doc_moment,
  207. 'expect': _doc_expect,
  208. 'interval': _doc_interval,
  209. 'mean': _doc_mean,
  210. 'std': _doc_std,
  211. 'var': _doc_var,
  212. 'median': _doc_median,
  213. 'allmethods': _doc_allmethods,
  214. 'longsummary': _doc_default_longsummary,
  215. 'frozennote': _doc_default_frozen_note,
  216. 'example': _doc_default_example,
  217. 'default': _doc_default,
  218. 'before_notes': _doc_default_before_notes,
  219. 'after_notes': _doc_default_locscale
  220. }
  221. # Reuse common content between continuous and discrete docs, change some
  222. # minor bits.
  223. docdict_discrete = docdict.copy()
  224. docdict_discrete['pmf'] = _doc_pmf
  225. docdict_discrete['logpmf'] = _doc_logpmf
  226. docdict_discrete['expect'] = _doc_expect_discrete
  227. _doc_disc_methods = ['rvs', 'pmf', 'logpmf', 'cdf', 'logcdf', 'sf', 'logsf',
  228. 'ppf', 'isf', 'stats', 'entropy', 'expect', 'median',
  229. 'mean', 'var', 'std', 'interval']
  230. for obj in _doc_disc_methods:
  231. docdict_discrete[obj] = docdict_discrete[obj].replace(', scale=1', '')
  232. _doc_disc_methods_err_varname = ['cdf', 'logcdf', 'sf', 'logsf']
  233. for obj in _doc_disc_methods_err_varname:
  234. docdict_discrete[obj] = docdict_discrete[obj].replace('(x, ', '(k, ')
  235. docdict_discrete.pop('pdf')
  236. docdict_discrete.pop('logpdf')
  237. _doc_allmethods = ''.join([docdict_discrete[obj] for obj in _doc_disc_methods])
  238. docdict_discrete['allmethods'] = docheaders['methods'] + _doc_allmethods
  239. docdict_discrete['longsummary'] = _doc_default_longsummary.replace(
  240. 'rv_continuous', 'rv_discrete')
  241. _doc_default_frozen_note = """
  242. Alternatively, the object may be called (as a function) to fix the shape and
  243. location parameters returning a "frozen" discrete RV object:
  244. rv = %(name)s(%(shapes)s, loc=0)
  245. - Frozen RV object with the same methods but holding the given shape and
  246. location fixed.
  247. """
  248. docdict_discrete['frozennote'] = _doc_default_frozen_note
  249. _doc_default_discrete_example = """\
  250. Examples
  251. --------
  252. >>> import numpy as np
  253. >>> from scipy.stats import %(name)s
  254. >>> import matplotlib.pyplot as plt
  255. >>> fig, ax = plt.subplots(1, 1)
  256. Calculate the first four moments:
  257. %(set_vals_stmt)s
  258. >>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk')
  259. Display the probability mass function (``pmf``):
  260. >>> x = np.arange(%(name)s.ppf(0.01, %(shapes)s),
  261. ... %(name)s.ppf(0.99, %(shapes)s))
  262. >>> ax.plot(x, %(name)s.pmf(x, %(shapes)s), 'bo', ms=8, label='%(name)s pmf')
  263. >>> ax.vlines(x, 0, %(name)s.pmf(x, %(shapes)s), colors='b', lw=5, alpha=0.5)
  264. Alternatively, the distribution object can be called (as a function)
  265. to fix the shape and location. This returns a "frozen" RV object holding
  266. the given parameters fixed.
  267. Freeze the distribution and display the frozen ``pmf``:
  268. >>> rv = %(name)s(%(shapes)s)
  269. >>> ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1,
  270. ... label='frozen pmf')
  271. >>> ax.legend(loc='best', frameon=False)
  272. >>> plt.show()
  273. Check accuracy of ``cdf`` and ``ppf``:
  274. >>> prob = %(name)s.cdf(x, %(shapes)s)
  275. >>> np.allclose(x, %(name)s.ppf(prob, %(shapes)s))
  276. True
  277. Generate random numbers:
  278. >>> r = %(name)s.rvs(%(shapes)s, size=1000)
  279. """
  280. _doc_default_discrete_locscale = """\
  281. The probability mass function above is defined in the "standardized" form.
  282. To shift distribution use the ``loc`` parameter.
  283. Specifically, ``%(name)s.pmf(k, %(shapes)s, loc)`` is identically
  284. equivalent to ``%(name)s.pmf(k - loc, %(shapes)s)``.
  285. """
  286. docdict_discrete['example'] = _doc_default_discrete_example
  287. docdict_discrete['after_notes'] = _doc_default_discrete_locscale
  288. _doc_default_before_notes = ''.join([docdict_discrete['longsummary'],
  289. docdict_discrete['allmethods']])
  290. docdict_discrete['before_notes'] = _doc_default_before_notes
  291. _doc_default_disc = ''.join([docdict_discrete['longsummary'],
  292. docdict_discrete['allmethods'],
  293. docdict_discrete['frozennote'],
  294. docdict_discrete['example']])
  295. docdict_discrete['default'] = _doc_default_disc
  296. # clean up all the separate docstring elements, we do not need them anymore
  297. for obj in [s for s in dir() if s.startswith('_doc_')]:
  298. exec('del ' + obj)
  299. del obj
  300. def _moment(data, n, mu=None):
  301. if mu is None:
  302. mu = data.mean()
  303. return ((data - mu)**n).mean()
  304. def _moment_from_stats(n, mu, mu2, g1, g2, moment_func, args):
  305. if (n == 0):
  306. return 1.0
  307. elif (n == 1):
  308. if mu is None:
  309. val = moment_func(1, *args)
  310. else:
  311. val = mu
  312. elif (n == 2):
  313. if mu2 is None or mu is None:
  314. val = moment_func(2, *args)
  315. else:
  316. val = mu2 + mu*mu
  317. elif (n == 3):
  318. if g1 is None or mu2 is None or mu is None:
  319. val = moment_func(3, *args)
  320. else:
  321. mu3 = g1 * np.power(mu2, 1.5) # 3rd central moment
  322. val = mu3+3*mu*mu2+mu*mu*mu # 3rd non-central moment
  323. elif (n == 4):
  324. if g1 is None or g2 is None or mu2 is None or mu is None:
  325. val = moment_func(4, *args)
  326. else:
  327. mu4 = (g2+3.0)*(mu2**2.0) # 4th central moment
  328. mu3 = g1*np.power(mu2, 1.5) # 3rd central moment
  329. val = mu4+4*mu*mu3+6*mu*mu*mu2+mu*mu*mu*mu
  330. else:
  331. val = moment_func(n, *args)
  332. return val
  333. def _skew(data):
  334. """
  335. skew is third central moment / variance**(1.5)
  336. """
  337. data = np.ravel(data)
  338. mu = data.mean()
  339. m2 = ((data - mu)**2).mean()
  340. m3 = ((data - mu)**3).mean()
  341. return m3 / np.power(m2, 1.5)
  342. def _kurtosis(data):
  343. """kurtosis is fourth central moment / variance**2 - 3."""
  344. data = np.ravel(data)
  345. mu = data.mean()
  346. m2 = ((data - mu)**2).mean()
  347. m4 = ((data - mu)**4).mean()
  348. return m4 / m2**2 - 3
  349. def _fit_determine_optimizer(optimizer):
  350. if not callable(optimizer) and isinstance(optimizer, str):
  351. if not optimizer.startswith('fmin_'):
  352. optimizer = "fmin_"+optimizer
  353. if optimizer == 'fmin_':
  354. optimizer = 'fmin'
  355. try:
  356. optimizer = getattr(optimize, optimizer)
  357. except AttributeError as e:
  358. raise ValueError("%s is not a valid optimizer" % optimizer) from e
  359. return optimizer
  360. # Frozen RV class
  361. class rv_frozen:
  362. def __init__(self, dist, *args, **kwds):
  363. self.args = args
  364. self.kwds = kwds
  365. # create a new instance
  366. self.dist = dist.__class__(**dist._updated_ctor_param())
  367. shapes, _, _ = self.dist._parse_args(*args, **kwds)
  368. self.a, self.b = self.dist._get_support(*shapes)
  369. @property
  370. def random_state(self):
  371. return self.dist._random_state
  372. @random_state.setter
  373. def random_state(self, seed):
  374. self.dist._random_state = check_random_state(seed)
  375. def cdf(self, x):
  376. return self.dist.cdf(x, *self.args, **self.kwds)
  377. def logcdf(self, x):
  378. return self.dist.logcdf(x, *self.args, **self.kwds)
  379. def ppf(self, q):
  380. return self.dist.ppf(q, *self.args, **self.kwds)
  381. def isf(self, q):
  382. return self.dist.isf(q, *self.args, **self.kwds)
  383. def rvs(self, size=None, random_state=None):
  384. kwds = self.kwds.copy()
  385. kwds.update({'size': size, 'random_state': random_state})
  386. return self.dist.rvs(*self.args, **kwds)
  387. def sf(self, x):
  388. return self.dist.sf(x, *self.args, **self.kwds)
  389. def logsf(self, x):
  390. return self.dist.logsf(x, *self.args, **self.kwds)
  391. def stats(self, moments='mv'):
  392. kwds = self.kwds.copy()
  393. kwds.update({'moments': moments})
  394. return self.dist.stats(*self.args, **kwds)
  395. def median(self):
  396. return self.dist.median(*self.args, **self.kwds)
  397. def mean(self):
  398. return self.dist.mean(*self.args, **self.kwds)
  399. def var(self):
  400. return self.dist.var(*self.args, **self.kwds)
  401. def std(self):
  402. return self.dist.std(*self.args, **self.kwds)
  403. def moment(self, order=None, **kwds):
  404. return self.dist.moment(order, *self.args, **self.kwds, **kwds)
  405. def entropy(self):
  406. return self.dist.entropy(*self.args, **self.kwds)
  407. def interval(self, confidence=None, **kwds):
  408. return self.dist.interval(confidence, *self.args, **self.kwds, **kwds)
  409. def expect(self, func=None, lb=None, ub=None, conditional=False, **kwds):
  410. # expect method only accepts shape parameters as positional args
  411. # hence convert self.args, self.kwds, also loc/scale
  412. # See the .expect method docstrings for the meaning of
  413. # other parameters.
  414. a, loc, scale = self.dist._parse_args(*self.args, **self.kwds)
  415. if isinstance(self.dist, rv_discrete):
  416. return self.dist.expect(func, a, loc, lb, ub, conditional, **kwds)
  417. else:
  418. return self.dist.expect(func, a, loc, scale, lb, ub,
  419. conditional, **kwds)
  420. def support(self):
  421. return self.dist.support(*self.args, **self.kwds)
  422. class rv_discrete_frozen(rv_frozen):
  423. def pmf(self, k):
  424. return self.dist.pmf(k, *self.args, **self.kwds)
  425. def logpmf(self, k): # No error
  426. return self.dist.logpmf(k, *self.args, **self.kwds)
  427. class rv_continuous_frozen(rv_frozen):
  428. def pdf(self, x):
  429. return self.dist.pdf(x, *self.args, **self.kwds)
  430. def logpdf(self, x):
  431. return self.dist.logpdf(x, *self.args, **self.kwds)
  432. def argsreduce(cond, *args):
  433. """Clean arguments to:
  434. 1. Ensure all arguments are iterable (arrays of dimension at least one
  435. 2. If cond != True and size > 1, ravel(args[i]) where ravel(condition) is
  436. True, in 1D.
  437. Return list of processed arguments.
  438. Examples
  439. --------
  440. >>> import numpy as np
  441. >>> rng = np.random.default_rng()
  442. >>> A = rng.random((4, 5))
  443. >>> B = 2
  444. >>> C = rng.random((1, 5))
  445. >>> cond = np.ones(A.shape)
  446. >>> [A1, B1, C1] = argsreduce(cond, A, B, C)
  447. >>> A1.shape
  448. (4, 5)
  449. >>> B1.shape
  450. (1,)
  451. >>> C1.shape
  452. (1, 5)
  453. >>> cond[2,:] = 0
  454. >>> [A1, B1, C1] = argsreduce(cond, A, B, C)
  455. >>> A1.shape
  456. (15,)
  457. >>> B1.shape
  458. (1,)
  459. >>> C1.shape
  460. (15,)
  461. """
  462. # some distributions assume arguments are iterable.
  463. newargs = np.atleast_1d(*args)
  464. # np.atleast_1d returns an array if only one argument, or a list of arrays
  465. # if more than one argument.
  466. if not isinstance(newargs, list):
  467. newargs = [newargs, ]
  468. if np.all(cond):
  469. # broadcast arrays with cond
  470. *newargs, cond = np.broadcast_arrays(*newargs, cond)
  471. return [arg.ravel() for arg in newargs]
  472. s = cond.shape
  473. # np.extract returns flattened arrays, which are not broadcastable together
  474. # unless they are either the same size or size == 1.
  475. return [(arg if np.size(arg) == 1
  476. else np.extract(cond, np.broadcast_to(arg, s)))
  477. for arg in newargs]
  478. parse_arg_template = """
  479. def _parse_args(self, %(shape_arg_str)s %(locscale_in)s):
  480. return (%(shape_arg_str)s), %(locscale_out)s
  481. def _parse_args_rvs(self, %(shape_arg_str)s %(locscale_in)s, size=None):
  482. return self._argcheck_rvs(%(shape_arg_str)s %(locscale_out)s, size=size)
  483. def _parse_args_stats(self, %(shape_arg_str)s %(locscale_in)s, moments='mv'):
  484. return (%(shape_arg_str)s), %(locscale_out)s, moments
  485. """
  486. class rv_generic:
  487. """Class which encapsulates common functionality between rv_discrete
  488. and rv_continuous.
  489. """
  490. def __init__(self, seed=None):
  491. super().__init__()
  492. # figure out if _stats signature has 'moments' keyword
  493. sig = _getfullargspec(self._stats)
  494. self._stats_has_moments = ((sig.varkw is not None) or
  495. ('moments' in sig.args) or
  496. ('moments' in sig.kwonlyargs))
  497. self._random_state = check_random_state(seed)
  498. @property
  499. def random_state(self):
  500. """Get or set the generator object for generating random variates.
  501. If `random_state` is None (or `np.random`), the
  502. `numpy.random.RandomState` singleton is used.
  503. If `random_state` is an int, a new ``RandomState`` instance is used,
  504. seeded with `random_state`.
  505. If `random_state` is already a ``Generator`` or ``RandomState``
  506. instance, that instance is used.
  507. """
  508. return self._random_state
  509. @random_state.setter
  510. def random_state(self, seed):
  511. self._random_state = check_random_state(seed)
  512. def __setstate__(self, state):
  513. try:
  514. self.__dict__.update(state)
  515. # attaches the dynamically created methods on each instance.
  516. # if a subclass overrides rv_generic.__setstate__, or implements
  517. # it's own _attach_methods, then it must make sure that
  518. # _attach_argparser_methods is called.
  519. self._attach_methods()
  520. except ValueError:
  521. # reconstitute an old pickle scipy<1.6, that contains
  522. # (_ctor_param, random_state) as state
  523. self._ctor_param = state[0]
  524. self._random_state = state[1]
  525. self.__init__()
  526. def _attach_methods(self):
  527. """Attaches dynamically created methods to the rv_* instance.
  528. This method must be overridden by subclasses, and must itself call
  529. _attach_argparser_methods. This method is called in __init__ in
  530. subclasses, and in __setstate__
  531. """
  532. raise NotImplementedError
  533. def _attach_argparser_methods(self):
  534. """
  535. Generates the argument-parsing functions dynamically and attaches
  536. them to the instance.
  537. Should be called from `_attach_methods`, typically in __init__ and
  538. during unpickling (__setstate__)
  539. """
  540. ns = {}
  541. exec(self._parse_arg_template, ns)
  542. # NB: attach to the instance, not class
  543. for name in ['_parse_args', '_parse_args_stats', '_parse_args_rvs']:
  544. setattr(self, name, types.MethodType(ns[name], self))
  545. def _construct_argparser(
  546. self, meths_to_inspect, locscale_in, locscale_out):
  547. """Construct the parser string for the shape arguments.
  548. This method should be called in __init__ of a class for each
  549. distribution. It creates the `_parse_arg_template` attribute that is
  550. then used by `_attach_argparser_methods` to dynamically create and
  551. attach the `_parse_args`, `_parse_args_stats`, `_parse_args_rvs`
  552. methods to the instance.
  553. If self.shapes is a non-empty string, interprets it as a
  554. comma-separated list of shape parameters.
  555. Otherwise inspects the call signatures of `meths_to_inspect`
  556. and constructs the argument-parsing functions from these.
  557. In this case also sets `shapes` and `numargs`.
  558. """
  559. if self.shapes:
  560. # sanitize the user-supplied shapes
  561. if not isinstance(self.shapes, str):
  562. raise TypeError('shapes must be a string.')
  563. shapes = self.shapes.replace(',', ' ').split()
  564. for field in shapes:
  565. if keyword.iskeyword(field):
  566. raise SyntaxError('keywords cannot be used as shapes.')
  567. if not re.match('^[_a-zA-Z][_a-zA-Z0-9]*$', field):
  568. raise SyntaxError(
  569. 'shapes must be valid python identifiers')
  570. else:
  571. # find out the call signatures (_pdf, _cdf etc), deduce shape
  572. # arguments. Generic methods only have 'self, x', any further args
  573. # are shapes.
  574. shapes_list = []
  575. for meth in meths_to_inspect:
  576. shapes_args = _getfullargspec(meth) # NB does not contain self
  577. args = shapes_args.args[1:] # peel off 'x', too
  578. if args:
  579. shapes_list.append(args)
  580. # *args or **kwargs are not allowed w/automatic shapes
  581. if shapes_args.varargs is not None:
  582. raise TypeError(
  583. '*args are not allowed w/out explicit shapes')
  584. if shapes_args.varkw is not None:
  585. raise TypeError(
  586. '**kwds are not allowed w/out explicit shapes')
  587. if shapes_args.kwonlyargs:
  588. raise TypeError(
  589. 'kwonly args are not allowed w/out explicit shapes')
  590. if shapes_args.defaults is not None:
  591. raise TypeError('defaults are not allowed for shapes')
  592. if shapes_list:
  593. shapes = shapes_list[0]
  594. # make sure the signatures are consistent
  595. for item in shapes_list:
  596. if item != shapes:
  597. raise TypeError('Shape arguments are inconsistent.')
  598. else:
  599. shapes = []
  600. # have the arguments, construct the method from template
  601. shapes_str = ', '.join(shapes) + ', ' if shapes else '' # NB: not None
  602. dct = dict(shape_arg_str=shapes_str,
  603. locscale_in=locscale_in,
  604. locscale_out=locscale_out,
  605. )
  606. # this string is used by _attach_argparser_methods
  607. self._parse_arg_template = parse_arg_template % dct
  608. self.shapes = ', '.join(shapes) if shapes else None
  609. if not hasattr(self, 'numargs'):
  610. # allows more general subclassing with *args
  611. self.numargs = len(shapes)
  612. def _construct_doc(self, docdict, shapes_vals=None):
  613. """Construct the instance docstring with string substitutions."""
  614. tempdict = docdict.copy()
  615. tempdict['name'] = self.name or 'distname'
  616. tempdict['shapes'] = self.shapes or ''
  617. if shapes_vals is None:
  618. shapes_vals = ()
  619. vals = ', '.join('%.3g' % val for val in shapes_vals)
  620. tempdict['vals'] = vals
  621. tempdict['shapes_'] = self.shapes or ''
  622. if self.shapes and self.numargs == 1:
  623. tempdict['shapes_'] += ','
  624. if self.shapes:
  625. tempdict['set_vals_stmt'] = '>>> %s = %s' % (self.shapes, vals)
  626. else:
  627. tempdict['set_vals_stmt'] = ''
  628. if self.shapes is None:
  629. # remove shapes from call parameters if there are none
  630. for item in ['default', 'before_notes']:
  631. tempdict[item] = tempdict[item].replace(
  632. "\n%(shapes)s : array_like\n shape parameters", "")
  633. for i in range(2):
  634. if self.shapes is None:
  635. # necessary because we use %(shapes)s in two forms (w w/o ", ")
  636. self.__doc__ = self.__doc__.replace("%(shapes)s, ", "")
  637. try:
  638. self.__doc__ = doccer.docformat(self.__doc__, tempdict)
  639. except TypeError as e:
  640. raise Exception("Unable to construct docstring for "
  641. "distribution \"%s\": %s" %
  642. (self.name, repr(e))) from e
  643. # correct for empty shapes
  644. self.__doc__ = self.__doc__.replace('(, ', '(').replace(', )', ')')
  645. def _construct_default_doc(self, longname=None, extradoc=None,
  646. docdict=None, discrete='continuous'):
  647. """Construct instance docstring from the default template."""
  648. if longname is None:
  649. longname = 'A'
  650. if extradoc is None:
  651. extradoc = ''
  652. if extradoc.startswith('\n\n'):
  653. extradoc = extradoc[2:]
  654. self.__doc__ = ''.join(['%s %s random variable.' % (longname, discrete),
  655. '\n\n%(before_notes)s\n', docheaders['notes'],
  656. extradoc, '\n%(example)s'])
  657. self._construct_doc(docdict)
  658. def freeze(self, *args, **kwds):
  659. """Freeze the distribution for the given arguments.
  660. Parameters
  661. ----------
  662. arg1, arg2, arg3,... : array_like
  663. The shape parameter(s) for the distribution. Should include all
  664. the non-optional arguments, may include ``loc`` and ``scale``.
  665. Returns
  666. -------
  667. rv_frozen : rv_frozen instance
  668. The frozen distribution.
  669. """
  670. if isinstance(self, rv_continuous):
  671. return rv_continuous_frozen(self, *args, **kwds)
  672. else:
  673. return rv_discrete_frozen(self, *args, **kwds)
  674. def __call__(self, *args, **kwds):
  675. return self.freeze(*args, **kwds)
  676. __call__.__doc__ = freeze.__doc__
  677. # The actual calculation functions (no basic checking need be done)
  678. # If these are defined, the others won't be looked at.
  679. # Otherwise, the other set can be defined.
  680. def _stats(self, *args, **kwds):
  681. return None, None, None, None
  682. # Noncentral moments (also known as the moment about the origin).
  683. # Expressed in LaTeX, munp would be $\mu'_{n}$, i.e. "mu-sub-n-prime".
  684. # The primed mu is a widely used notation for the noncentral moment.
  685. def _munp(self, n, *args):
  686. # Silence floating point warnings from integration.
  687. with np.errstate(all='ignore'):
  688. vals = self.generic_moment(n, *args)
  689. return vals
  690. def _argcheck_rvs(self, *args, **kwargs):
  691. # Handle broadcasting and size validation of the rvs method.
  692. # Subclasses should not have to override this method.
  693. # The rule is that if `size` is not None, then `size` gives the
  694. # shape of the result (integer values of `size` are treated as
  695. # tuples with length 1; i.e. `size=3` is the same as `size=(3,)`.)
  696. #
  697. # `args` is expected to contain the shape parameters (if any), the
  698. # location and the scale in a flat tuple (e.g. if there are two
  699. # shape parameters `a` and `b`, `args` will be `(a, b, loc, scale)`).
  700. # The only keyword argument expected is 'size'.
  701. size = kwargs.get('size', None)
  702. all_bcast = np.broadcast_arrays(*args)
  703. def squeeze_left(a):
  704. while a.ndim > 0 and a.shape[0] == 1:
  705. a = a[0]
  706. return a
  707. # Eliminate trivial leading dimensions. In the convention
  708. # used by numpy's random variate generators, trivial leading
  709. # dimensions are effectively ignored. In other words, when `size`
  710. # is given, trivial leading dimensions of the broadcast parameters
  711. # in excess of the number of dimensions in size are ignored, e.g.
  712. # >>> np.random.normal([[1, 3, 5]], [[[[0.01]]]], size=3)
  713. # array([ 1.00104267, 3.00422496, 4.99799278])
  714. # If `size` is not given, the exact broadcast shape is preserved:
  715. # >>> np.random.normal([[1, 3, 5]], [[[[0.01]]]])
  716. # array([[[[ 1.00862899, 3.00061431, 4.99867122]]]])
  717. #
  718. all_bcast = [squeeze_left(a) for a in all_bcast]
  719. bcast_shape = all_bcast[0].shape
  720. bcast_ndim = all_bcast[0].ndim
  721. if size is None:
  722. size_ = bcast_shape
  723. else:
  724. size_ = tuple(np.atleast_1d(size))
  725. # Check compatibility of size_ with the broadcast shape of all
  726. # the parameters. This check is intended to be consistent with
  727. # how the numpy random variate generators (e.g. np.random.normal,
  728. # np.random.beta) handle their arguments. The rule is that, if size
  729. # is given, it determines the shape of the output. Broadcasting
  730. # can't change the output size.
  731. # This is the standard broadcasting convention of extending the
  732. # shape with fewer dimensions with enough dimensions of length 1
  733. # so that the two shapes have the same number of dimensions.
  734. ndiff = bcast_ndim - len(size_)
  735. if ndiff < 0:
  736. bcast_shape = (1,)*(-ndiff) + bcast_shape
  737. elif ndiff > 0:
  738. size_ = (1,)*ndiff + size_
  739. # This compatibility test is not standard. In "regular" broadcasting,
  740. # two shapes are compatible if for each dimension, the lengths are the
  741. # same or one of the lengths is 1. Here, the length of a dimension in
  742. # size_ must not be less than the corresponding length in bcast_shape.
  743. ok = all([bcdim == 1 or bcdim == szdim
  744. for (bcdim, szdim) in zip(bcast_shape, size_)])
  745. if not ok:
  746. raise ValueError("size does not match the broadcast shape of "
  747. "the parameters. %s, %s, %s" % (size, size_,
  748. bcast_shape))
  749. param_bcast = all_bcast[:-2]
  750. loc_bcast = all_bcast[-2]
  751. scale_bcast = all_bcast[-1]
  752. return param_bcast, loc_bcast, scale_bcast, size_
  753. # These are the methods you must define (standard form functions)
  754. # NB: generic _pdf, _logpdf, _cdf are different for
  755. # rv_continuous and rv_discrete hence are defined in there
  756. def _argcheck(self, *args):
  757. """Default check for correct values on args and keywords.
  758. Returns condition array of 1's where arguments are correct and
  759. 0's where they are not.
  760. """
  761. cond = 1
  762. for arg in args:
  763. cond = logical_and(cond, (asarray(arg) > 0))
  764. return cond
  765. def _get_support(self, *args, **kwargs):
  766. """Return the support of the (unscaled, unshifted) distribution.
  767. *Must* be overridden by distributions which have support dependent
  768. upon the shape parameters of the distribution. Any such override
  769. *must not* set or change any of the class members, as these members
  770. are shared amongst all instances of the distribution.
  771. Parameters
  772. ----------
  773. arg1, arg2, ... : array_like
  774. The shape parameter(s) for the distribution (see docstring of the
  775. instance object for more information).
  776. Returns
  777. -------
  778. a, b : numeric (float, or int or +/-np.inf)
  779. end-points of the distribution's support for the specified
  780. shape parameters.
  781. """
  782. return self.a, self.b
  783. def _support_mask(self, x, *args):
  784. a, b = self._get_support(*args)
  785. with np.errstate(invalid='ignore'):
  786. return (a <= x) & (x <= b)
  787. def _open_support_mask(self, x, *args):
  788. a, b = self._get_support(*args)
  789. with np.errstate(invalid='ignore'):
  790. return (a < x) & (x < b)
  791. def _rvs(self, *args, size=None, random_state=None):
  792. # This method must handle size being a tuple, and it must
  793. # properly broadcast *args and size. size might be
  794. # an empty tuple, which means a scalar random variate is to be
  795. # generated.
  796. # Use basic inverse cdf algorithm for RV generation as default.
  797. U = random_state.uniform(size=size)
  798. Y = self._ppf(U, *args)
  799. return Y
  800. def _logcdf(self, x, *args):
  801. with np.errstate(divide='ignore'):
  802. return log(self._cdf(x, *args))
  803. def _sf(self, x, *args):
  804. return 1.0-self._cdf(x, *args)
  805. def _logsf(self, x, *args):
  806. with np.errstate(divide='ignore'):
  807. return log(self._sf(x, *args))
  808. def _ppf(self, q, *args):
  809. return self._ppfvec(q, *args)
  810. def _isf(self, q, *args):
  811. return self._ppf(1.0-q, *args) # use correct _ppf for subclasses
  812. # These are actually called, and should not be overwritten if you
  813. # want to keep error checking.
  814. def rvs(self, *args, **kwds):
  815. """Random variates of given type.
  816. Parameters
  817. ----------
  818. arg1, arg2, arg3,... : array_like
  819. The shape parameter(s) for the distribution (see docstring of the
  820. instance object for more information).
  821. loc : array_like, optional
  822. Location parameter (default=0).
  823. scale : array_like, optional
  824. Scale parameter (default=1).
  825. size : int or tuple of ints, optional
  826. Defining number of random variates (default is 1).
  827. random_state : {None, int, `numpy.random.Generator`,
  828. `numpy.random.RandomState`}, optional
  829. If `random_state` is None (or `np.random`), the
  830. `numpy.random.RandomState` singleton is used.
  831. If `random_state` is an int, a new ``RandomState`` instance is
  832. used, seeded with `random_state`.
  833. If `random_state` is already a ``Generator`` or ``RandomState``
  834. instance, that instance is used.
  835. Returns
  836. -------
  837. rvs : ndarray or scalar
  838. Random variates of given `size`.
  839. """
  840. discrete = kwds.pop('discrete', None)
  841. rndm = kwds.pop('random_state', None)
  842. args, loc, scale, size = self._parse_args_rvs(*args, **kwds)
  843. cond = logical_and(self._argcheck(*args), (scale >= 0))
  844. if not np.all(cond):
  845. message = ("Domain error in arguments. The `scale` parameter must "
  846. "be positive for all distributions, and many "
  847. "distributions have restrictions on shape parameters. "
  848. f"Please see the `scipy.stats.{self.name}` "
  849. "documentation for details.")
  850. raise ValueError(message)
  851. if np.all(scale == 0):
  852. return loc*ones(size, 'd')
  853. # extra gymnastics needed for a custom random_state
  854. if rndm is not None:
  855. random_state_saved = self._random_state
  856. random_state = check_random_state(rndm)
  857. else:
  858. random_state = self._random_state
  859. vals = self._rvs(*args, size=size, random_state=random_state)
  860. vals = vals * scale + loc
  861. # do not forget to restore the _random_state
  862. if rndm is not None:
  863. self._random_state = random_state_saved
  864. # Cast to int if discrete
  865. if discrete and not isinstance(self, rv_sample):
  866. if size == ():
  867. vals = int(vals)
  868. else:
  869. vals = vals.astype(np.int64)
  870. return vals
  871. def stats(self, *args, **kwds):
  872. """Some statistics of the given RV.
  873. Parameters
  874. ----------
  875. arg1, arg2, arg3,... : array_like
  876. The shape parameter(s) for the distribution (see docstring of the
  877. instance object for more information)
  878. loc : array_like, optional
  879. location parameter (default=0)
  880. scale : array_like, optional (continuous RVs only)
  881. scale parameter (default=1)
  882. moments : str, optional
  883. composed of letters ['mvsk'] defining which moments to compute:
  884. 'm' = mean,
  885. 'v' = variance,
  886. 's' = (Fisher's) skew,
  887. 'k' = (Fisher's) kurtosis.
  888. (default is 'mv')
  889. Returns
  890. -------
  891. stats : sequence
  892. of requested moments.
  893. """
  894. args, loc, scale, moments = self._parse_args_stats(*args, **kwds)
  895. # scale = 1 by construction for discrete RVs
  896. loc, scale = map(asarray, (loc, scale))
  897. args = tuple(map(asarray, args))
  898. cond = self._argcheck(*args) & (scale > 0) & (loc == loc)
  899. output = []
  900. default = np.full(shape(cond), fill_value=self.badvalue)
  901. # Use only entries that are valid in calculation
  902. if np.any(cond):
  903. goodargs = argsreduce(cond, *(args+(scale, loc)))
  904. scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]
  905. if self._stats_has_moments:
  906. mu, mu2, g1, g2 = self._stats(*goodargs,
  907. **{'moments': moments})
  908. else:
  909. mu, mu2, g1, g2 = self._stats(*goodargs)
  910. if 'm' in moments:
  911. if mu is None:
  912. mu = self._munp(1, *goodargs)
  913. out0 = default.copy()
  914. place(out0, cond, mu * scale + loc)
  915. output.append(out0)
  916. if 'v' in moments:
  917. if mu2 is None:
  918. mu2p = self._munp(2, *goodargs)
  919. if mu is None:
  920. mu = self._munp(1, *goodargs)
  921. # if mean is inf then var is also inf
  922. with np.errstate(invalid='ignore'):
  923. mu2 = np.where(~np.isinf(mu), mu2p - mu**2, np.inf)
  924. out0 = default.copy()
  925. place(out0, cond, mu2 * scale * scale)
  926. output.append(out0)
  927. if 's' in moments:
  928. if g1 is None:
  929. mu3p = self._munp(3, *goodargs)
  930. if mu is None:
  931. mu = self._munp(1, *goodargs)
  932. if mu2 is None:
  933. mu2p = self._munp(2, *goodargs)
  934. mu2 = mu2p - mu * mu
  935. with np.errstate(invalid='ignore'):
  936. mu3 = (-mu*mu - 3*mu2)*mu + mu3p
  937. g1 = mu3 / np.power(mu2, 1.5)
  938. out0 = default.copy()
  939. place(out0, cond, g1)
  940. output.append(out0)
  941. if 'k' in moments:
  942. if g2 is None:
  943. mu4p = self._munp(4, *goodargs)
  944. if mu is None:
  945. mu = self._munp(1, *goodargs)
  946. if mu2 is None:
  947. mu2p = self._munp(2, *goodargs)
  948. mu2 = mu2p - mu * mu
  949. if g1 is None:
  950. mu3 = None
  951. else:
  952. # (mu2**1.5) breaks down for nan and inf
  953. mu3 = g1 * np.power(mu2, 1.5)
  954. if mu3 is None:
  955. mu3p = self._munp(3, *goodargs)
  956. with np.errstate(invalid='ignore'):
  957. mu3 = (-mu * mu - 3 * mu2) * mu + mu3p
  958. with np.errstate(invalid='ignore'):
  959. mu4 = ((-mu**2 - 6*mu2) * mu - 4*mu3)*mu + mu4p
  960. g2 = mu4 / mu2**2.0 - 3.0
  961. out0 = default.copy()
  962. place(out0, cond, g2)
  963. output.append(out0)
  964. else: # no valid args
  965. output = [default.copy() for _ in moments]
  966. output = [out[()] for out in output]
  967. if len(output) == 1:
  968. return output[0]
  969. else:
  970. return tuple(output)
  971. def entropy(self, *args, **kwds):
  972. """Differential entropy of the RV.
  973. Parameters
  974. ----------
  975. arg1, arg2, arg3,... : array_like
  976. The shape parameter(s) for the distribution (see docstring of the
  977. instance object for more information).
  978. loc : array_like, optional
  979. Location parameter (default=0).
  980. scale : array_like, optional (continuous distributions only).
  981. Scale parameter (default=1).
  982. Notes
  983. -----
  984. Entropy is defined base `e`:
  985. >>> import numpy as np
  986. >>> drv = rv_discrete(values=((0, 1), (0.5, 0.5)))
  987. >>> np.allclose(drv.entropy(), np.log(2.0))
  988. True
  989. """
  990. args, loc, scale = self._parse_args(*args, **kwds)
  991. # NB: for discrete distributions scale=1 by construction in _parse_args
  992. loc, scale = map(asarray, (loc, scale))
  993. args = tuple(map(asarray, args))
  994. cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc)
  995. output = zeros(shape(cond0), 'd')
  996. place(output, (1-cond0), self.badvalue)
  997. goodargs = argsreduce(cond0, scale, *args)
  998. goodscale = goodargs[0]
  999. goodargs = goodargs[1:]
  1000. place(output, cond0, self.vecentropy(*goodargs) + log(goodscale))
  1001. return output[()]
  1002. def moment(self, order=None, *args, **kwds):
  1003. """non-central moment of distribution of specified order.
  1004. .. deprecated:: 1.9.0
  1005. Parameter `n` is replaced by parameter `order` to avoid name
  1006. collisions with the shape parameter `n` of several distributions.
  1007. Parameter `n` will be removed in SciPy 1.11.0.
  1008. Parameters
  1009. ----------
  1010. order : int, order >= 1
  1011. Order of moment.
  1012. arg1, arg2, arg3,... : float
  1013. The shape parameter(s) for the distribution (see docstring of the
  1014. instance object for more information).
  1015. loc : array_like, optional
  1016. location parameter (default=0)
  1017. scale : array_like, optional
  1018. scale parameter (default=1)
  1019. """
  1020. # This function was originally written with parameter `n`, but `n`
  1021. # is also the name of many distribution shape parameters.
  1022. # This block allows the function to accept both `n` and its
  1023. # replacement `order` during a deprecation period; it can be removed
  1024. # in the second release after 1.9.0.
  1025. # The logic to provide a DeprecationWarning only when `n` is passed
  1026. # as a keyword, accept the new keyword `order`, and otherwise be
  1027. # backward-compatible deserves explanation. We need to look out for
  1028. # the following:
  1029. # * Does the distribution have a shape named `n`?
  1030. # * Is `order` provided? It doesn't matter whether it is provided as a
  1031. # positional or keyword argument; it will be used as the order of the
  1032. # moment rather than a distribution shape parameter because:
  1033. # - The first positional argument of `moment` has always been the
  1034. # order of the moment.
  1035. # - The keyword `order` is new, so it's unambiguous that it refers to
  1036. # the order of the moment.
  1037. # * Is `n` provided as a keyword argument? It _does_ matter whether it
  1038. # is provided as a positional or keyword argument.
  1039. # - The first positional argument of `moment` has always been the
  1040. # order of moment, but
  1041. # - if `n` is provided as a keyword argument, its meaning depends
  1042. # on whether the distribution accepts `n` as a shape parameter.
  1043. has_shape_n = (self.shapes is not None
  1044. and "n" in (self.shapes.split(", ")))
  1045. got_order = order is not None
  1046. got_keyword_n = kwds.get("n", None) is not None
  1047. # These lead to the following cases.
  1048. # Case A: If the distribution _does_ accept `n` as a shape
  1049. # 1. If both `order` and `n` are provided, this is now OK:
  1050. # it is unambiguous that `order` is the order of the moment and `n`
  1051. # is the shape parameter. Previously, this would have caused an
  1052. # error because `n` was provided both as a keyword argument and
  1053. # as the first positional argument. I don't think it is credible for
  1054. # users to rely on this error in their code, though, so I don't see
  1055. # this as a backward compatibility break.
  1056. # 2. If only `n` is provided (as a keyword argument), this would have
  1057. # been an error in the past because `n` would have been treated as
  1058. # the order of the moment while the shape parameter would be
  1059. # missing. It is still the same type of error, but for a different
  1060. # reason: now, `n` is treated as the shape parameter while the
  1061. # order of the moment is missing.
  1062. # 3. If only `order` is provided, no special treament is needed.
  1063. # Clearly this value is intended to be the order of the moment,
  1064. # and the rest of the function will determine whether `n` is
  1065. # available as a shape parameter in `args`.
  1066. # 4. If neither `n` nor `order` is provided, this would have been an
  1067. # error (order of the moment is not provided) and it is still an
  1068. # error for the same reason.
  1069. # Case B: the distribution does _not_ accept `n` as a shape
  1070. # 1. If both `order` and `n` are provided, this was an error, and it
  1071. # still is an error: two values for same parameter.
  1072. # 2. If only `n` is provided (as a keyword argument), this was OK and
  1073. # is still OK, but there shold now be a `DeprecationWarning`. The
  1074. # value of `n` should be removed from `kwds` and stored in `order`.
  1075. # 3. If only `order` is provided, there was no problem before providing
  1076. # only the first argument of `moment`, and there is no problem with
  1077. # that now.
  1078. # 4. If neither `n` nor `order` is provided, this would have been an
  1079. # error (order of the moment is not provided), and it is still an
  1080. # error for the same reason.
  1081. if not got_order and ((not got_keyword_n) # A4 and B4
  1082. or (got_keyword_n and has_shape_n)): # A2
  1083. message = ("moment() missing 1 required "
  1084. "positional argument: `order`")
  1085. raise TypeError(message)
  1086. if got_keyword_n and not has_shape_n:
  1087. if got_order: # B1
  1088. # this will change to "moment got unexpected argument n"
  1089. message = "moment() got multiple values for first argument"
  1090. raise TypeError(message)
  1091. else: # B2
  1092. message = ("Use of keyword argument 'n' for method 'moment is"
  1093. " deprecated and will be removed in SciPy 1.11.0. "
  1094. "Use first positional argument or keyword argument"
  1095. " 'order' instead.")
  1096. order = kwds.pop("n")
  1097. warnings.warn(message, DeprecationWarning, stacklevel=2)
  1098. n = order
  1099. # No special treatment of A1, A3, or B3 is needed because the order
  1100. # of the moment is now in variable `n` and the shape parameter, if
  1101. # needed, will be fished out of `args` or `kwds` by _parse_args
  1102. # A3 might still cause an error if the shape parameter called `n`
  1103. # is not found in `args`.
  1104. shapes, loc, scale = self._parse_args(*args, **kwds)
  1105. args = np.broadcast_arrays(*(*shapes, loc, scale))
  1106. *shapes, loc, scale = args
  1107. i0 = np.logical_and(self._argcheck(*shapes), scale > 0)
  1108. i1 = np.logical_and(i0, loc == 0)
  1109. i2 = np.logical_and(i0, loc != 0)
  1110. args = argsreduce(i0, *shapes, loc, scale)
  1111. *shapes, loc, scale = args
  1112. if (floor(n) != n):
  1113. raise ValueError("Moment must be an integer.")
  1114. if (n < 0):
  1115. raise ValueError("Moment must be positive.")
  1116. mu, mu2, g1, g2 = None, None, None, None
  1117. if (n > 0) and (n < 5):
  1118. if self._stats_has_moments:
  1119. mdict = {'moments': {1: 'm', 2: 'v', 3: 'vs', 4: 'mvsk'}[n]}
  1120. else:
  1121. mdict = {}
  1122. mu, mu2, g1, g2 = self._stats(*shapes, **mdict)
  1123. val = np.empty(loc.shape) # val needs to be indexed by loc
  1124. val[...] = _moment_from_stats(n, mu, mu2, g1, g2, self._munp, shapes)
  1125. # Convert to transformed X = L + S*Y
  1126. # E[X^n] = E[(L+S*Y)^n] = L^n sum(comb(n, k)*(S/L)^k E[Y^k], k=0...n)
  1127. result = zeros(i0.shape)
  1128. place(result, ~i0, self.badvalue)
  1129. if i1.any():
  1130. res1 = scale[loc == 0]**n * val[loc == 0]
  1131. place(result, i1, res1)
  1132. if i2.any():
  1133. mom = [mu, mu2, g1, g2]
  1134. arrs = [i for i in mom if i is not None]
  1135. idx = [i for i in range(4) if mom[i] is not None]
  1136. if any(idx):
  1137. arrs = argsreduce(loc != 0, *arrs)
  1138. j = 0
  1139. for i in idx:
  1140. mom[i] = arrs[j]
  1141. j += 1
  1142. mu, mu2, g1, g2 = mom
  1143. args = argsreduce(loc != 0, *shapes, loc, scale, val)
  1144. *shapes, loc, scale, val = args
  1145. res2 = zeros(loc.shape, dtype='d')
  1146. fac = scale / loc
  1147. for k in range(n):
  1148. valk = _moment_from_stats(k, mu, mu2, g1, g2, self._munp,
  1149. shapes)
  1150. res2 += comb(n, k, exact=True)*fac**k * valk
  1151. res2 += fac**n * val
  1152. res2 *= loc**n
  1153. place(result, i2, res2)
  1154. return result[()]
  1155. def median(self, *args, **kwds):
  1156. """Median of the distribution.
  1157. Parameters
  1158. ----------
  1159. arg1, arg2, arg3,... : array_like
  1160. The shape parameter(s) for the distribution (see docstring of the
  1161. instance object for more information)
  1162. loc : array_like, optional
  1163. Location parameter, Default is 0.
  1164. scale : array_like, optional
  1165. Scale parameter, Default is 1.
  1166. Returns
  1167. -------
  1168. median : float
  1169. The median of the distribution.
  1170. See Also
  1171. --------
  1172. rv_discrete.ppf
  1173. Inverse of the CDF
  1174. """
  1175. return self.ppf(0.5, *args, **kwds)
  1176. def mean(self, *args, **kwds):
  1177. """Mean of the distribution.
  1178. Parameters
  1179. ----------
  1180. arg1, arg2, arg3,... : array_like
  1181. The shape parameter(s) for the distribution (see docstring of the
  1182. instance object for more information)
  1183. loc : array_like, optional
  1184. location parameter (default=0)
  1185. scale : array_like, optional
  1186. scale parameter (default=1)
  1187. Returns
  1188. -------
  1189. mean : float
  1190. the mean of the distribution
  1191. """
  1192. kwds['moments'] = 'm'
  1193. res = self.stats(*args, **kwds)
  1194. if isinstance(res, ndarray) and res.ndim == 0:
  1195. return res[()]
  1196. return res
  1197. def var(self, *args, **kwds):
  1198. """Variance of the distribution.
  1199. Parameters
  1200. ----------
  1201. arg1, arg2, arg3,... : array_like
  1202. The shape parameter(s) for the distribution (see docstring of the
  1203. instance object for more information)
  1204. loc : array_like, optional
  1205. location parameter (default=0)
  1206. scale : array_like, optional
  1207. scale parameter (default=1)
  1208. Returns
  1209. -------
  1210. var : float
  1211. the variance of the distribution
  1212. """
  1213. kwds['moments'] = 'v'
  1214. res = self.stats(*args, **kwds)
  1215. if isinstance(res, ndarray) and res.ndim == 0:
  1216. return res[()]
  1217. return res
  1218. def std(self, *args, **kwds):
  1219. """Standard deviation of the distribution.
  1220. Parameters
  1221. ----------
  1222. arg1, arg2, arg3,... : array_like
  1223. The shape parameter(s) for the distribution (see docstring of the
  1224. instance object for more information)
  1225. loc : array_like, optional
  1226. location parameter (default=0)
  1227. scale : array_like, optional
  1228. scale parameter (default=1)
  1229. Returns
  1230. -------
  1231. std : float
  1232. standard deviation of the distribution
  1233. """
  1234. kwds['moments'] = 'v'
  1235. res = sqrt(self.stats(*args, **kwds))
  1236. return res
  1237. def interval(self, confidence=None, *args, **kwds):
  1238. """Confidence interval with equal areas around the median.
  1239. .. deprecated:: 1.9.0
  1240. Parameter `alpha` is replaced by parameter `confidence` to avoid
  1241. name collisions with the shape parameter `alpha` of some
  1242. distributions. Parameter `alpha` will be removed in SciPy 1.11.0.
  1243. Parameters
  1244. ----------
  1245. confidence : array_like of float
  1246. Probability that an rv will be drawn from the returned range.
  1247. Each value should be in the range [0, 1].
  1248. arg1, arg2, ... : array_like
  1249. The shape parameter(s) for the distribution (see docstring of the
  1250. instance object for more information).
  1251. loc : array_like, optional
  1252. location parameter, Default is 0.
  1253. scale : array_like, optional
  1254. scale parameter, Default is 1.
  1255. Returns
  1256. -------
  1257. a, b : ndarray of float
  1258. end-points of range that contain ``100 * alpha %`` of the rv's
  1259. possible values.
  1260. Notes
  1261. -----
  1262. This is implemented as ``ppf([p_tail, 1-p_tail])``, where
  1263. ``ppf`` is the inverse cumulative distribution function and
  1264. ``p_tail = (1-confidence)/2``. Suppose ``[c, d]`` is the support of a
  1265. discrete distribution; then ``ppf([0, 1]) == (c-1, d)``. Therefore,
  1266. when ``confidence=1`` and the distribution is discrete, the left end
  1267. of the interval will be beyond the support of the distribution.
  1268. For discrete distributions, the interval will limit the probability
  1269. in each tail to be less than or equal to ``p_tail`` (usually
  1270. strictly less).
  1271. """
  1272. # This function was originally written with parameter `alpha`, but
  1273. # `alpha` is also the name of a shape parameter of two distributions.
  1274. # This block allows the function to accept both `alpha` and its
  1275. # replacement `confidence` during a deprecation period; it can be
  1276. # removed in the second release after 1.9.0.
  1277. # See description of logic in `moment` method.
  1278. has_shape_alpha = (self.shapes is not None
  1279. and "alpha" in (self.shapes.split(", ")))
  1280. got_confidence = confidence is not None
  1281. got_keyword_alpha = kwds.get("alpha", None) is not None
  1282. if not got_confidence and ((not got_keyword_alpha)
  1283. or (got_keyword_alpha and has_shape_alpha)):
  1284. message = ("interval() missing 1 required positional argument: "
  1285. "`confidence`")
  1286. raise TypeError(message)
  1287. if got_keyword_alpha and not has_shape_alpha:
  1288. if got_confidence:
  1289. # this will change to "interval got unexpected argument alpha"
  1290. message = "interval() got multiple values for first argument"
  1291. raise TypeError(message)
  1292. else:
  1293. message = ("Use of keyword argument 'alpha' for method "
  1294. "'interval' is deprecated and wil be removed in "
  1295. "SciPy 1.11.0. Use first positional argument or "
  1296. "keyword argument 'confidence' instead.")
  1297. confidence = kwds.pop("alpha")
  1298. warnings.warn(message, DeprecationWarning, stacklevel=2)
  1299. alpha = confidence
  1300. alpha = asarray(alpha)
  1301. if np.any((alpha > 1) | (alpha < 0)):
  1302. raise ValueError("alpha must be between 0 and 1 inclusive")
  1303. q1 = (1.0-alpha)/2
  1304. q2 = (1.0+alpha)/2
  1305. a = self.ppf(q1, *args, **kwds)
  1306. b = self.ppf(q2, *args, **kwds)
  1307. return a, b
  1308. def support(self, *args, **kwargs):
  1309. """Support of the distribution.
  1310. Parameters
  1311. ----------
  1312. arg1, arg2, ... : array_like
  1313. The shape parameter(s) for the distribution (see docstring of the
  1314. instance object for more information).
  1315. loc : array_like, optional
  1316. location parameter, Default is 0.
  1317. scale : array_like, optional
  1318. scale parameter, Default is 1.
  1319. Returns
  1320. -------
  1321. a, b : array_like
  1322. end-points of the distribution's support.
  1323. """
  1324. args, loc, scale = self._parse_args(*args, **kwargs)
  1325. arrs = np.broadcast_arrays(*args, loc, scale)
  1326. args, loc, scale = arrs[:-2], arrs[-2], arrs[-1]
  1327. cond = self._argcheck(*args) & (scale > 0)
  1328. _a, _b = self._get_support(*args)
  1329. if cond.all():
  1330. return _a * scale + loc, _b * scale + loc
  1331. elif cond.ndim == 0:
  1332. return self.badvalue, self.badvalue
  1333. # promote bounds to at least float to fill in the badvalue
  1334. _a, _b = np.asarray(_a).astype('d'), np.asarray(_b).astype('d')
  1335. out_a, out_b = _a * scale + loc, _b * scale + loc
  1336. place(out_a, 1-cond, self.badvalue)
  1337. place(out_b, 1-cond, self.badvalue)
  1338. return out_a, out_b
  1339. def nnlf(self, theta, x):
  1340. """Negative loglikelihood function.
  1341. Notes
  1342. -----
  1343. This is ``-sum(log pdf(x, theta), axis=0)`` where `theta` are the
  1344. parameters (including loc and scale).
  1345. """
  1346. loc, scale, args = self._unpack_loc_scale(theta)
  1347. if not self._argcheck(*args) or scale <= 0:
  1348. return inf
  1349. x = (asarray(x)-loc) / scale
  1350. n_log_scale = len(x) * log(scale)
  1351. if np.any(~self._support_mask(x, *args)):
  1352. return inf
  1353. return self._nnlf(x, *args) + n_log_scale
  1354. def _nnlf(self, x, *args):
  1355. return -np.sum(self._logpxf(x, *args), axis=0)
  1356. def _nlff_and_penalty(self, x, args, log_fitfun):
  1357. # negative log fit function
  1358. cond0 = ~self._support_mask(x, *args)
  1359. n_bad = np.count_nonzero(cond0, axis=0)
  1360. if n_bad > 0:
  1361. x = argsreduce(~cond0, x)[0]
  1362. logff = log_fitfun(x, *args)
  1363. finite_logff = np.isfinite(logff)
  1364. n_bad += np.sum(~finite_logff, axis=0)
  1365. if n_bad > 0:
  1366. penalty = n_bad * log(_XMAX) * 100
  1367. return -np.sum(logff[finite_logff], axis=0) + penalty
  1368. return -np.sum(logff, axis=0)
  1369. def _penalized_nnlf(self, theta, x):
  1370. """Penalized negative loglikelihood function.
  1371. i.e., - sum (log pdf(x, theta), axis=0) + penalty
  1372. where theta are the parameters (including loc and scale)
  1373. """
  1374. loc, scale, args = self._unpack_loc_scale(theta)
  1375. if not self._argcheck(*args) or scale <= 0:
  1376. return inf
  1377. x = asarray((x-loc) / scale)
  1378. n_log_scale = len(x) * log(scale)
  1379. return self._nlff_and_penalty(x, args, self._logpxf) + n_log_scale
  1380. def _penalized_nlpsf(self, theta, x):
  1381. """Penalized negative log product spacing function.
  1382. i.e., - sum (log (diff (cdf (x, theta))), axis=0) + penalty
  1383. where theta are the parameters (including loc and scale)
  1384. Follows reference [1] of scipy.stats.fit
  1385. """
  1386. loc, scale, args = self._unpack_loc_scale(theta)
  1387. if not self._argcheck(*args) or scale <= 0:
  1388. return inf
  1389. x = (np.sort(x) - loc)/scale
  1390. def log_psf(x, *args):
  1391. x, lj = np.unique(x, return_counts=True) # fast for sorted x
  1392. cdf_data = self._cdf(x, *args) if x.size else []
  1393. if not (x.size and 1 - cdf_data[-1] <= 0):
  1394. cdf = np.concatenate(([0], cdf_data, [1]))
  1395. lj = np.concatenate((lj, [1]))
  1396. else:
  1397. cdf = np.concatenate(([0], cdf_data))
  1398. # here we could use logcdf w/ logsumexp trick to take differences,
  1399. # but in the context of the method, it seems unlikely to matter
  1400. return lj * np.log(np.diff(cdf) / lj)
  1401. return self._nlff_and_penalty(x, args, log_psf)
  1402. class _ShapeInfo:
  1403. def __init__(self, name, integrality=False, domain=(-np.inf, np.inf),
  1404. inclusive=(True, True)):
  1405. self.name = name
  1406. self.integrality = integrality
  1407. domain = list(domain)
  1408. if np.isfinite(domain[0]) and not inclusive[0]:
  1409. domain[0] = np.nextafter(domain[0], np.inf)
  1410. if np.isfinite(domain[1]) and not inclusive[1]:
  1411. domain[1] = np.nextafter(domain[1], -np.inf)
  1412. self.domain = domain
  1413. def _get_fixed_fit_value(kwds, names):
  1414. """
  1415. Given names such as `['f0', 'fa', 'fix_a']`, check that there is
  1416. at most one non-None value in `kwds` associaed with those names.
  1417. Return that value, or None if none of the names occur in `kwds`.
  1418. As a side effect, all occurrences of those names in `kwds` are
  1419. removed.
  1420. """
  1421. vals = [(name, kwds.pop(name)) for name in names if name in kwds]
  1422. if len(vals) > 1:
  1423. repeated = [name for name, val in vals]
  1424. raise ValueError("fit method got multiple keyword arguments to "
  1425. "specify the same fixed parameter: " +
  1426. ', '.join(repeated))
  1427. return vals[0][1] if vals else None
  1428. # continuous random variables: implement maybe later
  1429. #
  1430. # hf --- Hazard Function (PDF / SF)
  1431. # chf --- Cumulative hazard function (-log(SF))
  1432. # psf --- Probability sparsity function (reciprocal of the pdf) in
  1433. # units of percent-point-function (as a function of q).
  1434. # Also, the derivative of the percent-point function.
  1435. class rv_continuous(rv_generic):
  1436. """A generic continuous random variable class meant for subclassing.
  1437. `rv_continuous` is a base class to construct specific distribution classes
  1438. and instances for continuous random variables. It cannot be used
  1439. directly as a distribution.
  1440. Parameters
  1441. ----------
  1442. momtype : int, optional
  1443. The type of generic moment calculation to use: 0 for pdf, 1 (default)
  1444. for ppf.
  1445. a : float, optional
  1446. Lower bound of the support of the distribution, default is minus
  1447. infinity.
  1448. b : float, optional
  1449. Upper bound of the support of the distribution, default is plus
  1450. infinity.
  1451. xtol : float, optional
  1452. The tolerance for fixed point calculation for generic ppf.
  1453. badvalue : float, optional
  1454. The value in a result arrays that indicates a value that for which
  1455. some argument restriction is violated, default is np.nan.
  1456. name : str, optional
  1457. The name of the instance. This string is used to construct the default
  1458. example for distributions.
  1459. longname : str, optional
  1460. This string is used as part of the first line of the docstring returned
  1461. when a subclass has no docstring of its own. Note: `longname` exists
  1462. for backwards compatibility, do not use for new subclasses.
  1463. shapes : str, optional
  1464. The shape of the distribution. For example ``"m, n"`` for a
  1465. distribution that takes two integers as the two shape arguments for all
  1466. its methods. If not provided, shape parameters will be inferred from
  1467. the signature of the private methods, ``_pdf`` and ``_cdf`` of the
  1468. instance.
  1469. extradoc : str, optional, deprecated
  1470. This string is used as the last part of the docstring returned when a
  1471. subclass has no docstring of its own. Note: `extradoc` exists for
  1472. backwards compatibility and will be removed in SciPy 1.11.0, do not
  1473. use for new subclasses.
  1474. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  1475. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  1476. singleton is used.
  1477. If `seed` is an int, a new ``RandomState`` instance is used,
  1478. seeded with `seed`.
  1479. If `seed` is already a ``Generator`` or ``RandomState`` instance then
  1480. that instance is used.
  1481. Methods
  1482. -------
  1483. rvs
  1484. pdf
  1485. logpdf
  1486. cdf
  1487. logcdf
  1488. sf
  1489. logsf
  1490. ppf
  1491. isf
  1492. moment
  1493. stats
  1494. entropy
  1495. expect
  1496. median
  1497. mean
  1498. std
  1499. var
  1500. interval
  1501. __call__
  1502. fit
  1503. fit_loc_scale
  1504. nnlf
  1505. support
  1506. Notes
  1507. -----
  1508. Public methods of an instance of a distribution class (e.g., ``pdf``,
  1509. ``cdf``) check their arguments and pass valid arguments to private,
  1510. computational methods (``_pdf``, ``_cdf``). For ``pdf(x)``, ``x`` is valid
  1511. if it is within the support of the distribution.
  1512. Whether a shape parameter is valid is decided by an ``_argcheck`` method
  1513. (which defaults to checking that its arguments are strictly positive.)
  1514. **Subclassing**
  1515. New random variables can be defined by subclassing the `rv_continuous` class
  1516. and re-defining at least the ``_pdf`` or the ``_cdf`` method (normalized
  1517. to location 0 and scale 1).
  1518. If positive argument checking is not correct for your RV
  1519. then you will also need to re-define the ``_argcheck`` method.
  1520. For most of the scipy.stats distributions, the support interval doesn't
  1521. depend on the shape parameters. ``x`` being in the support interval is
  1522. equivalent to ``self.a <= x <= self.b``. If either of the endpoints of
  1523. the support do depend on the shape parameters, then
  1524. i) the distribution must implement the ``_get_support`` method; and
  1525. ii) those dependent endpoints must be omitted from the distribution's
  1526. call to the ``rv_continuous`` initializer.
  1527. Correct, but potentially slow defaults exist for the remaining
  1528. methods but for speed and/or accuracy you can over-ride::
  1529. _logpdf, _cdf, _logcdf, _ppf, _rvs, _isf, _sf, _logsf
  1530. The default method ``_rvs`` relies on the inverse of the cdf, ``_ppf``,
  1531. applied to a uniform random variate. In order to generate random variates
  1532. efficiently, either the default ``_ppf`` needs to be overwritten (e.g.
  1533. if the inverse cdf can expressed in an explicit form) or a sampling
  1534. method needs to be implemented in a custom ``_rvs`` method.
  1535. If possible, you should override ``_isf``, ``_sf`` or ``_logsf``.
  1536. The main reason would be to improve numerical accuracy: for example,
  1537. the survival function ``_sf`` is computed as ``1 - _cdf`` which can
  1538. result in loss of precision if ``_cdf(x)`` is close to one.
  1539. **Methods that can be overwritten by subclasses**
  1540. ::
  1541. _rvs
  1542. _pdf
  1543. _cdf
  1544. _sf
  1545. _ppf
  1546. _isf
  1547. _stats
  1548. _munp
  1549. _entropy
  1550. _argcheck
  1551. _get_support
  1552. There are additional (internal and private) generic methods that can
  1553. be useful for cross-checking and for debugging, but might work in all
  1554. cases when directly called.
  1555. A note on ``shapes``: subclasses need not specify them explicitly. In this
  1556. case, `shapes` will be automatically deduced from the signatures of the
  1557. overridden methods (`pdf`, `cdf` etc).
  1558. If, for some reason, you prefer to avoid relying on introspection, you can
  1559. specify ``shapes`` explicitly as an argument to the instance constructor.
  1560. **Frozen Distributions**
  1561. Normally, you must provide shape parameters (and, optionally, location and
  1562. scale parameters to each call of a method of a distribution.
  1563. Alternatively, the object may be called (as a function) to fix the shape,
  1564. location, and scale parameters returning a "frozen" continuous RV object:
  1565. rv = generic(<shape(s)>, loc=0, scale=1)
  1566. `rv_frozen` object with the same methods but holding the given shape,
  1567. location, and scale fixed
  1568. **Statistics**
  1569. Statistics are computed using numerical integration by default.
  1570. For speed you can redefine this using ``_stats``:
  1571. - take shape parameters and return mu, mu2, g1, g2
  1572. - If you can't compute one of these, return it as None
  1573. - Can also be defined with a keyword argument ``moments``, which is a
  1574. string composed of "m", "v", "s", and/or "k".
  1575. Only the components appearing in string should be computed and
  1576. returned in the order "m", "v", "s", or "k" with missing values
  1577. returned as None.
  1578. Alternatively, you can override ``_munp``, which takes ``n`` and shape
  1579. parameters and returns the n-th non-central moment of the distribution.
  1580. Examples
  1581. --------
  1582. To create a new Gaussian distribution, we would do the following:
  1583. >>> from scipy.stats import rv_continuous
  1584. >>> class gaussian_gen(rv_continuous):
  1585. ... "Gaussian distribution"
  1586. ... def _pdf(self, x):
  1587. ... return np.exp(-x**2 / 2.) / np.sqrt(2.0 * np.pi)
  1588. >>> gaussian = gaussian_gen(name='gaussian')
  1589. ``scipy.stats`` distributions are *instances*, so here we subclass
  1590. `rv_continuous` and create an instance. With this, we now have
  1591. a fully functional distribution with all relevant methods automagically
  1592. generated by the framework.
  1593. Note that above we defined a standard normal distribution, with zero mean
  1594. and unit variance. Shifting and scaling of the distribution can be done
  1595. by using ``loc`` and ``scale`` parameters: ``gaussian.pdf(x, loc, scale)``
  1596. essentially computes ``y = (x - loc) / scale`` and
  1597. ``gaussian._pdf(y) / scale``.
  1598. """
  1599. def __init__(self, momtype=1, a=None, b=None, xtol=1e-14,
  1600. badvalue=None, name=None, longname=None,
  1601. shapes=None, extradoc=None, seed=None):
  1602. super().__init__(seed)
  1603. if extradoc is not None:
  1604. warnings.warn("extradoc is deprecated and will be removed in "
  1605. "SciPy 1.11.0", DeprecationWarning)
  1606. # save the ctor parameters, cf generic freeze
  1607. self._ctor_param = dict(
  1608. momtype=momtype, a=a, b=b, xtol=xtol,
  1609. badvalue=badvalue, name=name, longname=longname,
  1610. shapes=shapes, extradoc=extradoc, seed=seed)
  1611. if badvalue is None:
  1612. badvalue = nan
  1613. if name is None:
  1614. name = 'Distribution'
  1615. self.badvalue = badvalue
  1616. self.name = name
  1617. self.a = a
  1618. self.b = b
  1619. if a is None:
  1620. self.a = -inf
  1621. if b is None:
  1622. self.b = inf
  1623. self.xtol = xtol
  1624. self.moment_type = momtype
  1625. self.shapes = shapes
  1626. self.extradoc = extradoc
  1627. self._construct_argparser(meths_to_inspect=[self._pdf, self._cdf],
  1628. locscale_in='loc=0, scale=1',
  1629. locscale_out='loc, scale')
  1630. self._attach_methods()
  1631. if longname is None:
  1632. if name[0] in ['aeiouAEIOU']:
  1633. hstr = "An "
  1634. else:
  1635. hstr = "A "
  1636. longname = hstr + name
  1637. if sys.flags.optimize < 2:
  1638. # Skip adding docstrings if interpreter is run with -OO
  1639. if self.__doc__ is None:
  1640. self._construct_default_doc(longname=longname,
  1641. extradoc=extradoc,
  1642. docdict=docdict,
  1643. discrete='continuous')
  1644. else:
  1645. dct = dict(distcont)
  1646. self._construct_doc(docdict, dct.get(self.name))
  1647. def __getstate__(self):
  1648. dct = self.__dict__.copy()
  1649. # these methods will be remade in __setstate__
  1650. # _random_state attribute is taken care of by rv_generic
  1651. attrs = ["_parse_args", "_parse_args_stats", "_parse_args_rvs",
  1652. "_cdfvec", "_ppfvec", "vecentropy", "generic_moment"]
  1653. [dct.pop(attr, None) for attr in attrs]
  1654. return dct
  1655. def _attach_methods(self):
  1656. """
  1657. Attaches dynamically created methods to the rv_continuous instance.
  1658. """
  1659. # _attach_methods is responsible for calling _attach_argparser_methods
  1660. self._attach_argparser_methods()
  1661. # nin correction
  1662. self._ppfvec = vectorize(self._ppf_single, otypes='d')
  1663. self._ppfvec.nin = self.numargs + 1
  1664. self.vecentropy = vectorize(self._entropy, otypes='d')
  1665. self._cdfvec = vectorize(self._cdf_single, otypes='d')
  1666. self._cdfvec.nin = self.numargs + 1
  1667. if self.moment_type == 0:
  1668. self.generic_moment = vectorize(self._mom0_sc, otypes='d')
  1669. else:
  1670. self.generic_moment = vectorize(self._mom1_sc, otypes='d')
  1671. # Because of the *args argument of _mom0_sc, vectorize cannot count the
  1672. # number of arguments correctly.
  1673. self.generic_moment.nin = self.numargs + 1
  1674. def _updated_ctor_param(self):
  1675. """Return the current version of _ctor_param, possibly updated by user.
  1676. Used by freezing.
  1677. Keep this in sync with the signature of __init__.
  1678. """
  1679. dct = self._ctor_param.copy()
  1680. dct['a'] = self.a
  1681. dct['b'] = self.b
  1682. dct['xtol'] = self.xtol
  1683. dct['badvalue'] = self.badvalue
  1684. dct['name'] = self.name
  1685. dct['shapes'] = self.shapes
  1686. dct['extradoc'] = self.extradoc
  1687. return dct
  1688. def _ppf_to_solve(self, x, q, *args):
  1689. return self.cdf(*(x, )+args)-q
  1690. def _ppf_single(self, q, *args):
  1691. factor = 10.
  1692. left, right = self._get_support(*args)
  1693. if np.isinf(left):
  1694. left = min(-factor, right)
  1695. while self._ppf_to_solve(left, q, *args) > 0.:
  1696. left, right = left * factor, left
  1697. # left is now such that cdf(left) <= q
  1698. # if right has changed, then cdf(right) > q
  1699. if np.isinf(right):
  1700. right = max(factor, left)
  1701. while self._ppf_to_solve(right, q, *args) < 0.:
  1702. left, right = right, right * factor
  1703. # right is now such that cdf(right) >= q
  1704. return optimize.brentq(self._ppf_to_solve,
  1705. left, right, args=(q,)+args, xtol=self.xtol)
  1706. # moment from definition
  1707. def _mom_integ0(self, x, m, *args):
  1708. return x**m * self.pdf(x, *args)
  1709. def _mom0_sc(self, m, *args):
  1710. _a, _b = self._get_support(*args)
  1711. return integrate.quad(self._mom_integ0, _a, _b,
  1712. args=(m,)+args)[0]
  1713. # moment calculated using ppf
  1714. def _mom_integ1(self, q, m, *args):
  1715. return (self.ppf(q, *args))**m
  1716. def _mom1_sc(self, m, *args):
  1717. return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0]
  1718. def _pdf(self, x, *args):
  1719. return _derivative(self._cdf, x, dx=1e-5, args=args, order=5)
  1720. # Could also define any of these
  1721. def _logpdf(self, x, *args):
  1722. p = self._pdf(x, *args)
  1723. with np.errstate(divide='ignore'):
  1724. return log(p)
  1725. def _logpxf(self, x, *args):
  1726. # continuous distributions have PDF, discrete have PMF, but sometimes
  1727. # the distinction doesn't matter. This lets us use `_logpxf` for both
  1728. # discrete and continuous distributions.
  1729. return self._logpdf(x, *args)
  1730. def _cdf_single(self, x, *args):
  1731. _a, _b = self._get_support(*args)
  1732. return integrate.quad(self._pdf, _a, x, args=args)[0]
  1733. def _cdf(self, x, *args):
  1734. return self._cdfvec(x, *args)
  1735. # generic _argcheck, _logcdf, _sf, _logsf, _ppf, _isf, _rvs are defined
  1736. # in rv_generic
  1737. def pdf(self, x, *args, **kwds):
  1738. """Probability density function at x of the given RV.
  1739. Parameters
  1740. ----------
  1741. x : array_like
  1742. quantiles
  1743. arg1, arg2, arg3,... : array_like
  1744. The shape parameter(s) for the distribution (see docstring of the
  1745. instance object for more information)
  1746. loc : array_like, optional
  1747. location parameter (default=0)
  1748. scale : array_like, optional
  1749. scale parameter (default=1)
  1750. Returns
  1751. -------
  1752. pdf : ndarray
  1753. Probability density function evaluated at x
  1754. """
  1755. args, loc, scale = self._parse_args(*args, **kwds)
  1756. x, loc, scale = map(asarray, (x, loc, scale))
  1757. args = tuple(map(asarray, args))
  1758. dtyp = np.promote_types(x.dtype, np.float64)
  1759. x = np.asarray((x - loc)/scale, dtype=dtyp)
  1760. cond0 = self._argcheck(*args) & (scale > 0)
  1761. cond1 = self._support_mask(x, *args) & (scale > 0)
  1762. cond = cond0 & cond1
  1763. output = zeros(shape(cond), dtyp)
  1764. putmask(output, (1-cond0)+np.isnan(x), self.badvalue)
  1765. if np.any(cond):
  1766. goodargs = argsreduce(cond, *((x,)+args+(scale,)))
  1767. scale, goodargs = goodargs[-1], goodargs[:-1]
  1768. place(output, cond, self._pdf(*goodargs) / scale)
  1769. if output.ndim == 0:
  1770. return output[()]
  1771. return output
  1772. def logpdf(self, x, *args, **kwds):
  1773. """Log of the probability density function at x of the given RV.
  1774. This uses a more numerically accurate calculation if available.
  1775. Parameters
  1776. ----------
  1777. x : array_like
  1778. quantiles
  1779. arg1, arg2, arg3,... : array_like
  1780. The shape parameter(s) for the distribution (see docstring of the
  1781. instance object for more information)
  1782. loc : array_like, optional
  1783. location parameter (default=0)
  1784. scale : array_like, optional
  1785. scale parameter (default=1)
  1786. Returns
  1787. -------
  1788. logpdf : array_like
  1789. Log of the probability density function evaluated at x
  1790. """
  1791. args, loc, scale = self._parse_args(*args, **kwds)
  1792. x, loc, scale = map(asarray, (x, loc, scale))
  1793. args = tuple(map(asarray, args))
  1794. dtyp = np.promote_types(x.dtype, np.float64)
  1795. x = np.asarray((x - loc)/scale, dtype=dtyp)
  1796. cond0 = self._argcheck(*args) & (scale > 0)
  1797. cond1 = self._support_mask(x, *args) & (scale > 0)
  1798. cond = cond0 & cond1
  1799. output = empty(shape(cond), dtyp)
  1800. output.fill(NINF)
  1801. putmask(output, (1-cond0)+np.isnan(x), self.badvalue)
  1802. if np.any(cond):
  1803. goodargs = argsreduce(cond, *((x,)+args+(scale,)))
  1804. scale, goodargs = goodargs[-1], goodargs[:-1]
  1805. place(output, cond, self._logpdf(*goodargs) - log(scale))
  1806. if output.ndim == 0:
  1807. return output[()]
  1808. return output
  1809. def cdf(self, x, *args, **kwds):
  1810. """
  1811. Cumulative distribution function of the given RV.
  1812. Parameters
  1813. ----------
  1814. x : array_like
  1815. quantiles
  1816. arg1, arg2, arg3,... : array_like
  1817. The shape parameter(s) for the distribution (see docstring of the
  1818. instance object for more information)
  1819. loc : array_like, optional
  1820. location parameter (default=0)
  1821. scale : array_like, optional
  1822. scale parameter (default=1)
  1823. Returns
  1824. -------
  1825. cdf : ndarray
  1826. Cumulative distribution function evaluated at `x`
  1827. """
  1828. args, loc, scale = self._parse_args(*args, **kwds)
  1829. x, loc, scale = map(asarray, (x, loc, scale))
  1830. args = tuple(map(asarray, args))
  1831. _a, _b = self._get_support(*args)
  1832. dtyp = np.promote_types(x.dtype, np.float64)
  1833. x = np.asarray((x - loc)/scale, dtype=dtyp)
  1834. cond0 = self._argcheck(*args) & (scale > 0)
  1835. cond1 = self._open_support_mask(x, *args) & (scale > 0)
  1836. cond2 = (x >= np.asarray(_b)) & cond0
  1837. cond = cond0 & cond1
  1838. output = zeros(shape(cond), dtyp)
  1839. place(output, (1-cond0)+np.isnan(x), self.badvalue)
  1840. place(output, cond2, 1.0)
  1841. if np.any(cond): # call only if at least 1 entry
  1842. goodargs = argsreduce(cond, *((x,)+args))
  1843. place(output, cond, self._cdf(*goodargs))
  1844. if output.ndim == 0:
  1845. return output[()]
  1846. return output
  1847. def logcdf(self, x, *args, **kwds):
  1848. """Log of the cumulative distribution function at x of the given RV.
  1849. Parameters
  1850. ----------
  1851. x : array_like
  1852. quantiles
  1853. arg1, arg2, arg3,... : array_like
  1854. The shape parameter(s) for the distribution (see docstring of the
  1855. instance object for more information)
  1856. loc : array_like, optional
  1857. location parameter (default=0)
  1858. scale : array_like, optional
  1859. scale parameter (default=1)
  1860. Returns
  1861. -------
  1862. logcdf : array_like
  1863. Log of the cumulative distribution function evaluated at x
  1864. """
  1865. args, loc, scale = self._parse_args(*args, **kwds)
  1866. x, loc, scale = map(asarray, (x, loc, scale))
  1867. args = tuple(map(asarray, args))
  1868. _a, _b = self._get_support(*args)
  1869. dtyp = np.promote_types(x.dtype, np.float64)
  1870. x = np.asarray((x - loc)/scale, dtype=dtyp)
  1871. cond0 = self._argcheck(*args) & (scale > 0)
  1872. cond1 = self._open_support_mask(x, *args) & (scale > 0)
  1873. cond2 = (x >= _b) & cond0
  1874. cond = cond0 & cond1
  1875. output = empty(shape(cond), dtyp)
  1876. output.fill(NINF)
  1877. place(output, (1-cond0)*(cond1 == cond1)+np.isnan(x), self.badvalue)
  1878. place(output, cond2, 0.0)
  1879. if np.any(cond): # call only if at least 1 entry
  1880. goodargs = argsreduce(cond, *((x,)+args))
  1881. place(output, cond, self._logcdf(*goodargs))
  1882. if output.ndim == 0:
  1883. return output[()]
  1884. return output
  1885. def sf(self, x, *args, **kwds):
  1886. """Survival function (1 - `cdf`) at x of the given RV.
  1887. Parameters
  1888. ----------
  1889. x : array_like
  1890. quantiles
  1891. arg1, arg2, arg3,... : array_like
  1892. The shape parameter(s) for the distribution (see docstring of the
  1893. instance object for more information)
  1894. loc : array_like, optional
  1895. location parameter (default=0)
  1896. scale : array_like, optional
  1897. scale parameter (default=1)
  1898. Returns
  1899. -------
  1900. sf : array_like
  1901. Survival function evaluated at x
  1902. """
  1903. args, loc, scale = self._parse_args(*args, **kwds)
  1904. x, loc, scale = map(asarray, (x, loc, scale))
  1905. args = tuple(map(asarray, args))
  1906. _a, _b = self._get_support(*args)
  1907. dtyp = np.promote_types(x.dtype, np.float64)
  1908. x = np.asarray((x - loc)/scale, dtype=dtyp)
  1909. cond0 = self._argcheck(*args) & (scale > 0)
  1910. cond1 = self._open_support_mask(x, *args) & (scale > 0)
  1911. cond2 = cond0 & (x <= _a)
  1912. cond = cond0 & cond1
  1913. output = zeros(shape(cond), dtyp)
  1914. place(output, (1-cond0)+np.isnan(x), self.badvalue)
  1915. place(output, cond2, 1.0)
  1916. if np.any(cond):
  1917. goodargs = argsreduce(cond, *((x,)+args))
  1918. place(output, cond, self._sf(*goodargs))
  1919. if output.ndim == 0:
  1920. return output[()]
  1921. return output
  1922. def logsf(self, x, *args, **kwds):
  1923. """Log of the survival function of the given RV.
  1924. Returns the log of the "survival function," defined as (1 - `cdf`),
  1925. evaluated at `x`.
  1926. Parameters
  1927. ----------
  1928. x : array_like
  1929. quantiles
  1930. arg1, arg2, arg3,... : array_like
  1931. The shape parameter(s) for the distribution (see docstring of the
  1932. instance object for more information)
  1933. loc : array_like, optional
  1934. location parameter (default=0)
  1935. scale : array_like, optional
  1936. scale parameter (default=1)
  1937. Returns
  1938. -------
  1939. logsf : ndarray
  1940. Log of the survival function evaluated at `x`.
  1941. """
  1942. args, loc, scale = self._parse_args(*args, **kwds)
  1943. x, loc, scale = map(asarray, (x, loc, scale))
  1944. args = tuple(map(asarray, args))
  1945. _a, _b = self._get_support(*args)
  1946. dtyp = np.promote_types(x.dtype, np.float64)
  1947. x = np.asarray((x - loc)/scale, dtype=dtyp)
  1948. cond0 = self._argcheck(*args) & (scale > 0)
  1949. cond1 = self._open_support_mask(x, *args) & (scale > 0)
  1950. cond2 = cond0 & (x <= _a)
  1951. cond = cond0 & cond1
  1952. output = empty(shape(cond), dtyp)
  1953. output.fill(NINF)
  1954. place(output, (1-cond0)+np.isnan(x), self.badvalue)
  1955. place(output, cond2, 0.0)
  1956. if np.any(cond):
  1957. goodargs = argsreduce(cond, *((x,)+args))
  1958. place(output, cond, self._logsf(*goodargs))
  1959. if output.ndim == 0:
  1960. return output[()]
  1961. return output
  1962. def ppf(self, q, *args, **kwds):
  1963. """Percent point function (inverse of `cdf`) at q of the given RV.
  1964. Parameters
  1965. ----------
  1966. q : array_like
  1967. lower tail probability
  1968. arg1, arg2, arg3,... : array_like
  1969. The shape parameter(s) for the distribution (see docstring of the
  1970. instance object for more information)
  1971. loc : array_like, optional
  1972. location parameter (default=0)
  1973. scale : array_like, optional
  1974. scale parameter (default=1)
  1975. Returns
  1976. -------
  1977. x : array_like
  1978. quantile corresponding to the lower tail probability q.
  1979. """
  1980. args, loc, scale = self._parse_args(*args, **kwds)
  1981. q, loc, scale = map(asarray, (q, loc, scale))
  1982. args = tuple(map(asarray, args))
  1983. _a, _b = self._get_support(*args)
  1984. cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc)
  1985. cond1 = (0 < q) & (q < 1)
  1986. cond2 = cond0 & (q == 0)
  1987. cond3 = cond0 & (q == 1)
  1988. cond = cond0 & cond1
  1989. output = np.full(shape(cond), fill_value=self.badvalue)
  1990. lower_bound = _a * scale + loc
  1991. upper_bound = _b * scale + loc
  1992. place(output, cond2, argsreduce(cond2, lower_bound)[0])
  1993. place(output, cond3, argsreduce(cond3, upper_bound)[0])
  1994. if np.any(cond): # call only if at least 1 entry
  1995. goodargs = argsreduce(cond, *((q,)+args+(scale, loc)))
  1996. scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]
  1997. place(output, cond, self._ppf(*goodargs) * scale + loc)
  1998. if output.ndim == 0:
  1999. return output[()]
  2000. return output
  2001. def isf(self, q, *args, **kwds):
  2002. """Inverse survival function (inverse of `sf`) at q of the given RV.
  2003. Parameters
  2004. ----------
  2005. q : array_like
  2006. upper tail probability
  2007. arg1, arg2, arg3,... : array_like
  2008. The shape parameter(s) for the distribution (see docstring of the
  2009. instance object for more information)
  2010. loc : array_like, optional
  2011. location parameter (default=0)
  2012. scale : array_like, optional
  2013. scale parameter (default=1)
  2014. Returns
  2015. -------
  2016. x : ndarray or scalar
  2017. Quantile corresponding to the upper tail probability q.
  2018. """
  2019. args, loc, scale = self._parse_args(*args, **kwds)
  2020. q, loc, scale = map(asarray, (q, loc, scale))
  2021. args = tuple(map(asarray, args))
  2022. _a, _b = self._get_support(*args)
  2023. cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc)
  2024. cond1 = (0 < q) & (q < 1)
  2025. cond2 = cond0 & (q == 1)
  2026. cond3 = cond0 & (q == 0)
  2027. cond = cond0 & cond1
  2028. output = np.full(shape(cond), fill_value=self.badvalue)
  2029. lower_bound = _a * scale + loc
  2030. upper_bound = _b * scale + loc
  2031. place(output, cond2, argsreduce(cond2, lower_bound)[0])
  2032. place(output, cond3, argsreduce(cond3, upper_bound)[0])
  2033. if np.any(cond):
  2034. goodargs = argsreduce(cond, *((q,)+args+(scale, loc)))
  2035. scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]
  2036. place(output, cond, self._isf(*goodargs) * scale + loc)
  2037. if output.ndim == 0:
  2038. return output[()]
  2039. return output
  2040. def _unpack_loc_scale(self, theta):
  2041. try:
  2042. loc = theta[-2]
  2043. scale = theta[-1]
  2044. args = tuple(theta[:-2])
  2045. except IndexError as e:
  2046. raise ValueError("Not enough input arguments.") from e
  2047. return loc, scale, args
  2048. def _fitstart(self, data, args=None):
  2049. """Starting point for fit (shape arguments + loc + scale)."""
  2050. if args is None:
  2051. args = (1.0,)*self.numargs
  2052. loc, scale = self._fit_loc_scale_support(data, *args)
  2053. return args + (loc, scale)
  2054. def _reduce_func(self, args, kwds, data=None):
  2055. """
  2056. Return the (possibly reduced) function to optimize in order to find MLE
  2057. estimates for the .fit method.
  2058. """
  2059. # Convert fixed shape parameters to the standard numeric form: e.g. for
  2060. # stats.beta, shapes='a, b'. To fix `a`, the caller can give a value
  2061. # for `f0`, `fa` or 'fix_a'. The following converts the latter two
  2062. # into the first (numeric) form.
  2063. shapes = []
  2064. if self.shapes:
  2065. shapes = self.shapes.replace(',', ' ').split()
  2066. for j, s in enumerate(shapes):
  2067. key = 'f' + str(j)
  2068. names = [key, 'f' + s, 'fix_' + s]
  2069. val = _get_fixed_fit_value(kwds, names)
  2070. if val is not None:
  2071. kwds[key] = val
  2072. args = list(args)
  2073. Nargs = len(args)
  2074. fixedn = []
  2075. names = ['f%d' % n for n in range(Nargs - 2)] + ['floc', 'fscale']
  2076. x0 = []
  2077. for n, key in enumerate(names):
  2078. if key in kwds:
  2079. fixedn.append(n)
  2080. args[n] = kwds.pop(key)
  2081. else:
  2082. x0.append(args[n])
  2083. methods = {"mle", "mm"}
  2084. method = kwds.pop('method', "mle").lower()
  2085. if method == "mm":
  2086. n_params = len(shapes) + 2 - len(fixedn)
  2087. exponents = (np.arange(1, n_params+1))[:, np.newaxis]
  2088. data_moments = np.sum(data[None, :]**exponents/len(data), axis=1)
  2089. def objective(theta, x):
  2090. return self._moment_error(theta, x, data_moments)
  2091. elif method == "mle":
  2092. objective = self._penalized_nnlf
  2093. else:
  2094. raise ValueError("Method '{0}' not available; must be one of {1}"
  2095. .format(method, methods))
  2096. if len(fixedn) == 0:
  2097. func = objective
  2098. restore = None
  2099. else:
  2100. if len(fixedn) == Nargs:
  2101. raise ValueError(
  2102. "All parameters fixed. There is nothing to optimize.")
  2103. def restore(args, theta):
  2104. # Replace with theta for all numbers not in fixedn
  2105. # This allows the non-fixed values to vary, but
  2106. # we still call self.nnlf with all parameters.
  2107. i = 0
  2108. for n in range(Nargs):
  2109. if n not in fixedn:
  2110. args[n] = theta[i]
  2111. i += 1
  2112. return args
  2113. def func(theta, x):
  2114. newtheta = restore(args[:], theta)
  2115. return objective(newtheta, x)
  2116. return x0, func, restore, args
  2117. def _moment_error(self, theta, x, data_moments):
  2118. loc, scale, args = self._unpack_loc_scale(theta)
  2119. if not self._argcheck(*args) or scale <= 0:
  2120. return inf
  2121. dist_moments = np.array([self.moment(i+1, *args, loc=loc, scale=scale)
  2122. for i in range(len(data_moments))])
  2123. if np.any(np.isnan(dist_moments)):
  2124. raise ValueError("Method of moments encountered a non-finite "
  2125. "distribution moment and cannot continue. "
  2126. "Consider trying method='MLE'.")
  2127. return (((data_moments - dist_moments) /
  2128. np.maximum(np.abs(data_moments), 1e-8))**2).sum()
  2129. def fit(self, data, *args, **kwds):
  2130. """
  2131. Return estimates of shape (if applicable), location, and scale
  2132. parameters from data. The default estimation method is Maximum
  2133. Likelihood Estimation (MLE), but Method of Moments (MM)
  2134. is also available.
  2135. Starting estimates for
  2136. the fit are given by input arguments; for any arguments not provided
  2137. with starting estimates, ``self._fitstart(data)`` is called to generate
  2138. such.
  2139. One can hold some parameters fixed to specific values by passing in
  2140. keyword arguments ``f0``, ``f1``, ..., ``fn`` (for shape parameters)
  2141. and ``floc`` and ``fscale`` (for location and scale parameters,
  2142. respectively).
  2143. Parameters
  2144. ----------
  2145. data : array_like
  2146. Data to use in estimating the distribution parameters.
  2147. arg1, arg2, arg3,... : floats, optional
  2148. Starting value(s) for any shape-characterizing arguments (those not
  2149. provided will be determined by a call to ``_fitstart(data)``).
  2150. No default value.
  2151. **kwds : floats, optional
  2152. - `loc`: initial guess of the distribution's location parameter.
  2153. - `scale`: initial guess of the distribution's scale parameter.
  2154. Special keyword arguments are recognized as holding certain
  2155. parameters fixed:
  2156. - f0...fn : hold respective shape parameters fixed.
  2157. Alternatively, shape parameters to fix can be specified by name.
  2158. For example, if ``self.shapes == "a, b"``, ``fa`` and ``fix_a``
  2159. are equivalent to ``f0``, and ``fb`` and ``fix_b`` are
  2160. equivalent to ``f1``.
  2161. - floc : hold location parameter fixed to specified value.
  2162. - fscale : hold scale parameter fixed to specified value.
  2163. - optimizer : The optimizer to use.
  2164. The optimizer must take ``func``,
  2165. and starting position as the first two arguments,
  2166. plus ``args`` (for extra arguments to pass to the
  2167. function to be optimized) and ``disp=0`` to suppress
  2168. output as keyword arguments.
  2169. - method : The method to use. The default is "MLE" (Maximum
  2170. Likelihood Estimate); "MM" (Method of Moments)
  2171. is also available.
  2172. Raises
  2173. ------
  2174. TypeError, ValueError
  2175. If an input is invalid
  2176. `~scipy.stats.FitError`
  2177. If fitting fails or the fit produced would be invalid
  2178. Returns
  2179. -------
  2180. parameter_tuple : tuple of floats
  2181. Estimates for any shape parameters (if applicable),
  2182. followed by those for location and scale.
  2183. For most random variables, shape statistics
  2184. will be returned, but there are exceptions (e.g. ``norm``).
  2185. Notes
  2186. -----
  2187. With ``method="MLE"`` (default), the fit is computed by minimizing
  2188. the negative log-likelihood function. A large, finite penalty
  2189. (rather than infinite negative log-likelihood) is applied for
  2190. observations beyond the support of the distribution.
  2191. With ``method="MM"``, the fit is computed by minimizing the L2 norm
  2192. of the relative errors between the first *k* raw (about zero) data
  2193. moments and the corresponding distribution moments, where *k* is the
  2194. number of non-fixed parameters.
  2195. More precisely, the objective function is::
  2196. (((data_moments - dist_moments)
  2197. / np.maximum(np.abs(data_moments), 1e-8))**2).sum()
  2198. where the constant ``1e-8`` avoids division by zero in case of
  2199. vanishing data moments. Typically, this error norm can be reduced to
  2200. zero.
  2201. Note that the standard method of moments can produce parameters for
  2202. which some data are outside the support of the fitted distribution;
  2203. this implementation does nothing to prevent this.
  2204. For either method,
  2205. the returned answer is not guaranteed to be globally optimal; it
  2206. may only be locally optimal, or the optimization may fail altogether.
  2207. If the data contain any of ``np.nan``, ``np.inf``, or ``-np.inf``,
  2208. the `fit` method will raise a ``RuntimeError``.
  2209. Examples
  2210. --------
  2211. Generate some data to fit: draw random variates from the `beta`
  2212. distribution
  2213. >>> from scipy.stats import beta
  2214. >>> a, b = 1., 2.
  2215. >>> x = beta.rvs(a, b, size=1000)
  2216. Now we can fit all four parameters (``a``, ``b``, ``loc``
  2217. and ``scale``):
  2218. >>> a1, b1, loc1, scale1 = beta.fit(x)
  2219. We can also use some prior knowledge about the dataset: let's keep
  2220. ``loc`` and ``scale`` fixed:
  2221. >>> a1, b1, loc1, scale1 = beta.fit(x, floc=0, fscale=1)
  2222. >>> loc1, scale1
  2223. (0, 1)
  2224. We can also keep shape parameters fixed by using ``f``-keywords. To
  2225. keep the zero-th shape parameter ``a`` equal 1, use ``f0=1`` or,
  2226. equivalently, ``fa=1``:
  2227. >>> a1, b1, loc1, scale1 = beta.fit(x, fa=1, floc=0, fscale=1)
  2228. >>> a1
  2229. 1
  2230. Not all distributions return estimates for the shape parameters.
  2231. ``norm`` for example just returns estimates for location and scale:
  2232. >>> from scipy.stats import norm
  2233. >>> x = norm.rvs(a, b, size=1000, random_state=123)
  2234. >>> loc1, scale1 = norm.fit(x)
  2235. >>> loc1, scale1
  2236. (0.92087172783841631, 2.0015750750324668)
  2237. """
  2238. data = np.asarray(data)
  2239. method = kwds.get('method', "mle").lower()
  2240. # memory for method of moments
  2241. Narg = len(args)
  2242. if Narg > self.numargs:
  2243. raise TypeError("Too many input arguments.")
  2244. if not np.isfinite(data).all():
  2245. raise ValueError("The data contains non-finite values.")
  2246. start = [None]*2
  2247. if (Narg < self.numargs) or not ('loc' in kwds and
  2248. 'scale' in kwds):
  2249. # get distribution specific starting locations
  2250. start = self._fitstart(data)
  2251. args += start[Narg:-2]
  2252. loc = kwds.pop('loc', start[-2])
  2253. scale = kwds.pop('scale', start[-1])
  2254. args += (loc, scale)
  2255. x0, func, restore, args = self._reduce_func(args, kwds, data=data)
  2256. optimizer = kwds.pop('optimizer', optimize.fmin)
  2257. # convert string to function in scipy.optimize
  2258. optimizer = _fit_determine_optimizer(optimizer)
  2259. # by now kwds must be empty, since everybody took what they needed
  2260. if kwds:
  2261. raise TypeError("Unknown arguments: %s." % kwds)
  2262. # In some cases, method of moments can be done with fsolve/root
  2263. # instead of an optimizer, but sometimes no solution exists,
  2264. # especially when the user fixes parameters. Minimizing the sum
  2265. # of squares of the error generalizes to these cases.
  2266. vals = optimizer(func, x0, args=(ravel(data),), disp=0)
  2267. obj = func(vals, data)
  2268. if restore is not None:
  2269. vals = restore(args, vals)
  2270. vals = tuple(vals)
  2271. loc, scale, shapes = self._unpack_loc_scale(vals)
  2272. if not (np.all(self._argcheck(*shapes)) and scale > 0):
  2273. raise FitError("Optimization converged to parameters that are "
  2274. "outside the range allowed by the distribution.")
  2275. if method == 'mm':
  2276. if not np.isfinite(obj):
  2277. raise FitError("Optimization failed: either a data moment "
  2278. "or fitted distribution moment is "
  2279. "non-finite.")
  2280. return vals
  2281. def _fit_loc_scale_support(self, data, *args):
  2282. """Estimate loc and scale parameters from data accounting for support.
  2283. Parameters
  2284. ----------
  2285. data : array_like
  2286. Data to fit.
  2287. arg1, arg2, arg3,... : array_like
  2288. The shape parameter(s) for the distribution (see docstring of the
  2289. instance object for more information).
  2290. Returns
  2291. -------
  2292. Lhat : float
  2293. Estimated location parameter for the data.
  2294. Shat : float
  2295. Estimated scale parameter for the data.
  2296. """
  2297. data = np.asarray(data)
  2298. # Estimate location and scale according to the method of moments.
  2299. loc_hat, scale_hat = self.fit_loc_scale(data, *args)
  2300. # Compute the support according to the shape parameters.
  2301. self._argcheck(*args)
  2302. _a, _b = self._get_support(*args)
  2303. a, b = _a, _b
  2304. support_width = b - a
  2305. # If the support is empty then return the moment-based estimates.
  2306. if support_width <= 0:
  2307. return loc_hat, scale_hat
  2308. # Compute the proposed support according to the loc and scale
  2309. # estimates.
  2310. a_hat = loc_hat + a * scale_hat
  2311. b_hat = loc_hat + b * scale_hat
  2312. # Use the moment-based estimates if they are compatible with the data.
  2313. data_a = np.min(data)
  2314. data_b = np.max(data)
  2315. if a_hat < data_a and data_b < b_hat:
  2316. return loc_hat, scale_hat
  2317. # Otherwise find other estimates that are compatible with the data.
  2318. data_width = data_b - data_a
  2319. rel_margin = 0.1
  2320. margin = data_width * rel_margin
  2321. # For a finite interval, both the location and scale
  2322. # should have interesting values.
  2323. if support_width < np.inf:
  2324. loc_hat = (data_a - a) - margin
  2325. scale_hat = (data_width + 2 * margin) / support_width
  2326. return loc_hat, scale_hat
  2327. # For a one-sided interval, use only an interesting location parameter.
  2328. if a > -np.inf:
  2329. return (data_a - a) - margin, 1
  2330. elif b < np.inf:
  2331. return (data_b - b) + margin, 1
  2332. else:
  2333. raise RuntimeError
  2334. def fit_loc_scale(self, data, *args):
  2335. """
  2336. Estimate loc and scale parameters from data using 1st and 2nd moments.
  2337. Parameters
  2338. ----------
  2339. data : array_like
  2340. Data to fit.
  2341. arg1, arg2, arg3,... : array_like
  2342. The shape parameter(s) for the distribution (see docstring of the
  2343. instance object for more information).
  2344. Returns
  2345. -------
  2346. Lhat : float
  2347. Estimated location parameter for the data.
  2348. Shat : float
  2349. Estimated scale parameter for the data.
  2350. """
  2351. mu, mu2 = self.stats(*args, **{'moments': 'mv'})
  2352. tmp = asarray(data)
  2353. muhat = tmp.mean()
  2354. mu2hat = tmp.var()
  2355. Shat = sqrt(mu2hat / mu2)
  2356. Lhat = muhat - Shat*mu
  2357. if not np.isfinite(Lhat):
  2358. Lhat = 0
  2359. if not (np.isfinite(Shat) and (0 < Shat)):
  2360. Shat = 1
  2361. return Lhat, Shat
  2362. def _entropy(self, *args):
  2363. def integ(x):
  2364. val = self._pdf(x, *args)
  2365. return entr(val)
  2366. # upper limit is often inf, so suppress warnings when integrating
  2367. _a, _b = self._get_support(*args)
  2368. with np.errstate(over='ignore'):
  2369. h = integrate.quad(integ, _a, _b)[0]
  2370. if not np.isnan(h):
  2371. return h
  2372. else:
  2373. # try with different limits if integration problems
  2374. low, upp = self.ppf([1e-10, 1. - 1e-10], *args)
  2375. if np.isinf(_b):
  2376. upper = upp
  2377. else:
  2378. upper = _b
  2379. if np.isinf(_a):
  2380. lower = low
  2381. else:
  2382. lower = _a
  2383. return integrate.quad(integ, lower, upper)[0]
  2384. def expect(self, func=None, args=(), loc=0, scale=1, lb=None, ub=None,
  2385. conditional=False, **kwds):
  2386. """Calculate expected value of a function with respect to the
  2387. distribution by numerical integration.
  2388. The expected value of a function ``f(x)`` with respect to a
  2389. distribution ``dist`` is defined as::
  2390. ub
  2391. E[f(x)] = Integral(f(x) * dist.pdf(x)),
  2392. lb
  2393. where ``ub`` and ``lb`` are arguments and ``x`` has the ``dist.pdf(x)``
  2394. distribution. If the bounds ``lb`` and ``ub`` correspond to the
  2395. support of the distribution, e.g. ``[-inf, inf]`` in the default
  2396. case, then the integral is the unrestricted expectation of ``f(x)``.
  2397. Also, the function ``f(x)`` may be defined such that ``f(x)`` is ``0``
  2398. outside a finite interval in which case the expectation is
  2399. calculated within the finite range ``[lb, ub]``.
  2400. Parameters
  2401. ----------
  2402. func : callable, optional
  2403. Function for which integral is calculated. Takes only one argument.
  2404. The default is the identity mapping f(x) = x.
  2405. args : tuple, optional
  2406. Shape parameters of the distribution.
  2407. loc : float, optional
  2408. Location parameter (default=0).
  2409. scale : float, optional
  2410. Scale parameter (default=1).
  2411. lb, ub : scalar, optional
  2412. Lower and upper bound for integration. Default is set to the
  2413. support of the distribution.
  2414. conditional : bool, optional
  2415. If True, the integral is corrected by the conditional probability
  2416. of the integration interval. The return value is the expectation
  2417. of the function, conditional on being in the given interval.
  2418. Default is False.
  2419. Additional keyword arguments are passed to the integration routine.
  2420. Returns
  2421. -------
  2422. expect : float
  2423. The calculated expected value.
  2424. Notes
  2425. -----
  2426. The integration behavior of this function is inherited from
  2427. `scipy.integrate.quad`. Neither this function nor
  2428. `scipy.integrate.quad` can verify whether the integral exists or is
  2429. finite. For example ``cauchy(0).mean()`` returns ``np.nan`` and
  2430. ``cauchy(0).expect()`` returns ``0.0``.
  2431. Likewise, the accuracy of results is not verified by the function.
  2432. `scipy.integrate.quad` is typically reliable for integrals that are
  2433. numerically favorable, but it is not guaranteed to converge
  2434. to a correct value for all possible intervals and integrands. This
  2435. function is provided for convenience; for critical applications,
  2436. check results against other integration methods.
  2437. The function is not vectorized.
  2438. Examples
  2439. --------
  2440. To understand the effect of the bounds of integration consider
  2441. >>> from scipy.stats import expon
  2442. >>> expon(1).expect(lambda x: 1, lb=0.0, ub=2.0)
  2443. 0.6321205588285578
  2444. This is close to
  2445. >>> expon(1).cdf(2.0) - expon(1).cdf(0.0)
  2446. 0.6321205588285577
  2447. If ``conditional=True``
  2448. >>> expon(1).expect(lambda x: 1, lb=0.0, ub=2.0, conditional=True)
  2449. 1.0000000000000002
  2450. The slight deviation from 1 is due to numerical integration.
  2451. The integrand can be treated as a complex-valued function
  2452. by passing ``complex_func=True`` to `scipy.integrate.quad` .
  2453. >>> import numpy as np
  2454. >>> from scipy.stats import vonmises
  2455. >>> res = vonmises(loc=2, kappa=1).expect(lambda x: np.exp(1j*x),
  2456. ... complex_func=True)
  2457. >>> res
  2458. (-0.18576377217422957+0.40590124735052263j)
  2459. >>> np.angle(res) # location of the (circular) distribution
  2460. 2.0
  2461. """
  2462. lockwds = {'loc': loc,
  2463. 'scale': scale}
  2464. self._argcheck(*args)
  2465. _a, _b = self._get_support(*args)
  2466. if func is None:
  2467. def fun(x, *args):
  2468. return x * self.pdf(x, *args, **lockwds)
  2469. else:
  2470. def fun(x, *args):
  2471. return func(x) * self.pdf(x, *args, **lockwds)
  2472. if lb is None:
  2473. lb = loc + _a * scale
  2474. if ub is None:
  2475. ub = loc + _b * scale
  2476. cdf_bounds = self.cdf([lb, ub], *args, **lockwds)
  2477. invfac = cdf_bounds[1] - cdf_bounds[0]
  2478. kwds['args'] = args
  2479. # split interval to help integrator w/ infinite support; see gh-8928
  2480. alpha = 0.05 # split body from tails at probability mass `alpha`
  2481. inner_bounds = np.array([alpha, 1-alpha])
  2482. cdf_inner_bounds = cdf_bounds[0] + invfac * inner_bounds
  2483. c, d = loc + self._ppf(cdf_inner_bounds, *args) * scale
  2484. # Do not silence warnings from integration.
  2485. lbc = integrate.quad(fun, lb, c, **kwds)[0]
  2486. cd = integrate.quad(fun, c, d, **kwds)[0]
  2487. dub = integrate.quad(fun, d, ub, **kwds)[0]
  2488. vals = (lbc + cd + dub)
  2489. if conditional:
  2490. vals /= invfac
  2491. return np.array(vals)[()] # make it a numpy scalar like other methods
  2492. def _param_info(self):
  2493. shape_info = self._shape_info()
  2494. loc_info = _ShapeInfo("loc", False, (-np.inf, np.inf), (False, False))
  2495. scale_info = _ShapeInfo("scale", False, (0, np.inf), (False, False))
  2496. param_info = shape_info + [loc_info, scale_info]
  2497. return param_info
  2498. # Helpers for the discrete distributions
  2499. def _drv2_moment(self, n, *args):
  2500. """Non-central moment of discrete distribution."""
  2501. def fun(x):
  2502. return np.power(x, n) * self._pmf(x, *args)
  2503. _a, _b = self._get_support(*args)
  2504. return _expect(fun, _a, _b, self.ppf(0.5, *args), self.inc)
  2505. def _drv2_ppfsingle(self, q, *args): # Use basic bisection algorithm
  2506. _a, _b = self._get_support(*args)
  2507. b = _b
  2508. a = _a
  2509. if isinf(b): # Be sure ending point is > q
  2510. b = int(max(100*q, 10))
  2511. while 1:
  2512. if b >= _b:
  2513. qb = 1.0
  2514. break
  2515. qb = self._cdf(b, *args)
  2516. if (qb < q):
  2517. b += 10
  2518. else:
  2519. break
  2520. else:
  2521. qb = 1.0
  2522. if isinf(a): # be sure starting point < q
  2523. a = int(min(-100*q, -10))
  2524. while 1:
  2525. if a <= _a:
  2526. qb = 0.0
  2527. break
  2528. qa = self._cdf(a, *args)
  2529. if (qa > q):
  2530. a -= 10
  2531. else:
  2532. break
  2533. else:
  2534. qa = self._cdf(a, *args)
  2535. while 1:
  2536. if (qa == q):
  2537. return a
  2538. if (qb == q):
  2539. return b
  2540. if b <= a+1:
  2541. if qa > q:
  2542. return a
  2543. else:
  2544. return b
  2545. c = int((a+b)/2.0)
  2546. qc = self._cdf(c, *args)
  2547. if (qc < q):
  2548. if a != c:
  2549. a = c
  2550. else:
  2551. raise RuntimeError('updating stopped, endless loop')
  2552. qa = qc
  2553. elif (qc > q):
  2554. if b != c:
  2555. b = c
  2556. else:
  2557. raise RuntimeError('updating stopped, endless loop')
  2558. qb = qc
  2559. else:
  2560. return c
  2561. # Must over-ride one of _pmf or _cdf or pass in
  2562. # x_k, p(x_k) lists in initialization
  2563. class rv_discrete(rv_generic):
  2564. """A generic discrete random variable class meant for subclassing.
  2565. `rv_discrete` is a base class to construct specific distribution classes
  2566. and instances for discrete random variables. It can also be used
  2567. to construct an arbitrary distribution defined by a list of support
  2568. points and corresponding probabilities.
  2569. Parameters
  2570. ----------
  2571. a : float, optional
  2572. Lower bound of the support of the distribution, default: 0
  2573. b : float, optional
  2574. Upper bound of the support of the distribution, default: plus infinity
  2575. moment_tol : float, optional
  2576. The tolerance for the generic calculation of moments.
  2577. values : tuple of two array_like, optional
  2578. ``(xk, pk)`` where ``xk`` are integers and ``pk`` are the non-zero
  2579. probabilities between 0 and 1 with ``sum(pk) = 1``. ``xk``
  2580. and ``pk`` must have the same shape.
  2581. inc : integer, optional
  2582. Increment for the support of the distribution.
  2583. Default is 1. (other values have not been tested)
  2584. badvalue : float, optional
  2585. The value in a result arrays that indicates a value that for which
  2586. some argument restriction is violated, default is np.nan.
  2587. name : str, optional
  2588. The name of the instance. This string is used to construct the default
  2589. example for distributions.
  2590. longname : str, optional
  2591. This string is used as part of the first line of the docstring returned
  2592. when a subclass has no docstring of its own. Note: `longname` exists
  2593. for backwards compatibility, do not use for new subclasses.
  2594. shapes : str, optional
  2595. The shape of the distribution. For example "m, n" for a distribution
  2596. that takes two integers as the two shape arguments for all its methods
  2597. If not provided, shape parameters will be inferred from
  2598. the signatures of the private methods, ``_pmf`` and ``_cdf`` of
  2599. the instance.
  2600. extradoc : str, optional, deprecated
  2601. This string is used as the last part of the docstring returned when a
  2602. subclass has no docstring of its own. Note: `extradoc` exists for
  2603. backwards compatibility and will be removed in SciPy 1.11.0, do not
  2604. use for new subclasses.
  2605. seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional
  2606. If `seed` is None (or `np.random`), the `numpy.random.RandomState`
  2607. singleton is used.
  2608. If `seed` is an int, a new ``RandomState`` instance is used,
  2609. seeded with `seed`.
  2610. If `seed` is already a ``Generator`` or ``RandomState`` instance then
  2611. that instance is used.
  2612. Methods
  2613. -------
  2614. rvs
  2615. pmf
  2616. logpmf
  2617. cdf
  2618. logcdf
  2619. sf
  2620. logsf
  2621. ppf
  2622. isf
  2623. moment
  2624. stats
  2625. entropy
  2626. expect
  2627. median
  2628. mean
  2629. std
  2630. var
  2631. interval
  2632. __call__
  2633. support
  2634. Notes
  2635. -----
  2636. This class is similar to `rv_continuous`. Whether a shape parameter is
  2637. valid is decided by an ``_argcheck`` method (which defaults to checking
  2638. that its arguments are strictly positive.)
  2639. The main differences are:
  2640. - the support of the distribution is a set of integers
  2641. - instead of the probability density function, ``pdf`` (and the
  2642. corresponding private ``_pdf``), this class defines the
  2643. *probability mass function*, `pmf` (and the corresponding
  2644. private ``_pmf``.)
  2645. - scale parameter is not defined.
  2646. To create a new discrete distribution, we would do the following:
  2647. >>> from scipy.stats import rv_discrete
  2648. >>> class poisson_gen(rv_discrete):
  2649. ... "Poisson distribution"
  2650. ... def _pmf(self, k, mu):
  2651. ... return exp(-mu) * mu**k / factorial(k)
  2652. and create an instance::
  2653. >>> poisson = poisson_gen(name="poisson")
  2654. Note that above we defined the Poisson distribution in the standard form.
  2655. Shifting the distribution can be done by providing the ``loc`` parameter
  2656. to the methods of the instance. For example, ``poisson.pmf(x, mu, loc)``
  2657. delegates the work to ``poisson._pmf(x-loc, mu)``.
  2658. **Discrete distributions from a list of probabilities**
  2659. Alternatively, you can construct an arbitrary discrete rv defined
  2660. on a finite set of values ``xk`` with ``Prob{X=xk} = pk`` by using the
  2661. ``values`` keyword argument to the `rv_discrete` constructor.
  2662. Examples
  2663. --------
  2664. Custom made discrete distribution:
  2665. >>> import numpy as np
  2666. >>> from scipy import stats
  2667. >>> xk = np.arange(7)
  2668. >>> pk = (0.1, 0.2, 0.3, 0.1, 0.1, 0.0, 0.2)
  2669. >>> custm = stats.rv_discrete(name='custm', values=(xk, pk))
  2670. >>>
  2671. >>> import matplotlib.pyplot as plt
  2672. >>> fig, ax = plt.subplots(1, 1)
  2673. >>> ax.plot(xk, custm.pmf(xk), 'ro', ms=12, mec='r')
  2674. >>> ax.vlines(xk, 0, custm.pmf(xk), colors='r', lw=4)
  2675. >>> plt.show()
  2676. Random number generation:
  2677. >>> R = custm.rvs(size=100)
  2678. """
  2679. def __new__(cls, a=0, b=inf, name=None, badvalue=None,
  2680. moment_tol=1e-8, values=None, inc=1, longname=None,
  2681. shapes=None, extradoc=None, seed=None):
  2682. if values is not None:
  2683. # dispatch to a subclass
  2684. return super(rv_discrete, cls).__new__(rv_sample)
  2685. else:
  2686. # business as usual
  2687. return super(rv_discrete, cls).__new__(cls)
  2688. def __init__(self, a=0, b=inf, name=None, badvalue=None,
  2689. moment_tol=1e-8, values=None, inc=1, longname=None,
  2690. shapes=None, extradoc=None, seed=None):
  2691. super().__init__(seed)
  2692. if extradoc is not None:
  2693. warnings.warn("extradoc is deprecated and will be removed in "
  2694. "SciPy 1.11.0", DeprecationWarning)
  2695. # cf generic freeze
  2696. self._ctor_param = dict(
  2697. a=a, b=b, name=name, badvalue=badvalue,
  2698. moment_tol=moment_tol, values=values, inc=inc,
  2699. longname=longname, shapes=shapes, extradoc=extradoc, seed=seed)
  2700. if badvalue is None:
  2701. badvalue = nan
  2702. self.badvalue = badvalue
  2703. self.a = a
  2704. self.b = b
  2705. self.moment_tol = moment_tol
  2706. self.inc = inc
  2707. self.shapes = shapes
  2708. if values is not None:
  2709. raise ValueError("rv_discrete.__init__(..., values != None, ...)")
  2710. self._construct_argparser(meths_to_inspect=[self._pmf, self._cdf],
  2711. locscale_in='loc=0',
  2712. # scale=1 for discrete RVs
  2713. locscale_out='loc, 1')
  2714. self._attach_methods()
  2715. self._construct_docstrings(name, longname, extradoc)
  2716. def __getstate__(self):
  2717. dct = self.__dict__.copy()
  2718. # these methods will be remade in __setstate__
  2719. attrs = ["_parse_args", "_parse_args_stats", "_parse_args_rvs",
  2720. "_cdfvec", "_ppfvec", "generic_moment"]
  2721. [dct.pop(attr, None) for attr in attrs]
  2722. return dct
  2723. def _attach_methods(self):
  2724. """Attaches dynamically created methods to the rv_discrete instance."""
  2725. self._cdfvec = vectorize(self._cdf_single, otypes='d')
  2726. self.vecentropy = vectorize(self._entropy)
  2727. # _attach_methods is responsible for calling _attach_argparser_methods
  2728. self._attach_argparser_methods()
  2729. # nin correction needs to be after we know numargs
  2730. # correct nin for generic moment vectorization
  2731. _vec_generic_moment = vectorize(_drv2_moment, otypes='d')
  2732. _vec_generic_moment.nin = self.numargs + 2
  2733. self.generic_moment = types.MethodType(_vec_generic_moment, self)
  2734. # correct nin for ppf vectorization
  2735. _vppf = vectorize(_drv2_ppfsingle, otypes='d')
  2736. _vppf.nin = self.numargs + 2
  2737. self._ppfvec = types.MethodType(_vppf, self)
  2738. # now that self.numargs is defined, we can adjust nin
  2739. self._cdfvec.nin = self.numargs + 1
  2740. def _construct_docstrings(self, name, longname, extradoc):
  2741. if name is None:
  2742. name = 'Distribution'
  2743. self.name = name
  2744. self.extradoc = extradoc
  2745. # generate docstring for subclass instances
  2746. if longname is None:
  2747. if name[0] in ['aeiouAEIOU']:
  2748. hstr = "An "
  2749. else:
  2750. hstr = "A "
  2751. longname = hstr + name
  2752. if sys.flags.optimize < 2:
  2753. # Skip adding docstrings if interpreter is run with -OO
  2754. if self.__doc__ is None:
  2755. self._construct_default_doc(longname=longname,
  2756. extradoc=extradoc,
  2757. docdict=docdict_discrete,
  2758. discrete='discrete')
  2759. else:
  2760. dct = dict(distdiscrete)
  2761. self._construct_doc(docdict_discrete, dct.get(self.name))
  2762. # discrete RV do not have the scale parameter, remove it
  2763. self.__doc__ = self.__doc__.replace(
  2764. '\n scale : array_like, '
  2765. 'optional\n scale parameter (default=1)', '')
  2766. def _updated_ctor_param(self):
  2767. """Return the current version of _ctor_param, possibly updated by user.
  2768. Used by freezing.
  2769. Keep this in sync with the signature of __init__.
  2770. """
  2771. dct = self._ctor_param.copy()
  2772. dct['a'] = self.a
  2773. dct['b'] = self.b
  2774. dct['badvalue'] = self.badvalue
  2775. dct['moment_tol'] = self.moment_tol
  2776. dct['inc'] = self.inc
  2777. dct['name'] = self.name
  2778. dct['shapes'] = self.shapes
  2779. dct['extradoc'] = self.extradoc
  2780. return dct
  2781. def _nonzero(self, k, *args):
  2782. return floor(k) == k
  2783. def _pmf(self, k, *args):
  2784. return self._cdf(k, *args) - self._cdf(k-1, *args)
  2785. def _logpmf(self, k, *args):
  2786. return log(self._pmf(k, *args))
  2787. def _logpxf(self, k, *args):
  2788. # continuous distributions have PDF, discrete have PMF, but sometimes
  2789. # the distinction doesn't matter. This lets us use `_logpxf` for both
  2790. # discrete and continuous distributions.
  2791. return self._logpmf(k, *args)
  2792. def _unpack_loc_scale(self, theta):
  2793. try:
  2794. loc = theta[-1]
  2795. scale = 1
  2796. args = tuple(theta[:-1])
  2797. except IndexError as e:
  2798. raise ValueError("Not enough input arguments.") from e
  2799. return loc, scale, args
  2800. def _cdf_single(self, k, *args):
  2801. _a, _b = self._get_support(*args)
  2802. m = arange(int(_a), k+1)
  2803. return np.sum(self._pmf(m, *args), axis=0)
  2804. def _cdf(self, x, *args):
  2805. k = floor(x)
  2806. return self._cdfvec(k, *args)
  2807. # generic _logcdf, _sf, _logsf, _ppf, _isf, _rvs defined in rv_generic
  2808. def rvs(self, *args, **kwargs):
  2809. """Random variates of given type.
  2810. Parameters
  2811. ----------
  2812. arg1, arg2, arg3,... : array_like
  2813. The shape parameter(s) for the distribution (see docstring of the
  2814. instance object for more information).
  2815. loc : array_like, optional
  2816. Location parameter (default=0).
  2817. size : int or tuple of ints, optional
  2818. Defining number of random variates (Default is 1). Note that `size`
  2819. has to be given as keyword, not as positional argument.
  2820. random_state : {None, int, `numpy.random.Generator`,
  2821. `numpy.random.RandomState`}, optional
  2822. If `random_state` is None (or `np.random`), the
  2823. `numpy.random.RandomState` singleton is used.
  2824. If `random_state` is an int, a new ``RandomState`` instance is
  2825. used, seeded with `random_state`.
  2826. If `random_state` is already a ``Generator`` or ``RandomState``
  2827. instance, that instance is used.
  2828. Returns
  2829. -------
  2830. rvs : ndarray or scalar
  2831. Random variates of given `size`.
  2832. """
  2833. kwargs['discrete'] = True
  2834. return super().rvs(*args, **kwargs)
  2835. def pmf(self, k, *args, **kwds):
  2836. """Probability mass function at k of the given RV.
  2837. Parameters
  2838. ----------
  2839. k : array_like
  2840. Quantiles.
  2841. arg1, arg2, arg3,... : array_like
  2842. The shape parameter(s) for the distribution (see docstring of the
  2843. instance object for more information)
  2844. loc : array_like, optional
  2845. Location parameter (default=0).
  2846. Returns
  2847. -------
  2848. pmf : array_like
  2849. Probability mass function evaluated at k
  2850. """
  2851. args, loc, _ = self._parse_args(*args, **kwds)
  2852. k, loc = map(asarray, (k, loc))
  2853. args = tuple(map(asarray, args))
  2854. _a, _b = self._get_support(*args)
  2855. k = asarray((k-loc))
  2856. cond0 = self._argcheck(*args)
  2857. cond1 = (k >= _a) & (k <= _b)
  2858. if not isinstance(self, rv_sample):
  2859. cond1 = cond1 & self._nonzero(k, *args)
  2860. cond = cond0 & cond1
  2861. output = zeros(shape(cond), 'd')
  2862. place(output, (1-cond0) + np.isnan(k), self.badvalue)
  2863. if np.any(cond):
  2864. goodargs = argsreduce(cond, *((k,)+args))
  2865. place(output, cond, np.clip(self._pmf(*goodargs), 0, 1))
  2866. if output.ndim == 0:
  2867. return output[()]
  2868. return output
  2869. def logpmf(self, k, *args, **kwds):
  2870. """Log of the probability mass function at k of the given RV.
  2871. Parameters
  2872. ----------
  2873. k : array_like
  2874. Quantiles.
  2875. arg1, arg2, arg3,... : array_like
  2876. The shape parameter(s) for the distribution (see docstring of the
  2877. instance object for more information).
  2878. loc : array_like, optional
  2879. Location parameter. Default is 0.
  2880. Returns
  2881. -------
  2882. logpmf : array_like
  2883. Log of the probability mass function evaluated at k.
  2884. """
  2885. args, loc, _ = self._parse_args(*args, **kwds)
  2886. k, loc = map(asarray, (k, loc))
  2887. args = tuple(map(asarray, args))
  2888. _a, _b = self._get_support(*args)
  2889. k = asarray((k-loc))
  2890. cond0 = self._argcheck(*args)
  2891. cond1 = (k >= _a) & (k <= _b)
  2892. if not isinstance(self, rv_sample):
  2893. cond1 = cond1 & self._nonzero(k, *args)
  2894. cond = cond0 & cond1
  2895. output = empty(shape(cond), 'd')
  2896. output.fill(NINF)
  2897. place(output, (1-cond0) + np.isnan(k), self.badvalue)
  2898. if np.any(cond):
  2899. goodargs = argsreduce(cond, *((k,)+args))
  2900. place(output, cond, self._logpmf(*goodargs))
  2901. if output.ndim == 0:
  2902. return output[()]
  2903. return output
  2904. def cdf(self, k, *args, **kwds):
  2905. """Cumulative distribution function of the given RV.
  2906. Parameters
  2907. ----------
  2908. k : array_like, int
  2909. Quantiles.
  2910. arg1, arg2, arg3,... : array_like
  2911. The shape parameter(s) for the distribution (see docstring of the
  2912. instance object for more information).
  2913. loc : array_like, optional
  2914. Location parameter (default=0).
  2915. Returns
  2916. -------
  2917. cdf : ndarray
  2918. Cumulative distribution function evaluated at `k`.
  2919. """
  2920. args, loc, _ = self._parse_args(*args, **kwds)
  2921. k, loc = map(asarray, (k, loc))
  2922. args = tuple(map(asarray, args))
  2923. _a, _b = self._get_support(*args)
  2924. k = asarray((k-loc))
  2925. cond0 = self._argcheck(*args)
  2926. cond1 = (k >= _a) & (k < _b)
  2927. cond2 = (k >= _b)
  2928. cond3 = np.isneginf(k)
  2929. cond = cond0 & cond1 & np.isfinite(k)
  2930. output = zeros(shape(cond), 'd')
  2931. place(output, cond2*(cond0 == cond0), 1.0)
  2932. place(output, cond3*(cond0 == cond0), 0.0)
  2933. place(output, (1-cond0) + np.isnan(k), self.badvalue)
  2934. if np.any(cond):
  2935. goodargs = argsreduce(cond, *((k,)+args))
  2936. place(output, cond, np.clip(self._cdf(*goodargs), 0, 1))
  2937. if output.ndim == 0:
  2938. return output[()]
  2939. return output
  2940. def logcdf(self, k, *args, **kwds):
  2941. """Log of the cumulative distribution function at k of the given RV.
  2942. Parameters
  2943. ----------
  2944. k : array_like, int
  2945. Quantiles.
  2946. arg1, arg2, arg3,... : array_like
  2947. The shape parameter(s) for the distribution (see docstring of the
  2948. instance object for more information).
  2949. loc : array_like, optional
  2950. Location parameter (default=0).
  2951. Returns
  2952. -------
  2953. logcdf : array_like
  2954. Log of the cumulative distribution function evaluated at k.
  2955. """
  2956. args, loc, _ = self._parse_args(*args, **kwds)
  2957. k, loc = map(asarray, (k, loc))
  2958. args = tuple(map(asarray, args))
  2959. _a, _b = self._get_support(*args)
  2960. k = asarray((k-loc))
  2961. cond0 = self._argcheck(*args)
  2962. cond1 = (k >= _a) & (k < _b)
  2963. cond2 = (k >= _b)
  2964. cond = cond0 & cond1
  2965. output = empty(shape(cond), 'd')
  2966. output.fill(NINF)
  2967. place(output, (1-cond0) + np.isnan(k), self.badvalue)
  2968. place(output, cond2*(cond0 == cond0), 0.0)
  2969. if np.any(cond):
  2970. goodargs = argsreduce(cond, *((k,)+args))
  2971. place(output, cond, self._logcdf(*goodargs))
  2972. if output.ndim == 0:
  2973. return output[()]
  2974. return output
  2975. def sf(self, k, *args, **kwds):
  2976. """Survival function (1 - `cdf`) at k of the given RV.
  2977. Parameters
  2978. ----------
  2979. k : array_like
  2980. Quantiles.
  2981. arg1, arg2, arg3,... : array_like
  2982. The shape parameter(s) for the distribution (see docstring of the
  2983. instance object for more information).
  2984. loc : array_like, optional
  2985. Location parameter (default=0).
  2986. Returns
  2987. -------
  2988. sf : array_like
  2989. Survival function evaluated at k.
  2990. """
  2991. args, loc, _ = self._parse_args(*args, **kwds)
  2992. k, loc = map(asarray, (k, loc))
  2993. args = tuple(map(asarray, args))
  2994. _a, _b = self._get_support(*args)
  2995. k = asarray(k-loc)
  2996. cond0 = self._argcheck(*args)
  2997. cond1 = (k >= _a) & (k < _b)
  2998. cond2 = ((k < _a) | np.isneginf(k)) & cond0
  2999. cond = cond0 & cond1 & np.isfinite(k)
  3000. output = zeros(shape(cond), 'd')
  3001. place(output, (1-cond0) + np.isnan(k), self.badvalue)
  3002. place(output, cond2, 1.0)
  3003. if np.any(cond):
  3004. goodargs = argsreduce(cond, *((k,)+args))
  3005. place(output, cond, np.clip(self._sf(*goodargs), 0, 1))
  3006. if output.ndim == 0:
  3007. return output[()]
  3008. return output
  3009. def logsf(self, k, *args, **kwds):
  3010. """Log of the survival function of the given RV.
  3011. Returns the log of the "survival function," defined as 1 - `cdf`,
  3012. evaluated at `k`.
  3013. Parameters
  3014. ----------
  3015. k : array_like
  3016. Quantiles.
  3017. arg1, arg2, arg3,... : array_like
  3018. The shape parameter(s) for the distribution (see docstring of the
  3019. instance object for more information).
  3020. loc : array_like, optional
  3021. Location parameter (default=0).
  3022. Returns
  3023. -------
  3024. logsf : ndarray
  3025. Log of the survival function evaluated at `k`.
  3026. """
  3027. args, loc, _ = self._parse_args(*args, **kwds)
  3028. k, loc = map(asarray, (k, loc))
  3029. args = tuple(map(asarray, args))
  3030. _a, _b = self._get_support(*args)
  3031. k = asarray(k-loc)
  3032. cond0 = self._argcheck(*args)
  3033. cond1 = (k >= _a) & (k < _b)
  3034. cond2 = (k < _a) & cond0
  3035. cond = cond0 & cond1
  3036. output = empty(shape(cond), 'd')
  3037. output.fill(NINF)
  3038. place(output, (1-cond0) + np.isnan(k), self.badvalue)
  3039. place(output, cond2, 0.0)
  3040. if np.any(cond):
  3041. goodargs = argsreduce(cond, *((k,)+args))
  3042. place(output, cond, self._logsf(*goodargs))
  3043. if output.ndim == 0:
  3044. return output[()]
  3045. return output
  3046. def ppf(self, q, *args, **kwds):
  3047. """Percent point function (inverse of `cdf`) at q of the given RV.
  3048. Parameters
  3049. ----------
  3050. q : array_like
  3051. Lower tail probability.
  3052. arg1, arg2, arg3,... : array_like
  3053. The shape parameter(s) for the distribution (see docstring of the
  3054. instance object for more information).
  3055. loc : array_like, optional
  3056. Location parameter (default=0).
  3057. Returns
  3058. -------
  3059. k : array_like
  3060. Quantile corresponding to the lower tail probability, q.
  3061. """
  3062. args, loc, _ = self._parse_args(*args, **kwds)
  3063. q, loc = map(asarray, (q, loc))
  3064. args = tuple(map(asarray, args))
  3065. _a, _b = self._get_support(*args)
  3066. cond0 = self._argcheck(*args) & (loc == loc)
  3067. cond1 = (q > 0) & (q < 1)
  3068. cond2 = (q == 1) & cond0
  3069. cond = cond0 & cond1
  3070. output = np.full(shape(cond), fill_value=self.badvalue, dtype='d')
  3071. # output type 'd' to handle nin and inf
  3072. place(output, (q == 0)*(cond == cond), _a-1 + loc)
  3073. place(output, cond2, _b + loc)
  3074. if np.any(cond):
  3075. goodargs = argsreduce(cond, *((q,)+args+(loc,)))
  3076. loc, goodargs = goodargs[-1], goodargs[:-1]
  3077. place(output, cond, self._ppf(*goodargs) + loc)
  3078. if output.ndim == 0:
  3079. return output[()]
  3080. return output
  3081. def isf(self, q, *args, **kwds):
  3082. """Inverse survival function (inverse of `sf`) at q of the given RV.
  3083. Parameters
  3084. ----------
  3085. q : array_like
  3086. Upper tail probability.
  3087. arg1, arg2, arg3,... : array_like
  3088. The shape parameter(s) for the distribution (see docstring of the
  3089. instance object for more information).
  3090. loc : array_like, optional
  3091. Location parameter (default=0).
  3092. Returns
  3093. -------
  3094. k : ndarray or scalar
  3095. Quantile corresponding to the upper tail probability, q.
  3096. """
  3097. args, loc, _ = self._parse_args(*args, **kwds)
  3098. q, loc = map(asarray, (q, loc))
  3099. args = tuple(map(asarray, args))
  3100. _a, _b = self._get_support(*args)
  3101. cond0 = self._argcheck(*args) & (loc == loc)
  3102. cond1 = (q > 0) & (q < 1)
  3103. cond2 = (q == 1) & cond0
  3104. cond3 = (q == 0) & cond0
  3105. cond = cond0 & cond1
  3106. # same problem as with ppf; copied from ppf and changed
  3107. output = np.full(shape(cond), fill_value=self.badvalue, dtype='d')
  3108. # output type 'd' to handle nin and inf
  3109. lower_bound = _a - 1 + loc
  3110. upper_bound = _b + loc
  3111. place(output, cond2*(cond == cond), lower_bound)
  3112. place(output, cond3*(cond == cond), upper_bound)
  3113. # call place only if at least 1 valid argument
  3114. if np.any(cond):
  3115. goodargs = argsreduce(cond, *((q,)+args+(loc,)))
  3116. loc, goodargs = goodargs[-1], goodargs[:-1]
  3117. # PB same as ticket 766
  3118. place(output, cond, self._isf(*goodargs) + loc)
  3119. if output.ndim == 0:
  3120. return output[()]
  3121. return output
  3122. def _entropy(self, *args):
  3123. if hasattr(self, 'pk'):
  3124. return stats.entropy(self.pk)
  3125. else:
  3126. _a, _b = self._get_support(*args)
  3127. return _expect(lambda x: entr(self.pmf(x, *args)),
  3128. _a, _b, self.ppf(0.5, *args), self.inc)
  3129. def expect(self, func=None, args=(), loc=0, lb=None, ub=None,
  3130. conditional=False, maxcount=1000, tolerance=1e-10, chunksize=32):
  3131. """
  3132. Calculate expected value of a function with respect to the distribution
  3133. for discrete distribution by numerical summation.
  3134. Parameters
  3135. ----------
  3136. func : callable, optional
  3137. Function for which the expectation value is calculated.
  3138. Takes only one argument.
  3139. The default is the identity mapping f(k) = k.
  3140. args : tuple, optional
  3141. Shape parameters of the distribution.
  3142. loc : float, optional
  3143. Location parameter.
  3144. Default is 0.
  3145. lb, ub : int, optional
  3146. Lower and upper bound for the summation, default is set to the
  3147. support of the distribution, inclusive (``lb <= k <= ub``).
  3148. conditional : bool, optional
  3149. If true then the expectation is corrected by the conditional
  3150. probability of the summation interval. The return value is the
  3151. expectation of the function, `func`, conditional on being in
  3152. the given interval (k such that ``lb <= k <= ub``).
  3153. Default is False.
  3154. maxcount : int, optional
  3155. Maximal number of terms to evaluate (to avoid an endless loop for
  3156. an infinite sum). Default is 1000.
  3157. tolerance : float, optional
  3158. Absolute tolerance for the summation. Default is 1e-10.
  3159. chunksize : int, optional
  3160. Iterate over the support of a distributions in chunks of this size.
  3161. Default is 32.
  3162. Returns
  3163. -------
  3164. expect : float
  3165. Expected value.
  3166. Notes
  3167. -----
  3168. For heavy-tailed distributions, the expected value may or
  3169. may not exist,
  3170. depending on the function, `func`. If it does exist, but the
  3171. sum converges
  3172. slowly, the accuracy of the result may be rather low. For instance, for
  3173. ``zipf(4)``, accuracy for mean, variance in example is only 1e-5.
  3174. increasing `maxcount` and/or `chunksize` may improve the result,
  3175. but may also make zipf very slow.
  3176. The function is not vectorized.
  3177. """
  3178. if func is None:
  3179. def fun(x):
  3180. # loc and args from outer scope
  3181. return (x+loc)*self._pmf(x, *args)
  3182. else:
  3183. def fun(x):
  3184. # loc and args from outer scope
  3185. return func(x+loc)*self._pmf(x, *args)
  3186. # used pmf because _pmf does not check support in randint and there
  3187. # might be problems(?) with correct self.a, self.b at this stage maybe
  3188. # not anymore, seems to work now with _pmf
  3189. _a, _b = self._get_support(*args)
  3190. if lb is None:
  3191. lb = _a
  3192. else:
  3193. lb = lb - loc # convert bound for standardized distribution
  3194. if ub is None:
  3195. ub = _b
  3196. else:
  3197. ub = ub - loc # convert bound for standardized distribution
  3198. if conditional:
  3199. invfac = self.sf(lb-1, *args) - self.sf(ub, *args)
  3200. else:
  3201. invfac = 1.0
  3202. if isinstance(self, rv_sample):
  3203. res = self._expect(fun, lb, ub)
  3204. return res / invfac
  3205. # iterate over the support, starting from the median
  3206. x0 = self.ppf(0.5, *args)
  3207. res = _expect(fun, lb, ub, x0, self.inc, maxcount, tolerance, chunksize)
  3208. return res / invfac
  3209. def _param_info(self):
  3210. shape_info = self._shape_info()
  3211. loc_info = _ShapeInfo("loc", True, (-np.inf, np.inf), (False, False))
  3212. param_info = shape_info + [loc_info]
  3213. return param_info
  3214. def _expect(fun, lb, ub, x0, inc, maxcount=1000, tolerance=1e-10,
  3215. chunksize=32):
  3216. """Helper for computing the expectation value of `fun`."""
  3217. # short-circuit if the support size is small enough
  3218. if (ub - lb) <= chunksize:
  3219. supp = np.arange(lb, ub+1, inc)
  3220. vals = fun(supp)
  3221. return np.sum(vals)
  3222. # otherwise, iterate starting from x0
  3223. if x0 < lb:
  3224. x0 = lb
  3225. if x0 > ub:
  3226. x0 = ub
  3227. count, tot = 0, 0.
  3228. # iterate over [x0, ub] inclusive
  3229. for x in _iter_chunked(x0, ub+1, chunksize=chunksize, inc=inc):
  3230. count += x.size
  3231. delta = np.sum(fun(x))
  3232. tot += delta
  3233. if abs(delta) < tolerance * x.size:
  3234. break
  3235. if count > maxcount:
  3236. warnings.warn('expect(): sum did not converge', RuntimeWarning)
  3237. return tot
  3238. # iterate over [lb, x0)
  3239. for x in _iter_chunked(x0-1, lb-1, chunksize=chunksize, inc=-inc):
  3240. count += x.size
  3241. delta = np.sum(fun(x))
  3242. tot += delta
  3243. if abs(delta) < tolerance * x.size:
  3244. break
  3245. if count > maxcount:
  3246. warnings.warn('expect(): sum did not converge', RuntimeWarning)
  3247. break
  3248. return tot
  3249. def _iter_chunked(x0, x1, chunksize=4, inc=1):
  3250. """Iterate from x0 to x1 in chunks of chunksize and steps inc.
  3251. x0 must be finite, x1 need not be. In the latter case, the iterator is
  3252. infinite.
  3253. Handles both x0 < x1 and x0 > x1. In the latter case, iterates downwards
  3254. (make sure to set inc < 0.)
  3255. >>> [x for x in _iter_chunked(2, 5, inc=2)]
  3256. [array([2, 4])]
  3257. >>> [x for x in _iter_chunked(2, 11, inc=2)]
  3258. [array([2, 4, 6, 8]), array([10])]
  3259. >>> [x for x in _iter_chunked(2, -5, inc=-2)]
  3260. [array([ 2, 0, -2, -4])]
  3261. >>> [x for x in _iter_chunked(2, -9, inc=-2)]
  3262. [array([ 2, 0, -2, -4]), array([-6, -8])]
  3263. """
  3264. if inc == 0:
  3265. raise ValueError('Cannot increment by zero.')
  3266. if chunksize <= 0:
  3267. raise ValueError('Chunk size must be positive; got %s.' % chunksize)
  3268. s = 1 if inc > 0 else -1
  3269. stepsize = abs(chunksize * inc)
  3270. x = x0
  3271. while (x - x1) * inc < 0:
  3272. delta = min(stepsize, abs(x - x1))
  3273. step = delta * s
  3274. supp = np.arange(x, x + step, inc)
  3275. x += step
  3276. yield supp
  3277. class rv_sample(rv_discrete):
  3278. """A 'sample' discrete distribution defined by the support and values.
  3279. The ctor ignores most of the arguments, only needs the `values` argument.
  3280. """
  3281. def __init__(self, a=0, b=inf, name=None, badvalue=None,
  3282. moment_tol=1e-8, values=None, inc=1, longname=None,
  3283. shapes=None, extradoc=None, seed=None):
  3284. super(rv_discrete, self).__init__(seed)
  3285. if extradoc is not None:
  3286. warnings.warn("extradoc is deprecated and will be removed in "
  3287. "SciPy 1.11.0", DeprecationWarning)
  3288. if values is None:
  3289. raise ValueError("rv_sample.__init__(..., values=None,...)")
  3290. # cf generic freeze
  3291. self._ctor_param = dict(
  3292. a=a, b=b, name=name, badvalue=badvalue,
  3293. moment_tol=moment_tol, values=values, inc=inc,
  3294. longname=longname, shapes=shapes, extradoc=extradoc, seed=seed)
  3295. if badvalue is None:
  3296. badvalue = nan
  3297. self.badvalue = badvalue
  3298. self.moment_tol = moment_tol
  3299. self.inc = inc
  3300. self.shapes = shapes
  3301. self.vecentropy = self._entropy
  3302. xk, pk = values
  3303. if np.shape(xk) != np.shape(pk):
  3304. raise ValueError("xk and pk must have the same shape.")
  3305. if np.less(pk, 0.0).any():
  3306. raise ValueError("All elements of pk must be non-negative.")
  3307. if not np.allclose(np.sum(pk), 1):
  3308. raise ValueError("The sum of provided pk is not 1.")
  3309. indx = np.argsort(np.ravel(xk))
  3310. self.xk = np.take(np.ravel(xk), indx, 0)
  3311. self.pk = np.take(np.ravel(pk), indx, 0)
  3312. self.a = self.xk[0]
  3313. self.b = self.xk[-1]
  3314. self.qvals = np.cumsum(self.pk, axis=0)
  3315. self.shapes = ' ' # bypass inspection
  3316. self._construct_argparser(meths_to_inspect=[self._pmf],
  3317. locscale_in='loc=0',
  3318. # scale=1 for discrete RVs
  3319. locscale_out='loc, 1')
  3320. self._attach_methods()
  3321. self._construct_docstrings(name, longname, extradoc)
  3322. def __getstate__(self):
  3323. dct = self.__dict__.copy()
  3324. # these methods will be remade in rv_generic.__setstate__,
  3325. # which calls rv_generic._attach_methods
  3326. attrs = ["_parse_args", "_parse_args_stats", "_parse_args_rvs"]
  3327. [dct.pop(attr, None) for attr in attrs]
  3328. return dct
  3329. def _attach_methods(self):
  3330. """Attaches dynamically created argparser methods."""
  3331. self._attach_argparser_methods()
  3332. def _get_support(self, *args):
  3333. """Return the support of the (unscaled, unshifted) distribution.
  3334. Parameters
  3335. ----------
  3336. arg1, arg2, ... : array_like
  3337. The shape parameter(s) for the distribution (see docstring of the
  3338. instance object for more information).
  3339. Returns
  3340. -------
  3341. a, b : numeric (float, or int or +/-np.inf)
  3342. end-points of the distribution's support.
  3343. """
  3344. return self.a, self.b
  3345. def _pmf(self, x):
  3346. return np.select([x == k for k in self.xk],
  3347. [np.broadcast_arrays(p, x)[0] for p in self.pk], 0)
  3348. def _cdf(self, x):
  3349. xx, xxk = np.broadcast_arrays(x[:, None], self.xk)
  3350. indx = np.argmax(xxk > xx, axis=-1) - 1
  3351. return self.qvals[indx]
  3352. def _ppf(self, q):
  3353. qq, sqq = np.broadcast_arrays(q[..., None], self.qvals)
  3354. indx = argmax(sqq >= qq, axis=-1)
  3355. return self.xk[indx]
  3356. def _rvs(self, size=None, random_state=None):
  3357. # Need to define it explicitly, otherwise .rvs() with size=None
  3358. # fails due to explicit broadcasting in _ppf
  3359. U = random_state.uniform(size=size)
  3360. if size is None:
  3361. U = np.array(U, ndmin=1)
  3362. Y = self._ppf(U)[0]
  3363. else:
  3364. Y = self._ppf(U)
  3365. return Y
  3366. def _entropy(self):
  3367. return stats.entropy(self.pk)
  3368. def generic_moment(self, n):
  3369. n = asarray(n)
  3370. return np.sum(self.xk**n[np.newaxis, ...] * self.pk, axis=0)
  3371. def _expect(self, fun, lb, ub, *args, **kwds):
  3372. # ignore all args, just do a brute force summation
  3373. supp = self.xk[(lb <= self.xk) & (self.xk <= ub)]
  3374. vals = fun(supp)
  3375. return np.sum(vals)
  3376. def _check_shape(argshape, size):
  3377. """
  3378. This is a utility function used by `_rvs()` in the class geninvgauss_gen.
  3379. It compares the tuple argshape to the tuple size.
  3380. Parameters
  3381. ----------
  3382. argshape : tuple of integers
  3383. Shape of the arguments.
  3384. size : tuple of integers or integer
  3385. Size argument of rvs().
  3386. Returns
  3387. -------
  3388. The function returns two tuples, scalar_shape and bc.
  3389. scalar_shape : tuple
  3390. Shape to which the 1-d array of random variates returned by
  3391. _rvs_scalar() is converted when it is copied into the
  3392. output array of _rvs().
  3393. bc : tuple of booleans
  3394. bc is an tuple the same length as size. bc[j] is True if the data
  3395. associated with that index is generated in one call of _rvs_scalar().
  3396. """
  3397. scalar_shape = []
  3398. bc = []
  3399. for argdim, sizedim in zip_longest(argshape[::-1], size[::-1],
  3400. fillvalue=1):
  3401. if sizedim > argdim or (argdim == sizedim == 1):
  3402. scalar_shape.append(sizedim)
  3403. bc.append(True)
  3404. else:
  3405. bc.append(False)
  3406. return tuple(scalar_shape[::-1]), tuple(bc[::-1])
  3407. def get_distribution_names(namespace_pairs, rv_base_class):
  3408. """Collect names of statistical distributions and their generators.
  3409. Parameters
  3410. ----------
  3411. namespace_pairs : sequence
  3412. A snapshot of (name, value) pairs in the namespace of a module.
  3413. rv_base_class : class
  3414. The base class of random variable generator classes in a module.
  3415. Returns
  3416. -------
  3417. distn_names : list of strings
  3418. Names of the statistical distributions.
  3419. distn_gen_names : list of strings
  3420. Names of the generators of the statistical distributions.
  3421. Note that these are not simply the names of the statistical
  3422. distributions, with a _gen suffix added.
  3423. """
  3424. distn_names = []
  3425. distn_gen_names = []
  3426. for name, value in namespace_pairs:
  3427. if name.startswith('_'):
  3428. continue
  3429. if name.endswith('_gen') and issubclass(value, rv_base_class):
  3430. distn_gen_names.append(name)
  3431. if isinstance(value, rv_base_class):
  3432. distn_names.append(name)
  3433. return distn_names, distn_gen_names