_discrete_distns.py 53 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814
  1. #
  2. # Author: Travis Oliphant 2002-2011 with contributions from
  3. # SciPy Developers 2004-2011
  4. #
  5. from functools import partial
  6. import warnings
  7. from scipy import special
  8. from scipy.special import entr, logsumexp, betaln, gammaln as gamln, zeta
  9. from scipy._lib._util import _lazywhere, rng_integers
  10. from scipy.interpolate import interp1d
  11. from numpy import floor, ceil, log, exp, sqrt, log1p, expm1, tanh, cosh, sinh
  12. import numpy as np
  13. from ._distn_infrastructure import (rv_discrete, get_distribution_names,
  14. _check_shape, _ShapeInfo)
  15. import scipy.stats._boost as _boost
  16. from ._biasedurn import (_PyFishersNCHypergeometric,
  17. _PyWalleniusNCHypergeometric,
  18. _PyStochasticLib3)
  19. def _isintegral(x):
  20. return x == np.round(x)
  21. class binom_gen(rv_discrete):
  22. r"""A binomial discrete random variable.
  23. %(before_notes)s
  24. Notes
  25. -----
  26. The probability mass function for `binom` is:
  27. .. math::
  28. f(k) = \binom{n}{k} p^k (1-p)^{n-k}
  29. for :math:`k \in \{0, 1, \dots, n\}`, :math:`0 \leq p \leq 1`
  30. `binom` takes :math:`n` and :math:`p` as shape parameters,
  31. where :math:`p` is the probability of a single success
  32. and :math:`1-p` is the probability of a single failure.
  33. %(after_notes)s
  34. %(example)s
  35. See Also
  36. --------
  37. hypergeom, nbinom, nhypergeom
  38. """
  39. def _shape_info(self):
  40. return [_ShapeInfo("n", True, (0, np.inf), (True, False)),
  41. _ShapeInfo("p", False, (0, 1), (True, True))]
  42. def _rvs(self, n, p, size=None, random_state=None):
  43. return random_state.binomial(n, p, size)
  44. def _argcheck(self, n, p):
  45. return (n >= 0) & _isintegral(n) & (p >= 0) & (p <= 1)
  46. def _get_support(self, n, p):
  47. return self.a, n
  48. def _logpmf(self, x, n, p):
  49. k = floor(x)
  50. combiln = (gamln(n+1) - (gamln(k+1) + gamln(n-k+1)))
  51. return combiln + special.xlogy(k, p) + special.xlog1py(n-k, -p)
  52. def _pmf(self, x, n, p):
  53. # binom.pmf(k) = choose(n, k) * p**k * (1-p)**(n-k)
  54. return _boost._binom_pdf(x, n, p)
  55. def _cdf(self, x, n, p):
  56. k = floor(x)
  57. return _boost._binom_cdf(k, n, p)
  58. def _sf(self, x, n, p):
  59. k = floor(x)
  60. return _boost._binom_sf(k, n, p)
  61. def _isf(self, x, n, p):
  62. return _boost._binom_isf(x, n, p)
  63. def _ppf(self, q, n, p):
  64. return _boost._binom_ppf(q, n, p)
  65. def _stats(self, n, p, moments='mv'):
  66. mu = _boost._binom_mean(n, p)
  67. var = _boost._binom_variance(n, p)
  68. g1, g2 = None, None
  69. if 's' in moments:
  70. g1 = _boost._binom_skewness(n, p)
  71. if 'k' in moments:
  72. g2 = _boost._binom_kurtosis_excess(n, p)
  73. return mu, var, g1, g2
  74. def _entropy(self, n, p):
  75. k = np.r_[0:n + 1]
  76. vals = self._pmf(k, n, p)
  77. return np.sum(entr(vals), axis=0)
  78. binom = binom_gen(name='binom')
  79. class bernoulli_gen(binom_gen):
  80. r"""A Bernoulli discrete random variable.
  81. %(before_notes)s
  82. Notes
  83. -----
  84. The probability mass function for `bernoulli` is:
  85. .. math::
  86. f(k) = \begin{cases}1-p &\text{if } k = 0\\
  87. p &\text{if } k = 1\end{cases}
  88. for :math:`k` in :math:`\{0, 1\}`, :math:`0 \leq p \leq 1`
  89. `bernoulli` takes :math:`p` as shape parameter,
  90. where :math:`p` is the probability of a single success
  91. and :math:`1-p` is the probability of a single failure.
  92. %(after_notes)s
  93. %(example)s
  94. """
  95. def _shape_info(self):
  96. return [_ShapeInfo("p", False, (0, 1), (True, True))]
  97. def _rvs(self, p, size=None, random_state=None):
  98. return binom_gen._rvs(self, 1, p, size=size, random_state=random_state)
  99. def _argcheck(self, p):
  100. return (p >= 0) & (p <= 1)
  101. def _get_support(self, p):
  102. # Overrides binom_gen._get_support!x
  103. return self.a, self.b
  104. def _logpmf(self, x, p):
  105. return binom._logpmf(x, 1, p)
  106. def _pmf(self, x, p):
  107. # bernoulli.pmf(k) = 1-p if k = 0
  108. # = p if k = 1
  109. return binom._pmf(x, 1, p)
  110. def _cdf(self, x, p):
  111. return binom._cdf(x, 1, p)
  112. def _sf(self, x, p):
  113. return binom._sf(x, 1, p)
  114. def _isf(self, x, p):
  115. return binom._isf(x, 1, p)
  116. def _ppf(self, q, p):
  117. return binom._ppf(q, 1, p)
  118. def _stats(self, p):
  119. return binom._stats(1, p)
  120. def _entropy(self, p):
  121. return entr(p) + entr(1-p)
  122. bernoulli = bernoulli_gen(b=1, name='bernoulli')
  123. class betabinom_gen(rv_discrete):
  124. r"""A beta-binomial discrete random variable.
  125. %(before_notes)s
  126. Notes
  127. -----
  128. The beta-binomial distribution is a binomial distribution with a
  129. probability of success `p` that follows a beta distribution.
  130. The probability mass function for `betabinom` is:
  131. .. math::
  132. f(k) = \binom{n}{k} \frac{B(k + a, n - k + b)}{B(a, b)}
  133. for :math:`k \in \{0, 1, \dots, n\}`, :math:`n \geq 0`, :math:`a > 0`,
  134. :math:`b > 0`, where :math:`B(a, b)` is the beta function.
  135. `betabinom` takes :math:`n`, :math:`a`, and :math:`b` as shape parameters.
  136. References
  137. ----------
  138. .. [1] https://en.wikipedia.org/wiki/Beta-binomial_distribution
  139. %(after_notes)s
  140. .. versionadded:: 1.4.0
  141. See Also
  142. --------
  143. beta, binom
  144. %(example)s
  145. """
  146. def _shape_info(self):
  147. return [_ShapeInfo("n", True, (0, np.inf), (True, False)),
  148. _ShapeInfo("a", False, (0, np.inf), (False, False)),
  149. _ShapeInfo("b", False, (0, np.inf), (False, False))]
  150. def _rvs(self, n, a, b, size=None, random_state=None):
  151. p = random_state.beta(a, b, size)
  152. return random_state.binomial(n, p, size)
  153. def _get_support(self, n, a, b):
  154. return 0, n
  155. def _argcheck(self, n, a, b):
  156. return (n >= 0) & _isintegral(n) & (a > 0) & (b > 0)
  157. def _logpmf(self, x, n, a, b):
  158. k = floor(x)
  159. combiln = -log(n + 1) - betaln(n - k + 1, k + 1)
  160. return combiln + betaln(k + a, n - k + b) - betaln(a, b)
  161. def _pmf(self, x, n, a, b):
  162. return exp(self._logpmf(x, n, a, b))
  163. def _stats(self, n, a, b, moments='mv'):
  164. e_p = a / (a + b)
  165. e_q = 1 - e_p
  166. mu = n * e_p
  167. var = n * (a + b + n) * e_p * e_q / (a + b + 1)
  168. g1, g2 = None, None
  169. if 's' in moments:
  170. g1 = 1.0 / sqrt(var)
  171. g1 *= (a + b + 2 * n) * (b - a)
  172. g1 /= (a + b + 2) * (a + b)
  173. if 'k' in moments:
  174. g2 = a + b
  175. g2 *= (a + b - 1 + 6 * n)
  176. g2 += 3 * a * b * (n - 2)
  177. g2 += 6 * n ** 2
  178. g2 -= 3 * e_p * b * n * (6 - n)
  179. g2 -= 18 * e_p * e_q * n ** 2
  180. g2 *= (a + b) ** 2 * (1 + a + b)
  181. g2 /= (n * a * b * (a + b + 2) * (a + b + 3) * (a + b + n))
  182. g2 -= 3
  183. return mu, var, g1, g2
  184. betabinom = betabinom_gen(name='betabinom')
  185. class nbinom_gen(rv_discrete):
  186. r"""A negative binomial discrete random variable.
  187. %(before_notes)s
  188. Notes
  189. -----
  190. Negative binomial distribution describes a sequence of i.i.d. Bernoulli
  191. trials, repeated until a predefined, non-random number of successes occurs.
  192. The probability mass function of the number of failures for `nbinom` is:
  193. .. math::
  194. f(k) = \binom{k+n-1}{n-1} p^n (1-p)^k
  195. for :math:`k \ge 0`, :math:`0 < p \leq 1`
  196. `nbinom` takes :math:`n` and :math:`p` as shape parameters where :math:`n`
  197. is the number of successes, :math:`p` is the probability of a single
  198. success, and :math:`1-p` is the probability of a single failure.
  199. Another common parameterization of the negative binomial distribution is
  200. in terms of the mean number of failures :math:`\mu` to achieve :math:`n`
  201. successes. The mean :math:`\mu` is related to the probability of success
  202. as
  203. .. math::
  204. p = \frac{n}{n + \mu}
  205. The number of successes :math:`n` may also be specified in terms of a
  206. "dispersion", "heterogeneity", or "aggregation" parameter :math:`\alpha`,
  207. which relates the mean :math:`\mu` to the variance :math:`\sigma^2`,
  208. e.g. :math:`\sigma^2 = \mu + \alpha \mu^2`. Regardless of the convention
  209. used for :math:`\alpha`,
  210. .. math::
  211. p &= \frac{\mu}{\sigma^2} \\
  212. n &= \frac{\mu^2}{\sigma^2 - \mu}
  213. %(after_notes)s
  214. %(example)s
  215. See Also
  216. --------
  217. hypergeom, binom, nhypergeom
  218. """
  219. def _shape_info(self):
  220. return [_ShapeInfo("n", True, (0, np.inf), (True, False)),
  221. _ShapeInfo("p", False, (0, 1), (True, True))]
  222. def _rvs(self, n, p, size=None, random_state=None):
  223. return random_state.negative_binomial(n, p, size)
  224. def _argcheck(self, n, p):
  225. return (n > 0) & (p > 0) & (p <= 1)
  226. def _pmf(self, x, n, p):
  227. # nbinom.pmf(k) = choose(k+n-1, n-1) * p**n * (1-p)**k
  228. return _boost._nbinom_pdf(x, n, p)
  229. def _logpmf(self, x, n, p):
  230. coeff = gamln(n+x) - gamln(x+1) - gamln(n)
  231. return coeff + n*log(p) + special.xlog1py(x, -p)
  232. def _cdf(self, x, n, p):
  233. k = floor(x)
  234. return _boost._nbinom_cdf(k, n, p)
  235. def _logcdf(self, x, n, p):
  236. k = floor(x)
  237. cdf = self._cdf(k, n, p)
  238. cond = cdf > 0.5
  239. def f1(k, n, p):
  240. return np.log1p(-special.betainc(k + 1, n, 1 - p))
  241. # do calc in place
  242. logcdf = cdf
  243. with np.errstate(divide='ignore'):
  244. logcdf[cond] = f1(k[cond], n[cond], p[cond])
  245. logcdf[~cond] = np.log(cdf[~cond])
  246. return logcdf
  247. def _sf(self, x, n, p):
  248. k = floor(x)
  249. return _boost._nbinom_sf(k, n, p)
  250. def _isf(self, x, n, p):
  251. with warnings.catch_warnings():
  252. # See gh-14901
  253. message = "overflow encountered in _nbinom_isf"
  254. warnings.filterwarnings('ignore', message=message)
  255. return _boost._nbinom_isf(x, n, p)
  256. def _ppf(self, q, n, p):
  257. with warnings.catch_warnings():
  258. message = "overflow encountered in _nbinom_ppf"
  259. warnings.filterwarnings('ignore', message=message)
  260. return _boost._nbinom_ppf(q, n, p)
  261. def _stats(self, n, p):
  262. return (
  263. _boost._nbinom_mean(n, p),
  264. _boost._nbinom_variance(n, p),
  265. _boost._nbinom_skewness(n, p),
  266. _boost._nbinom_kurtosis_excess(n, p),
  267. )
  268. nbinom = nbinom_gen(name='nbinom')
  269. class geom_gen(rv_discrete):
  270. r"""A geometric discrete random variable.
  271. %(before_notes)s
  272. Notes
  273. -----
  274. The probability mass function for `geom` is:
  275. .. math::
  276. f(k) = (1-p)^{k-1} p
  277. for :math:`k \ge 1`, :math:`0 < p \leq 1`
  278. `geom` takes :math:`p` as shape parameter,
  279. where :math:`p` is the probability of a single success
  280. and :math:`1-p` is the probability of a single failure.
  281. %(after_notes)s
  282. See Also
  283. --------
  284. planck
  285. %(example)s
  286. """
  287. def _shape_info(self):
  288. return [_ShapeInfo("p", False, (0, 1), (True, True))]
  289. def _rvs(self, p, size=None, random_state=None):
  290. return random_state.geometric(p, size=size)
  291. def _argcheck(self, p):
  292. return (p <= 1) & (p > 0)
  293. def _pmf(self, k, p):
  294. return np.power(1-p, k-1) * p
  295. def _logpmf(self, k, p):
  296. return special.xlog1py(k - 1, -p) + log(p)
  297. def _cdf(self, x, p):
  298. k = floor(x)
  299. return -expm1(log1p(-p)*k)
  300. def _sf(self, x, p):
  301. return np.exp(self._logsf(x, p))
  302. def _logsf(self, x, p):
  303. k = floor(x)
  304. return k*log1p(-p)
  305. def _ppf(self, q, p):
  306. vals = ceil(log1p(-q) / log1p(-p))
  307. temp = self._cdf(vals-1, p)
  308. return np.where((temp >= q) & (vals > 0), vals-1, vals)
  309. def _stats(self, p):
  310. mu = 1.0/p
  311. qr = 1.0-p
  312. var = qr / p / p
  313. g1 = (2.0-p) / sqrt(qr)
  314. g2 = np.polyval([1, -6, 6], p)/(1.0-p)
  315. return mu, var, g1, g2
  316. geom = geom_gen(a=1, name='geom', longname="A geometric")
  317. class hypergeom_gen(rv_discrete):
  318. r"""A hypergeometric discrete random variable.
  319. The hypergeometric distribution models drawing objects from a bin.
  320. `M` is the total number of objects, `n` is total number of Type I objects.
  321. The random variate represents the number of Type I objects in `N` drawn
  322. without replacement from the total population.
  323. %(before_notes)s
  324. Notes
  325. -----
  326. The symbols used to denote the shape parameters (`M`, `n`, and `N`) are not
  327. universally accepted. See the Examples for a clarification of the
  328. definitions used here.
  329. The probability mass function is defined as,
  330. .. math:: p(k, M, n, N) = \frac{\binom{n}{k} \binom{M - n}{N - k}}
  331. {\binom{M}{N}}
  332. for :math:`k \in [\max(0, N - M + n), \min(n, N)]`, where the binomial
  333. coefficients are defined as,
  334. .. math:: \binom{n}{k} \equiv \frac{n!}{k! (n - k)!}.
  335. %(after_notes)s
  336. Examples
  337. --------
  338. >>> import numpy as np
  339. >>> from scipy.stats import hypergeom
  340. >>> import matplotlib.pyplot as plt
  341. Suppose we have a collection of 20 animals, of which 7 are dogs. Then if
  342. we want to know the probability of finding a given number of dogs if we
  343. choose at random 12 of the 20 animals, we can initialize a frozen
  344. distribution and plot the probability mass function:
  345. >>> [M, n, N] = [20, 7, 12]
  346. >>> rv = hypergeom(M, n, N)
  347. >>> x = np.arange(0, n+1)
  348. >>> pmf_dogs = rv.pmf(x)
  349. >>> fig = plt.figure()
  350. >>> ax = fig.add_subplot(111)
  351. >>> ax.plot(x, pmf_dogs, 'bo')
  352. >>> ax.vlines(x, 0, pmf_dogs, lw=2)
  353. >>> ax.set_xlabel('# of dogs in our group of chosen animals')
  354. >>> ax.set_ylabel('hypergeom PMF')
  355. >>> plt.show()
  356. Instead of using a frozen distribution we can also use `hypergeom`
  357. methods directly. To for example obtain the cumulative distribution
  358. function, use:
  359. >>> prb = hypergeom.cdf(x, M, n, N)
  360. And to generate random numbers:
  361. >>> R = hypergeom.rvs(M, n, N, size=10)
  362. See Also
  363. --------
  364. nhypergeom, binom, nbinom
  365. """
  366. def _shape_info(self):
  367. return [_ShapeInfo("M", True, (0, np.inf), (True, False)),
  368. _ShapeInfo("n", True, (0, np.inf), (True, False)),
  369. _ShapeInfo("N", True, (0, np.inf), (True, False))]
  370. def _rvs(self, M, n, N, size=None, random_state=None):
  371. return random_state.hypergeometric(n, M-n, N, size=size)
  372. def _get_support(self, M, n, N):
  373. return np.maximum(N-(M-n), 0), np.minimum(n, N)
  374. def _argcheck(self, M, n, N):
  375. cond = (M > 0) & (n >= 0) & (N >= 0)
  376. cond &= (n <= M) & (N <= M)
  377. cond &= _isintegral(M) & _isintegral(n) & _isintegral(N)
  378. return cond
  379. def _logpmf(self, k, M, n, N):
  380. tot, good = M, n
  381. bad = tot - good
  382. result = (betaln(good+1, 1) + betaln(bad+1, 1) + betaln(tot-N+1, N+1) -
  383. betaln(k+1, good-k+1) - betaln(N-k+1, bad-N+k+1) -
  384. betaln(tot+1, 1))
  385. return result
  386. def _pmf(self, k, M, n, N):
  387. return _boost._hypergeom_pdf(k, n, N, M)
  388. def _cdf(self, k, M, n, N):
  389. return _boost._hypergeom_cdf(k, n, N, M)
  390. def _stats(self, M, n, N):
  391. M, n, N = 1. * M, 1. * n, 1. * N
  392. m = M - n
  393. # Boost kurtosis_excess doesn't return the same as the value
  394. # computed here.
  395. g2 = M * (M + 1) - 6. * N * (M - N) - 6. * n * m
  396. g2 *= (M - 1) * M * M
  397. g2 += 6. * n * N * (M - N) * m * (5. * M - 6)
  398. g2 /= n * N * (M - N) * m * (M - 2.) * (M - 3.)
  399. return (
  400. _boost._hypergeom_mean(n, N, M),
  401. _boost._hypergeom_variance(n, N, M),
  402. _boost._hypergeom_skewness(n, N, M),
  403. g2,
  404. )
  405. def _entropy(self, M, n, N):
  406. k = np.r_[N - (M - n):min(n, N) + 1]
  407. vals = self.pmf(k, M, n, N)
  408. return np.sum(entr(vals), axis=0)
  409. def _sf(self, k, M, n, N):
  410. return _boost._hypergeom_sf(k, n, N, M)
  411. def _logsf(self, k, M, n, N):
  412. res = []
  413. for quant, tot, good, draw in zip(*np.broadcast_arrays(k, M, n, N)):
  414. if (quant + 0.5) * (tot + 0.5) < (good - 0.5) * (draw - 0.5):
  415. # Less terms to sum if we calculate log(1-cdf)
  416. res.append(log1p(-exp(self.logcdf(quant, tot, good, draw))))
  417. else:
  418. # Integration over probability mass function using logsumexp
  419. k2 = np.arange(quant + 1, draw + 1)
  420. res.append(logsumexp(self._logpmf(k2, tot, good, draw)))
  421. return np.asarray(res)
  422. def _logcdf(self, k, M, n, N):
  423. res = []
  424. for quant, tot, good, draw in zip(*np.broadcast_arrays(k, M, n, N)):
  425. if (quant + 0.5) * (tot + 0.5) > (good - 0.5) * (draw - 0.5):
  426. # Less terms to sum if we calculate log(1-sf)
  427. res.append(log1p(-exp(self.logsf(quant, tot, good, draw))))
  428. else:
  429. # Integration over probability mass function using logsumexp
  430. k2 = np.arange(0, quant + 1)
  431. res.append(logsumexp(self._logpmf(k2, tot, good, draw)))
  432. return np.asarray(res)
  433. hypergeom = hypergeom_gen(name='hypergeom')
  434. class nhypergeom_gen(rv_discrete):
  435. r"""A negative hypergeometric discrete random variable.
  436. Consider a box containing :math:`M` balls:, :math:`n` red and
  437. :math:`M-n` blue. We randomly sample balls from the box, one
  438. at a time and *without* replacement, until we have picked :math:`r`
  439. blue balls. `nhypergeom` is the distribution of the number of
  440. red balls :math:`k` we have picked.
  441. %(before_notes)s
  442. Notes
  443. -----
  444. The symbols used to denote the shape parameters (`M`, `n`, and `r`) are not
  445. universally accepted. See the Examples for a clarification of the
  446. definitions used here.
  447. The probability mass function is defined as,
  448. .. math:: f(k; M, n, r) = \frac{{{k+r-1}\choose{k}}{{M-r-k}\choose{n-k}}}
  449. {{M \choose n}}
  450. for :math:`k \in [0, n]`, :math:`n \in [0, M]`, :math:`r \in [0, M-n]`,
  451. and the binomial coefficient is:
  452. .. math:: \binom{n}{k} \equiv \frac{n!}{k! (n - k)!}.
  453. It is equivalent to observing :math:`k` successes in :math:`k+r-1`
  454. samples with :math:`k+r`'th sample being a failure. The former
  455. can be modelled as a hypergeometric distribution. The probability
  456. of the latter is simply the number of failures remaining
  457. :math:`M-n-(r-1)` divided by the size of the remaining population
  458. :math:`M-(k+r-1)`. This relationship can be shown as:
  459. .. math:: NHG(k;M,n,r) = HG(k;M,n,k+r-1)\frac{(M-n-(r-1))}{(M-(k+r-1))}
  460. where :math:`NHG` is probability mass function (PMF) of the
  461. negative hypergeometric distribution and :math:`HG` is the
  462. PMF of the hypergeometric distribution.
  463. %(after_notes)s
  464. Examples
  465. --------
  466. >>> import numpy as np
  467. >>> from scipy.stats import nhypergeom
  468. >>> import matplotlib.pyplot as plt
  469. Suppose we have a collection of 20 animals, of which 7 are dogs.
  470. Then if we want to know the probability of finding a given number
  471. of dogs (successes) in a sample with exactly 12 animals that
  472. aren't dogs (failures), we can initialize a frozen distribution
  473. and plot the probability mass function:
  474. >>> M, n, r = [20, 7, 12]
  475. >>> rv = nhypergeom(M, n, r)
  476. >>> x = np.arange(0, n+2)
  477. >>> pmf_dogs = rv.pmf(x)
  478. >>> fig = plt.figure()
  479. >>> ax = fig.add_subplot(111)
  480. >>> ax.plot(x, pmf_dogs, 'bo')
  481. >>> ax.vlines(x, 0, pmf_dogs, lw=2)
  482. >>> ax.set_xlabel('# of dogs in our group with given 12 failures')
  483. >>> ax.set_ylabel('nhypergeom PMF')
  484. >>> plt.show()
  485. Instead of using a frozen distribution we can also use `nhypergeom`
  486. methods directly. To for example obtain the probability mass
  487. function, use:
  488. >>> prb = nhypergeom.pmf(x, M, n, r)
  489. And to generate random numbers:
  490. >>> R = nhypergeom.rvs(M, n, r, size=10)
  491. To verify the relationship between `hypergeom` and `nhypergeom`, use:
  492. >>> from scipy.stats import hypergeom, nhypergeom
  493. >>> M, n, r = 45, 13, 8
  494. >>> k = 6
  495. >>> nhypergeom.pmf(k, M, n, r)
  496. 0.06180776620271643
  497. >>> hypergeom.pmf(k, M, n, k+r-1) * (M - n - (r-1)) / (M - (k+r-1))
  498. 0.06180776620271644
  499. See Also
  500. --------
  501. hypergeom, binom, nbinom
  502. References
  503. ----------
  504. .. [1] Negative Hypergeometric Distribution on Wikipedia
  505. https://en.wikipedia.org/wiki/Negative_hypergeometric_distribution
  506. .. [2] Negative Hypergeometric Distribution from
  507. http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Negativehypergeometric.pdf
  508. """
  509. def _shape_info(self):
  510. return [_ShapeInfo("M", True, (0, np.inf), (True, False)),
  511. _ShapeInfo("n", True, (0, np.inf), (True, False)),
  512. _ShapeInfo("r", True, (0, np.inf), (True, False))]
  513. def _get_support(self, M, n, r):
  514. return 0, n
  515. def _argcheck(self, M, n, r):
  516. cond = (n >= 0) & (n <= M) & (r >= 0) & (r <= M-n)
  517. cond &= _isintegral(M) & _isintegral(n) & _isintegral(r)
  518. return cond
  519. def _rvs(self, M, n, r, size=None, random_state=None):
  520. @_vectorize_rvs_over_shapes
  521. def _rvs1(M, n, r, size, random_state):
  522. # invert cdf by calculating all values in support, scalar M, n, r
  523. a, b = self.support(M, n, r)
  524. ks = np.arange(a, b+1)
  525. cdf = self.cdf(ks, M, n, r)
  526. ppf = interp1d(cdf, ks, kind='next', fill_value='extrapolate')
  527. rvs = ppf(random_state.uniform(size=size)).astype(int)
  528. if size is None:
  529. return rvs.item()
  530. return rvs
  531. return _rvs1(M, n, r, size=size, random_state=random_state)
  532. def _logpmf(self, k, M, n, r):
  533. cond = ((r == 0) & (k == 0))
  534. result = _lazywhere(~cond, (k, M, n, r),
  535. lambda k, M, n, r:
  536. (-betaln(k+1, r) + betaln(k+r, 1) -
  537. betaln(n-k+1, M-r-n+1) + betaln(M-r-k+1, 1) +
  538. betaln(n+1, M-n+1) - betaln(M+1, 1)),
  539. fillvalue=0.0)
  540. return result
  541. def _pmf(self, k, M, n, r):
  542. # same as the following but numerically more precise
  543. # return comb(k+r-1, k) * comb(M-r-k, n-k) / comb(M, n)
  544. return exp(self._logpmf(k, M, n, r))
  545. def _stats(self, M, n, r):
  546. # Promote the datatype to at least float
  547. # mu = rn / (M-n+1)
  548. M, n, r = 1.*M, 1.*n, 1.*r
  549. mu = r*n / (M-n+1)
  550. var = r*(M+1)*n / ((M-n+1)*(M-n+2)) * (1 - r / (M-n+1))
  551. # The skew and kurtosis are mathematically
  552. # intractable so return `None`. See [2]_.
  553. g1, g2 = None, None
  554. return mu, var, g1, g2
  555. nhypergeom = nhypergeom_gen(name='nhypergeom')
  556. # FIXME: Fails _cdfvec
  557. class logser_gen(rv_discrete):
  558. r"""A Logarithmic (Log-Series, Series) discrete random variable.
  559. %(before_notes)s
  560. Notes
  561. -----
  562. The probability mass function for `logser` is:
  563. .. math::
  564. f(k) = - \frac{p^k}{k \log(1-p)}
  565. for :math:`k \ge 1`, :math:`0 < p < 1`
  566. `logser` takes :math:`p` as shape parameter,
  567. where :math:`p` is the probability of a single success
  568. and :math:`1-p` is the probability of a single failure.
  569. %(after_notes)s
  570. %(example)s
  571. """
  572. def _shape_info(self):
  573. return [_ShapeInfo("p", False, (0, 1), (True, True))]
  574. def _rvs(self, p, size=None, random_state=None):
  575. # looks wrong for p>0.5, too few k=1
  576. # trying to use generic is worse, no k=1 at all
  577. return random_state.logseries(p, size=size)
  578. def _argcheck(self, p):
  579. return (p > 0) & (p < 1)
  580. def _pmf(self, k, p):
  581. # logser.pmf(k) = - p**k / (k*log(1-p))
  582. return -np.power(p, k) * 1.0 / k / special.log1p(-p)
  583. def _stats(self, p):
  584. r = special.log1p(-p)
  585. mu = p / (p - 1.0) / r
  586. mu2p = -p / r / (p - 1.0)**2
  587. var = mu2p - mu*mu
  588. mu3p = -p / r * (1.0+p) / (1.0 - p)**3
  589. mu3 = mu3p - 3*mu*mu2p + 2*mu**3
  590. g1 = mu3 / np.power(var, 1.5)
  591. mu4p = -p / r * (
  592. 1.0 / (p-1)**2 - 6*p / (p - 1)**3 + 6*p*p / (p-1)**4)
  593. mu4 = mu4p - 4*mu3p*mu + 6*mu2p*mu*mu - 3*mu**4
  594. g2 = mu4 / var**2 - 3.0
  595. return mu, var, g1, g2
  596. logser = logser_gen(a=1, name='logser', longname='A logarithmic')
  597. class poisson_gen(rv_discrete):
  598. r"""A Poisson discrete random variable.
  599. %(before_notes)s
  600. Notes
  601. -----
  602. The probability mass function for `poisson` is:
  603. .. math::
  604. f(k) = \exp(-\mu) \frac{\mu^k}{k!}
  605. for :math:`k \ge 0`.
  606. `poisson` takes :math:`\mu \geq 0` as shape parameter.
  607. When :math:`\mu = 0`, the ``pmf`` method
  608. returns ``1.0`` at quantile :math:`k = 0`.
  609. %(after_notes)s
  610. %(example)s
  611. """
  612. def _shape_info(self):
  613. return [_ShapeInfo("mu", False, (0, np.inf), (True, False))]
  614. # Override rv_discrete._argcheck to allow mu=0.
  615. def _argcheck(self, mu):
  616. return mu >= 0
  617. def _rvs(self, mu, size=None, random_state=None):
  618. return random_state.poisson(mu, size)
  619. def _logpmf(self, k, mu):
  620. Pk = special.xlogy(k, mu) - gamln(k + 1) - mu
  621. return Pk
  622. def _pmf(self, k, mu):
  623. # poisson.pmf(k) = exp(-mu) * mu**k / k!
  624. return exp(self._logpmf(k, mu))
  625. def _cdf(self, x, mu):
  626. k = floor(x)
  627. return special.pdtr(k, mu)
  628. def _sf(self, x, mu):
  629. k = floor(x)
  630. return special.pdtrc(k, mu)
  631. def _ppf(self, q, mu):
  632. vals = ceil(special.pdtrik(q, mu))
  633. vals1 = np.maximum(vals - 1, 0)
  634. temp = special.pdtr(vals1, mu)
  635. return np.where(temp >= q, vals1, vals)
  636. def _stats(self, mu):
  637. var = mu
  638. tmp = np.asarray(mu)
  639. mu_nonzero = tmp > 0
  640. g1 = _lazywhere(mu_nonzero, (tmp,), lambda x: sqrt(1.0/x), np.inf)
  641. g2 = _lazywhere(mu_nonzero, (tmp,), lambda x: 1.0/x, np.inf)
  642. return mu, var, g1, g2
  643. poisson = poisson_gen(name="poisson", longname='A Poisson')
  644. class planck_gen(rv_discrete):
  645. r"""A Planck discrete exponential random variable.
  646. %(before_notes)s
  647. Notes
  648. -----
  649. The probability mass function for `planck` is:
  650. .. math::
  651. f(k) = (1-\exp(-\lambda)) \exp(-\lambda k)
  652. for :math:`k \ge 0` and :math:`\lambda > 0`.
  653. `planck` takes :math:`\lambda` as shape parameter. The Planck distribution
  654. can be written as a geometric distribution (`geom`) with
  655. :math:`p = 1 - \exp(-\lambda)` shifted by ``loc = -1``.
  656. %(after_notes)s
  657. See Also
  658. --------
  659. geom
  660. %(example)s
  661. """
  662. def _shape_info(self):
  663. return [_ShapeInfo("lambda", False, (0, np.inf), (False, False))]
  664. def _argcheck(self, lambda_):
  665. return lambda_ > 0
  666. def _pmf(self, k, lambda_):
  667. return -expm1(-lambda_)*exp(-lambda_*k)
  668. def _cdf(self, x, lambda_):
  669. k = floor(x)
  670. return -expm1(-lambda_*(k+1))
  671. def _sf(self, x, lambda_):
  672. return exp(self._logsf(x, lambda_))
  673. def _logsf(self, x, lambda_):
  674. k = floor(x)
  675. return -lambda_*(k+1)
  676. def _ppf(self, q, lambda_):
  677. vals = ceil(-1.0/lambda_ * log1p(-q)-1)
  678. vals1 = (vals-1).clip(*(self._get_support(lambda_)))
  679. temp = self._cdf(vals1, lambda_)
  680. return np.where(temp >= q, vals1, vals)
  681. def _rvs(self, lambda_, size=None, random_state=None):
  682. # use relation to geometric distribution for sampling
  683. p = -expm1(-lambda_)
  684. return random_state.geometric(p, size=size) - 1.0
  685. def _stats(self, lambda_):
  686. mu = 1/expm1(lambda_)
  687. var = exp(-lambda_)/(expm1(-lambda_))**2
  688. g1 = 2*cosh(lambda_/2.0)
  689. g2 = 4+2*cosh(lambda_)
  690. return mu, var, g1, g2
  691. def _entropy(self, lambda_):
  692. C = -expm1(-lambda_)
  693. return lambda_*exp(-lambda_)/C - log(C)
  694. planck = planck_gen(a=0, name='planck', longname='A discrete exponential ')
  695. class boltzmann_gen(rv_discrete):
  696. r"""A Boltzmann (Truncated Discrete Exponential) random variable.
  697. %(before_notes)s
  698. Notes
  699. -----
  700. The probability mass function for `boltzmann` is:
  701. .. math::
  702. f(k) = (1-\exp(-\lambda)) \exp(-\lambda k) / (1-\exp(-\lambda N))
  703. for :math:`k = 0,..., N-1`.
  704. `boltzmann` takes :math:`\lambda > 0` and :math:`N > 0` as shape parameters.
  705. %(after_notes)s
  706. %(example)s
  707. """
  708. def _shape_info(self):
  709. return [_ShapeInfo("lambda_", False, (0, np.inf), (False, False)),
  710. _ShapeInfo("N", True, (0, np.inf), (False, False))]
  711. def _argcheck(self, lambda_, N):
  712. return (lambda_ > 0) & (N > 0) & _isintegral(N)
  713. def _get_support(self, lambda_, N):
  714. return self.a, N - 1
  715. def _pmf(self, k, lambda_, N):
  716. # boltzmann.pmf(k) =
  717. # (1-exp(-lambda_)*exp(-lambda_*k)/(1-exp(-lambda_*N))
  718. fact = (1-exp(-lambda_))/(1-exp(-lambda_*N))
  719. return fact*exp(-lambda_*k)
  720. def _cdf(self, x, lambda_, N):
  721. k = floor(x)
  722. return (1-exp(-lambda_*(k+1)))/(1-exp(-lambda_*N))
  723. def _ppf(self, q, lambda_, N):
  724. qnew = q*(1-exp(-lambda_*N))
  725. vals = ceil(-1.0/lambda_ * log(1-qnew)-1)
  726. vals1 = (vals-1).clip(0.0, np.inf)
  727. temp = self._cdf(vals1, lambda_, N)
  728. return np.where(temp >= q, vals1, vals)
  729. def _stats(self, lambda_, N):
  730. z = exp(-lambda_)
  731. zN = exp(-lambda_*N)
  732. mu = z/(1.0-z)-N*zN/(1-zN)
  733. var = z/(1.0-z)**2 - N*N*zN/(1-zN)**2
  734. trm = (1-zN)/(1-z)
  735. trm2 = (z*trm**2 - N*N*zN)
  736. g1 = z*(1+z)*trm**3 - N**3*zN*(1+zN)
  737. g1 = g1 / trm2**(1.5)
  738. g2 = z*(1+4*z+z*z)*trm**4 - N**4 * zN*(1+4*zN+zN*zN)
  739. g2 = g2 / trm2 / trm2
  740. return mu, var, g1, g2
  741. boltzmann = boltzmann_gen(name='boltzmann', a=0,
  742. longname='A truncated discrete exponential ')
  743. class randint_gen(rv_discrete):
  744. r"""A uniform discrete random variable.
  745. %(before_notes)s
  746. Notes
  747. -----
  748. The probability mass function for `randint` is:
  749. .. math::
  750. f(k) = \frac{1}{\texttt{high} - \texttt{low}}
  751. for :math:`k \in \{\texttt{low}, \dots, \texttt{high} - 1\}`.
  752. `randint` takes :math:`\texttt{low}` and :math:`\texttt{high}` as shape
  753. parameters.
  754. %(after_notes)s
  755. %(example)s
  756. """
  757. def _shape_info(self):
  758. return [_ShapeInfo("low", True, (-np.inf, np.inf), (False, False)),
  759. _ShapeInfo("high", True, (-np.inf, np.inf), (False, False))]
  760. def _argcheck(self, low, high):
  761. return (high > low) & _isintegral(low) & _isintegral(high)
  762. def _get_support(self, low, high):
  763. return low, high-1
  764. def _pmf(self, k, low, high):
  765. # randint.pmf(k) = 1./(high - low)
  766. p = np.ones_like(k) / (high - low)
  767. return np.where((k >= low) & (k < high), p, 0.)
  768. def _cdf(self, x, low, high):
  769. k = floor(x)
  770. return (k - low + 1.) / (high - low)
  771. def _ppf(self, q, low, high):
  772. vals = ceil(q * (high - low) + low) - 1
  773. vals1 = (vals - 1).clip(low, high)
  774. temp = self._cdf(vals1, low, high)
  775. return np.where(temp >= q, vals1, vals)
  776. def _stats(self, low, high):
  777. m2, m1 = np.asarray(high), np.asarray(low)
  778. mu = (m2 + m1 - 1.0) / 2
  779. d = m2 - m1
  780. var = (d*d - 1) / 12.0
  781. g1 = 0.0
  782. g2 = -6.0/5.0 * (d*d + 1.0) / (d*d - 1.0)
  783. return mu, var, g1, g2
  784. def _rvs(self, low, high, size=None, random_state=None):
  785. """An array of *size* random integers >= ``low`` and < ``high``."""
  786. if np.asarray(low).size == 1 and np.asarray(high).size == 1:
  787. # no need to vectorize in that case
  788. return rng_integers(random_state, low, high, size=size)
  789. if size is not None:
  790. # NumPy's RandomState.randint() doesn't broadcast its arguments.
  791. # Use `broadcast_to()` to extend the shapes of low and high
  792. # up to size. Then we can use the numpy.vectorize'd
  793. # randint without needing to pass it a `size` argument.
  794. low = np.broadcast_to(low, size)
  795. high = np.broadcast_to(high, size)
  796. randint = np.vectorize(partial(rng_integers, random_state),
  797. otypes=[np.int_])
  798. return randint(low, high)
  799. def _entropy(self, low, high):
  800. return log(high - low)
  801. randint = randint_gen(name='randint', longname='A discrete uniform '
  802. '(random integer)')
  803. # FIXME: problems sampling.
  804. class zipf_gen(rv_discrete):
  805. r"""A Zipf (Zeta) discrete random variable.
  806. %(before_notes)s
  807. See Also
  808. --------
  809. zipfian
  810. Notes
  811. -----
  812. The probability mass function for `zipf` is:
  813. .. math::
  814. f(k, a) = \frac{1}{\zeta(a) k^a}
  815. for :math:`k \ge 1`, :math:`a > 1`.
  816. `zipf` takes :math:`a > 1` as shape parameter. :math:`\zeta` is the
  817. Riemann zeta function (`scipy.special.zeta`)
  818. The Zipf distribution is also known as the zeta distribution, which is
  819. a special case of the Zipfian distribution (`zipfian`).
  820. %(after_notes)s
  821. References
  822. ----------
  823. .. [1] "Zeta Distribution", Wikipedia,
  824. https://en.wikipedia.org/wiki/Zeta_distribution
  825. %(example)s
  826. Confirm that `zipf` is the large `n` limit of `zipfian`.
  827. >>> import numpy as np
  828. >>> from scipy.stats import zipfian
  829. >>> k = np.arange(11)
  830. >>> np.allclose(zipf.pmf(k, a), zipfian.pmf(k, a, n=10000000))
  831. True
  832. """
  833. def _shape_info(self):
  834. return [_ShapeInfo("a", False, (1, np.inf), (False, False))]
  835. def _rvs(self, a, size=None, random_state=None):
  836. return random_state.zipf(a, size=size)
  837. def _argcheck(self, a):
  838. return a > 1
  839. def _pmf(self, k, a):
  840. # zipf.pmf(k, a) = 1/(zeta(a) * k**a)
  841. Pk = 1.0 / special.zeta(a, 1) / k**a
  842. return Pk
  843. def _munp(self, n, a):
  844. return _lazywhere(
  845. a > n + 1, (a, n),
  846. lambda a, n: special.zeta(a - n, 1) / special.zeta(a, 1),
  847. np.inf)
  848. zipf = zipf_gen(a=1, name='zipf', longname='A Zipf')
  849. def _gen_harmonic_gt1(n, a):
  850. """Generalized harmonic number, a > 1"""
  851. # See https://en.wikipedia.org/wiki/Harmonic_number; search for "hurwitz"
  852. return zeta(a, 1) - zeta(a, n+1)
  853. def _gen_harmonic_leq1(n, a):
  854. """Generalized harmonic number, a <= 1"""
  855. if not np.size(n):
  856. return n
  857. n_max = np.max(n) # loop starts at maximum of all n
  858. out = np.zeros_like(a, dtype=float)
  859. # add terms of harmonic series; starting from smallest to avoid roundoff
  860. for i in np.arange(n_max, 0, -1, dtype=float):
  861. mask = i <= n # don't add terms after nth
  862. out[mask] += 1/i**a[mask]
  863. return out
  864. def _gen_harmonic(n, a):
  865. """Generalized harmonic number"""
  866. n, a = np.broadcast_arrays(n, a)
  867. return _lazywhere(a > 1, (n, a),
  868. f=_gen_harmonic_gt1, f2=_gen_harmonic_leq1)
  869. class zipfian_gen(rv_discrete):
  870. r"""A Zipfian discrete random variable.
  871. %(before_notes)s
  872. See Also
  873. --------
  874. zipf
  875. Notes
  876. -----
  877. The probability mass function for `zipfian` is:
  878. .. math::
  879. f(k, a, n) = \frac{1}{H_{n,a} k^a}
  880. for :math:`k \in \{1, 2, \dots, n-1, n\}`, :math:`a \ge 0`,
  881. :math:`n \in \{1, 2, 3, \dots\}`.
  882. `zipfian` takes :math:`a` and :math:`n` as shape parameters.
  883. :math:`H_{n,a}` is the :math:`n`:sup:`th` generalized harmonic
  884. number of order :math:`a`.
  885. The Zipfian distribution reduces to the Zipf (zeta) distribution as
  886. :math:`n \rightarrow \infty`.
  887. %(after_notes)s
  888. References
  889. ----------
  890. .. [1] "Zipf's Law", Wikipedia, https://en.wikipedia.org/wiki/Zipf's_law
  891. .. [2] Larry Leemis, "Zipf Distribution", Univariate Distribution
  892. Relationships. http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Zipf.pdf
  893. %(example)s
  894. Confirm that `zipfian` reduces to `zipf` for large `n`, `a > 1`.
  895. >>> import numpy as np
  896. >>> from scipy.stats import zipf
  897. >>> k = np.arange(11)
  898. >>> np.allclose(zipfian.pmf(k, a=3.5, n=10000000), zipf.pmf(k, a=3.5))
  899. True
  900. """
  901. def _shape_info(self):
  902. return [_ShapeInfo("a", False, (0, np.inf), (True, False)),
  903. _ShapeInfo("n", True, (0, np.inf), (False, False))]
  904. def _argcheck(self, a, n):
  905. # we need np.asarray here because moment (maybe others) don't convert
  906. return (a >= 0) & (n > 0) & (n == np.asarray(n, dtype=int))
  907. def _get_support(self, a, n):
  908. return 1, n
  909. def _pmf(self, k, a, n):
  910. return 1.0 / _gen_harmonic(n, a) / k**a
  911. def _cdf(self, k, a, n):
  912. return _gen_harmonic(k, a) / _gen_harmonic(n, a)
  913. def _sf(self, k, a, n):
  914. k = k + 1 # # to match SciPy convention
  915. # see http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Zipf.pdf
  916. return ((k**a*(_gen_harmonic(n, a) - _gen_harmonic(k, a)) + 1)
  917. / (k**a*_gen_harmonic(n, a)))
  918. def _stats(self, a, n):
  919. # see # see http://www.math.wm.edu/~leemis/chart/UDR/PDFs/Zipf.pdf
  920. Hna = _gen_harmonic(n, a)
  921. Hna1 = _gen_harmonic(n, a-1)
  922. Hna2 = _gen_harmonic(n, a-2)
  923. Hna3 = _gen_harmonic(n, a-3)
  924. Hna4 = _gen_harmonic(n, a-4)
  925. mu1 = Hna1/Hna
  926. mu2n = (Hna2*Hna - Hna1**2)
  927. mu2d = Hna**2
  928. mu2 = mu2n / mu2d
  929. g1 = (Hna3/Hna - 3*Hna1*Hna2/Hna**2 + 2*Hna1**3/Hna**3)/mu2**(3/2)
  930. g2 = (Hna**3*Hna4 - 4*Hna**2*Hna1*Hna3 + 6*Hna*Hna1**2*Hna2
  931. - 3*Hna1**4) / mu2n**2
  932. g2 -= 3
  933. return mu1, mu2, g1, g2
  934. zipfian = zipfian_gen(a=1, name='zipfian', longname='A Zipfian')
  935. class dlaplace_gen(rv_discrete):
  936. r"""A Laplacian discrete random variable.
  937. %(before_notes)s
  938. Notes
  939. -----
  940. The probability mass function for `dlaplace` is:
  941. .. math::
  942. f(k) = \tanh(a/2) \exp(-a |k|)
  943. for integers :math:`k` and :math:`a > 0`.
  944. `dlaplace` takes :math:`a` as shape parameter.
  945. %(after_notes)s
  946. %(example)s
  947. """
  948. def _shape_info(self):
  949. return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
  950. def _pmf(self, k, a):
  951. # dlaplace.pmf(k) = tanh(a/2) * exp(-a*abs(k))
  952. return tanh(a/2.0) * exp(-a * abs(k))
  953. def _cdf(self, x, a):
  954. k = floor(x)
  955. f = lambda k, a: 1.0 - exp(-a * k) / (exp(a) + 1)
  956. f2 = lambda k, a: exp(a * (k+1)) / (exp(a) + 1)
  957. return _lazywhere(k >= 0, (k, a), f=f, f2=f2)
  958. def _ppf(self, q, a):
  959. const = 1 + exp(a)
  960. vals = ceil(np.where(q < 1.0 / (1 + exp(-a)),
  961. log(q*const) / a - 1,
  962. -log((1-q) * const) / a))
  963. vals1 = vals - 1
  964. return np.where(self._cdf(vals1, a) >= q, vals1, vals)
  965. def _stats(self, a):
  966. ea = exp(a)
  967. mu2 = 2.*ea/(ea-1.)**2
  968. mu4 = 2.*ea*(ea**2+10.*ea+1.) / (ea-1.)**4
  969. return 0., mu2, 0., mu4/mu2**2 - 3.
  970. def _entropy(self, a):
  971. return a / sinh(a) - log(tanh(a/2.0))
  972. def _rvs(self, a, size=None, random_state=None):
  973. # The discrete Laplace is equivalent to the two-sided geometric
  974. # distribution with PMF:
  975. # f(k) = (1 - alpha)/(1 + alpha) * alpha^abs(k)
  976. # Reference:
  977. # https://www.sciencedirect.com/science/
  978. # article/abs/pii/S0378375804003519
  979. # Furthermore, the two-sided geometric distribution is
  980. # equivalent to the difference between two iid geometric
  981. # distributions.
  982. # Reference (page 179):
  983. # https://pdfs.semanticscholar.org/61b3/
  984. # b99f466815808fd0d03f5d2791eea8b541a1.pdf
  985. # Thus, we can leverage the following:
  986. # 1) alpha = e^-a
  987. # 2) probability_of_success = 1 - alpha (Bernoulli trial)
  988. probOfSuccess = -np.expm1(-np.asarray(a))
  989. x = random_state.geometric(probOfSuccess, size=size)
  990. y = random_state.geometric(probOfSuccess, size=size)
  991. return x - y
  992. dlaplace = dlaplace_gen(a=-np.inf,
  993. name='dlaplace', longname='A discrete Laplacian')
  994. class skellam_gen(rv_discrete):
  995. r"""A Skellam discrete random variable.
  996. %(before_notes)s
  997. Notes
  998. -----
  999. Probability distribution of the difference of two correlated or
  1000. uncorrelated Poisson random variables.
  1001. Let :math:`k_1` and :math:`k_2` be two Poisson-distributed r.v. with
  1002. expected values :math:`\lambda_1` and :math:`\lambda_2`. Then,
  1003. :math:`k_1 - k_2` follows a Skellam distribution with parameters
  1004. :math:`\mu_1 = \lambda_1 - \rho \sqrt{\lambda_1 \lambda_2}` and
  1005. :math:`\mu_2 = \lambda_2 - \rho \sqrt{\lambda_1 \lambda_2}`, where
  1006. :math:`\rho` is the correlation coefficient between :math:`k_1` and
  1007. :math:`k_2`. If the two Poisson-distributed r.v. are independent then
  1008. :math:`\rho = 0`.
  1009. Parameters :math:`\mu_1` and :math:`\mu_2` must be strictly positive.
  1010. For details see: https://en.wikipedia.org/wiki/Skellam_distribution
  1011. `skellam` takes :math:`\mu_1` and :math:`\mu_2` as shape parameters.
  1012. %(after_notes)s
  1013. %(example)s
  1014. """
  1015. def _shape_info(self):
  1016. return [_ShapeInfo("mu1", False, (0, np.inf), (False, False)),
  1017. _ShapeInfo("mu2", False, (0, np.inf), (False, False))]
  1018. def _rvs(self, mu1, mu2, size=None, random_state=None):
  1019. n = size
  1020. return (random_state.poisson(mu1, n) -
  1021. random_state.poisson(mu2, n))
  1022. def _pmf(self, x, mu1, mu2):
  1023. with warnings.catch_warnings():
  1024. message = "overflow encountered in _ncx2_pdf"
  1025. warnings.filterwarnings("ignore", message=message)
  1026. px = np.where(x < 0,
  1027. _boost._ncx2_pdf(2*mu2, 2*(1-x), 2*mu1)*2,
  1028. _boost._ncx2_pdf(2*mu1, 2*(1+x), 2*mu2)*2)
  1029. # ncx2.pdf() returns nan's for extremely low probabilities
  1030. return px
  1031. def _cdf(self, x, mu1, mu2):
  1032. x = floor(x)
  1033. px = np.where(x < 0,
  1034. _boost._ncx2_cdf(2*mu2, -2*x, 2*mu1),
  1035. 1 - _boost._ncx2_cdf(2*mu1, 2*(x+1), 2*mu2))
  1036. return px
  1037. def _stats(self, mu1, mu2):
  1038. mean = mu1 - mu2
  1039. var = mu1 + mu2
  1040. g1 = mean / sqrt((var)**3)
  1041. g2 = 1 / var
  1042. return mean, var, g1, g2
  1043. skellam = skellam_gen(a=-np.inf, name="skellam", longname='A Skellam')
  1044. class yulesimon_gen(rv_discrete):
  1045. r"""A Yule-Simon discrete random variable.
  1046. %(before_notes)s
  1047. Notes
  1048. -----
  1049. The probability mass function for the `yulesimon` is:
  1050. .. math::
  1051. f(k) = \alpha B(k, \alpha+1)
  1052. for :math:`k=1,2,3,...`, where :math:`\alpha>0`.
  1053. Here :math:`B` refers to the `scipy.special.beta` function.
  1054. The sampling of random variates is based on pg 553, Section 6.3 of [1]_.
  1055. Our notation maps to the referenced logic via :math:`\alpha=a-1`.
  1056. For details see the wikipedia entry [2]_.
  1057. References
  1058. ----------
  1059. .. [1] Devroye, Luc. "Non-uniform Random Variate Generation",
  1060. (1986) Springer, New York.
  1061. .. [2] https://en.wikipedia.org/wiki/Yule-Simon_distribution
  1062. %(after_notes)s
  1063. %(example)s
  1064. """
  1065. def _shape_info(self):
  1066. return [_ShapeInfo("alpha", False, (0, np.inf), (False, False))]
  1067. def _rvs(self, alpha, size=None, random_state=None):
  1068. E1 = random_state.standard_exponential(size)
  1069. E2 = random_state.standard_exponential(size)
  1070. ans = ceil(-E1 / log1p(-exp(-E2 / alpha)))
  1071. return ans
  1072. def _pmf(self, x, alpha):
  1073. return alpha * special.beta(x, alpha + 1)
  1074. def _argcheck(self, alpha):
  1075. return (alpha > 0)
  1076. def _logpmf(self, x, alpha):
  1077. return log(alpha) + special.betaln(x, alpha + 1)
  1078. def _cdf(self, x, alpha):
  1079. return 1 - x * special.beta(x, alpha + 1)
  1080. def _sf(self, x, alpha):
  1081. return x * special.beta(x, alpha + 1)
  1082. def _logsf(self, x, alpha):
  1083. return log(x) + special.betaln(x, alpha + 1)
  1084. def _stats(self, alpha):
  1085. mu = np.where(alpha <= 1, np.inf, alpha / (alpha - 1))
  1086. mu2 = np.where(alpha > 2,
  1087. alpha**2 / ((alpha - 2.0) * (alpha - 1)**2),
  1088. np.inf)
  1089. mu2 = np.where(alpha <= 1, np.nan, mu2)
  1090. g1 = np.where(alpha > 3,
  1091. sqrt(alpha - 2) * (alpha + 1)**2 / (alpha * (alpha - 3)),
  1092. np.inf)
  1093. g1 = np.where(alpha <= 2, np.nan, g1)
  1094. g2 = np.where(alpha > 4,
  1095. (alpha + 3) + (alpha**3 - 49 * alpha - 22) / (alpha *
  1096. (alpha - 4) * (alpha - 3)), np.inf)
  1097. g2 = np.where(alpha <= 2, np.nan, g2)
  1098. return mu, mu2, g1, g2
  1099. yulesimon = yulesimon_gen(name='yulesimon', a=1)
  1100. def _vectorize_rvs_over_shapes(_rvs1):
  1101. """Decorator that vectorizes _rvs method to work on ndarray shapes"""
  1102. # _rvs1 must be a _function_ that accepts _scalar_ args as positional
  1103. # arguments, `size` and `random_state` as keyword arguments.
  1104. # _rvs1 must return a random variate array with shape `size`. If `size` is
  1105. # None, _rvs1 must return a scalar.
  1106. # When applied to _rvs1, this decorator broadcasts ndarray args
  1107. # and loops over them, calling _rvs1 for each set of scalar args.
  1108. # For usage example, see _nchypergeom_gen
  1109. def _rvs(*args, size, random_state):
  1110. _rvs1_size, _rvs1_indices = _check_shape(args[0].shape, size)
  1111. size = np.array(size)
  1112. _rvs1_size = np.array(_rvs1_size)
  1113. _rvs1_indices = np.array(_rvs1_indices)
  1114. if np.all(_rvs1_indices): # all args are scalars
  1115. return _rvs1(*args, size, random_state)
  1116. out = np.empty(size)
  1117. # out.shape can mix dimensions associated with arg_shape and _rvs1_size
  1118. # Sort them to arg_shape + _rvs1_size for easy indexing of dimensions
  1119. # corresponding with the different sets of scalar args
  1120. j0 = np.arange(out.ndim)
  1121. j1 = np.hstack((j0[~_rvs1_indices], j0[_rvs1_indices]))
  1122. out = np.moveaxis(out, j1, j0)
  1123. for i in np.ndindex(*size[~_rvs1_indices]):
  1124. # arg can be squeezed because singleton dimensions will be
  1125. # associated with _rvs1_size, not arg_shape per _check_shape
  1126. out[i] = _rvs1(*[np.squeeze(arg)[i] for arg in args],
  1127. _rvs1_size, random_state)
  1128. return np.moveaxis(out, j0, j1) # move axes back before returning
  1129. return _rvs
  1130. class _nchypergeom_gen(rv_discrete):
  1131. r"""A noncentral hypergeometric discrete random variable.
  1132. For subclassing by nchypergeom_fisher_gen and nchypergeom_wallenius_gen.
  1133. """
  1134. rvs_name = None
  1135. dist = None
  1136. def _shape_info(self):
  1137. return [_ShapeInfo("M", True, (0, np.inf), (True, False)),
  1138. _ShapeInfo("n", True, (0, np.inf), (True, False)),
  1139. _ShapeInfo("N", True, (0, np.inf), (True, False)),
  1140. _ShapeInfo("odds", False, (0, np.inf), (False, False))]
  1141. def _get_support(self, M, n, N, odds):
  1142. N, m1, n = M, n, N # follow Wikipedia notation
  1143. m2 = N - m1
  1144. x_min = np.maximum(0, n - m2)
  1145. x_max = np.minimum(n, m1)
  1146. return x_min, x_max
  1147. def _argcheck(self, M, n, N, odds):
  1148. M, n = np.asarray(M), np.asarray(n),
  1149. N, odds = np.asarray(N), np.asarray(odds)
  1150. cond1 = (M.astype(int) == M) & (M >= 0)
  1151. cond2 = (n.astype(int) == n) & (n >= 0)
  1152. cond3 = (N.astype(int) == N) & (N >= 0)
  1153. cond4 = odds > 0
  1154. cond5 = N <= M
  1155. cond6 = n <= M
  1156. return cond1 & cond2 & cond3 & cond4 & cond5 & cond6
  1157. def _rvs(self, M, n, N, odds, size=None, random_state=None):
  1158. @_vectorize_rvs_over_shapes
  1159. def _rvs1(M, n, N, odds, size, random_state):
  1160. length = np.prod(size)
  1161. urn = _PyStochasticLib3()
  1162. rv_gen = getattr(urn, self.rvs_name)
  1163. rvs = rv_gen(N, n, M, odds, length, random_state)
  1164. rvs = rvs.reshape(size)
  1165. return rvs
  1166. return _rvs1(M, n, N, odds, size=size, random_state=random_state)
  1167. def _pmf(self, x, M, n, N, odds):
  1168. x, M, n, N, odds = np.broadcast_arrays(x, M, n, N, odds)
  1169. if x.size == 0: # np.vectorize doesn't work with zero size input
  1170. return np.empty_like(x)
  1171. @np.vectorize
  1172. def _pmf1(x, M, n, N, odds):
  1173. urn = self.dist(N, n, M, odds, 1e-12)
  1174. return urn.probability(x)
  1175. return _pmf1(x, M, n, N, odds)
  1176. def _stats(self, M, n, N, odds, moments):
  1177. @np.vectorize
  1178. def _moments1(M, n, N, odds):
  1179. urn = self.dist(N, n, M, odds, 1e-12)
  1180. return urn.moments()
  1181. m, v = (_moments1(M, n, N, odds) if ("m" in moments or "v" in moments)
  1182. else (None, None))
  1183. s, k = None, None
  1184. return m, v, s, k
  1185. class nchypergeom_fisher_gen(_nchypergeom_gen):
  1186. r"""A Fisher's noncentral hypergeometric discrete random variable.
  1187. Fisher's noncentral hypergeometric distribution models drawing objects of
  1188. two types from a bin. `M` is the total number of objects, `n` is the
  1189. number of Type I objects, and `odds` is the odds ratio: the odds of
  1190. selecting a Type I object rather than a Type II object when there is only
  1191. one object of each type.
  1192. The random variate represents the number of Type I objects drawn if we
  1193. take a handful of objects from the bin at once and find out afterwards
  1194. that we took `N` objects.
  1195. %(before_notes)s
  1196. See Also
  1197. --------
  1198. nchypergeom_wallenius, hypergeom, nhypergeom
  1199. Notes
  1200. -----
  1201. Let mathematical symbols :math:`N`, :math:`n`, and :math:`M` correspond
  1202. with parameters `N`, `n`, and `M` (respectively) as defined above.
  1203. The probability mass function is defined as
  1204. .. math::
  1205. p(x; M, n, N, \omega) =
  1206. \frac{\binom{n}{x}\binom{M - n}{N-x}\omega^x}{P_0},
  1207. for
  1208. :math:`x \in [x_l, x_u]`,
  1209. :math:`M \in {\mathbb N}`,
  1210. :math:`n \in [0, M]`,
  1211. :math:`N \in [0, M]`,
  1212. :math:`\omega > 0`,
  1213. where
  1214. :math:`x_l = \max(0, N - (M - n))`,
  1215. :math:`x_u = \min(N, n)`,
  1216. .. math::
  1217. P_0 = \sum_{y=x_l}^{x_u} \binom{n}{y}\binom{M - n}{N-y}\omega^y,
  1218. and the binomial coefficients are defined as
  1219. .. math:: \binom{n}{k} \equiv \frac{n!}{k! (n - k)!}.
  1220. `nchypergeom_fisher` uses the BiasedUrn package by Agner Fog with
  1221. permission for it to be distributed under SciPy's license.
  1222. The symbols used to denote the shape parameters (`N`, `n`, and `M`) are not
  1223. universally accepted; they are chosen for consistency with `hypergeom`.
  1224. Note that Fisher's noncentral hypergeometric distribution is distinct
  1225. from Wallenius' noncentral hypergeometric distribution, which models
  1226. drawing a pre-determined `N` objects from a bin one by one.
  1227. When the odds ratio is unity, however, both distributions reduce to the
  1228. ordinary hypergeometric distribution.
  1229. %(after_notes)s
  1230. References
  1231. ----------
  1232. .. [1] Agner Fog, "Biased Urn Theory".
  1233. https://cran.r-project.org/web/packages/BiasedUrn/vignettes/UrnTheory.pdf
  1234. .. [2] "Fisher's noncentral hypergeometric distribution", Wikipedia,
  1235. https://en.wikipedia.org/wiki/Fisher's_noncentral_hypergeometric_distribution
  1236. %(example)s
  1237. """
  1238. rvs_name = "rvs_fisher"
  1239. dist = _PyFishersNCHypergeometric
  1240. nchypergeom_fisher = nchypergeom_fisher_gen(
  1241. name='nchypergeom_fisher',
  1242. longname="A Fisher's noncentral hypergeometric")
  1243. class nchypergeom_wallenius_gen(_nchypergeom_gen):
  1244. r"""A Wallenius' noncentral hypergeometric discrete random variable.
  1245. Wallenius' noncentral hypergeometric distribution models drawing objects of
  1246. two types from a bin. `M` is the total number of objects, `n` is the
  1247. number of Type I objects, and `odds` is the odds ratio: the odds of
  1248. selecting a Type I object rather than a Type II object when there is only
  1249. one object of each type.
  1250. The random variate represents the number of Type I objects drawn if we
  1251. draw a pre-determined `N` objects from a bin one by one.
  1252. %(before_notes)s
  1253. See Also
  1254. --------
  1255. nchypergeom_fisher, hypergeom, nhypergeom
  1256. Notes
  1257. -----
  1258. Let mathematical symbols :math:`N`, :math:`n`, and :math:`M` correspond
  1259. with parameters `N`, `n`, and `M` (respectively) as defined above.
  1260. The probability mass function is defined as
  1261. .. math::
  1262. p(x; N, n, M) = \binom{n}{x} \binom{M - n}{N-x}
  1263. \int_0^1 \left(1-t^{\omega/D}\right)^x\left(1-t^{1/D}\right)^{N-x} dt
  1264. for
  1265. :math:`x \in [x_l, x_u]`,
  1266. :math:`M \in {\mathbb N}`,
  1267. :math:`n \in [0, M]`,
  1268. :math:`N \in [0, M]`,
  1269. :math:`\omega > 0`,
  1270. where
  1271. :math:`x_l = \max(0, N - (M - n))`,
  1272. :math:`x_u = \min(N, n)`,
  1273. .. math::
  1274. D = \omega(n - x) + ((M - n)-(N-x)),
  1275. and the binomial coefficients are defined as
  1276. .. math:: \binom{n}{k} \equiv \frac{n!}{k! (n - k)!}.
  1277. `nchypergeom_wallenius` uses the BiasedUrn package by Agner Fog with
  1278. permission for it to be distributed under SciPy's license.
  1279. The symbols used to denote the shape parameters (`N`, `n`, and `M`) are not
  1280. universally accepted; they are chosen for consistency with `hypergeom`.
  1281. Note that Wallenius' noncentral hypergeometric distribution is distinct
  1282. from Fisher's noncentral hypergeometric distribution, which models
  1283. take a handful of objects from the bin at once, finding out afterwards
  1284. that `N` objects were taken.
  1285. When the odds ratio is unity, however, both distributions reduce to the
  1286. ordinary hypergeometric distribution.
  1287. %(after_notes)s
  1288. References
  1289. ----------
  1290. .. [1] Agner Fog, "Biased Urn Theory".
  1291. https://cran.r-project.org/web/packages/BiasedUrn/vignettes/UrnTheory.pdf
  1292. .. [2] "Wallenius' noncentral hypergeometric distribution", Wikipedia,
  1293. https://en.wikipedia.org/wiki/Wallenius'_noncentral_hypergeometric_distribution
  1294. %(example)s
  1295. """
  1296. rvs_name = "rvs_wallenius"
  1297. dist = _PyWalleniusNCHypergeometric
  1298. nchypergeom_wallenius = nchypergeom_wallenius_gen(
  1299. name='nchypergeom_wallenius',
  1300. longname="A Wallenius' noncentral hypergeometric")
  1301. # Collect names of classes and objects in this module.
  1302. pairs = list(globals().copy().items())
  1303. _distn_names, _distn_gen_names = get_distribution_names(pairs, rv_discrete)
  1304. __all__ = _distn_names + _distn_gen_names