test_sampling.py 50 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357
  1. import threading
  2. import pickle
  3. import pytest
  4. from copy import deepcopy
  5. import platform
  6. import sys
  7. import math
  8. import numpy as np
  9. from numpy.testing import assert_allclose, assert_equal, suppress_warnings
  10. from numpy.lib import NumpyVersion
  11. from scipy.stats.sampling import (
  12. TransformedDensityRejection,
  13. DiscreteAliasUrn,
  14. DiscreteGuideTable,
  15. NumericalInversePolynomial,
  16. NumericalInverseHermite,
  17. SimpleRatioUniforms,
  18. UNURANError
  19. )
  20. from scipy import stats
  21. from scipy import special
  22. from scipy.stats import chisquare, cramervonmises
  23. from scipy.stats._distr_params import distdiscrete, distcont
  24. from scipy._lib._util import check_random_state
  25. # common test data: this data can be shared between all the tests.
  26. # Normal distribution shared between all the continuous methods
  27. class StandardNormal:
  28. def pdf(self, x):
  29. # normalization constant needed for NumericalInverseHermite
  30. return 1./np.sqrt(2.*np.pi) * np.exp(-0.5 * x*x)
  31. def dpdf(self, x):
  32. return 1./np.sqrt(2.*np.pi) * -x * np.exp(-0.5 * x*x)
  33. def cdf(self, x):
  34. return special.ndtr(x)
  35. all_methods = [
  36. ("TransformedDensityRejection", {"dist": StandardNormal()}),
  37. ("DiscreteAliasUrn", {"dist": [0.02, 0.18, 0.8]}),
  38. ("DiscreteGuideTable", {"dist": [0.02, 0.18, 0.8]}),
  39. ("NumericalInversePolynomial", {"dist": StandardNormal()}),
  40. ("NumericalInverseHermite", {"dist": StandardNormal()}),
  41. ("SimpleRatioUniforms", {"dist": StandardNormal(), "mode": 0})
  42. ]
  43. if (sys.implementation.name == 'pypy'
  44. and sys.implementation.version < (7, 3, 10)):
  45. # changed in PyPy for v7.3.10
  46. floaterr = r"unsupported operand type for float\(\): 'list'"
  47. else:
  48. floaterr = r"must be real number, not list"
  49. # Make sure an internal error occurs in UNU.RAN when invalid callbacks are
  50. # passed. Moreover, different generators throw different error messages.
  51. # So, in case of an `UNURANError`, we do not validate the error message.
  52. bad_pdfs_common = [
  53. # Negative PDF
  54. (lambda x: -x, UNURANError, r"..."),
  55. # Returning wrong type
  56. (lambda x: [], TypeError, floaterr),
  57. # Undefined name inside the function
  58. (lambda x: foo, NameError, r"name 'foo' is not defined"), # type: ignore[name-defined] # noqa
  59. # Infinite value returned => Overflow error.
  60. (lambda x: np.inf, UNURANError, r"..."),
  61. # NaN value => internal error in UNU.RAN
  62. (lambda x: np.nan, UNURANError, r"..."),
  63. # signature of PDF wrong
  64. (lambda: 1.0, TypeError, r"takes 0 positional arguments but 1 was given")
  65. ]
  66. # same approach for dpdf
  67. bad_dpdf_common = [
  68. # Infinite value returned.
  69. (lambda x: np.inf, UNURANError, r"..."),
  70. # NaN value => internal error in UNU.RAN
  71. (lambda x: np.nan, UNURANError, r"..."),
  72. # Returning wrong type
  73. (lambda x: [], TypeError, floaterr),
  74. # Undefined name inside the function
  75. (lambda x: foo, NameError, r"name 'foo' is not defined"), # type: ignore[name-defined] # noqa
  76. # signature of dPDF wrong
  77. (lambda: 1.0, TypeError, r"takes 0 positional arguments but 1 was given")
  78. ]
  79. # same approach for logpdf
  80. bad_logpdfs_common = [
  81. # Returning wrong type
  82. (lambda x: [], TypeError, floaterr),
  83. # Undefined name inside the function
  84. (lambda x: foo, NameError, r"name 'foo' is not defined"), # type: ignore[name-defined] # noqa
  85. # Infinite value returned => Overflow error.
  86. (lambda x: np.inf, UNURANError, r"..."),
  87. # NaN value => internal error in UNU.RAN
  88. (lambda x: np.nan, UNURANError, r"..."),
  89. # signature of logpdf wrong
  90. (lambda: 1.0, TypeError, r"takes 0 positional arguments but 1 was given")
  91. ]
  92. bad_pv_common = [
  93. ([], r"must contain at least one element"),
  94. ([[1.0, 0.0]], r"wrong number of dimensions \(expected 1, got 2\)"),
  95. ([0.2, 0.4, np.nan, 0.8], r"must contain only finite / non-nan values"),
  96. ([0.2, 0.4, np.inf, 0.8], r"must contain only finite / non-nan values"),
  97. ([0.0, 0.0], r"must contain at least one non-zero value"),
  98. ]
  99. # size of the domains is incorrect
  100. bad_sized_domains = [
  101. # > 2 elements in the domain
  102. ((1, 2, 3), ValueError, r"must be a length 2 tuple"),
  103. # empty domain
  104. ((), ValueError, r"must be a length 2 tuple")
  105. ]
  106. # domain values are incorrect
  107. bad_domains = [
  108. ((2, 1), UNURANError, r"left >= right"),
  109. ((1, 1), UNURANError, r"left >= right"),
  110. ]
  111. # infinite and nan values present in domain.
  112. inf_nan_domains = [
  113. # left >= right
  114. ((10, 10), UNURANError, r"left >= right"),
  115. ((np.inf, np.inf), UNURANError, r"left >= right"),
  116. ((-np.inf, -np.inf), UNURANError, r"left >= right"),
  117. ((np.inf, -np.inf), UNURANError, r"left >= right"),
  118. # Also include nans in some of the domains.
  119. ((-np.inf, np.nan), ValueError, r"only non-nan values"),
  120. ((np.nan, np.inf), ValueError, r"only non-nan values")
  121. ]
  122. # `nan` values present in domain. Some distributions don't support
  123. # infinite tails, so don't mix the nan values with infinities.
  124. nan_domains = [
  125. ((0, np.nan), ValueError, r"only non-nan values"),
  126. ((np.nan, np.nan), ValueError, r"only non-nan values")
  127. ]
  128. # all the methods should throw errors for nan, bad sized, and bad valued
  129. # domains.
  130. @pytest.mark.parametrize("domain, err, msg",
  131. bad_domains + bad_sized_domains +
  132. nan_domains) # type: ignore[operator]
  133. @pytest.mark.parametrize("method, kwargs", all_methods)
  134. def test_bad_domain(domain, err, msg, method, kwargs):
  135. Method = getattr(stats.sampling, method)
  136. with pytest.raises(err, match=msg):
  137. Method(**kwargs, domain=domain)
  138. @pytest.mark.parametrize("method, kwargs", all_methods)
  139. def test_random_state(method, kwargs):
  140. Method = getattr(stats.sampling, method)
  141. # simple seed that works for any version of NumPy
  142. seed = 123
  143. rng1 = Method(**kwargs, random_state=seed)
  144. rng2 = Method(**kwargs, random_state=seed)
  145. assert_equal(rng1.rvs(100), rng2.rvs(100))
  146. # global seed
  147. np.random.seed(123)
  148. rng1 = Method(**kwargs)
  149. rvs1 = rng1.rvs(100)
  150. np.random.seed(None)
  151. rng2 = Method(**kwargs, random_state=123)
  152. rvs2 = rng2.rvs(100)
  153. assert_equal(rvs1, rvs2)
  154. # Generator seed for new NumPy
  155. # when a RandomState is given, it should take the bitgen_t
  156. # member of the class and create a Generator instance.
  157. seed1 = np.random.RandomState(np.random.MT19937(123))
  158. seed2 = np.random.Generator(np.random.MT19937(123))
  159. rng1 = Method(**kwargs, random_state=seed1)
  160. rng2 = Method(**kwargs, random_state=seed2)
  161. assert_equal(rng1.rvs(100), rng2.rvs(100))
  162. def test_set_random_state():
  163. rng1 = TransformedDensityRejection(StandardNormal(), random_state=123)
  164. rng2 = TransformedDensityRejection(StandardNormal())
  165. rng2.set_random_state(123)
  166. assert_equal(rng1.rvs(100), rng2.rvs(100))
  167. rng = TransformedDensityRejection(StandardNormal(), random_state=123)
  168. rvs1 = rng.rvs(100)
  169. rng.set_random_state(123)
  170. rvs2 = rng.rvs(100)
  171. assert_equal(rvs1, rvs2)
  172. def test_threading_behaviour():
  173. # Test if the API is thread-safe.
  174. # This verifies if the lock mechanism and the use of `PyErr_Occurred`
  175. # is correct.
  176. errors = {"err1": None, "err2": None}
  177. class Distribution:
  178. def __init__(self, pdf_msg):
  179. self.pdf_msg = pdf_msg
  180. def pdf(self, x):
  181. if 49.9 < x < 50.0:
  182. raise ValueError(self.pdf_msg)
  183. return x
  184. def dpdf(self, x):
  185. return 1
  186. def func1():
  187. dist = Distribution('foo')
  188. rng = TransformedDensityRejection(dist, domain=(10, 100),
  189. random_state=12)
  190. try:
  191. rng.rvs(100000)
  192. except ValueError as e:
  193. errors['err1'] = e.args[0]
  194. def func2():
  195. dist = Distribution('bar')
  196. rng = TransformedDensityRejection(dist, domain=(10, 100),
  197. random_state=2)
  198. try:
  199. rng.rvs(100000)
  200. except ValueError as e:
  201. errors['err2'] = e.args[0]
  202. t1 = threading.Thread(target=func1)
  203. t2 = threading.Thread(target=func2)
  204. t1.start()
  205. t2.start()
  206. t1.join()
  207. t2.join()
  208. assert errors['err1'] == 'foo'
  209. assert errors['err2'] == 'bar'
  210. @pytest.mark.parametrize("method, kwargs", all_methods)
  211. def test_pickle(method, kwargs):
  212. Method = getattr(stats.sampling, method)
  213. rng1 = Method(**kwargs, random_state=123)
  214. obj = pickle.dumps(rng1)
  215. rng2 = pickle.loads(obj)
  216. assert_equal(rng1.rvs(100), rng2.rvs(100))
  217. @pytest.mark.parametrize("size", [None, 0, (0, ), 1, (10, 3), (2, 3, 4, 5),
  218. (0, 0), (0, 1)])
  219. def test_rvs_size(size):
  220. # As the `rvs` method is present in the base class and shared between
  221. # all the classes, we can just test with one of the methods.
  222. rng = TransformedDensityRejection(StandardNormal())
  223. if size is None:
  224. assert np.isscalar(rng.rvs(size))
  225. else:
  226. if np.isscalar(size):
  227. size = (size, )
  228. assert rng.rvs(size).shape == size
  229. def test_with_scipy_distribution():
  230. # test if the setup works with SciPy's rv_frozen distributions
  231. dist = stats.norm()
  232. urng = np.random.default_rng(0)
  233. rng = NumericalInverseHermite(dist, random_state=urng)
  234. u = np.linspace(0, 1, num=100)
  235. check_cont_samples(rng, dist, dist.stats())
  236. assert_allclose(dist.ppf(u), rng.ppf(u))
  237. # test if it works with `loc` and `scale`
  238. dist = stats.norm(loc=10., scale=5.)
  239. rng = NumericalInverseHermite(dist, random_state=urng)
  240. check_cont_samples(rng, dist, dist.stats())
  241. assert_allclose(dist.ppf(u), rng.ppf(u))
  242. # check for discrete distributions
  243. dist = stats.binom(10, 0.2)
  244. rng = DiscreteAliasUrn(dist, random_state=urng)
  245. domain = dist.support()
  246. pv = dist.pmf(np.arange(domain[0], domain[1]+1))
  247. check_discr_samples(rng, pv, dist.stats())
  248. def check_cont_samples(rng, dist, mv_ex):
  249. rvs = rng.rvs(100000)
  250. mv = rvs.mean(), rvs.var()
  251. # test the moments only if the variance is finite
  252. if np.isfinite(mv_ex[1]):
  253. assert_allclose(mv, mv_ex, rtol=1e-7, atol=1e-1)
  254. # Cramer Von Mises test for goodness-of-fit
  255. rvs = rng.rvs(500)
  256. dist.cdf = np.vectorize(dist.cdf)
  257. pval = cramervonmises(rvs, dist.cdf).pvalue
  258. assert pval > 0.1
  259. def check_discr_samples(rng, pv, mv_ex):
  260. rvs = rng.rvs(100000)
  261. # test if the first few moments match
  262. mv = rvs.mean(), rvs.var()
  263. assert_allclose(mv, mv_ex, rtol=1e-3, atol=1e-1)
  264. # normalize
  265. pv = pv / pv.sum()
  266. # chi-squared test for goodness-of-fit
  267. obs_freqs = np.zeros_like(pv)
  268. _, freqs = np.unique(rvs, return_counts=True)
  269. freqs = freqs / freqs.sum()
  270. obs_freqs[:freqs.size] = freqs
  271. pval = chisquare(obs_freqs, pv).pvalue
  272. assert pval > 0.1
  273. def test_warning_center_not_in_domain():
  274. # UNURAN will warn if the center provided or the one computed w/o the
  275. # domain is outside of the domain
  276. msg = "102 : center moved into domain of distribution"
  277. with pytest.warns(RuntimeWarning, match=msg):
  278. NumericalInversePolynomial(StandardNormal(), center=0, domain=(3, 5))
  279. with pytest.warns(RuntimeWarning, match=msg):
  280. NumericalInversePolynomial(StandardNormal(), domain=(3, 5))
  281. @pytest.mark.parametrize('method', ["SimpleRatioUniforms",
  282. "NumericalInversePolynomial",
  283. "TransformedDensityRejection"])
  284. def test_error_mode_not_in_domain(method):
  285. # UNURAN raises an error if the mode is not in the domain
  286. # the behavior is different compared to the case that center is not in the
  287. # domain. mode is supposed to be the exact value, center can be an
  288. # approximate value
  289. Method = getattr(stats.sampling, method)
  290. msg = "17 : mode not in domain"
  291. with pytest.raises(UNURANError, match=msg):
  292. Method(StandardNormal(), mode=0, domain=(3, 5))
  293. @pytest.mark.parametrize('method', ["NumericalInverseHermite",
  294. "NumericalInversePolynomial"])
  295. class TestQRVS:
  296. def test_input_validation(self, method):
  297. match = "`qmc_engine` must be an instance of..."
  298. with pytest.raises(ValueError, match=match):
  299. Method = getattr(stats.sampling, method)
  300. gen = Method(StandardNormal())
  301. gen.qrvs(qmc_engine=0)
  302. # issues with QMCEngines and old NumPy
  303. Method = getattr(stats.sampling, method)
  304. gen = Method(StandardNormal())
  305. match = "`d` must be consistent with dimension of `qmc_engine`."
  306. with pytest.raises(ValueError, match=match):
  307. gen.qrvs(d=3, qmc_engine=stats.qmc.Halton(2))
  308. qrngs = [None, stats.qmc.Sobol(1, seed=0), stats.qmc.Halton(3, seed=0)]
  309. # `size=None` should not add anything to the shape, `size=1` should
  310. sizes = [(None, tuple()), (1, (1,)), (4, (4,)),
  311. ((4,), (4,)), ((2, 4), (2, 4))] # type: ignore
  312. # Neither `d=None` nor `d=1` should add anything to the shape
  313. ds = [(None, tuple()), (1, tuple()), (3, (3,))]
  314. @pytest.mark.parametrize('qrng', qrngs)
  315. @pytest.mark.parametrize('size_in, size_out', sizes)
  316. @pytest.mark.parametrize('d_in, d_out', ds)
  317. def test_QRVS_shape_consistency(self, qrng, size_in, size_out,
  318. d_in, d_out, method):
  319. w32 = sys.platform == "win32" and platform.architecture()[0] == "32bit"
  320. if w32 and method == "NumericalInversePolynomial":
  321. pytest.xfail("NumericalInversePolynomial.qrvs fails for Win "
  322. "32-bit")
  323. dist = StandardNormal()
  324. Method = getattr(stats.sampling, method)
  325. gen = Method(dist)
  326. # If d and qrng.d are inconsistent, an error is raised
  327. if d_in is not None and qrng is not None and qrng.d != d_in:
  328. match = "`d` must be consistent with dimension of `qmc_engine`."
  329. with pytest.raises(ValueError, match=match):
  330. gen.qrvs(size_in, d=d_in, qmc_engine=qrng)
  331. return
  332. # Sometimes d is really determined by qrng
  333. if d_in is None and qrng is not None and qrng.d != 1:
  334. d_out = (qrng.d,)
  335. shape_expected = size_out + d_out
  336. qrng2 = deepcopy(qrng)
  337. qrvs = gen.qrvs(size=size_in, d=d_in, qmc_engine=qrng)
  338. if size_in is not None:
  339. assert qrvs.shape == shape_expected
  340. if qrng2 is not None:
  341. uniform = qrng2.random(np.prod(size_in) or 1)
  342. qrvs2 = stats.norm.ppf(uniform).reshape(shape_expected)
  343. assert_allclose(qrvs, qrvs2, atol=1e-12)
  344. def test_QRVS_size_tuple(self, method):
  345. # QMCEngine samples are always of shape (n, d). When `size` is a tuple,
  346. # we set `n = prod(size)` in the call to qmc_engine.random, transform
  347. # the sample, and reshape it to the final dimensions. When we reshape,
  348. # we need to be careful, because the _columns_ of the sample returned
  349. # by a QMCEngine are "independent"-ish, but the elements within the
  350. # columns are not. We need to make sure that this doesn't get mixed up
  351. # by reshaping: qrvs[..., i] should remain "independent"-ish of
  352. # qrvs[..., i+1], but the elements within qrvs[..., i] should be
  353. # transformed from the same low-discrepancy sequence.
  354. dist = StandardNormal()
  355. Method = getattr(stats.sampling, method)
  356. gen = Method(dist)
  357. size = (3, 4)
  358. d = 5
  359. qrng = stats.qmc.Halton(d, seed=0)
  360. qrng2 = stats.qmc.Halton(d, seed=0)
  361. uniform = qrng2.random(np.prod(size))
  362. qrvs = gen.qrvs(size=size, d=d, qmc_engine=qrng)
  363. qrvs2 = stats.norm.ppf(uniform)
  364. for i in range(d):
  365. sample = qrvs[..., i]
  366. sample2 = qrvs2[:, i].reshape(size)
  367. assert_allclose(sample, sample2, atol=1e-12)
  368. class TestTransformedDensityRejection:
  369. # Simple Custom Distribution
  370. class dist0:
  371. def pdf(self, x):
  372. return 3/4 * (1-x*x)
  373. def dpdf(self, x):
  374. return 3/4 * (-2*x)
  375. def cdf(self, x):
  376. return 3/4 * (x - x**3/3 + 2/3)
  377. def support(self):
  378. return -1, 1
  379. # Standard Normal Distribution
  380. class dist1:
  381. def pdf(self, x):
  382. return stats.norm._pdf(x / 0.1)
  383. def dpdf(self, x):
  384. return -x / 0.01 * stats.norm._pdf(x / 0.1)
  385. def cdf(self, x):
  386. return stats.norm._cdf(x / 0.1)
  387. # pdf with piecewise linear function as transformed density
  388. # with T = -1/sqrt with shift. Taken from UNU.RAN test suite
  389. # (from file t_tdr_ps.c)
  390. class dist2:
  391. def __init__(self, shift):
  392. self.shift = shift
  393. def pdf(self, x):
  394. x -= self.shift
  395. y = 1. / (abs(x) + 1.)
  396. return 0.5 * y * y
  397. def dpdf(self, x):
  398. x -= self.shift
  399. y = 1. / (abs(x) + 1.)
  400. y = y * y * y
  401. return y if (x < 0.) else -y
  402. def cdf(self, x):
  403. x -= self.shift
  404. if x <= 0.:
  405. return 0.5 / (1. - x)
  406. else:
  407. return 1. - 0.5 / (1. + x)
  408. dists = [dist0(), dist1(), dist2(0.), dist2(10000.)]
  409. # exact mean and variance of the distributions in the list dists
  410. mv0 = [0., 4./15.]
  411. mv1 = [0., 0.01]
  412. mv2 = [0., np.inf]
  413. mv3 = [10000., np.inf]
  414. mvs = [mv0, mv1, mv2, mv3]
  415. @pytest.mark.parametrize("dist, mv_ex",
  416. zip(dists, mvs))
  417. def test_basic(self, dist, mv_ex):
  418. with suppress_warnings() as sup:
  419. # filter the warnings thrown by UNU.RAN
  420. sup.filter(RuntimeWarning)
  421. rng = TransformedDensityRejection(dist, random_state=42)
  422. check_cont_samples(rng, dist, mv_ex)
  423. # PDF 0 everywhere => bad construction points
  424. bad_pdfs = [(lambda x: 0, UNURANError, r"50 : bad construction points.")]
  425. bad_pdfs += bad_pdfs_common # type: ignore[arg-type]
  426. @pytest.mark.parametrize("pdf, err, msg", bad_pdfs)
  427. def test_bad_pdf(self, pdf, err, msg):
  428. class dist:
  429. pass
  430. dist.pdf = pdf
  431. dist.dpdf = lambda x: 1 # an arbitrary dPDF
  432. with pytest.raises(err, match=msg):
  433. TransformedDensityRejection(dist)
  434. @pytest.mark.parametrize("dpdf, err, msg", bad_dpdf_common)
  435. def test_bad_dpdf(self, dpdf, err, msg):
  436. class dist:
  437. pass
  438. dist.pdf = lambda x: x
  439. dist.dpdf = dpdf
  440. with pytest.raises(err, match=msg):
  441. TransformedDensityRejection(dist, domain=(1, 10))
  442. # test domains with inf + nan in them. need to write a custom test for
  443. # this because not all methods support infinite tails.
  444. @pytest.mark.parametrize("domain, err, msg", inf_nan_domains)
  445. def test_inf_nan_domains(self, domain, err, msg):
  446. with pytest.raises(err, match=msg):
  447. TransformedDensityRejection(StandardNormal(), domain=domain)
  448. @pytest.mark.parametrize("construction_points", [-1, 0, 0.1])
  449. def test_bad_construction_points_scalar(self, construction_points):
  450. with pytest.raises(ValueError, match=r"`construction_points` must be "
  451. r"a positive integer."):
  452. TransformedDensityRejection(
  453. StandardNormal(), construction_points=construction_points
  454. )
  455. def test_bad_construction_points_array(self):
  456. # empty array
  457. construction_points = []
  458. with pytest.raises(ValueError, match=r"`construction_points` must "
  459. r"either be a "
  460. r"scalar or a non-empty array."):
  461. TransformedDensityRejection(
  462. StandardNormal(), construction_points=construction_points
  463. )
  464. # construction_points not monotonically increasing
  465. construction_points = [1, 1, 1, 1, 1, 1]
  466. with pytest.warns(RuntimeWarning, match=r"33 : starting points not "
  467. r"strictly monotonically "
  468. r"increasing"):
  469. TransformedDensityRejection(
  470. StandardNormal(), construction_points=construction_points
  471. )
  472. # construction_points containing nans
  473. construction_points = [np.nan, np.nan, np.nan]
  474. with pytest.raises(UNURANError, match=r"50 : bad construction "
  475. r"points."):
  476. TransformedDensityRejection(
  477. StandardNormal(), construction_points=construction_points
  478. )
  479. # construction_points out of domain
  480. construction_points = [-10, 10]
  481. with pytest.warns(RuntimeWarning, match=r"50 : starting point out of "
  482. r"domain"):
  483. TransformedDensityRejection(
  484. StandardNormal(), domain=(-3, 3),
  485. construction_points=construction_points
  486. )
  487. @pytest.mark.parametrize("c", [-1., np.nan, np.inf, 0.1, 1.])
  488. def test_bad_c(self, c):
  489. msg = r"`c` must either be -0.5 or 0."
  490. with pytest.raises(ValueError, match=msg):
  491. TransformedDensityRejection(StandardNormal(), c=-1.)
  492. u = [np.linspace(0, 1, num=1000), [], [[]], [np.nan],
  493. [-np.inf, np.nan, np.inf], 0,
  494. [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-2, 3, 4]]]
  495. @pytest.mark.parametrize("u", u)
  496. def test_ppf_hat(self, u):
  497. # Increase the `max_squeeze_hat_ratio` so the ppf_hat is more
  498. # accurate.
  499. rng = TransformedDensityRejection(StandardNormal(),
  500. max_squeeze_hat_ratio=0.9999)
  501. # Older versions of NumPy throw RuntimeWarnings for comparisons
  502. # with nan.
  503. with suppress_warnings() as sup:
  504. sup.filter(RuntimeWarning, "invalid value encountered in greater")
  505. sup.filter(RuntimeWarning, "invalid value encountered in "
  506. "greater_equal")
  507. sup.filter(RuntimeWarning, "invalid value encountered in less")
  508. sup.filter(RuntimeWarning, "invalid value encountered in "
  509. "less_equal")
  510. res = rng.ppf_hat(u)
  511. expected = stats.norm.ppf(u)
  512. assert_allclose(res, expected, rtol=1e-3, atol=1e-5)
  513. assert res.shape == expected.shape
  514. def test_bad_dist(self):
  515. # Empty distribution
  516. class dist:
  517. ...
  518. msg = r"`pdf` required but not found."
  519. with pytest.raises(ValueError, match=msg):
  520. TransformedDensityRejection(dist)
  521. # dPDF not present in dist
  522. class dist:
  523. pdf = lambda x: 1-x*x # noqa: E731
  524. msg = r"`dpdf` required but not found."
  525. with pytest.raises(ValueError, match=msg):
  526. TransformedDensityRejection(dist)
  527. class TestDiscreteAliasUrn:
  528. # DAU fails on these probably because of large domains and small
  529. # computation errors in PMF. Mean/SD match but chi-squared test fails.
  530. basic_fail_dists = {
  531. 'nchypergeom_fisher', # numerical erros on tails
  532. 'nchypergeom_wallenius', # numerical erros on tails
  533. 'randint' # fails on 32-bit ubuntu
  534. }
  535. @pytest.mark.parametrize("distname, params", distdiscrete)
  536. def test_basic(self, distname, params):
  537. if distname in self.basic_fail_dists:
  538. msg = ("DAU fails on these probably because of large domains "
  539. "and small computation errors in PMF.")
  540. pytest.skip(msg)
  541. if not isinstance(distname, str):
  542. dist = distname
  543. else:
  544. dist = getattr(stats, distname)
  545. dist = dist(*params)
  546. domain = dist.support()
  547. if not np.isfinite(domain[1] - domain[0]):
  548. # DAU only works with finite domain. So, skip the distributions
  549. # with infinite tails.
  550. pytest.skip("DAU only works with a finite domain.")
  551. k = np.arange(domain[0], domain[1]+1)
  552. pv = dist.pmf(k)
  553. mv_ex = dist.stats('mv')
  554. rng = DiscreteAliasUrn(dist, random_state=42)
  555. check_discr_samples(rng, pv, mv_ex)
  556. # Can't use bad_pmf_common here as we evaluate PMF early on to avoid
  557. # unhelpful errors from UNU.RAN.
  558. bad_pmf = [
  559. # inf returned
  560. (lambda x: np.inf, ValueError,
  561. r"must contain only finite / non-nan values"),
  562. # nan returned
  563. (lambda x: np.nan, ValueError,
  564. r"must contain only finite / non-nan values"),
  565. # all zeros
  566. (lambda x: 0.0, ValueError,
  567. r"must contain at least one non-zero value"),
  568. # Undefined name inside the function
  569. (lambda x: foo, NameError, # type: ignore[name-defined] # noqa
  570. r"name 'foo' is not defined"),
  571. # Returning wrong type.
  572. (lambda x: [], ValueError,
  573. r"setting an array element with a sequence."),
  574. # probabilities < 0
  575. (lambda x: -x, UNURANError,
  576. r"50 : probability < 0"),
  577. # signature of PMF wrong
  578. (lambda: 1.0, TypeError,
  579. r"takes 0 positional arguments but 1 was given")
  580. ]
  581. @pytest.mark.parametrize("pmf, err, msg", bad_pmf)
  582. def test_bad_pmf(self, pmf, err, msg):
  583. class dist:
  584. pass
  585. dist.pmf = pmf
  586. with pytest.raises(err, match=msg):
  587. DiscreteAliasUrn(dist, domain=(1, 10))
  588. @pytest.mark.parametrize("pv", [[0.18, 0.02, 0.8],
  589. [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]])
  590. def test_sampling_with_pv(self, pv):
  591. pv = np.asarray(pv, dtype=np.float64)
  592. rng = DiscreteAliasUrn(pv, random_state=123)
  593. rvs = rng.rvs(100_000)
  594. pv = pv / pv.sum()
  595. variates = np.arange(0, len(pv))
  596. # test if the first few moments match
  597. m_expected = np.average(variates, weights=pv)
  598. v_expected = np.average((variates - m_expected) ** 2, weights=pv)
  599. mv_expected = m_expected, v_expected
  600. check_discr_samples(rng, pv, mv_expected)
  601. @pytest.mark.parametrize("pv, msg", bad_pv_common)
  602. def test_bad_pv(self, pv, msg):
  603. with pytest.raises(ValueError, match=msg):
  604. DiscreteAliasUrn(pv)
  605. # DAU doesn't support infinite tails. So, it should throw an error when
  606. # inf is present in the domain.
  607. inf_domain = [(-np.inf, np.inf), (np.inf, np.inf), (-np.inf, -np.inf),
  608. (0, np.inf), (-np.inf, 0)]
  609. @pytest.mark.parametrize("domain", inf_domain)
  610. def test_inf_domain(self, domain):
  611. with pytest.raises(ValueError, match=r"must be finite"):
  612. DiscreteAliasUrn(stats.binom(10, 0.2), domain=domain)
  613. def test_bad_urn_factor(self):
  614. with pytest.warns(RuntimeWarning, match=r"relative urn size < 1."):
  615. DiscreteAliasUrn([0.5, 0.5], urn_factor=-1)
  616. def test_bad_args(self):
  617. msg = (r"`domain` must be provided when the "
  618. r"probability vector is not available.")
  619. class dist:
  620. def pmf(self, x):
  621. return x
  622. with pytest.raises(ValueError, match=msg):
  623. DiscreteAliasUrn(dist)
  624. class TestNumericalInversePolynomial:
  625. # Simple Custom Distribution
  626. class dist0:
  627. def pdf(self, x):
  628. return 3/4 * (1-x*x)
  629. def cdf(self, x):
  630. return 3/4 * (x - x**3/3 + 2/3)
  631. def support(self):
  632. return -1, 1
  633. # Standard Normal Distribution
  634. class dist1:
  635. def pdf(self, x):
  636. return stats.norm._pdf(x / 0.1)
  637. def cdf(self, x):
  638. return stats.norm._cdf(x / 0.1)
  639. # Sin 2 distribution
  640. # / 0.05 + 0.45*(1 +sin(2 Pi x)) if |x| <= 1
  641. # f(x) = <
  642. # \ 0 otherwise
  643. # Taken from UNU.RAN test suite (from file t_pinv.c)
  644. class dist2:
  645. def pdf(self, x):
  646. return 0.05 + 0.45 * (1 + np.sin(2*np.pi*x))
  647. def cdf(self, x):
  648. return (0.05*(x + 1) +
  649. 0.9*(1. + 2.*np.pi*(1 + x) - np.cos(2.*np.pi*x)) /
  650. (4.*np.pi))
  651. def support(self):
  652. return -1, 1
  653. # Sin 10 distribution
  654. # / 0.05 + 0.45*(1 +sin(2 Pi x)) if |x| <= 5
  655. # f(x) = <
  656. # \ 0 otherwise
  657. # Taken from UNU.RAN test suite (from file t_pinv.c)
  658. class dist3:
  659. def pdf(self, x):
  660. return 0.2 * (0.05 + 0.45 * (1 + np.sin(2*np.pi*x)))
  661. def cdf(self, x):
  662. return x/10. + 0.5 + 0.09/(2*np.pi) * (np.cos(10*np.pi) -
  663. np.cos(2*np.pi*x))
  664. def support(self):
  665. return -5, 5
  666. dists = [dist0(), dist1(), dist2(), dist3()]
  667. # exact mean and variance of the distributions in the list dists
  668. mv0 = [0., 4./15.]
  669. mv1 = [0., 0.01]
  670. mv2 = [-0.45/np.pi, 2/3*0.5 - 0.45**2/np.pi**2]
  671. mv3 = [-0.45/np.pi, 0.2 * 250/3 * 0.5 - 0.45**2/np.pi**2]
  672. mvs = [mv0, mv1, mv2, mv3]
  673. @pytest.mark.parametrize("dist, mv_ex",
  674. zip(dists, mvs))
  675. def test_basic(self, dist, mv_ex):
  676. rng = NumericalInversePolynomial(dist, random_state=42)
  677. check_cont_samples(rng, dist, mv_ex)
  678. very_slow_dists = ['studentized_range', 'trapezoid', 'triang', 'vonmises',
  679. 'levy_stable', 'kappa4', 'ksone', 'kstwo', 'levy_l',
  680. 'gausshyper', 'anglit']
  681. # for these distributions, some assertions fail due to minor
  682. # numerical differences. They can be avoided either by changing
  683. # the seed or by increasing the u_resolution.
  684. fail_dists = ['ncf', 'pareto', 'chi2', 'fatiguelife', 'halfgennorm',
  685. 'gibrat', 'lognorm', 'ncx2', 't']
  686. @pytest.mark.xslow
  687. @pytest.mark.parametrize("distname, params", distcont)
  688. def test_basic_all_scipy_dists(self, distname, params):
  689. if distname in self.very_slow_dists:
  690. pytest.skip(f"PINV too slow for {distname}")
  691. if distname in self.fail_dists:
  692. pytest.skip(f"PINV fails for {distname}")
  693. dist = (getattr(stats, distname)
  694. if isinstance(distname, str)
  695. else distname)
  696. dist = dist(*params)
  697. with suppress_warnings() as sup:
  698. sup.filter(RuntimeWarning)
  699. rng = NumericalInversePolynomial(dist, random_state=42)
  700. check_cont_samples(rng, dist, [dist.mean(), dist.var()])
  701. @pytest.mark.parametrize("pdf, err, msg", bad_pdfs_common)
  702. def test_bad_pdf(self, pdf, err, msg):
  703. class dist:
  704. pass
  705. dist.pdf = pdf
  706. with pytest.raises(err, match=msg):
  707. NumericalInversePolynomial(dist, domain=[0, 5])
  708. @pytest.mark.parametrize("logpdf, err, msg", bad_logpdfs_common)
  709. def test_bad_logpdf(self, logpdf, err, msg):
  710. class dist:
  711. pass
  712. dist.logpdf = logpdf
  713. with pytest.raises(err, match=msg):
  714. NumericalInversePolynomial(dist, domain=[0, 5])
  715. # test domains with inf + nan in them. need to write a custom test for
  716. # this because not all methods support infinite tails.
  717. @pytest.mark.parametrize("domain, err, msg", inf_nan_domains)
  718. def test_inf_nan_domains(self, domain, err, msg):
  719. with pytest.raises(err, match=msg):
  720. NumericalInversePolynomial(StandardNormal(), domain=domain)
  721. u = [
  722. # test if quantile 0 and 1 return -inf and inf respectively and check
  723. # the correctness of the PPF for equidistant points between 0 and 1.
  724. np.linspace(0, 1, num=10000),
  725. # test the PPF method for empty arrays
  726. [], [[]],
  727. # test if nans and infs return nan result.
  728. [np.nan], [-np.inf, np.nan, np.inf],
  729. # test if a scalar is returned for a scalar input.
  730. 0,
  731. # test for arrays with nans, values greater than 1 and less than 0,
  732. # and some valid values.
  733. [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-2, 3, 4]]
  734. ]
  735. @pytest.mark.parametrize("u", u)
  736. def test_ppf(self, u):
  737. dist = StandardNormal()
  738. rng = NumericalInversePolynomial(dist, u_resolution=1e-14)
  739. # Older versions of NumPy throw RuntimeWarnings for comparisons
  740. # with nan.
  741. with suppress_warnings() as sup:
  742. sup.filter(RuntimeWarning, "invalid value encountered in greater")
  743. sup.filter(RuntimeWarning, "invalid value encountered in "
  744. "greater_equal")
  745. sup.filter(RuntimeWarning, "invalid value encountered in less")
  746. sup.filter(RuntimeWarning, "invalid value encountered in "
  747. "less_equal")
  748. res = rng.ppf(u)
  749. expected = stats.norm.ppf(u)
  750. assert_allclose(res, expected, rtol=1e-11, atol=1e-11)
  751. assert res.shape == expected.shape
  752. x = [np.linspace(-10, 10, num=10000), [], [[]], [np.nan],
  753. [-np.inf, np.nan, np.inf], 0,
  754. [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-np.inf, 3, 4]]]
  755. @pytest.mark.parametrize("x", x)
  756. def test_cdf(self, x):
  757. dist = StandardNormal()
  758. rng = NumericalInversePolynomial(dist, u_resolution=1e-14)
  759. # Older versions of NumPy throw RuntimeWarnings for comparisons
  760. # with nan.
  761. with suppress_warnings() as sup:
  762. sup.filter(RuntimeWarning, "invalid value encountered in greater")
  763. sup.filter(RuntimeWarning, "invalid value encountered in "
  764. "greater_equal")
  765. sup.filter(RuntimeWarning, "invalid value encountered in less")
  766. sup.filter(RuntimeWarning, "invalid value encountered in "
  767. "less_equal")
  768. res = rng.cdf(x)
  769. expected = stats.norm.cdf(x)
  770. assert_allclose(res, expected, rtol=1e-11, atol=1e-11)
  771. assert res.shape == expected.shape
  772. def test_u_error(self):
  773. dist = StandardNormal()
  774. rng = NumericalInversePolynomial(dist, u_resolution=1e-10)
  775. max_error, mae = rng.u_error()
  776. assert max_error < 1e-10
  777. assert mae <= max_error
  778. rng = NumericalInversePolynomial(dist, u_resolution=1e-14)
  779. max_error, mae = rng.u_error()
  780. assert max_error < 1e-14
  781. assert mae <= max_error
  782. bad_orders = [1, 4.5, 20, np.inf, np.nan]
  783. bad_u_resolution = [1e-20, 1e-1, np.inf, np.nan]
  784. @pytest.mark.parametrize("order", bad_orders)
  785. def test_bad_orders(self, order):
  786. dist = StandardNormal()
  787. msg = r"`order` must be an integer in the range \[3, 17\]."
  788. with pytest.raises(ValueError, match=msg):
  789. NumericalInversePolynomial(dist, order=order)
  790. @pytest.mark.parametrize("u_resolution", bad_u_resolution)
  791. def test_bad_u_resolution(self, u_resolution):
  792. msg = r"`u_resolution` must be between 1e-15 and 1e-5."
  793. with pytest.raises(ValueError, match=msg):
  794. NumericalInversePolynomial(StandardNormal(),
  795. u_resolution=u_resolution)
  796. def test_bad_args(self):
  797. class BadDist:
  798. def cdf(self, x):
  799. return stats.norm._cdf(x)
  800. dist = BadDist()
  801. msg = r"Either of the methods `pdf` or `logpdf` must be specified"
  802. with pytest.raises(ValueError, match=msg):
  803. rng = NumericalInversePolynomial(dist)
  804. dist = StandardNormal()
  805. rng = NumericalInversePolynomial(dist)
  806. msg = r"`sample_size` must be greater than or equal to 1000."
  807. with pytest.raises(ValueError, match=msg):
  808. rng.u_error(10)
  809. class Distribution:
  810. def pdf(self, x):
  811. return np.exp(-0.5 * x*x)
  812. dist = Distribution()
  813. rng = NumericalInversePolynomial(dist)
  814. msg = r"Exact CDF required but not found."
  815. with pytest.raises(ValueError, match=msg):
  816. rng.u_error()
  817. def test_logpdf_pdf_consistency(self):
  818. # 1. check that PINV works with pdf and logpdf only
  819. # 2. check that generated ppf is the same (up to a small tolerance)
  820. class MyDist:
  821. pass
  822. # create genrator from dist with only pdf
  823. dist_pdf = MyDist()
  824. dist_pdf.pdf = lambda x: math.exp(-x*x/2)
  825. rng1 = NumericalInversePolynomial(dist_pdf)
  826. # create dist with only logpdf
  827. dist_logpdf = MyDist()
  828. dist_logpdf.logpdf = lambda x: -x*x/2
  829. rng2 = NumericalInversePolynomial(dist_logpdf)
  830. q = np.linspace(1e-5, 1-1e-5, num=100)
  831. assert_allclose(rng1.ppf(q), rng2.ppf(q))
  832. class TestNumericalInverseHermite:
  833. # / (1 +sin(2 Pi x))/2 if |x| <= 1
  834. # f(x) = <
  835. # \ 0 otherwise
  836. # Taken from UNU.RAN test suite (from file t_hinv.c)
  837. class dist0:
  838. def pdf(self, x):
  839. return 0.5*(1. + np.sin(2.*np.pi*x))
  840. def dpdf(self, x):
  841. return np.pi*np.cos(2.*np.pi*x)
  842. def cdf(self, x):
  843. return (1. + 2.*np.pi*(1 + x) - np.cos(2.*np.pi*x)) / (4.*np.pi)
  844. def support(self):
  845. return -1, 1
  846. # / Max(sin(2 Pi x)),0)Pi/2 if -1 < x <0.5
  847. # f(x) = <
  848. # \ 0 otherwise
  849. # Taken from UNU.RAN test suite (from file t_hinv.c)
  850. class dist1:
  851. def pdf(self, x):
  852. if (x <= -0.5):
  853. return np.sin((2. * np.pi) * x) * 0.5 * np.pi
  854. if (x < 0.):
  855. return 0.
  856. if (x <= 0.5):
  857. return np.sin((2. * np.pi) * x) * 0.5 * np.pi
  858. def dpdf(self, x):
  859. if (x <= -0.5):
  860. return np.cos((2. * np.pi) * x) * np.pi * np.pi
  861. if (x < 0.):
  862. return 0.
  863. if (x <= 0.5):
  864. return np.cos((2. * np.pi) * x) * np.pi * np.pi
  865. def cdf(self, x):
  866. if (x <= -0.5):
  867. return 0.25 * (1 - np.cos((2. * np.pi) * x))
  868. if (x < 0.):
  869. return 0.5
  870. if (x <= 0.5):
  871. return 0.75 - 0.25 * np.cos((2. * np.pi) * x)
  872. def support(self):
  873. return -1, 0.5
  874. dists = [dist0(), dist1()]
  875. # exact mean and variance of the distributions in the list dists
  876. mv0 = [-1/(2*np.pi), 1/3 - 1/(4*np.pi*np.pi)]
  877. mv1 = [-1/4, 3/8-1/(2*np.pi*np.pi) - 1/16]
  878. mvs = [mv0, mv1]
  879. @pytest.mark.parametrize("dist, mv_ex",
  880. zip(dists, mvs))
  881. @pytest.mark.parametrize("order", [3, 5])
  882. def test_basic(self, dist, mv_ex, order):
  883. rng = NumericalInverseHermite(dist, order=order, random_state=42)
  884. check_cont_samples(rng, dist, mv_ex)
  885. # test domains with inf + nan in them. need to write a custom test for
  886. # this because not all methods support infinite tails.
  887. @pytest.mark.parametrize("domain, err, msg", inf_nan_domains)
  888. def test_inf_nan_domains(self, domain, err, msg):
  889. with pytest.raises(err, match=msg):
  890. NumericalInverseHermite(StandardNormal(), domain=domain)
  891. def basic_test_all_scipy_dists(self, distname, shapes):
  892. slow_dists = {'ksone', 'kstwo', 'levy_stable', 'skewnorm'}
  893. fail_dists = {'beta', 'gausshyper', 'geninvgauss', 'ncf', 'nct',
  894. 'norminvgauss', 'genhyperbolic', 'studentized_range',
  895. 'vonmises', 'kappa4', 'invgauss', 'wald'}
  896. if distname in slow_dists:
  897. pytest.skip("Distribution is too slow")
  898. if distname in fail_dists:
  899. # specific reasons documented in gh-13319
  900. # https://github.com/scipy/scipy/pull/13319#discussion_r626188955
  901. pytest.xfail("Fails - usually due to inaccurate CDF/PDF")
  902. np.random.seed(0)
  903. dist = getattr(stats, distname)(*shapes)
  904. fni = NumericalInverseHermite(dist)
  905. x = np.random.rand(10)
  906. p_tol = np.max(np.abs(dist.ppf(x)-fni.ppf(x))/np.abs(dist.ppf(x)))
  907. u_tol = np.max(np.abs(dist.cdf(fni.ppf(x)) - x))
  908. assert p_tol < 1e-8
  909. assert u_tol < 1e-12
  910. @pytest.mark.filterwarnings('ignore::RuntimeWarning')
  911. @pytest.mark.xslow
  912. @pytest.mark.parametrize(("distname", "shapes"), distcont)
  913. def test_basic_all_scipy_dists(self, distname, shapes):
  914. # if distname == "truncnorm":
  915. # pytest.skip("Tested separately")
  916. self.basic_test_all_scipy_dists(distname, shapes)
  917. @pytest.mark.filterwarnings('ignore::RuntimeWarning')
  918. def test_basic_truncnorm_gh17155(self):
  919. self.basic_test_all_scipy_dists("truncnorm", (0.1, 2))
  920. def test_input_validation(self):
  921. match = r"`order` must be either 1, 3, or 5."
  922. with pytest.raises(ValueError, match=match):
  923. NumericalInverseHermite(StandardNormal(), order=2)
  924. match = "`cdf` required but not found"
  925. with pytest.raises(ValueError, match=match):
  926. NumericalInverseHermite("norm")
  927. match = "could not convert string to float"
  928. with pytest.raises(ValueError, match=match):
  929. NumericalInverseHermite(StandardNormal(),
  930. u_resolution='ekki')
  931. rngs = [None, 0, np.random.RandomState(0)]
  932. rngs.append(np.random.default_rng(0)) # type: ignore
  933. sizes = [(None, tuple()), (8, (8,)), ((4, 5, 6), (4, 5, 6))]
  934. @pytest.mark.parametrize('rng', rngs)
  935. @pytest.mark.parametrize('size_in, size_out', sizes)
  936. def test_RVS(self, rng, size_in, size_out):
  937. dist = StandardNormal()
  938. fni = NumericalInverseHermite(dist)
  939. rng2 = deepcopy(rng)
  940. rvs = fni.rvs(size=size_in, random_state=rng)
  941. if size_in is not None:
  942. assert rvs.shape == size_out
  943. if rng2 is not None:
  944. rng2 = check_random_state(rng2)
  945. uniform = rng2.uniform(size=size_in)
  946. rvs2 = stats.norm.ppf(uniform)
  947. assert_allclose(rvs, rvs2)
  948. def test_inaccurate_CDF(self):
  949. # CDF function with inaccurate tail cannot be inverted; see gh-13319
  950. # https://github.com/scipy/scipy/pull/13319#discussion_r626188955
  951. shapes = (2.3098496451481823, 0.6268795430096368)
  952. match = ("98 : one or more intervals very short; possibly due to "
  953. "numerical problems with a pole or very flat tail")
  954. # fails with default tol
  955. with pytest.warns(RuntimeWarning, match=match):
  956. NumericalInverseHermite(stats.beta(*shapes))
  957. # no error with coarser tol
  958. NumericalInverseHermite(stats.beta(*shapes), u_resolution=1e-8)
  959. def test_custom_distribution(self):
  960. dist1 = StandardNormal()
  961. fni1 = NumericalInverseHermite(dist1)
  962. dist2 = stats.norm()
  963. fni2 = NumericalInverseHermite(dist2)
  964. assert_allclose(fni1.rvs(random_state=0), fni2.rvs(random_state=0))
  965. u = [
  966. # check the correctness of the PPF for equidistant points between
  967. # 0.02 and 0.98.
  968. np.linspace(0., 1., num=10000),
  969. # test the PPF method for empty arrays
  970. [], [[]],
  971. # test if nans and infs return nan result.
  972. [np.nan], [-np.inf, np.nan, np.inf],
  973. # test if a scalar is returned for a scalar input.
  974. 0,
  975. # test for arrays with nans, values greater than 1 and less than 0,
  976. # and some valid values.
  977. [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-2, 3, 4]]
  978. ]
  979. @pytest.mark.parametrize("u", u)
  980. def test_ppf(self, u):
  981. dist = StandardNormal()
  982. rng = NumericalInverseHermite(dist, u_resolution=1e-12)
  983. # Older versions of NumPy throw RuntimeWarnings for comparisons
  984. # with nan.
  985. with suppress_warnings() as sup:
  986. sup.filter(RuntimeWarning, "invalid value encountered in greater")
  987. sup.filter(RuntimeWarning, "invalid value encountered in "
  988. "greater_equal")
  989. sup.filter(RuntimeWarning, "invalid value encountered in less")
  990. sup.filter(RuntimeWarning, "invalid value encountered in "
  991. "less_equal")
  992. res = rng.ppf(u)
  993. expected = stats.norm.ppf(u)
  994. assert_allclose(res, expected, rtol=1e-9, atol=3e-10)
  995. assert res.shape == expected.shape
  996. def test_u_error(self):
  997. dist = StandardNormal()
  998. rng = NumericalInverseHermite(dist, u_resolution=1e-10)
  999. max_error, mae = rng.u_error()
  1000. assert max_error < 1e-10
  1001. assert mae <= max_error
  1002. with suppress_warnings() as sup:
  1003. # ignore warning about u-resolution being too small.
  1004. sup.filter(RuntimeWarning)
  1005. rng = NumericalInverseHermite(dist, u_resolution=1e-14)
  1006. max_error, mae = rng.u_error()
  1007. assert max_error < 1e-14
  1008. assert mae <= max_error
  1009. class TestDiscreteGuideTable:
  1010. basic_fail_dists = {
  1011. 'nchypergeom_fisher', # numerical errors on tails
  1012. 'nchypergeom_wallenius', # numerical errors on tails
  1013. 'randint' # fails on 32-bit ubuntu
  1014. }
  1015. def test_guide_factor_gt3_raises_warning(self):
  1016. pv = [0.1, 0.3, 0.6]
  1017. urng = np.random.default_rng()
  1018. with pytest.warns(RuntimeWarning):
  1019. DiscreteGuideTable(pv, random_state=urng, guide_factor=7)
  1020. def test_guide_factor_zero_raises_warning(self):
  1021. pv = [0.1, 0.3, 0.6]
  1022. urng = np.random.default_rng()
  1023. with pytest.warns(RuntimeWarning):
  1024. DiscreteGuideTable(pv, random_state=urng, guide_factor=0)
  1025. def test_negative_guide_factor_raises_warning(self):
  1026. # This occurs from the UNU.RAN wrapper automatically.
  1027. # however it already gives a useful warning
  1028. # Here we just test that a warning is raised.
  1029. pv = [0.1, 0.3, 0.6]
  1030. urng = np.random.default_rng()
  1031. with pytest.warns(RuntimeWarning):
  1032. DiscreteGuideTable(pv, random_state=urng, guide_factor=-1)
  1033. @pytest.mark.parametrize("distname, params", distdiscrete)
  1034. def test_basic(self, distname, params):
  1035. if distname in self.basic_fail_dists:
  1036. msg = ("DGT fails on these probably because of large domains "
  1037. "and small computation errors in PMF.")
  1038. pytest.skip(msg)
  1039. if not isinstance(distname, str):
  1040. dist = distname
  1041. else:
  1042. dist = getattr(stats, distname)
  1043. dist = dist(*params)
  1044. domain = dist.support()
  1045. if not np.isfinite(domain[1] - domain[0]):
  1046. # DGT only works with finite domain. So, skip the distributions
  1047. # with infinite tails.
  1048. pytest.skip("DGT only works with a finite domain.")
  1049. k = np.arange(domain[0], domain[1]+1)
  1050. pv = dist.pmf(k)
  1051. mv_ex = dist.stats('mv')
  1052. rng = DiscreteGuideTable(dist, random_state=42)
  1053. check_discr_samples(rng, pv, mv_ex)
  1054. u = [
  1055. # the correctness of the PPF for equidistant points between 0 and 1.
  1056. np.linspace(0, 1, num=10000),
  1057. # test the PPF method for empty arrays
  1058. [], [[]],
  1059. # test if nans and infs return nan result.
  1060. [np.nan], [-np.inf, np.nan, np.inf],
  1061. # test if a scalar is returned for a scalar input.
  1062. 0,
  1063. # test for arrays with nans, values greater than 1 and less than 0,
  1064. # and some valid values.
  1065. [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-2, 3, 4]]
  1066. ]
  1067. @pytest.mark.parametrize('u', u)
  1068. def test_ppf(self, u):
  1069. n, p = 4, 0.1
  1070. dist = stats.binom(n, p)
  1071. rng = DiscreteGuideTable(dist, random_state=42)
  1072. # Older versions of NumPy throw RuntimeWarnings for comparisons
  1073. # with nan.
  1074. with suppress_warnings() as sup:
  1075. sup.filter(RuntimeWarning, "invalid value encountered in greater")
  1076. sup.filter(RuntimeWarning, "invalid value encountered in "
  1077. "greater_equal")
  1078. sup.filter(RuntimeWarning, "invalid value encountered in less")
  1079. sup.filter(RuntimeWarning, "invalid value encountered in "
  1080. "less_equal")
  1081. res = rng.ppf(u)
  1082. expected = stats.binom.ppf(u, n, p)
  1083. assert_equal(res.shape, expected.shape)
  1084. assert_equal(res, expected)
  1085. @pytest.mark.parametrize("pv, msg", bad_pv_common)
  1086. def test_bad_pv(self, pv, msg):
  1087. with pytest.raises(ValueError, match=msg):
  1088. DiscreteGuideTable(pv)
  1089. # DGT doesn't support infinite tails. So, it should throw an error when
  1090. # inf is present in the domain.
  1091. inf_domain = [(-np.inf, np.inf), (np.inf, np.inf), (-np.inf, -np.inf),
  1092. (0, np.inf), (-np.inf, 0)]
  1093. @pytest.mark.parametrize("domain", inf_domain)
  1094. def test_inf_domain(self, domain):
  1095. with pytest.raises(ValueError, match=r"must be finite"):
  1096. DiscreteGuideTable(stats.binom(10, 0.2), domain=domain)
  1097. class TestSimpleRatioUniforms:
  1098. # pdf with piecewise linear function as transformed density
  1099. # with T = -1/sqrt with shift. Taken from UNU.RAN test suite
  1100. # (from file t_srou.c)
  1101. class dist:
  1102. def __init__(self, shift):
  1103. self.shift = shift
  1104. self.mode = shift
  1105. def pdf(self, x):
  1106. x -= self.shift
  1107. y = 1. / (abs(x) + 1.)
  1108. return 0.5 * y * y
  1109. def cdf(self, x):
  1110. x -= self.shift
  1111. if x <= 0.:
  1112. return 0.5 / (1. - x)
  1113. else:
  1114. return 1. - 0.5 / (1. + x)
  1115. dists = [dist(0.), dist(10000.)]
  1116. # exact mean and variance of the distributions in the list dists
  1117. mv1 = [0., np.inf]
  1118. mv2 = [10000., np.inf]
  1119. mvs = [mv1, mv2]
  1120. @pytest.mark.parametrize("dist, mv_ex",
  1121. zip(dists, mvs))
  1122. def test_basic(self, dist, mv_ex):
  1123. rng = SimpleRatioUniforms(dist, mode=dist.mode, random_state=42)
  1124. check_cont_samples(rng, dist, mv_ex)
  1125. rng = SimpleRatioUniforms(dist, mode=dist.mode,
  1126. cdf_at_mode=dist.cdf(dist.mode),
  1127. random_state=42)
  1128. check_cont_samples(rng, dist, mv_ex)
  1129. # test domains with inf + nan in them. need to write a custom test for
  1130. # this because not all methods support infinite tails.
  1131. @pytest.mark.parametrize("domain, err, msg", inf_nan_domains)
  1132. def test_inf_nan_domains(self, domain, err, msg):
  1133. with pytest.raises(err, match=msg):
  1134. SimpleRatioUniforms(StandardNormal(), domain=domain)
  1135. def test_bad_args(self):
  1136. # pdf_area < 0
  1137. with pytest.raises(ValueError, match=r"`pdf_area` must be > 0"):
  1138. SimpleRatioUniforms(StandardNormal(), mode=0, pdf_area=-1)