1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357 |
- import threading
- import pickle
- import pytest
- from copy import deepcopy
- import platform
- import sys
- import math
- import numpy as np
- from numpy.testing import assert_allclose, assert_equal, suppress_warnings
- from numpy.lib import NumpyVersion
- from scipy.stats.sampling import (
- TransformedDensityRejection,
- DiscreteAliasUrn,
- DiscreteGuideTable,
- NumericalInversePolynomial,
- NumericalInverseHermite,
- SimpleRatioUniforms,
- UNURANError
- )
- from scipy import stats
- from scipy import special
- from scipy.stats import chisquare, cramervonmises
- from scipy.stats._distr_params import distdiscrete, distcont
- from scipy._lib._util import check_random_state
- # common test data: this data can be shared between all the tests.
- # Normal distribution shared between all the continuous methods
- class StandardNormal:
- def pdf(self, x):
- # normalization constant needed for NumericalInverseHermite
- return 1./np.sqrt(2.*np.pi) * np.exp(-0.5 * x*x)
- def dpdf(self, x):
- return 1./np.sqrt(2.*np.pi) * -x * np.exp(-0.5 * x*x)
- def cdf(self, x):
- return special.ndtr(x)
- all_methods = [
- ("TransformedDensityRejection", {"dist": StandardNormal()}),
- ("DiscreteAliasUrn", {"dist": [0.02, 0.18, 0.8]}),
- ("DiscreteGuideTable", {"dist": [0.02, 0.18, 0.8]}),
- ("NumericalInversePolynomial", {"dist": StandardNormal()}),
- ("NumericalInverseHermite", {"dist": StandardNormal()}),
- ("SimpleRatioUniforms", {"dist": StandardNormal(), "mode": 0})
- ]
- if (sys.implementation.name == 'pypy'
- and sys.implementation.version < (7, 3, 10)):
- # changed in PyPy for v7.3.10
- floaterr = r"unsupported operand type for float\(\): 'list'"
- else:
- floaterr = r"must be real number, not list"
- # Make sure an internal error occurs in UNU.RAN when invalid callbacks are
- # passed. Moreover, different generators throw different error messages.
- # So, in case of an `UNURANError`, we do not validate the error message.
- bad_pdfs_common = [
- # Negative PDF
- (lambda x: -x, UNURANError, r"..."),
- # Returning wrong type
- (lambda x: [], TypeError, floaterr),
- # Undefined name inside the function
- (lambda x: foo, NameError, r"name 'foo' is not defined"), # type: ignore[name-defined] # noqa
- # Infinite value returned => Overflow error.
- (lambda x: np.inf, UNURANError, r"..."),
- # NaN value => internal error in UNU.RAN
- (lambda x: np.nan, UNURANError, r"..."),
- # signature of PDF wrong
- (lambda: 1.0, TypeError, r"takes 0 positional arguments but 1 was given")
- ]
- # same approach for dpdf
- bad_dpdf_common = [
- # Infinite value returned.
- (lambda x: np.inf, UNURANError, r"..."),
- # NaN value => internal error in UNU.RAN
- (lambda x: np.nan, UNURANError, r"..."),
- # Returning wrong type
- (lambda x: [], TypeError, floaterr),
- # Undefined name inside the function
- (lambda x: foo, NameError, r"name 'foo' is not defined"), # type: ignore[name-defined] # noqa
- # signature of dPDF wrong
- (lambda: 1.0, TypeError, r"takes 0 positional arguments but 1 was given")
- ]
- # same approach for logpdf
- bad_logpdfs_common = [
- # Returning wrong type
- (lambda x: [], TypeError, floaterr),
- # Undefined name inside the function
- (lambda x: foo, NameError, r"name 'foo' is not defined"), # type: ignore[name-defined] # noqa
- # Infinite value returned => Overflow error.
- (lambda x: np.inf, UNURANError, r"..."),
- # NaN value => internal error in UNU.RAN
- (lambda x: np.nan, UNURANError, r"..."),
- # signature of logpdf wrong
- (lambda: 1.0, TypeError, r"takes 0 positional arguments but 1 was given")
- ]
- bad_pv_common = [
- ([], r"must contain at least one element"),
- ([[1.0, 0.0]], r"wrong number of dimensions \(expected 1, got 2\)"),
- ([0.2, 0.4, np.nan, 0.8], r"must contain only finite / non-nan values"),
- ([0.2, 0.4, np.inf, 0.8], r"must contain only finite / non-nan values"),
- ([0.0, 0.0], r"must contain at least one non-zero value"),
- ]
- # size of the domains is incorrect
- bad_sized_domains = [
- # > 2 elements in the domain
- ((1, 2, 3), ValueError, r"must be a length 2 tuple"),
- # empty domain
- ((), ValueError, r"must be a length 2 tuple")
- ]
- # domain values are incorrect
- bad_domains = [
- ((2, 1), UNURANError, r"left >= right"),
- ((1, 1), UNURANError, r"left >= right"),
- ]
- # infinite and nan values present in domain.
- inf_nan_domains = [
- # left >= right
- ((10, 10), UNURANError, r"left >= right"),
- ((np.inf, np.inf), UNURANError, r"left >= right"),
- ((-np.inf, -np.inf), UNURANError, r"left >= right"),
- ((np.inf, -np.inf), UNURANError, r"left >= right"),
- # Also include nans in some of the domains.
- ((-np.inf, np.nan), ValueError, r"only non-nan values"),
- ((np.nan, np.inf), ValueError, r"only non-nan values")
- ]
- # `nan` values present in domain. Some distributions don't support
- # infinite tails, so don't mix the nan values with infinities.
- nan_domains = [
- ((0, np.nan), ValueError, r"only non-nan values"),
- ((np.nan, np.nan), ValueError, r"only non-nan values")
- ]
- # all the methods should throw errors for nan, bad sized, and bad valued
- # domains.
- @pytest.mark.parametrize("domain, err, msg",
- bad_domains + bad_sized_domains +
- nan_domains) # type: ignore[operator]
- @pytest.mark.parametrize("method, kwargs", all_methods)
- def test_bad_domain(domain, err, msg, method, kwargs):
- Method = getattr(stats.sampling, method)
- with pytest.raises(err, match=msg):
- Method(**kwargs, domain=domain)
- @pytest.mark.parametrize("method, kwargs", all_methods)
- def test_random_state(method, kwargs):
- Method = getattr(stats.sampling, method)
- # simple seed that works for any version of NumPy
- seed = 123
- rng1 = Method(**kwargs, random_state=seed)
- rng2 = Method(**kwargs, random_state=seed)
- assert_equal(rng1.rvs(100), rng2.rvs(100))
- # global seed
- np.random.seed(123)
- rng1 = Method(**kwargs)
- rvs1 = rng1.rvs(100)
- np.random.seed(None)
- rng2 = Method(**kwargs, random_state=123)
- rvs2 = rng2.rvs(100)
- assert_equal(rvs1, rvs2)
- # Generator seed for new NumPy
- # when a RandomState is given, it should take the bitgen_t
- # member of the class and create a Generator instance.
- seed1 = np.random.RandomState(np.random.MT19937(123))
- seed2 = np.random.Generator(np.random.MT19937(123))
- rng1 = Method(**kwargs, random_state=seed1)
- rng2 = Method(**kwargs, random_state=seed2)
- assert_equal(rng1.rvs(100), rng2.rvs(100))
- def test_set_random_state():
- rng1 = TransformedDensityRejection(StandardNormal(), random_state=123)
- rng2 = TransformedDensityRejection(StandardNormal())
- rng2.set_random_state(123)
- assert_equal(rng1.rvs(100), rng2.rvs(100))
- rng = TransformedDensityRejection(StandardNormal(), random_state=123)
- rvs1 = rng.rvs(100)
- rng.set_random_state(123)
- rvs2 = rng.rvs(100)
- assert_equal(rvs1, rvs2)
- def test_threading_behaviour():
- # Test if the API is thread-safe.
- # This verifies if the lock mechanism and the use of `PyErr_Occurred`
- # is correct.
- errors = {"err1": None, "err2": None}
- class Distribution:
- def __init__(self, pdf_msg):
- self.pdf_msg = pdf_msg
- def pdf(self, x):
- if 49.9 < x < 50.0:
- raise ValueError(self.pdf_msg)
- return x
- def dpdf(self, x):
- return 1
- def func1():
- dist = Distribution('foo')
- rng = TransformedDensityRejection(dist, domain=(10, 100),
- random_state=12)
- try:
- rng.rvs(100000)
- except ValueError as e:
- errors['err1'] = e.args[0]
- def func2():
- dist = Distribution('bar')
- rng = TransformedDensityRejection(dist, domain=(10, 100),
- random_state=2)
- try:
- rng.rvs(100000)
- except ValueError as e:
- errors['err2'] = e.args[0]
- t1 = threading.Thread(target=func1)
- t2 = threading.Thread(target=func2)
- t1.start()
- t2.start()
- t1.join()
- t2.join()
- assert errors['err1'] == 'foo'
- assert errors['err2'] == 'bar'
- @pytest.mark.parametrize("method, kwargs", all_methods)
- def test_pickle(method, kwargs):
- Method = getattr(stats.sampling, method)
- rng1 = Method(**kwargs, random_state=123)
- obj = pickle.dumps(rng1)
- rng2 = pickle.loads(obj)
- assert_equal(rng1.rvs(100), rng2.rvs(100))
- @pytest.mark.parametrize("size", [None, 0, (0, ), 1, (10, 3), (2, 3, 4, 5),
- (0, 0), (0, 1)])
- def test_rvs_size(size):
- # As the `rvs` method is present in the base class and shared between
- # all the classes, we can just test with one of the methods.
- rng = TransformedDensityRejection(StandardNormal())
- if size is None:
- assert np.isscalar(rng.rvs(size))
- else:
- if np.isscalar(size):
- size = (size, )
- assert rng.rvs(size).shape == size
- def test_with_scipy_distribution():
- # test if the setup works with SciPy's rv_frozen distributions
- dist = stats.norm()
- urng = np.random.default_rng(0)
- rng = NumericalInverseHermite(dist, random_state=urng)
- u = np.linspace(0, 1, num=100)
- check_cont_samples(rng, dist, dist.stats())
- assert_allclose(dist.ppf(u), rng.ppf(u))
- # test if it works with `loc` and `scale`
- dist = stats.norm(loc=10., scale=5.)
- rng = NumericalInverseHermite(dist, random_state=urng)
- check_cont_samples(rng, dist, dist.stats())
- assert_allclose(dist.ppf(u), rng.ppf(u))
- # check for discrete distributions
- dist = stats.binom(10, 0.2)
- rng = DiscreteAliasUrn(dist, random_state=urng)
- domain = dist.support()
- pv = dist.pmf(np.arange(domain[0], domain[1]+1))
- check_discr_samples(rng, pv, dist.stats())
- def check_cont_samples(rng, dist, mv_ex):
- rvs = rng.rvs(100000)
- mv = rvs.mean(), rvs.var()
- # test the moments only if the variance is finite
- if np.isfinite(mv_ex[1]):
- assert_allclose(mv, mv_ex, rtol=1e-7, atol=1e-1)
- # Cramer Von Mises test for goodness-of-fit
- rvs = rng.rvs(500)
- dist.cdf = np.vectorize(dist.cdf)
- pval = cramervonmises(rvs, dist.cdf).pvalue
- assert pval > 0.1
- def check_discr_samples(rng, pv, mv_ex):
- rvs = rng.rvs(100000)
- # test if the first few moments match
- mv = rvs.mean(), rvs.var()
- assert_allclose(mv, mv_ex, rtol=1e-3, atol=1e-1)
- # normalize
- pv = pv / pv.sum()
- # chi-squared test for goodness-of-fit
- obs_freqs = np.zeros_like(pv)
- _, freqs = np.unique(rvs, return_counts=True)
- freqs = freqs / freqs.sum()
- obs_freqs[:freqs.size] = freqs
- pval = chisquare(obs_freqs, pv).pvalue
- assert pval > 0.1
- def test_warning_center_not_in_domain():
- # UNURAN will warn if the center provided or the one computed w/o the
- # domain is outside of the domain
- msg = "102 : center moved into domain of distribution"
- with pytest.warns(RuntimeWarning, match=msg):
- NumericalInversePolynomial(StandardNormal(), center=0, domain=(3, 5))
- with pytest.warns(RuntimeWarning, match=msg):
- NumericalInversePolynomial(StandardNormal(), domain=(3, 5))
- @pytest.mark.parametrize('method', ["SimpleRatioUniforms",
- "NumericalInversePolynomial",
- "TransformedDensityRejection"])
- def test_error_mode_not_in_domain(method):
- # UNURAN raises an error if the mode is not in the domain
- # the behavior is different compared to the case that center is not in the
- # domain. mode is supposed to be the exact value, center can be an
- # approximate value
- Method = getattr(stats.sampling, method)
- msg = "17 : mode not in domain"
- with pytest.raises(UNURANError, match=msg):
- Method(StandardNormal(), mode=0, domain=(3, 5))
- @pytest.mark.parametrize('method', ["NumericalInverseHermite",
- "NumericalInversePolynomial"])
- class TestQRVS:
- def test_input_validation(self, method):
- match = "`qmc_engine` must be an instance of..."
- with pytest.raises(ValueError, match=match):
- Method = getattr(stats.sampling, method)
- gen = Method(StandardNormal())
- gen.qrvs(qmc_engine=0)
- # issues with QMCEngines and old NumPy
- Method = getattr(stats.sampling, method)
- gen = Method(StandardNormal())
- match = "`d` must be consistent with dimension of `qmc_engine`."
- with pytest.raises(ValueError, match=match):
- gen.qrvs(d=3, qmc_engine=stats.qmc.Halton(2))
- qrngs = [None, stats.qmc.Sobol(1, seed=0), stats.qmc.Halton(3, seed=0)]
- # `size=None` should not add anything to the shape, `size=1` should
- sizes = [(None, tuple()), (1, (1,)), (4, (4,)),
- ((4,), (4,)), ((2, 4), (2, 4))] # type: ignore
- # Neither `d=None` nor `d=1` should add anything to the shape
- ds = [(None, tuple()), (1, tuple()), (3, (3,))]
- @pytest.mark.parametrize('qrng', qrngs)
- @pytest.mark.parametrize('size_in, size_out', sizes)
- @pytest.mark.parametrize('d_in, d_out', ds)
- def test_QRVS_shape_consistency(self, qrng, size_in, size_out,
- d_in, d_out, method):
- w32 = sys.platform == "win32" and platform.architecture()[0] == "32bit"
- if w32 and method == "NumericalInversePolynomial":
- pytest.xfail("NumericalInversePolynomial.qrvs fails for Win "
- "32-bit")
- dist = StandardNormal()
- Method = getattr(stats.sampling, method)
- gen = Method(dist)
- # If d and qrng.d are inconsistent, an error is raised
- if d_in is not None and qrng is not None and qrng.d != d_in:
- match = "`d` must be consistent with dimension of `qmc_engine`."
- with pytest.raises(ValueError, match=match):
- gen.qrvs(size_in, d=d_in, qmc_engine=qrng)
- return
- # Sometimes d is really determined by qrng
- if d_in is None and qrng is not None and qrng.d != 1:
- d_out = (qrng.d,)
- shape_expected = size_out + d_out
- qrng2 = deepcopy(qrng)
- qrvs = gen.qrvs(size=size_in, d=d_in, qmc_engine=qrng)
- if size_in is not None:
- assert qrvs.shape == shape_expected
- if qrng2 is not None:
- uniform = qrng2.random(np.prod(size_in) or 1)
- qrvs2 = stats.norm.ppf(uniform).reshape(shape_expected)
- assert_allclose(qrvs, qrvs2, atol=1e-12)
- def test_QRVS_size_tuple(self, method):
- # QMCEngine samples are always of shape (n, d). When `size` is a tuple,
- # we set `n = prod(size)` in the call to qmc_engine.random, transform
- # the sample, and reshape it to the final dimensions. When we reshape,
- # we need to be careful, because the _columns_ of the sample returned
- # by a QMCEngine are "independent"-ish, but the elements within the
- # columns are not. We need to make sure that this doesn't get mixed up
- # by reshaping: qrvs[..., i] should remain "independent"-ish of
- # qrvs[..., i+1], but the elements within qrvs[..., i] should be
- # transformed from the same low-discrepancy sequence.
- dist = StandardNormal()
- Method = getattr(stats.sampling, method)
- gen = Method(dist)
- size = (3, 4)
- d = 5
- qrng = stats.qmc.Halton(d, seed=0)
- qrng2 = stats.qmc.Halton(d, seed=0)
- uniform = qrng2.random(np.prod(size))
- qrvs = gen.qrvs(size=size, d=d, qmc_engine=qrng)
- qrvs2 = stats.norm.ppf(uniform)
- for i in range(d):
- sample = qrvs[..., i]
- sample2 = qrvs2[:, i].reshape(size)
- assert_allclose(sample, sample2, atol=1e-12)
- class TestTransformedDensityRejection:
- # Simple Custom Distribution
- class dist0:
- def pdf(self, x):
- return 3/4 * (1-x*x)
- def dpdf(self, x):
- return 3/4 * (-2*x)
- def cdf(self, x):
- return 3/4 * (x - x**3/3 + 2/3)
- def support(self):
- return -1, 1
- # Standard Normal Distribution
- class dist1:
- def pdf(self, x):
- return stats.norm._pdf(x / 0.1)
- def dpdf(self, x):
- return -x / 0.01 * stats.norm._pdf(x / 0.1)
- def cdf(self, x):
- return stats.norm._cdf(x / 0.1)
- # pdf with piecewise linear function as transformed density
- # with T = -1/sqrt with shift. Taken from UNU.RAN test suite
- # (from file t_tdr_ps.c)
- class dist2:
- def __init__(self, shift):
- self.shift = shift
- def pdf(self, x):
- x -= self.shift
- y = 1. / (abs(x) + 1.)
- return 0.5 * y * y
- def dpdf(self, x):
- x -= self.shift
- y = 1. / (abs(x) + 1.)
- y = y * y * y
- return y if (x < 0.) else -y
- def cdf(self, x):
- x -= self.shift
- if x <= 0.:
- return 0.5 / (1. - x)
- else:
- return 1. - 0.5 / (1. + x)
- dists = [dist0(), dist1(), dist2(0.), dist2(10000.)]
- # exact mean and variance of the distributions in the list dists
- mv0 = [0., 4./15.]
- mv1 = [0., 0.01]
- mv2 = [0., np.inf]
- mv3 = [10000., np.inf]
- mvs = [mv0, mv1, mv2, mv3]
- @pytest.mark.parametrize("dist, mv_ex",
- zip(dists, mvs))
- def test_basic(self, dist, mv_ex):
- with suppress_warnings() as sup:
- # filter the warnings thrown by UNU.RAN
- sup.filter(RuntimeWarning)
- rng = TransformedDensityRejection(dist, random_state=42)
- check_cont_samples(rng, dist, mv_ex)
- # PDF 0 everywhere => bad construction points
- bad_pdfs = [(lambda x: 0, UNURANError, r"50 : bad construction points.")]
- bad_pdfs += bad_pdfs_common # type: ignore[arg-type]
- @pytest.mark.parametrize("pdf, err, msg", bad_pdfs)
- def test_bad_pdf(self, pdf, err, msg):
- class dist:
- pass
- dist.pdf = pdf
- dist.dpdf = lambda x: 1 # an arbitrary dPDF
- with pytest.raises(err, match=msg):
- TransformedDensityRejection(dist)
- @pytest.mark.parametrize("dpdf, err, msg", bad_dpdf_common)
- def test_bad_dpdf(self, dpdf, err, msg):
- class dist:
- pass
- dist.pdf = lambda x: x
- dist.dpdf = dpdf
- with pytest.raises(err, match=msg):
- TransformedDensityRejection(dist, domain=(1, 10))
- # test domains with inf + nan in them. need to write a custom test for
- # this because not all methods support infinite tails.
- @pytest.mark.parametrize("domain, err, msg", inf_nan_domains)
- def test_inf_nan_domains(self, domain, err, msg):
- with pytest.raises(err, match=msg):
- TransformedDensityRejection(StandardNormal(), domain=domain)
- @pytest.mark.parametrize("construction_points", [-1, 0, 0.1])
- def test_bad_construction_points_scalar(self, construction_points):
- with pytest.raises(ValueError, match=r"`construction_points` must be "
- r"a positive integer."):
- TransformedDensityRejection(
- StandardNormal(), construction_points=construction_points
- )
- def test_bad_construction_points_array(self):
- # empty array
- construction_points = []
- with pytest.raises(ValueError, match=r"`construction_points` must "
- r"either be a "
- r"scalar or a non-empty array."):
- TransformedDensityRejection(
- StandardNormal(), construction_points=construction_points
- )
- # construction_points not monotonically increasing
- construction_points = [1, 1, 1, 1, 1, 1]
- with pytest.warns(RuntimeWarning, match=r"33 : starting points not "
- r"strictly monotonically "
- r"increasing"):
- TransformedDensityRejection(
- StandardNormal(), construction_points=construction_points
- )
- # construction_points containing nans
- construction_points = [np.nan, np.nan, np.nan]
- with pytest.raises(UNURANError, match=r"50 : bad construction "
- r"points."):
- TransformedDensityRejection(
- StandardNormal(), construction_points=construction_points
- )
- # construction_points out of domain
- construction_points = [-10, 10]
- with pytest.warns(RuntimeWarning, match=r"50 : starting point out of "
- r"domain"):
- TransformedDensityRejection(
- StandardNormal(), domain=(-3, 3),
- construction_points=construction_points
- )
- @pytest.mark.parametrize("c", [-1., np.nan, np.inf, 0.1, 1.])
- def test_bad_c(self, c):
- msg = r"`c` must either be -0.5 or 0."
- with pytest.raises(ValueError, match=msg):
- TransformedDensityRejection(StandardNormal(), c=-1.)
- u = [np.linspace(0, 1, num=1000), [], [[]], [np.nan],
- [-np.inf, np.nan, np.inf], 0,
- [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-2, 3, 4]]]
- @pytest.mark.parametrize("u", u)
- def test_ppf_hat(self, u):
- # Increase the `max_squeeze_hat_ratio` so the ppf_hat is more
- # accurate.
- rng = TransformedDensityRejection(StandardNormal(),
- max_squeeze_hat_ratio=0.9999)
- # Older versions of NumPy throw RuntimeWarnings for comparisons
- # with nan.
- with suppress_warnings() as sup:
- sup.filter(RuntimeWarning, "invalid value encountered in greater")
- sup.filter(RuntimeWarning, "invalid value encountered in "
- "greater_equal")
- sup.filter(RuntimeWarning, "invalid value encountered in less")
- sup.filter(RuntimeWarning, "invalid value encountered in "
- "less_equal")
- res = rng.ppf_hat(u)
- expected = stats.norm.ppf(u)
- assert_allclose(res, expected, rtol=1e-3, atol=1e-5)
- assert res.shape == expected.shape
- def test_bad_dist(self):
- # Empty distribution
- class dist:
- ...
- msg = r"`pdf` required but not found."
- with pytest.raises(ValueError, match=msg):
- TransformedDensityRejection(dist)
- # dPDF not present in dist
- class dist:
- pdf = lambda x: 1-x*x # noqa: E731
- msg = r"`dpdf` required but not found."
- with pytest.raises(ValueError, match=msg):
- TransformedDensityRejection(dist)
- class TestDiscreteAliasUrn:
- # DAU fails on these probably because of large domains and small
- # computation errors in PMF. Mean/SD match but chi-squared test fails.
- basic_fail_dists = {
- 'nchypergeom_fisher', # numerical erros on tails
- 'nchypergeom_wallenius', # numerical erros on tails
- 'randint' # fails on 32-bit ubuntu
- }
- @pytest.mark.parametrize("distname, params", distdiscrete)
- def test_basic(self, distname, params):
- if distname in self.basic_fail_dists:
- msg = ("DAU fails on these probably because of large domains "
- "and small computation errors in PMF.")
- pytest.skip(msg)
- if not isinstance(distname, str):
- dist = distname
- else:
- dist = getattr(stats, distname)
- dist = dist(*params)
- domain = dist.support()
- if not np.isfinite(domain[1] - domain[0]):
- # DAU only works with finite domain. So, skip the distributions
- # with infinite tails.
- pytest.skip("DAU only works with a finite domain.")
- k = np.arange(domain[0], domain[1]+1)
- pv = dist.pmf(k)
- mv_ex = dist.stats('mv')
- rng = DiscreteAliasUrn(dist, random_state=42)
- check_discr_samples(rng, pv, mv_ex)
- # Can't use bad_pmf_common here as we evaluate PMF early on to avoid
- # unhelpful errors from UNU.RAN.
- bad_pmf = [
- # inf returned
- (lambda x: np.inf, ValueError,
- r"must contain only finite / non-nan values"),
- # nan returned
- (lambda x: np.nan, ValueError,
- r"must contain only finite / non-nan values"),
- # all zeros
- (lambda x: 0.0, ValueError,
- r"must contain at least one non-zero value"),
- # Undefined name inside the function
- (lambda x: foo, NameError, # type: ignore[name-defined] # noqa
- r"name 'foo' is not defined"),
- # Returning wrong type.
- (lambda x: [], ValueError,
- r"setting an array element with a sequence."),
- # probabilities < 0
- (lambda x: -x, UNURANError,
- r"50 : probability < 0"),
- # signature of PMF wrong
- (lambda: 1.0, TypeError,
- r"takes 0 positional arguments but 1 was given")
- ]
- @pytest.mark.parametrize("pmf, err, msg", bad_pmf)
- def test_bad_pmf(self, pmf, err, msg):
- class dist:
- pass
- dist.pmf = pmf
- with pytest.raises(err, match=msg):
- DiscreteAliasUrn(dist, domain=(1, 10))
- @pytest.mark.parametrize("pv", [[0.18, 0.02, 0.8],
- [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]])
- def test_sampling_with_pv(self, pv):
- pv = np.asarray(pv, dtype=np.float64)
- rng = DiscreteAliasUrn(pv, random_state=123)
- rvs = rng.rvs(100_000)
- pv = pv / pv.sum()
- variates = np.arange(0, len(pv))
- # test if the first few moments match
- m_expected = np.average(variates, weights=pv)
- v_expected = np.average((variates - m_expected) ** 2, weights=pv)
- mv_expected = m_expected, v_expected
- check_discr_samples(rng, pv, mv_expected)
- @pytest.mark.parametrize("pv, msg", bad_pv_common)
- def test_bad_pv(self, pv, msg):
- with pytest.raises(ValueError, match=msg):
- DiscreteAliasUrn(pv)
- # DAU doesn't support infinite tails. So, it should throw an error when
- # inf is present in the domain.
- inf_domain = [(-np.inf, np.inf), (np.inf, np.inf), (-np.inf, -np.inf),
- (0, np.inf), (-np.inf, 0)]
- @pytest.mark.parametrize("domain", inf_domain)
- def test_inf_domain(self, domain):
- with pytest.raises(ValueError, match=r"must be finite"):
- DiscreteAliasUrn(stats.binom(10, 0.2), domain=domain)
- def test_bad_urn_factor(self):
- with pytest.warns(RuntimeWarning, match=r"relative urn size < 1."):
- DiscreteAliasUrn([0.5, 0.5], urn_factor=-1)
- def test_bad_args(self):
- msg = (r"`domain` must be provided when the "
- r"probability vector is not available.")
- class dist:
- def pmf(self, x):
- return x
- with pytest.raises(ValueError, match=msg):
- DiscreteAliasUrn(dist)
- class TestNumericalInversePolynomial:
- # Simple Custom Distribution
- class dist0:
- def pdf(self, x):
- return 3/4 * (1-x*x)
- def cdf(self, x):
- return 3/4 * (x - x**3/3 + 2/3)
- def support(self):
- return -1, 1
- # Standard Normal Distribution
- class dist1:
- def pdf(self, x):
- return stats.norm._pdf(x / 0.1)
- def cdf(self, x):
- return stats.norm._cdf(x / 0.1)
- # Sin 2 distribution
- # / 0.05 + 0.45*(1 +sin(2 Pi x)) if |x| <= 1
- # f(x) = <
- # \ 0 otherwise
- # Taken from UNU.RAN test suite (from file t_pinv.c)
- class dist2:
- def pdf(self, x):
- return 0.05 + 0.45 * (1 + np.sin(2*np.pi*x))
- def cdf(self, x):
- return (0.05*(x + 1) +
- 0.9*(1. + 2.*np.pi*(1 + x) - np.cos(2.*np.pi*x)) /
- (4.*np.pi))
- def support(self):
- return -1, 1
- # Sin 10 distribution
- # / 0.05 + 0.45*(1 +sin(2 Pi x)) if |x| <= 5
- # f(x) = <
- # \ 0 otherwise
- # Taken from UNU.RAN test suite (from file t_pinv.c)
- class dist3:
- def pdf(self, x):
- return 0.2 * (0.05 + 0.45 * (1 + np.sin(2*np.pi*x)))
- def cdf(self, x):
- return x/10. + 0.5 + 0.09/(2*np.pi) * (np.cos(10*np.pi) -
- np.cos(2*np.pi*x))
- def support(self):
- return -5, 5
- dists = [dist0(), dist1(), dist2(), dist3()]
- # exact mean and variance of the distributions in the list dists
- mv0 = [0., 4./15.]
- mv1 = [0., 0.01]
- mv2 = [-0.45/np.pi, 2/3*0.5 - 0.45**2/np.pi**2]
- mv3 = [-0.45/np.pi, 0.2 * 250/3 * 0.5 - 0.45**2/np.pi**2]
- mvs = [mv0, mv1, mv2, mv3]
- @pytest.mark.parametrize("dist, mv_ex",
- zip(dists, mvs))
- def test_basic(self, dist, mv_ex):
- rng = NumericalInversePolynomial(dist, random_state=42)
- check_cont_samples(rng, dist, mv_ex)
- very_slow_dists = ['studentized_range', 'trapezoid', 'triang', 'vonmises',
- 'levy_stable', 'kappa4', 'ksone', 'kstwo', 'levy_l',
- 'gausshyper', 'anglit']
- # for these distributions, some assertions fail due to minor
- # numerical differences. They can be avoided either by changing
- # the seed or by increasing the u_resolution.
- fail_dists = ['ncf', 'pareto', 'chi2', 'fatiguelife', 'halfgennorm',
- 'gibrat', 'lognorm', 'ncx2', 't']
- @pytest.mark.xslow
- @pytest.mark.parametrize("distname, params", distcont)
- def test_basic_all_scipy_dists(self, distname, params):
- if distname in self.very_slow_dists:
- pytest.skip(f"PINV too slow for {distname}")
- if distname in self.fail_dists:
- pytest.skip(f"PINV fails for {distname}")
- dist = (getattr(stats, distname)
- if isinstance(distname, str)
- else distname)
- dist = dist(*params)
- with suppress_warnings() as sup:
- sup.filter(RuntimeWarning)
- rng = NumericalInversePolynomial(dist, random_state=42)
- check_cont_samples(rng, dist, [dist.mean(), dist.var()])
- @pytest.mark.parametrize("pdf, err, msg", bad_pdfs_common)
- def test_bad_pdf(self, pdf, err, msg):
- class dist:
- pass
- dist.pdf = pdf
- with pytest.raises(err, match=msg):
- NumericalInversePolynomial(dist, domain=[0, 5])
- @pytest.mark.parametrize("logpdf, err, msg", bad_logpdfs_common)
- def test_bad_logpdf(self, logpdf, err, msg):
- class dist:
- pass
- dist.logpdf = logpdf
- with pytest.raises(err, match=msg):
- NumericalInversePolynomial(dist, domain=[0, 5])
- # test domains with inf + nan in them. need to write a custom test for
- # this because not all methods support infinite tails.
- @pytest.mark.parametrize("domain, err, msg", inf_nan_domains)
- def test_inf_nan_domains(self, domain, err, msg):
- with pytest.raises(err, match=msg):
- NumericalInversePolynomial(StandardNormal(), domain=domain)
- u = [
- # test if quantile 0 and 1 return -inf and inf respectively and check
- # the correctness of the PPF for equidistant points between 0 and 1.
- np.linspace(0, 1, num=10000),
- # test the PPF method for empty arrays
- [], [[]],
- # test if nans and infs return nan result.
- [np.nan], [-np.inf, np.nan, np.inf],
- # test if a scalar is returned for a scalar input.
- 0,
- # test for arrays with nans, values greater than 1 and less than 0,
- # and some valid values.
- [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-2, 3, 4]]
- ]
- @pytest.mark.parametrize("u", u)
- def test_ppf(self, u):
- dist = StandardNormal()
- rng = NumericalInversePolynomial(dist, u_resolution=1e-14)
- # Older versions of NumPy throw RuntimeWarnings for comparisons
- # with nan.
- with suppress_warnings() as sup:
- sup.filter(RuntimeWarning, "invalid value encountered in greater")
- sup.filter(RuntimeWarning, "invalid value encountered in "
- "greater_equal")
- sup.filter(RuntimeWarning, "invalid value encountered in less")
- sup.filter(RuntimeWarning, "invalid value encountered in "
- "less_equal")
- res = rng.ppf(u)
- expected = stats.norm.ppf(u)
- assert_allclose(res, expected, rtol=1e-11, atol=1e-11)
- assert res.shape == expected.shape
- x = [np.linspace(-10, 10, num=10000), [], [[]], [np.nan],
- [-np.inf, np.nan, np.inf], 0,
- [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-np.inf, 3, 4]]]
- @pytest.mark.parametrize("x", x)
- def test_cdf(self, x):
- dist = StandardNormal()
- rng = NumericalInversePolynomial(dist, u_resolution=1e-14)
- # Older versions of NumPy throw RuntimeWarnings for comparisons
- # with nan.
- with suppress_warnings() as sup:
- sup.filter(RuntimeWarning, "invalid value encountered in greater")
- sup.filter(RuntimeWarning, "invalid value encountered in "
- "greater_equal")
- sup.filter(RuntimeWarning, "invalid value encountered in less")
- sup.filter(RuntimeWarning, "invalid value encountered in "
- "less_equal")
- res = rng.cdf(x)
- expected = stats.norm.cdf(x)
- assert_allclose(res, expected, rtol=1e-11, atol=1e-11)
- assert res.shape == expected.shape
- def test_u_error(self):
- dist = StandardNormal()
- rng = NumericalInversePolynomial(dist, u_resolution=1e-10)
- max_error, mae = rng.u_error()
- assert max_error < 1e-10
- assert mae <= max_error
- rng = NumericalInversePolynomial(dist, u_resolution=1e-14)
- max_error, mae = rng.u_error()
- assert max_error < 1e-14
- assert mae <= max_error
- bad_orders = [1, 4.5, 20, np.inf, np.nan]
- bad_u_resolution = [1e-20, 1e-1, np.inf, np.nan]
- @pytest.mark.parametrize("order", bad_orders)
- def test_bad_orders(self, order):
- dist = StandardNormal()
- msg = r"`order` must be an integer in the range \[3, 17\]."
- with pytest.raises(ValueError, match=msg):
- NumericalInversePolynomial(dist, order=order)
- @pytest.mark.parametrize("u_resolution", bad_u_resolution)
- def test_bad_u_resolution(self, u_resolution):
- msg = r"`u_resolution` must be between 1e-15 and 1e-5."
- with pytest.raises(ValueError, match=msg):
- NumericalInversePolynomial(StandardNormal(),
- u_resolution=u_resolution)
- def test_bad_args(self):
- class BadDist:
- def cdf(self, x):
- return stats.norm._cdf(x)
- dist = BadDist()
- msg = r"Either of the methods `pdf` or `logpdf` must be specified"
- with pytest.raises(ValueError, match=msg):
- rng = NumericalInversePolynomial(dist)
- dist = StandardNormal()
- rng = NumericalInversePolynomial(dist)
- msg = r"`sample_size` must be greater than or equal to 1000."
- with pytest.raises(ValueError, match=msg):
- rng.u_error(10)
- class Distribution:
- def pdf(self, x):
- return np.exp(-0.5 * x*x)
- dist = Distribution()
- rng = NumericalInversePolynomial(dist)
- msg = r"Exact CDF required but not found."
- with pytest.raises(ValueError, match=msg):
- rng.u_error()
- def test_logpdf_pdf_consistency(self):
- # 1. check that PINV works with pdf and logpdf only
- # 2. check that generated ppf is the same (up to a small tolerance)
- class MyDist:
- pass
- # create genrator from dist with only pdf
- dist_pdf = MyDist()
- dist_pdf.pdf = lambda x: math.exp(-x*x/2)
- rng1 = NumericalInversePolynomial(dist_pdf)
- # create dist with only logpdf
- dist_logpdf = MyDist()
- dist_logpdf.logpdf = lambda x: -x*x/2
- rng2 = NumericalInversePolynomial(dist_logpdf)
- q = np.linspace(1e-5, 1-1e-5, num=100)
- assert_allclose(rng1.ppf(q), rng2.ppf(q))
- class TestNumericalInverseHermite:
- # / (1 +sin(2 Pi x))/2 if |x| <= 1
- # f(x) = <
- # \ 0 otherwise
- # Taken from UNU.RAN test suite (from file t_hinv.c)
- class dist0:
- def pdf(self, x):
- return 0.5*(1. + np.sin(2.*np.pi*x))
- def dpdf(self, x):
- return np.pi*np.cos(2.*np.pi*x)
- def cdf(self, x):
- return (1. + 2.*np.pi*(1 + x) - np.cos(2.*np.pi*x)) / (4.*np.pi)
- def support(self):
- return -1, 1
- # / Max(sin(2 Pi x)),0)Pi/2 if -1 < x <0.5
- # f(x) = <
- # \ 0 otherwise
- # Taken from UNU.RAN test suite (from file t_hinv.c)
- class dist1:
- def pdf(self, x):
- if (x <= -0.5):
- return np.sin((2. * np.pi) * x) * 0.5 * np.pi
- if (x < 0.):
- return 0.
- if (x <= 0.5):
- return np.sin((2. * np.pi) * x) * 0.5 * np.pi
- def dpdf(self, x):
- if (x <= -0.5):
- return np.cos((2. * np.pi) * x) * np.pi * np.pi
- if (x < 0.):
- return 0.
- if (x <= 0.5):
- return np.cos((2. * np.pi) * x) * np.pi * np.pi
- def cdf(self, x):
- if (x <= -0.5):
- return 0.25 * (1 - np.cos((2. * np.pi) * x))
- if (x < 0.):
- return 0.5
- if (x <= 0.5):
- return 0.75 - 0.25 * np.cos((2. * np.pi) * x)
- def support(self):
- return -1, 0.5
- dists = [dist0(), dist1()]
- # exact mean and variance of the distributions in the list dists
- mv0 = [-1/(2*np.pi), 1/3 - 1/(4*np.pi*np.pi)]
- mv1 = [-1/4, 3/8-1/(2*np.pi*np.pi) - 1/16]
- mvs = [mv0, mv1]
- @pytest.mark.parametrize("dist, mv_ex",
- zip(dists, mvs))
- @pytest.mark.parametrize("order", [3, 5])
- def test_basic(self, dist, mv_ex, order):
- rng = NumericalInverseHermite(dist, order=order, random_state=42)
- check_cont_samples(rng, dist, mv_ex)
- # test domains with inf + nan in them. need to write a custom test for
- # this because not all methods support infinite tails.
- @pytest.mark.parametrize("domain, err, msg", inf_nan_domains)
- def test_inf_nan_domains(self, domain, err, msg):
- with pytest.raises(err, match=msg):
- NumericalInverseHermite(StandardNormal(), domain=domain)
- def basic_test_all_scipy_dists(self, distname, shapes):
- slow_dists = {'ksone', 'kstwo', 'levy_stable', 'skewnorm'}
- fail_dists = {'beta', 'gausshyper', 'geninvgauss', 'ncf', 'nct',
- 'norminvgauss', 'genhyperbolic', 'studentized_range',
- 'vonmises', 'kappa4', 'invgauss', 'wald'}
- if distname in slow_dists:
- pytest.skip("Distribution is too slow")
- if distname in fail_dists:
- # specific reasons documented in gh-13319
- # https://github.com/scipy/scipy/pull/13319#discussion_r626188955
- pytest.xfail("Fails - usually due to inaccurate CDF/PDF")
- np.random.seed(0)
- dist = getattr(stats, distname)(*shapes)
- fni = NumericalInverseHermite(dist)
- x = np.random.rand(10)
- p_tol = np.max(np.abs(dist.ppf(x)-fni.ppf(x))/np.abs(dist.ppf(x)))
- u_tol = np.max(np.abs(dist.cdf(fni.ppf(x)) - x))
- assert p_tol < 1e-8
- assert u_tol < 1e-12
- @pytest.mark.filterwarnings('ignore::RuntimeWarning')
- @pytest.mark.xslow
- @pytest.mark.parametrize(("distname", "shapes"), distcont)
- def test_basic_all_scipy_dists(self, distname, shapes):
- # if distname == "truncnorm":
- # pytest.skip("Tested separately")
- self.basic_test_all_scipy_dists(distname, shapes)
- @pytest.mark.filterwarnings('ignore::RuntimeWarning')
- def test_basic_truncnorm_gh17155(self):
- self.basic_test_all_scipy_dists("truncnorm", (0.1, 2))
- def test_input_validation(self):
- match = r"`order` must be either 1, 3, or 5."
- with pytest.raises(ValueError, match=match):
- NumericalInverseHermite(StandardNormal(), order=2)
- match = "`cdf` required but not found"
- with pytest.raises(ValueError, match=match):
- NumericalInverseHermite("norm")
- match = "could not convert string to float"
- with pytest.raises(ValueError, match=match):
- NumericalInverseHermite(StandardNormal(),
- u_resolution='ekki')
- rngs = [None, 0, np.random.RandomState(0)]
- rngs.append(np.random.default_rng(0)) # type: ignore
- sizes = [(None, tuple()), (8, (8,)), ((4, 5, 6), (4, 5, 6))]
- @pytest.mark.parametrize('rng', rngs)
- @pytest.mark.parametrize('size_in, size_out', sizes)
- def test_RVS(self, rng, size_in, size_out):
- dist = StandardNormal()
- fni = NumericalInverseHermite(dist)
- rng2 = deepcopy(rng)
- rvs = fni.rvs(size=size_in, random_state=rng)
- if size_in is not None:
- assert rvs.shape == size_out
- if rng2 is not None:
- rng2 = check_random_state(rng2)
- uniform = rng2.uniform(size=size_in)
- rvs2 = stats.norm.ppf(uniform)
- assert_allclose(rvs, rvs2)
- def test_inaccurate_CDF(self):
- # CDF function with inaccurate tail cannot be inverted; see gh-13319
- # https://github.com/scipy/scipy/pull/13319#discussion_r626188955
- shapes = (2.3098496451481823, 0.6268795430096368)
- match = ("98 : one or more intervals very short; possibly due to "
- "numerical problems with a pole or very flat tail")
- # fails with default tol
- with pytest.warns(RuntimeWarning, match=match):
- NumericalInverseHermite(stats.beta(*shapes))
- # no error with coarser tol
- NumericalInverseHermite(stats.beta(*shapes), u_resolution=1e-8)
- def test_custom_distribution(self):
- dist1 = StandardNormal()
- fni1 = NumericalInverseHermite(dist1)
- dist2 = stats.norm()
- fni2 = NumericalInverseHermite(dist2)
- assert_allclose(fni1.rvs(random_state=0), fni2.rvs(random_state=0))
- u = [
- # check the correctness of the PPF for equidistant points between
- # 0.02 and 0.98.
- np.linspace(0., 1., num=10000),
- # test the PPF method for empty arrays
- [], [[]],
- # test if nans and infs return nan result.
- [np.nan], [-np.inf, np.nan, np.inf],
- # test if a scalar is returned for a scalar input.
- 0,
- # test for arrays with nans, values greater than 1 and less than 0,
- # and some valid values.
- [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-2, 3, 4]]
- ]
- @pytest.mark.parametrize("u", u)
- def test_ppf(self, u):
- dist = StandardNormal()
- rng = NumericalInverseHermite(dist, u_resolution=1e-12)
- # Older versions of NumPy throw RuntimeWarnings for comparisons
- # with nan.
- with suppress_warnings() as sup:
- sup.filter(RuntimeWarning, "invalid value encountered in greater")
- sup.filter(RuntimeWarning, "invalid value encountered in "
- "greater_equal")
- sup.filter(RuntimeWarning, "invalid value encountered in less")
- sup.filter(RuntimeWarning, "invalid value encountered in "
- "less_equal")
- res = rng.ppf(u)
- expected = stats.norm.ppf(u)
- assert_allclose(res, expected, rtol=1e-9, atol=3e-10)
- assert res.shape == expected.shape
- def test_u_error(self):
- dist = StandardNormal()
- rng = NumericalInverseHermite(dist, u_resolution=1e-10)
- max_error, mae = rng.u_error()
- assert max_error < 1e-10
- assert mae <= max_error
- with suppress_warnings() as sup:
- # ignore warning about u-resolution being too small.
- sup.filter(RuntimeWarning)
- rng = NumericalInverseHermite(dist, u_resolution=1e-14)
- max_error, mae = rng.u_error()
- assert max_error < 1e-14
- assert mae <= max_error
- class TestDiscreteGuideTable:
- basic_fail_dists = {
- 'nchypergeom_fisher', # numerical errors on tails
- 'nchypergeom_wallenius', # numerical errors on tails
- 'randint' # fails on 32-bit ubuntu
- }
- def test_guide_factor_gt3_raises_warning(self):
- pv = [0.1, 0.3, 0.6]
- urng = np.random.default_rng()
- with pytest.warns(RuntimeWarning):
- DiscreteGuideTable(pv, random_state=urng, guide_factor=7)
- def test_guide_factor_zero_raises_warning(self):
- pv = [0.1, 0.3, 0.6]
- urng = np.random.default_rng()
- with pytest.warns(RuntimeWarning):
- DiscreteGuideTable(pv, random_state=urng, guide_factor=0)
- def test_negative_guide_factor_raises_warning(self):
- # This occurs from the UNU.RAN wrapper automatically.
- # however it already gives a useful warning
- # Here we just test that a warning is raised.
- pv = [0.1, 0.3, 0.6]
- urng = np.random.default_rng()
- with pytest.warns(RuntimeWarning):
- DiscreteGuideTable(pv, random_state=urng, guide_factor=-1)
- @pytest.mark.parametrize("distname, params", distdiscrete)
- def test_basic(self, distname, params):
- if distname in self.basic_fail_dists:
- msg = ("DGT fails on these probably because of large domains "
- "and small computation errors in PMF.")
- pytest.skip(msg)
- if not isinstance(distname, str):
- dist = distname
- else:
- dist = getattr(stats, distname)
- dist = dist(*params)
- domain = dist.support()
- if not np.isfinite(domain[1] - domain[0]):
- # DGT only works with finite domain. So, skip the distributions
- # with infinite tails.
- pytest.skip("DGT only works with a finite domain.")
- k = np.arange(domain[0], domain[1]+1)
- pv = dist.pmf(k)
- mv_ex = dist.stats('mv')
- rng = DiscreteGuideTable(dist, random_state=42)
- check_discr_samples(rng, pv, mv_ex)
- u = [
- # the correctness of the PPF for equidistant points between 0 and 1.
- np.linspace(0, 1, num=10000),
- # test the PPF method for empty arrays
- [], [[]],
- # test if nans and infs return nan result.
- [np.nan], [-np.inf, np.nan, np.inf],
- # test if a scalar is returned for a scalar input.
- 0,
- # test for arrays with nans, values greater than 1 and less than 0,
- # and some valid values.
- [[np.nan, 0.5, 0.1], [0.2, 0.4, np.inf], [-2, 3, 4]]
- ]
- @pytest.mark.parametrize('u', u)
- def test_ppf(self, u):
- n, p = 4, 0.1
- dist = stats.binom(n, p)
- rng = DiscreteGuideTable(dist, random_state=42)
- # Older versions of NumPy throw RuntimeWarnings for comparisons
- # with nan.
- with suppress_warnings() as sup:
- sup.filter(RuntimeWarning, "invalid value encountered in greater")
- sup.filter(RuntimeWarning, "invalid value encountered in "
- "greater_equal")
- sup.filter(RuntimeWarning, "invalid value encountered in less")
- sup.filter(RuntimeWarning, "invalid value encountered in "
- "less_equal")
- res = rng.ppf(u)
- expected = stats.binom.ppf(u, n, p)
- assert_equal(res.shape, expected.shape)
- assert_equal(res, expected)
- @pytest.mark.parametrize("pv, msg", bad_pv_common)
- def test_bad_pv(self, pv, msg):
- with pytest.raises(ValueError, match=msg):
- DiscreteGuideTable(pv)
- # DGT doesn't support infinite tails. So, it should throw an error when
- # inf is present in the domain.
- inf_domain = [(-np.inf, np.inf), (np.inf, np.inf), (-np.inf, -np.inf),
- (0, np.inf), (-np.inf, 0)]
- @pytest.mark.parametrize("domain", inf_domain)
- def test_inf_domain(self, domain):
- with pytest.raises(ValueError, match=r"must be finite"):
- DiscreteGuideTable(stats.binom(10, 0.2), domain=domain)
- class TestSimpleRatioUniforms:
- # pdf with piecewise linear function as transformed density
- # with T = -1/sqrt with shift. Taken from UNU.RAN test suite
- # (from file t_srou.c)
- class dist:
- def __init__(self, shift):
- self.shift = shift
- self.mode = shift
- def pdf(self, x):
- x -= self.shift
- y = 1. / (abs(x) + 1.)
- return 0.5 * y * y
- def cdf(self, x):
- x -= self.shift
- if x <= 0.:
- return 0.5 / (1. - x)
- else:
- return 1. - 0.5 / (1. + x)
- dists = [dist(0.), dist(10000.)]
- # exact mean and variance of the distributions in the list dists
- mv1 = [0., np.inf]
- mv2 = [10000., np.inf]
- mvs = [mv1, mv2]
- @pytest.mark.parametrize("dist, mv_ex",
- zip(dists, mvs))
- def test_basic(self, dist, mv_ex):
- rng = SimpleRatioUniforms(dist, mode=dist.mode, random_state=42)
- check_cont_samples(rng, dist, mv_ex)
- rng = SimpleRatioUniforms(dist, mode=dist.mode,
- cdf_at_mode=dist.cdf(dist.mode),
- random_state=42)
- check_cont_samples(rng, dist, mv_ex)
- # test domains with inf + nan in them. need to write a custom test for
- # this because not all methods support infinite tails.
- @pytest.mark.parametrize("domain, err, msg", inf_nan_domains)
- def test_inf_nan_domains(self, domain, err, msg):
- with pytest.raises(err, match=msg):
- SimpleRatioUniforms(StandardNormal(), domain=domain)
- def test_bad_args(self):
- # pdf_area < 0
- with pytest.raises(ValueError, match=r"`pdf_area` must be > 0"):
- SimpleRatioUniforms(StandardNormal(), mode=0, pdf_area=-1)
|