12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570 |
- """Quasi-Monte Carlo engines and helpers."""
- from __future__ import annotations
- import copy
- import math
- import numbers
- import os
- import warnings
- from abc import ABC, abstractmethod
- from functools import partial
- from typing import (
- Callable,
- ClassVar,
- Dict,
- List,
- Literal,
- Optional,
- overload,
- Tuple,
- TYPE_CHECKING,
- )
- import numpy as np
- if TYPE_CHECKING:
- import numpy.typing as npt
- from scipy._lib._util import (
- DecimalNumber, GeneratorType, IntNumber, SeedType
- )
- import scipy.stats as stats
- from scipy._lib._util import rng_integers
- from scipy.spatial import distance, Voronoi
- from scipy.special import gammainc
- from ._sobol import (
- _initialize_v, _cscramble, _fill_p_cumulative, _draw, _fast_forward,
- _categorize, _MAXDIM
- )
- from ._qmc_cy import (
- _cy_wrapper_centered_discrepancy,
- _cy_wrapper_wrap_around_discrepancy,
- _cy_wrapper_mixture_discrepancy,
- _cy_wrapper_l2_star_discrepancy,
- _cy_wrapper_update_discrepancy,
- _cy_van_der_corput_scrambled,
- _cy_van_der_corput,
- )
- __all__ = ['scale', 'discrepancy', 'update_discrepancy',
- 'QMCEngine', 'Sobol', 'Halton', 'LatinHypercube', 'PoissonDisk',
- 'MultinomialQMC', 'MultivariateNormalQMC']
- @overload
- def check_random_state(seed: Optional[IntNumber] = ...) -> np.random.Generator:
- ...
- @overload
- def check_random_state(seed: GeneratorType) -> GeneratorType:
- ...
- # Based on scipy._lib._util.check_random_state
- def check_random_state(seed=None):
- """Turn `seed` into a `numpy.random.Generator` instance.
- Parameters
- ----------
- seed : {None, int, `numpy.random.Generator`, `numpy.random.RandomState`}, optional # noqa
- If `seed` is an int or None, a new `numpy.random.Generator` is
- created using ``np.random.default_rng(seed)``.
- If `seed` is already a ``Generator`` or ``RandomState`` instance, then
- the provided instance is used.
- Returns
- -------
- seed : {`numpy.random.Generator`, `numpy.random.RandomState`}
- Random number generator.
- """
- if seed is None or isinstance(seed, (numbers.Integral, np.integer)):
- return np.random.default_rng(seed)
- elif isinstance(seed, (np.random.RandomState, np.random.Generator)):
- return seed
- else:
- raise ValueError(f'{seed!r} cannot be used to seed a'
- ' numpy.random.Generator instance')
- def scale(
- sample: npt.ArrayLike,
- l_bounds: npt.ArrayLike,
- u_bounds: npt.ArrayLike,
- *,
- reverse: bool = False
- ) -> np.ndarray:
- r"""Sample scaling from unit hypercube to different bounds.
- To convert a sample from :math:`[0, 1)` to :math:`[a, b), b>a`,
- with :math:`a` the lower bounds and :math:`b` the upper bounds.
- The following transformation is used:
- .. math::
- (b - a) \cdot \text{sample} + a
- Parameters
- ----------
- sample : array_like (n, d)
- Sample to scale.
- l_bounds, u_bounds : array_like (d,)
- Lower and upper bounds (resp. :math:`a`, :math:`b`) of transformed
- data. If `reverse` is True, range of the original data to transform
- to the unit hypercube.
- reverse : bool, optional
- Reverse the transformation from different bounds to the unit hypercube.
- Default is False.
- Returns
- -------
- sample : array_like (n, d)
- Scaled sample.
- Examples
- --------
- Transform 3 samples in the unit hypercube to bounds:
- >>> from scipy.stats import qmc
- >>> l_bounds = [-2, 0]
- >>> u_bounds = [6, 5]
- >>> sample = [[0.5 , 0.75],
- ... [0.5 , 0.5],
- ... [0.75, 0.25]]
- >>> sample_scaled = qmc.scale(sample, l_bounds, u_bounds)
- >>> sample_scaled
- array([[2. , 3.75],
- [2. , 2.5 ],
- [4. , 1.25]])
- And convert back to the unit hypercube:
- >>> sample_ = qmc.scale(sample_scaled, l_bounds, u_bounds, reverse=True)
- >>> sample_
- array([[0.5 , 0.75],
- [0.5 , 0.5 ],
- [0.75, 0.25]])
- """
- sample = np.asarray(sample)
- # Checking bounds and sample
- if not sample.ndim == 2:
- raise ValueError('Sample is not a 2D array')
- lower, upper = _validate_bounds(
- l_bounds=l_bounds, u_bounds=u_bounds, d=sample.shape[1]
- )
- if not reverse:
- # Checking that sample is within the hypercube
- if (sample.max() > 1.) or (sample.min() < 0.):
- raise ValueError('Sample is not in unit hypercube')
- return sample * (upper - lower) + lower
- else:
- # Checking that sample is within the bounds
- if not (np.all(sample >= lower) and np.all(sample <= upper)):
- raise ValueError('Sample is out of bounds')
- return (sample - lower) / (upper - lower)
- def discrepancy(
- sample: npt.ArrayLike,
- *,
- iterative: bool = False,
- method: Literal["CD", "WD", "MD", "L2-star"] = "CD",
- workers: IntNumber = 1) -> float:
- """Discrepancy of a given sample.
- Parameters
- ----------
- sample : array_like (n, d)
- The sample to compute the discrepancy from.
- iterative : bool, optional
- Must be False if not using it for updating the discrepancy.
- Default is False. Refer to the notes for more details.
- method : str, optional
- Type of discrepancy, can be ``CD``, ``WD``, ``MD`` or ``L2-star``.
- Refer to the notes for more details. Default is ``CD``.
- workers : int, optional
- Number of workers to use for parallel processing. If -1 is given all
- CPU threads are used. Default is 1.
- Returns
- -------
- discrepancy : float
- Discrepancy.
- Notes
- -----
- The discrepancy is a uniformity criterion used to assess the space filling
- of a number of samples in a hypercube. A discrepancy quantifies the
- distance between the continuous uniform distribution on a hypercube and the
- discrete uniform distribution on :math:`n` distinct sample points.
- The lower the value is, the better the coverage of the parameter space is.
- For a collection of subsets of the hypercube, the discrepancy is the
- difference between the fraction of sample points in one of those
- subsets and the volume of that subset. There are different definitions of
- discrepancy corresponding to different collections of subsets. Some
- versions take a root mean square difference over subsets instead of
- a maximum.
- A measure of uniformity is reasonable if it satisfies the following
- criteria [1]_:
- 1. It is invariant under permuting factors and/or runs.
- 2. It is invariant under rotation of the coordinates.
- 3. It can measure not only uniformity of the sample over the hypercube,
- but also the projection uniformity of the sample over non-empty
- subset of lower dimension hypercubes.
- 4. There is some reasonable geometric meaning.
- 5. It is easy to compute.
- 6. It satisfies the Koksma-Hlawka-like inequality.
- 7. It is consistent with other criteria in experimental design.
- Four methods are available:
- * ``CD``: Centered Discrepancy - subspace involves a corner of the
- hypercube
- * ``WD``: Wrap-around Discrepancy - subspace can wrap around bounds
- * ``MD``: Mixture Discrepancy - mix between CD/WD covering more criteria
- * ``L2-star``: L2-star discrepancy - like CD BUT variant to rotation
- See [2]_ for precise definitions of each method.
- Lastly, using ``iterative=True``, it is possible to compute the
- discrepancy as if we had :math:`n+1` samples. This is useful if we want
- to add a point to a sampling and check the candidate which would give the
- lowest discrepancy. Then you could just update the discrepancy with
- each candidate using `update_discrepancy`. This method is faster than
- computing the discrepancy for a large number of candidates.
- References
- ----------
- .. [1] Fang et al. "Design and modeling for computer experiments".
- Computer Science and Data Analysis Series, 2006.
- .. [2] Zhou Y.-D. et al. "Mixture discrepancy for quasi-random point sets."
- Journal of Complexity, 29 (3-4) , pp. 283-301, 2013.
- .. [3] T. T. Warnock. "Computational investigations of low discrepancy
- point sets." Applications of Number Theory to Numerical
- Analysis, Academic Press, pp. 319-343, 1972.
- Examples
- --------
- Calculate the quality of the sample using the discrepancy:
- >>> import numpy as np
- >>> from scipy.stats import qmc
- >>> space = np.array([[1, 3], [2, 6], [3, 2], [4, 5], [5, 1], [6, 4]])
- >>> l_bounds = [0.5, 0.5]
- >>> u_bounds = [6.5, 6.5]
- >>> space = qmc.scale(space, l_bounds, u_bounds, reverse=True)
- >>> space
- array([[0.08333333, 0.41666667],
- [0.25 , 0.91666667],
- [0.41666667, 0.25 ],
- [0.58333333, 0.75 ],
- [0.75 , 0.08333333],
- [0.91666667, 0.58333333]])
- >>> qmc.discrepancy(space)
- 0.008142039609053464
- We can also compute iteratively the ``CD`` discrepancy by using
- ``iterative=True``.
- >>> disc_init = qmc.discrepancy(space[:-1], iterative=True)
- >>> disc_init
- 0.04769081147119336
- >>> qmc.update_discrepancy(space[-1], space[:-1], disc_init)
- 0.008142039609053513
- """
- sample = np.asarray(sample, dtype=np.float64, order="C")
- # Checking that sample is within the hypercube and 2D
- if not sample.ndim == 2:
- raise ValueError("Sample is not a 2D array")
- if (sample.max() > 1.) or (sample.min() < 0.):
- raise ValueError("Sample is not in unit hypercube")
- workers = _validate_workers(workers)
- methods = {
- "CD": _cy_wrapper_centered_discrepancy,
- "WD": _cy_wrapper_wrap_around_discrepancy,
- "MD": _cy_wrapper_mixture_discrepancy,
- "L2-star": _cy_wrapper_l2_star_discrepancy,
- }
- if method in methods:
- return methods[method](sample, iterative, workers=workers)
- else:
- raise ValueError(f"{method!r} is not a valid method. It must be one of"
- f" {set(methods)!r}")
- def update_discrepancy(
- x_new: npt.ArrayLike,
- sample: npt.ArrayLike,
- initial_disc: DecimalNumber) -> float:
- """Update the centered discrepancy with a new sample.
- Parameters
- ----------
- x_new : array_like (1, d)
- The new sample to add in `sample`.
- sample : array_like (n, d)
- The initial sample.
- initial_disc : float
- Centered discrepancy of the `sample`.
- Returns
- -------
- discrepancy : float
- Centered discrepancy of the sample composed of `x_new` and `sample`.
- Examples
- --------
- We can also compute iteratively the discrepancy by using
- ``iterative=True``.
- >>> import numpy as np
- >>> from scipy.stats import qmc
- >>> space = np.array([[1, 3], [2, 6], [3, 2], [4, 5], [5, 1], [6, 4]])
- >>> l_bounds = [0.5, 0.5]
- >>> u_bounds = [6.5, 6.5]
- >>> space = qmc.scale(space, l_bounds, u_bounds, reverse=True)
- >>> disc_init = qmc.discrepancy(space[:-1], iterative=True)
- >>> disc_init
- 0.04769081147119336
- >>> qmc.update_discrepancy(space[-1], space[:-1], disc_init)
- 0.008142039609053513
- """
- sample = np.asarray(sample, dtype=np.float64, order="C")
- x_new = np.asarray(x_new, dtype=np.float64, order="C")
- # Checking that sample is within the hypercube and 2D
- if not sample.ndim == 2:
- raise ValueError('Sample is not a 2D array')
- if (sample.max() > 1.) or (sample.min() < 0.):
- raise ValueError('Sample is not in unit hypercube')
- # Checking that x_new is within the hypercube and 1D
- if not x_new.ndim == 1:
- raise ValueError('x_new is not a 1D array')
- if not (np.all(x_new >= 0) and np.all(x_new <= 1)):
- raise ValueError('x_new is not in unit hypercube')
- if x_new.shape[0] != sample.shape[1]:
- raise ValueError("x_new and sample must be broadcastable")
- return _cy_wrapper_update_discrepancy(x_new, sample, initial_disc)
- def _perturb_discrepancy(sample: np.ndarray, i1: int, i2: int, k: int,
- disc: float):
- """Centered discrepancy after an elementary perturbation of a LHS.
- An elementary perturbation consists of an exchange of coordinates between
- two points: ``sample[i1, k] <-> sample[i2, k]``. By construction,
- this operation conserves the LHS properties.
- Parameters
- ----------
- sample : array_like (n, d)
- The sample (before permutation) to compute the discrepancy from.
- i1 : int
- The first line of the elementary permutation.
- i2 : int
- The second line of the elementary permutation.
- k : int
- The column of the elementary permutation.
- disc : float
- Centered discrepancy of the design before permutation.
- Returns
- -------
- discrepancy : float
- Centered discrepancy of the design after permutation.
- References
- ----------
- .. [1] Jin et al. "An efficient algorithm for constructing optimal design
- of computer experiments", Journal of Statistical Planning and
- Inference, 2005.
- """
- n = sample.shape[0]
- z_ij = sample - 0.5
- # Eq (19)
- c_i1j = (1. / n ** 2.
- * np.prod(0.5 * (2. + abs(z_ij[i1, :])
- + abs(z_ij) - abs(z_ij[i1, :] - z_ij)), axis=1))
- c_i2j = (1. / n ** 2.
- * np.prod(0.5 * (2. + abs(z_ij[i2, :])
- + abs(z_ij) - abs(z_ij[i2, :] - z_ij)), axis=1))
- # Eq (20)
- c_i1i1 = (1. / n ** 2 * np.prod(1 + abs(z_ij[i1, :]))
- - 2. / n * np.prod(1. + 0.5 * abs(z_ij[i1, :])
- - 0.5 * z_ij[i1, :] ** 2))
- c_i2i2 = (1. / n ** 2 * np.prod(1 + abs(z_ij[i2, :]))
- - 2. / n * np.prod(1. + 0.5 * abs(z_ij[i2, :])
- - 0.5 * z_ij[i2, :] ** 2))
- # Eq (22), typo in the article in the denominator i2 -> i1
- num = (2 + abs(z_ij[i2, k]) + abs(z_ij[:, k])
- - abs(z_ij[i2, k] - z_ij[:, k]))
- denum = (2 + abs(z_ij[i1, k]) + abs(z_ij[:, k])
- - abs(z_ij[i1, k] - z_ij[:, k]))
- gamma = num / denum
- # Eq (23)
- c_p_i1j = gamma * c_i1j
- # Eq (24)
- c_p_i2j = c_i2j / gamma
- alpha = (1 + abs(z_ij[i2, k])) / (1 + abs(z_ij[i1, k]))
- beta = (2 - abs(z_ij[i2, k])) / (2 - abs(z_ij[i1, k]))
- g_i1 = np.prod(1. + abs(z_ij[i1, :]))
- g_i2 = np.prod(1. + abs(z_ij[i2, :]))
- h_i1 = np.prod(1. + 0.5 * abs(z_ij[i1, :]) - 0.5 * (z_ij[i1, :] ** 2))
- h_i2 = np.prod(1. + 0.5 * abs(z_ij[i2, :]) - 0.5 * (z_ij[i2, :] ** 2))
- # Eq (25), typo in the article g is missing
- c_p_i1i1 = ((g_i1 * alpha) / (n ** 2) - 2. * alpha * beta * h_i1 / n)
- # Eq (26), typo in the article n ** 2
- c_p_i2i2 = ((g_i2 / ((n ** 2) * alpha)) - (2. * h_i2 / (n * alpha * beta)))
- # Eq (26)
- sum_ = c_p_i1j - c_i1j + c_p_i2j - c_i2j
- mask = np.ones(n, dtype=bool)
- mask[[i1, i2]] = False
- sum_ = sum(sum_[mask])
- disc_ep = (disc + c_p_i1i1 - c_i1i1 + c_p_i2i2 - c_i2i2 + 2 * sum_)
- return disc_ep
- def primes_from_2_to(n: int) -> np.ndarray:
- """Prime numbers from 2 to *n*.
- Parameters
- ----------
- n : int
- Sup bound with ``n >= 6``.
- Returns
- -------
- primes : list(int)
- Primes in ``2 <= p < n``.
- Notes
- -----
- Taken from [1]_ by P.T. Roy, written consent given on 23.04.2021
- by the original author, Bruno Astrolino, for free use in SciPy under
- the 3-clause BSD.
- References
- ----------
- .. [1] `StackOverflow <https://stackoverflow.com/questions/2068372>`_.
- """
- sieve = np.ones(n // 3 + (n % 6 == 2), dtype=bool)
- for i in range(1, int(n ** 0.5) // 3 + 1):
- k = 3 * i + 1 | 1
- sieve[k * k // 3::2 * k] = False
- sieve[k * (k - 2 * (i & 1) + 4) // 3::2 * k] = False
- return np.r_[2, 3, ((3 * np.nonzero(sieve)[0][1:] + 1) | 1)]
- def n_primes(n: IntNumber) -> List[int]:
- """List of the n-first prime numbers.
- Parameters
- ----------
- n : int
- Number of prime numbers wanted.
- Returns
- -------
- primes : list(int)
- List of primes.
- """
- primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59,
- 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127,
- 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193,
- 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269,
- 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349,
- 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431,
- 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503,
- 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599,
- 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661, 673,
- 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761,
- 769, 773, 787, 797, 809, 811, 821, 823, 827, 829, 839, 853, 857,
- 859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937, 941, 947,
- 953, 967, 971, 977, 983, 991, 997][:n] # type: ignore[misc]
- if len(primes) < n:
- big_number = 2000
- while 'Not enough primes':
- primes = primes_from_2_to(big_number)[:n] # type: ignore
- if len(primes) == n:
- break
- big_number += 1000
- return primes
- def van_der_corput(
- n: IntNumber,
- base: IntNumber = 2,
- *,
- start_index: IntNumber = 0,
- scramble: bool = False,
- seed: SeedType = None,
- workers: IntNumber = 1) -> np.ndarray:
- """Van der Corput sequence.
- Pseudo-random number generator based on a b-adic expansion.
- Scrambling uses permutations of the remainders (see [1]_). Multiple
- permutations are applied to construct a point. The sequence of
- permutations has to be the same for all points of the sequence.
- Parameters
- ----------
- n : int
- Number of element of the sequence.
- base : int, optional
- Base of the sequence. Default is 2.
- start_index : int, optional
- Index to start the sequence from. Default is 0.
- scramble : bool, optional
- If True, use Owen scrambling. Otherwise no scrambling is done.
- Default is True.
- seed : {None, int, `numpy.random.Generator`}, optional
- If `seed` is an int or None, a new `numpy.random.Generator` is
- created using ``np.random.default_rng(seed)``.
- If `seed` is already a ``Generator`` instance, then the provided
- instance is used.
- workers : int, optional
- Number of workers to use for parallel processing. If -1 is
- given all CPU threads are used. Default is 1.
- Returns
- -------
- sequence : list (n,)
- Sequence of Van der Corput.
- References
- ----------
- .. [1] A. B. Owen. "A randomized Halton algorithm in R",
- :arxiv:`1706.02808`, 2017.
- """
- if base < 2:
- raise ValueError("'base' must be at least 2")
- if scramble:
- rng = check_random_state(seed)
- # In Algorithm 1 of Owen 2017, a permutation of `np.arange(base)` is
- # created for each positive integer `k` such that `1 - base**-k < 1`
- # using floating-point arithmetic. For double precision floats, the
- # condition `1 - base**-k < 1` can also be written as `base**-k >
- # 2**-54`, which makes it more apparent how many permutations we need
- # to create.
- count = math.ceil(54 / math.log2(base)) - 1
- permutations = np.repeat(np.arange(base)[None], count, axis=0)
- for perm in permutations:
- rng.shuffle(perm)
- return _cy_van_der_corput_scrambled(n, base, start_index,
- permutations, workers)
- else:
- return _cy_van_der_corput(n, base, start_index, workers)
- class QMCEngine(ABC):
- """A generic Quasi-Monte Carlo sampler class meant for subclassing.
- QMCEngine is a base class to construct a specific Quasi-Monte Carlo
- sampler. It cannot be used directly as a sampler.
- Parameters
- ----------
- d : int
- Dimension of the parameter space.
- optimization : {None, "random-cd", "lloyd"}, optional
- Whether to use an optimization scheme to improve the quality after
- sampling. Note that this is a post-processing step that does not
- guarantee that all properties of the sample will be conserved.
- Default is None.
- * ``random-cd``: random permutations of coordinates to lower the
- centered discrepancy. The best sample based on the centered
- discrepancy is constantly updated. Centered discrepancy-based
- sampling shows better space-filling robustness toward 2D and 3D
- subprojections compared to using other discrepancy measures.
- * ``lloyd``: Perturb samples using a modified Lloyd-Max algorithm.
- The process converges to equally spaced samples.
- .. versionadded:: 1.10.0
- seed : {None, int, `numpy.random.Generator`}, optional
- If `seed` is an int or None, a new `numpy.random.Generator` is
- created using ``np.random.default_rng(seed)``.
- If `seed` is already a ``Generator`` instance, then the provided
- instance is used.
- Notes
- -----
- By convention samples are distributed over the half-open interval
- ``[0, 1)``. Instances of the class can access the attributes: ``d`` for
- the dimension; and ``rng`` for the random number generator (used for the
- ``seed``).
- **Subclassing**
- When subclassing `QMCEngine` to create a new sampler, ``__init__`` and
- ``random`` must be redefined.
- * ``__init__(d, seed=None)``: at least fix the dimension. If the sampler
- does not take advantage of a ``seed`` (deterministic methods like
- Halton), this parameter can be omitted.
- * ``_random(n, *, workers=1)``: draw ``n`` from the engine. ``workers``
- is used for parallelism. See `Halton` for example.
- Optionally, two other methods can be overwritten by subclasses:
- * ``reset``: Reset the engine to its original state.
- * ``fast_forward``: If the sequence is deterministic (like Halton
- sequence), then ``fast_forward(n)`` is skipping the ``n`` first draw.
- Examples
- --------
- To create a random sampler based on ``np.random.random``, we would do the
- following:
- >>> from scipy.stats import qmc
- >>> class RandomEngine(qmc.QMCEngine):
- ... def __init__(self, d, seed=None):
- ... super().__init__(d=d, seed=seed)
- ...
- ...
- ... def _random(self, n=1, *, workers=1):
- ... return self.rng.random((n, self.d))
- ...
- ...
- ... def reset(self):
- ... super().__init__(d=self.d, seed=self.rng_seed)
- ... return self
- ...
- ...
- ... def fast_forward(self, n):
- ... self.random(n)
- ... return self
- After subclassing `QMCEngine` to define the sampling strategy we want to
- use, we can create an instance to sample from.
- >>> engine = RandomEngine(2)
- >>> engine.random(5)
- array([[0.22733602, 0.31675834], # random
- [0.79736546, 0.67625467],
- [0.39110955, 0.33281393],
- [0.59830875, 0.18673419],
- [0.67275604, 0.94180287]])
- We can also reset the state of the generator and resample again.
- >>> _ = engine.reset()
- >>> engine.random(5)
- array([[0.22733602, 0.31675834], # random
- [0.79736546, 0.67625467],
- [0.39110955, 0.33281393],
- [0.59830875, 0.18673419],
- [0.67275604, 0.94180287]])
- """
- @abstractmethod
- def __init__(
- self,
- d: IntNumber,
- *,
- optimization: Optional[Literal["random-cd", "lloyd"]] = None,
- seed: SeedType = None
- ) -> None:
- if not np.issubdtype(type(d), np.integer) or d < 0:
- raise ValueError('d must be a non-negative integer value')
- self.d = d
- self.rng = check_random_state(seed)
- self.rng_seed = copy.deepcopy(seed)
- self.num_generated = 0
- config = {
- # random-cd
- "n_nochange": 100,
- "n_iters": 10_000,
- "rng": self.rng,
- # lloyd
- "tol": 1e-5,
- "maxiter": 10,
- "qhull_options": None,
- }
- self.optimization_method = _select_optimizer(optimization, config)
- @abstractmethod
- def _random(
- self, n: IntNumber = 1, *, workers: IntNumber = 1
- ) -> np.ndarray:
- ...
- def random(
- self, n: IntNumber = 1, *, workers: IntNumber = 1
- ) -> np.ndarray:
- """Draw `n` in the half-open interval ``[0, 1)``.
- Parameters
- ----------
- n : int, optional
- Number of samples to generate in the parameter space.
- Default is 1.
- workers : int, optional
- Only supported with `Halton`.
- Number of workers to use for parallel processing. If -1 is
- given all CPU threads are used. Default is 1. It becomes faster
- than one worker for `n` greater than :math:`10^3`.
- Returns
- -------
- sample : array_like (n, d)
- QMC sample.
- """
- sample = self._random(n, workers=workers)
- if self.optimization_method is not None:
- sample = self.optimization_method(sample)
- self.num_generated += n
- return sample
- def integers(
- self,
- l_bounds: npt.ArrayLike,
- *,
- u_bounds: Optional[npt.ArrayLike] = None,
- n: IntNumber = 1,
- endpoint: bool = False,
- workers: IntNumber = 1
- ) -> np.ndarray:
- r"""
- Draw `n` integers from `l_bounds` (inclusive) to `u_bounds`
- (exclusive), or if endpoint=True, `l_bounds` (inclusive) to
- `u_bounds` (inclusive).
- Parameters
- ----------
- l_bounds : int or array-like of ints
- Lowest (signed) integers to be drawn (unless ``u_bounds=None``,
- in which case this parameter is 0 and this value is used for
- `u_bounds`).
- u_bounds : int or array-like of ints, optional
- If provided, one above the largest (signed) integer to be drawn
- (see above for behavior if ``u_bounds=None``).
- If array-like, must contain integer values.
- n : int, optional
- Number of samples to generate in the parameter space.
- Default is 1.
- endpoint : bool, optional
- If true, sample from the interval ``[l_bounds, u_bounds]`` instead
- of the default ``[l_bounds, u_bounds)``. Defaults is False.
- workers : int, optional
- Number of workers to use for parallel processing. If -1 is
- given all CPU threads are used. Only supported when using `Halton`
- Default is 1.
- Returns
- -------
- sample : array_like (n, d)
- QMC sample.
- Notes
- -----
- It is safe to just use the same ``[0, 1)`` to integer mapping
- with QMC that you would use with MC. You still get unbiasedness,
- a strong law of large numbers, an asymptotically infinite variance
- reduction and a finite sample variance bound.
- To convert a sample from :math:`[0, 1)` to :math:`[a, b), b>a`,
- with :math:`a` the lower bounds and :math:`b` the upper bounds,
- the following transformation is used:
- .. math::
- \text{floor}((b - a) \cdot \text{sample} + a)
- """
- if u_bounds is None:
- u_bounds = l_bounds
- l_bounds = 0
- u_bounds = np.atleast_1d(u_bounds)
- l_bounds = np.atleast_1d(l_bounds)
- if endpoint:
- u_bounds = u_bounds + 1
- if (not np.issubdtype(l_bounds.dtype, np.integer) or
- not np.issubdtype(u_bounds.dtype, np.integer)):
- message = ("'u_bounds' and 'l_bounds' must be integers or"
- " array-like of integers")
- raise ValueError(message)
- if isinstance(self, Halton):
- sample = self.random(n=n, workers=workers)
- else:
- sample = self.random(n=n)
- sample = scale(sample, l_bounds=l_bounds, u_bounds=u_bounds)
- sample = np.floor(sample).astype(np.int64)
- return sample
- def reset(self) -> QMCEngine:
- """Reset the engine to base state.
- Returns
- -------
- engine : QMCEngine
- Engine reset to its base state.
- """
- seed = copy.deepcopy(self.rng_seed)
- self.rng = check_random_state(seed)
- self.num_generated = 0
- return self
- def fast_forward(self, n: IntNumber) -> QMCEngine:
- """Fast-forward the sequence by `n` positions.
- Parameters
- ----------
- n : int
- Number of points to skip in the sequence.
- Returns
- -------
- engine : QMCEngine
- Engine reset to its base state.
- """
- self.random(n=n)
- return self
- class Halton(QMCEngine):
- """Halton sequence.
- Pseudo-random number generator that generalize the Van der Corput sequence
- for multiple dimensions. The Halton sequence uses the base-two Van der
- Corput sequence for the first dimension, base-three for its second and
- base-:math:`n` for its n-dimension.
- Parameters
- ----------
- d : int
- Dimension of the parameter space.
- scramble : bool, optional
- If True, use Owen scrambling. Otherwise no scrambling is done.
- Default is True.
- optimization : {None, "random-cd", "lloyd"}, optional
- Whether to use an optimization scheme to improve the quality after
- sampling. Note that this is a post-processing step that does not
- guarantee that all properties of the sample will be conserved.
- Default is None.
- * ``random-cd``: random permutations of coordinates to lower the
- centered discrepancy. The best sample based on the centered
- discrepancy is constantly updated. Centered discrepancy-based
- sampling shows better space-filling robustness toward 2D and 3D
- subprojections compared to using other discrepancy measures.
- * ``lloyd``: Perturb samples using a modified Lloyd-Max algorithm.
- The process converges to equally spaced samples.
- .. versionadded:: 1.10.0
- seed : {None, int, `numpy.random.Generator`}, optional
- If `seed` is an int or None, a new `numpy.random.Generator` is
- created using ``np.random.default_rng(seed)``.
- If `seed` is already a ``Generator`` instance, then the provided
- instance is used.
- Notes
- -----
- The Halton sequence has severe striping artifacts for even modestly
- large dimensions. These can be ameliorated by scrambling. Scrambling
- also supports replication-based error estimates and extends
- applicabiltiy to unbounded integrands.
- References
- ----------
- .. [1] Halton, "On the efficiency of certain quasi-random sequences of
- points in evaluating multi-dimensional integrals", Numerische
- Mathematik, 1960.
- .. [2] A. B. Owen. "A randomized Halton algorithm in R",
- :arxiv:`1706.02808`, 2017.
- Examples
- --------
- Generate samples from a low discrepancy sequence of Halton.
- >>> from scipy.stats import qmc
- >>> sampler = qmc.Halton(d=2, scramble=False)
- >>> sample = sampler.random(n=5)
- >>> sample
- array([[0. , 0. ],
- [0.5 , 0.33333333],
- [0.25 , 0.66666667],
- [0.75 , 0.11111111],
- [0.125 , 0.44444444]])
- Compute the quality of the sample using the discrepancy criterion.
- >>> qmc.discrepancy(sample)
- 0.088893711419753
- If some wants to continue an existing design, extra points can be obtained
- by calling again `random`. Alternatively, you can skip some points like:
- >>> _ = sampler.fast_forward(5)
- >>> sample_continued = sampler.random(n=5)
- >>> sample_continued
- array([[0.3125 , 0.37037037],
- [0.8125 , 0.7037037 ],
- [0.1875 , 0.14814815],
- [0.6875 , 0.48148148],
- [0.4375 , 0.81481481]])
- Finally, samples can be scaled to bounds.
- >>> l_bounds = [0, 2]
- >>> u_bounds = [10, 5]
- >>> qmc.scale(sample_continued, l_bounds, u_bounds)
- array([[3.125 , 3.11111111],
- [8.125 , 4.11111111],
- [1.875 , 2.44444444],
- [6.875 , 3.44444444],
- [4.375 , 4.44444444]])
- """
- def __init__(
- self, d: IntNumber, *, scramble: bool = True,
- optimization: Optional[Literal["random-cd", "lloyd"]] = None,
- seed: SeedType = None
- ) -> None:
- # Used in `scipy.integrate.qmc_quad`
- self._init_quad = {'d': d, 'scramble': True,
- 'optimization': optimization}
- super().__init__(d=d, optimization=optimization, seed=seed)
- self.seed = seed
- self.base = n_primes(d)
- self.scramble = scramble
- def _random(
- self, n: IntNumber = 1, *, workers: IntNumber = 1
- ) -> np.ndarray:
- """Draw `n` in the half-open interval ``[0, 1)``.
- Parameters
- ----------
- n : int, optional
- Number of samples to generate in the parameter space. Default is 1.
- workers : int, optional
- Number of workers to use for parallel processing. If -1 is
- given all CPU threads are used. Default is 1. It becomes faster
- than one worker for `n` greater than :math:`10^3`.
- Returns
- -------
- sample : array_like (n, d)
- QMC sample.
- """
- workers = _validate_workers(workers)
- # Generate a sample using a Van der Corput sequence per dimension.
- # important to have ``type(bdim) == int`` for performance reason
- sample = [van_der_corput(n, int(bdim), start_index=self.num_generated,
- scramble=self.scramble,
- seed=copy.deepcopy(self.seed),
- workers=workers)
- for bdim in self.base]
- return np.array(sample).T.reshape(n, self.d)
- class LatinHypercube(QMCEngine):
- r"""Latin hypercube sampling (LHS).
- A Latin hypercube sample [1]_ generates :math:`n` points in
- :math:`[0,1)^{d}`. Each univariate marginal distribution is stratified,
- placing exactly one point in :math:`[j/n, (j+1)/n)` for
- :math:`j=0,1,...,n-1`. They are still applicable when :math:`n << d`.
- Parameters
- ----------
- d : int
- Dimension of the parameter space.
- centered : bool, optional
- Center samples within cells of a multi-dimensional grid.
- Default is False.
- .. deprecated:: 1.10.0
- `centered` is deprecated as of SciPy 1.10.0 and will be removed in
- 1.12.0. Use `scramble` instead. ``centered=True`` corresponds to
- ``scramble=False``.
- scramble : bool, optional
- When False, center samples within cells of a multi-dimensional grid.
- Otherwise, samples are randomly placed within cells of the grid.
- .. note::
- Setting ``scramble=False`` does not ensure deterministic output.
- For that, use the `seed` parameter.
- Default is True.
- .. versionadded:: 1.10.0
- optimization : {None, "random-cd", "lloyd"}, optional
- Whether to use an optimization scheme to improve the quality after
- sampling. Note that this is a post-processing step that does not
- guarantee that all properties of the sample will be conserved.
- Default is None.
- * ``random-cd``: random permutations of coordinates to lower the
- centered discrepancy. The best sample based on the centered
- discrepancy is constantly updated. Centered discrepancy-based
- sampling shows better space-filling robustness toward 2D and 3D
- subprojections compared to using other discrepancy measures.
- * ``lloyd``: Perturb samples using a modified Lloyd-Max algorithm.
- The process converges to equally spaced samples.
- .. versionadded:: 1.8.0
- .. versionchanged:: 1.10.0
- Add ``lloyd``.
- strength : {1, 2}, optional
- Strength of the LHS. ``strength=1`` produces a plain LHS while
- ``strength=2`` produces an orthogonal array based LHS of strength 2
- [7]_, [8]_. In that case, only ``n=p**2`` points can be sampled,
- with ``p`` a prime number. It also constrains ``d <= p + 1``.
- Default is 1.
- .. versionadded:: 1.8.0
- seed : {None, int, `numpy.random.Generator`}, optional
- If `seed` is an int or None, a new `numpy.random.Generator` is
- created using ``np.random.default_rng(seed)``.
- If `seed` is already a ``Generator`` instance, then the provided
- instance is used.
- Notes
- -----
- When LHS is used for integrating a function :math:`f` over :math:`n`,
- LHS is extremely effective on integrands that are nearly additive [2]_.
- With a LHS of :math:`n` points, the variance of the integral is always
- lower than plain MC on :math:`n-1` points [3]_. There is a central limit
- theorem for LHS on the mean and variance of the integral [4]_, but not
- necessarily for optimized LHS due to the randomization.
- :math:`A` is called an orthogonal array of strength :math:`t` if in each
- n-row-by-t-column submatrix of :math:`A`: all :math:`p^t` possible
- distinct rows occur the same number of times. The elements of :math:`A`
- are in the set :math:`\{0, 1, ..., p-1\}`, also called symbols.
- The constraint that :math:`p` must be a prime number is to allow modular
- arithmetic. Increasing strength adds some symmetry to the sub-projections
- of a sample. With strength 2, samples are symmetric along the diagonals of
- 2D sub-projections. This may be undesirable, but on the other hand, the
- sample dispersion is improved.
- Strength 1 (plain LHS) brings an advantage over strength 0 (MC) and
- strength 2 is a useful increment over strength 1. Going to strength 3 is
- a smaller increment and scrambled QMC like Sobol', Halton are more
- performant [7]_.
- To create a LHS of strength 2, the orthogonal array :math:`A` is
- randomized by applying a random, bijective map of the set of symbols onto
- itself. For example, in column 0, all 0s might become 2; in column 1,
- all 0s might become 1, etc.
- Then, for each column :math:`i` and symbol :math:`j`, we add a plain,
- one-dimensional LHS of size :math:`p` to the subarray where
- :math:`A^i = j`. The resulting matrix is finally divided by :math:`p`.
- References
- ----------
- .. [1] Mckay et al., "A Comparison of Three Methods for Selecting Values
- of Input Variables in the Analysis of Output from a Computer Code."
- Technometrics, 1979.
- .. [2] M. Stein, "Large sample properties of simulations using Latin
- hypercube sampling." Technometrics 29, no. 2: 143-151, 1987.
- .. [3] A. B. Owen, "Monte Carlo variance of scrambled net quadrature."
- SIAM Journal on Numerical Analysis 34, no. 5: 1884-1910, 1997
- .. [4] Loh, W.-L. "On Latin hypercube sampling." The annals of statistics
- 24, no. 5: 2058-2080, 1996.
- .. [5] Fang et al. "Design and modeling for computer experiments".
- Computer Science and Data Analysis Series, 2006.
- .. [6] Damblin et al., "Numerical studies of space filling designs:
- optimization of Latin Hypercube Samples and subprojection properties."
- Journal of Simulation, 2013.
- .. [7] A. B. Owen , "Orthogonal arrays for computer experiments,
- integration and visualization." Statistica Sinica, 1992.
- .. [8] B. Tang, "Orthogonal Array-Based Latin Hypercubes."
- Journal of the American Statistical Association, 1993.
- Examples
- --------
- Generate samples from a Latin hypercube generator.
- >>> from scipy.stats import qmc
- >>> sampler = qmc.LatinHypercube(d=2)
- >>> sample = sampler.random(n=5)
- >>> sample
- array([[0.1545328 , 0.53664833], # random
- [0.84052691, 0.06474907],
- [0.52177809, 0.93343721],
- [0.68033825, 0.36265316],
- [0.26544879, 0.61163943]])
- Compute the quality of the sample using the discrepancy criterion.
- >>> qmc.discrepancy(sample)
- 0.0196... # random
- Samples can be scaled to bounds.
- >>> l_bounds = [0, 2]
- >>> u_bounds = [10, 5]
- >>> qmc.scale(sample, l_bounds, u_bounds)
- array([[1.54532796, 3.609945 ], # random
- [8.40526909, 2.1942472 ],
- [5.2177809 , 4.80031164],
- [6.80338249, 3.08795949],
- [2.65448791, 3.83491828]])
- Use the `optimization` keyword argument to produce a LHS with
- lower discrepancy at higher computational cost.
- >>> sampler = qmc.LatinHypercube(d=2, optimization="random-cd")
- >>> sample = sampler.random(n=5)
- >>> qmc.discrepancy(sample)
- 0.0176... # random
- Use the `strength` keyword argument to produce an orthogonal array based
- LHS of strength 2. In this case, the number of sample points must be the
- square of a prime number.
- >>> sampler = qmc.LatinHypercube(d=2, strength=2)
- >>> sample = sampler.random(n=9)
- >>> qmc.discrepancy(sample)
- 0.00526... # random
- Options could be combined to produce an optimized centered
- orthogonal array based LHS. After optimization, the result would not
- be guaranteed to be of strength 2.
- """
- def __init__(
- self, d: IntNumber, *, centered: bool = False,
- scramble: bool = True,
- strength: int = 1,
- optimization: Optional[Literal["random-cd", "lloyd"]] = None,
- seed: SeedType = None
- ) -> None:
- if centered:
- scramble = False
- warnings.warn(
- "'centered' is deprecated and will be removed in SciPy 1.12."
- " Please use 'scramble' instead. 'centered=True' corresponds"
- " to 'scramble=False'.",
- stacklevel=2
- )
- # Used in `scipy.integrate.qmc_quad`
- self._init_quad = {'d': d, 'scramble': True, 'strength': strength,
- 'optimization': optimization}
- super().__init__(d=d, seed=seed, optimization=optimization)
- self.scramble = scramble
- lhs_method_strength = {
- 1: self._random_lhs,
- 2: self._random_oa_lhs
- }
- try:
- self.lhs_method: Callable = lhs_method_strength[strength]
- except KeyError as exc:
- message = (f"{strength!r} is not a valid strength. It must be one"
- f" of {set(lhs_method_strength)!r}")
- raise ValueError(message) from exc
- def _random(
- self, n: IntNumber = 1, *, workers: IntNumber = 1
- ) -> np.ndarray:
- lhs = self.lhs_method(n)
- return lhs
- def _random_lhs(self, n: IntNumber = 1) -> np.ndarray:
- """Base LHS algorithm."""
- if not self.scramble:
- samples: np.ndarray | float = 0.5
- else:
- samples = self.rng.uniform(size=(n, self.d))
- perms = np.tile(np.arange(1, n + 1),
- (self.d, 1)) # type: ignore[arg-type]
- for i in range(self.d):
- self.rng.shuffle(perms[i, :])
- perms = perms.T
- samples = (perms - samples) / n
- return samples
- def _random_oa_lhs(self, n: IntNumber = 4) -> np.ndarray:
- """Orthogonal array based LHS of strength 2."""
- p = np.sqrt(n).astype(int)
- n_row = p**2
- n_col = p + 1
- primes = primes_from_2_to(p + 1)
- if p not in primes or n != n_row:
- raise ValueError(
- "n is not the square of a prime number. Close"
- f" values are {primes[-2:]**2}"
- )
- if self.d > p + 1:
- raise ValueError("n is too small for d. Must be n > (d-1)**2")
- oa_sample = np.zeros(shape=(n_row, n_col), dtype=int)
- # OA of strength 2
- arrays = np.tile(np.arange(p), (2, 1))
- oa_sample[:, :2] = np.stack(np.meshgrid(*arrays),
- axis=-1).reshape(-1, 2)
- for p_ in range(1, p):
- oa_sample[:, 2+p_-1] = np.mod(oa_sample[:, 0]
- + p_*oa_sample[:, 1], p)
- # scramble the OA
- oa_sample_ = np.empty(shape=(n_row, n_col), dtype=int)
- for j in range(n_col):
- perms = self.rng.permutation(p)
- oa_sample_[:, j] = perms[oa_sample[:, j]]
- # following is making a scrambled OA into an OA-LHS
- oa_lhs_sample = np.zeros(shape=(n_row, n_col))
- lhs_engine = LatinHypercube(d=1, scramble=self.scramble, strength=1,
- seed=self.rng) # type: QMCEngine
- for j in range(n_col):
- for k in range(p):
- idx = oa_sample[:, j] == k
- lhs = lhs_engine.random(p).flatten()
- oa_lhs_sample[:, j][idx] = lhs + oa_sample[:, j][idx]
- lhs_engine = lhs_engine.reset()
- oa_lhs_sample /= p
- return oa_lhs_sample[:, :self.d] # type: ignore
- class Sobol(QMCEngine):
- """Engine for generating (scrambled) Sobol' sequences.
- Sobol' sequences are low-discrepancy, quasi-random numbers. Points
- can be drawn using two methods:
- * `random_base2`: safely draw :math:`n=2^m` points. This method
- guarantees the balance properties of the sequence.
- * `random`: draw an arbitrary number of points from the
- sequence. See warning below.
- Parameters
- ----------
- d : int
- Dimensionality of the sequence. Max dimensionality is 21201.
- scramble : bool, optional
- If True, use LMS+shift scrambling. Otherwise, no scrambling is done.
- Default is True.
- bits : int, optional
- Number of bits of the generator. Control the maximum number of points
- that can be generated, which is ``2**bits``. Maximal value is 64.
- It does not correspond to the return type, which is always
- ``np.float64`` to prevent points from repeating themselves.
- Default is None, which for backward compatibility, corresponds to 30.
- .. versionadded:: 1.9.0
- optimization : {None, "random-cd", "lloyd"}, optional
- Whether to use an optimization scheme to improve the quality after
- sampling. Note that this is a post-processing step that does not
- guarantee that all properties of the sample will be conserved.
- Default is None.
- * ``random-cd``: random permutations of coordinates to lower the
- centered discrepancy. The best sample based on the centered
- discrepancy is constantly updated. Centered discrepancy-based
- sampling shows better space-filling robustness toward 2D and 3D
- subprojections compared to using other discrepancy measures.
- * ``lloyd``: Perturb samples using a modified Lloyd-Max algorithm.
- The process converges to equally spaced samples.
- .. versionadded:: 1.10.0
- seed : {None, int, `numpy.random.Generator`}, optional
- If `seed` is an int or None, a new `numpy.random.Generator` is
- created using ``np.random.default_rng(seed)``.
- If `seed` is already a ``Generator`` instance, then the provided
- instance is used.
- Notes
- -----
- Sobol' sequences [1]_ provide :math:`n=2^m` low discrepancy points in
- :math:`[0,1)^{d}`. Scrambling them [3]_ makes them suitable for singular
- integrands, provides a means of error estimation, and can improve their
- rate of convergence. The scrambling strategy which is implemented is a
- (left) linear matrix scramble (LMS) followed by a digital random shift
- (LMS+shift) [2]_.
- There are many versions of Sobol' sequences depending on their
- 'direction numbers'. This code uses direction numbers from [4]_. Hence,
- the maximum number of dimension is 21201. The direction numbers have been
- precomputed with search criterion 6 and can be retrieved at
- https://web.maths.unsw.edu.au/~fkuo/sobol/.
- .. warning::
- Sobol' sequences are a quadrature rule and they lose their balance
- properties if one uses a sample size that is not a power of 2, or skips
- the first point, or thins the sequence [5]_.
- If :math:`n=2^m` points are not enough then one should take :math:`2^M`
- points for :math:`M>m`. When scrambling, the number R of independent
- replicates does not have to be a power of 2.
- Sobol' sequences are generated to some number :math:`B` of bits.
- After :math:`2^B` points have been generated, the sequence would
- repeat. Hence, an error is raised.
- The number of bits can be controlled with the parameter `bits`.
- References
- ----------
- .. [1] I. M. Sobol', "The distribution of points in a cube and the accurate
- evaluation of integrals." Zh. Vychisl. Mat. i Mat. Phys., 7:784-802,
- 1967.
- .. [2] J. Matousek, "On the L2-discrepancy for anchored boxes."
- J. of Complexity 14, 527-556, 1998.
- .. [3] Art B. Owen, "Scrambling Sobol and Niederreiter-Xing points."
- Journal of Complexity, 14(4):466-489, December 1998.
- .. [4] S. Joe and F. Y. Kuo, "Constructing sobol sequences with better
- two-dimensional projections." SIAM Journal on Scientific Computing,
- 30(5):2635-2654, 2008.
- .. [5] Art B. Owen, "On dropping the first Sobol' point."
- :arxiv:`2008.08051`, 2020.
- Examples
- --------
- Generate samples from a low discrepancy sequence of Sobol'.
- >>> from scipy.stats import qmc
- >>> sampler = qmc.Sobol(d=2, scramble=False)
- >>> sample = sampler.random_base2(m=3)
- >>> sample
- array([[0. , 0. ],
- [0.5 , 0.5 ],
- [0.75 , 0.25 ],
- [0.25 , 0.75 ],
- [0.375, 0.375],
- [0.875, 0.875],
- [0.625, 0.125],
- [0.125, 0.625]])
- Compute the quality of the sample using the discrepancy criterion.
- >>> qmc.discrepancy(sample)
- 0.013882107204860938
- To continue an existing design, extra points can be obtained
- by calling again `random_base2`. Alternatively, you can skip some
- points like:
- >>> _ = sampler.reset()
- >>> _ = sampler.fast_forward(4)
- >>> sample_continued = sampler.random_base2(m=2)
- >>> sample_continued
- array([[0.375, 0.375],
- [0.875, 0.875],
- [0.625, 0.125],
- [0.125, 0.625]])
- Finally, samples can be scaled to bounds.
- >>> l_bounds = [0, 2]
- >>> u_bounds = [10, 5]
- >>> qmc.scale(sample_continued, l_bounds, u_bounds)
- array([[3.75 , 3.125],
- [8.75 , 4.625],
- [6.25 , 2.375],
- [1.25 , 3.875]])
- """
- MAXDIM: ClassVar[int] = _MAXDIM
- def __init__(
- self, d: IntNumber, *, scramble: bool = True,
- bits: Optional[IntNumber] = None, seed: SeedType = None,
- optimization: Optional[Literal["random-cd", "lloyd"]] = None
- ) -> None:
- # Used in `scipy.integrate.qmc_quad`
- self._init_quad = {'d': d, 'scramble': True, 'bits': bits,
- 'optimization': optimization}
- super().__init__(d=d, optimization=optimization, seed=seed)
- if d > self.MAXDIM:
- raise ValueError(
- f"Maximum supported dimensionality is {self.MAXDIM}."
- )
- self.bits = bits
- self.dtype_i: type
- if self.bits is None:
- self.bits = 30
- if self.bits <= 32:
- self.dtype_i = np.uint32
- elif 32 < self.bits <= 64:
- self.dtype_i = np.uint64
- else:
- raise ValueError("Maximum supported 'bits' is 64")
- self.maxn = 2**self.bits
- # v is d x maxbit matrix
- self._sv: np.ndarray = np.zeros((d, self.bits), dtype=self.dtype_i)
- _initialize_v(self._sv, dim=d, bits=self.bits)
- if not scramble:
- self._shift: np.ndarray = np.zeros(d, dtype=self.dtype_i)
- else:
- # scramble self._shift and self._sv
- self._scramble()
- self._quasi = self._shift.copy()
- # normalization constant with the largest possible number
- # calculate in Python to not overflow int with 2**64
- self._scale = 1.0 / 2 ** self.bits
- self._first_point = (self._quasi * self._scale).reshape(1, -1)
- # explicit casting to float64
- self._first_point = self._first_point.astype(np.float64)
- def _scramble(self) -> None:
- """Scramble the sequence using LMS+shift."""
- # Generate shift vector
- self._shift = np.dot(
- rng_integers(self.rng, 2, size=(self.d, self.bits),
- dtype=self.dtype_i),
- 2 ** np.arange(self.bits, dtype=self.dtype_i),
- )
- # Generate lower triangular matrices (stacked across dimensions)
- ltm = np.tril(rng_integers(self.rng, 2,
- size=(self.d, self.bits, self.bits),
- dtype=self.dtype_i))
- _cscramble(
- dim=self.d, bits=self.bits, # type: ignore[arg-type]
- ltm=ltm, sv=self._sv
- )
- def _random(
- self, n: IntNumber = 1, *, workers: IntNumber = 1
- ) -> np.ndarray:
- """Draw next point(s) in the Sobol' sequence.
- Parameters
- ----------
- n : int, optional
- Number of samples to generate in the parameter space. Default is 1.
- Returns
- -------
- sample : array_like (n, d)
- Sobol' sample.
- """
- sample: np.ndarray = np.empty((n, self.d), dtype=np.float64)
- if n == 0:
- return sample
- total_n = self.num_generated + n
- if total_n > self.maxn:
- msg = (
- f"At most 2**{self.bits}={self.maxn} distinct points can be "
- f"generated. {self.num_generated} points have been previously "
- f"generated, then: n={self.num_generated}+{n}={total_n}. "
- )
- if self.bits != 64:
- msg += "Consider increasing `bits`."
- raise ValueError(msg)
- if self.num_generated == 0:
- # verify n is 2**n
- if not (n & (n - 1) == 0):
- warnings.warn("The balance properties of Sobol' points require"
- " n to be a power of 2.", stacklevel=2)
- if n == 1:
- sample = self._first_point
- else:
- _draw(
- n=n - 1, num_gen=self.num_generated, dim=self.d,
- scale=self._scale, sv=self._sv, quasi=self._quasi,
- sample=sample
- )
- sample = np.concatenate(
- [self._first_point, sample]
- )[:n] # type: ignore[misc]
- else:
- _draw(
- n=n, num_gen=self.num_generated - 1, dim=self.d,
- scale=self._scale, sv=self._sv, quasi=self._quasi,
- sample=sample
- )
- return sample
- def random_base2(self, m: IntNumber) -> np.ndarray:
- """Draw point(s) from the Sobol' sequence.
- This function draws :math:`n=2^m` points in the parameter space
- ensuring the balance properties of the sequence.
- Parameters
- ----------
- m : int
- Logarithm in base 2 of the number of samples; i.e., n = 2^m.
- Returns
- -------
- sample : array_like (n, d)
- Sobol' sample.
- """
- n = 2 ** m
- total_n = self.num_generated + n
- if not (total_n & (total_n - 1) == 0):
- raise ValueError("The balance properties of Sobol' points require "
- "n to be a power of 2. {0} points have been "
- "previously generated, then: n={0}+2**{1}={2}. "
- "If you still want to do this, the function "
- "'Sobol.random()' can be used."
- .format(self.num_generated, m, total_n))
- return self.random(n)
- def reset(self) -> Sobol:
- """Reset the engine to base state.
- Returns
- -------
- engine : Sobol
- Engine reset to its base state.
- """
- super().reset()
- self._quasi = self._shift.copy()
- return self
- def fast_forward(self, n: IntNumber) -> Sobol:
- """Fast-forward the sequence by `n` positions.
- Parameters
- ----------
- n : int
- Number of points to skip in the sequence.
- Returns
- -------
- engine : Sobol
- The fast-forwarded engine.
- """
- if self.num_generated == 0:
- _fast_forward(
- n=n - 1, num_gen=self.num_generated, dim=self.d,
- sv=self._sv, quasi=self._quasi
- )
- else:
- _fast_forward(
- n=n, num_gen=self.num_generated - 1, dim=self.d,
- sv=self._sv, quasi=self._quasi
- )
- self.num_generated += n
- return self
- class PoissonDisk(QMCEngine):
- """Poisson disk sampling.
- Parameters
- ----------
- d : int
- Dimension of the parameter space.
- radius : float
- Minimal distance to keep between points when sampling new candidates.
- hypersphere : {"volume", "surface"}, optional
- Sampling strategy to generate potential candidates to be added in the
- final sample. Default is "volume".
- * ``volume``: original Bridson algorithm as described in [1]_.
- New candidates are sampled *within* the hypersphere.
- * ``surface``: only sample the surface of the hypersphere.
- ncandidates : int
- Number of candidates to sample per iteration. More candidates result
- in a denser sampling as more candidates can be accepted per iteration.
- optimization : {None, "random-cd", "lloyd"}, optional
- Whether to use an optimization scheme to improve the quality after
- sampling. Note that this is a post-processing step that does not
- guarantee that all properties of the sample will be conserved.
- Default is None.
- * ``random-cd``: random permutations of coordinates to lower the
- centered discrepancy. The best sample based on the centered
- discrepancy is constantly updated. Centered discrepancy-based
- sampling shows better space-filling robustness toward 2D and 3D
- subprojections compared to using other discrepancy measures.
- * ``lloyd``: Perturb samples using a modified Lloyd-Max algorithm.
- The process converges to equally spaced samples.
- .. versionadded:: 1.10.0
- seed : {None, int, `numpy.random.Generator`}, optional
- If `seed` is an int or None, a new `numpy.random.Generator` is
- created using ``np.random.default_rng(seed)``.
- If `seed` is already a ``Generator`` instance, then the provided
- instance is used.
- Notes
- -----
- Poisson disk sampling is an iterative sampling strategy. Starting from
- a seed sample, `ncandidates` are sampled in the hypersphere
- surrounding the seed. Candidates bellow a certain `radius` or outside the
- domain are rejected. New samples are added in a pool of sample seed. The
- process stops when the pool is empty or when the number of required
- samples is reached.
- The maximum number of point that a sample can contain is directly linked
- to the `radius`. As the dimension of the space increases, a higher radius
- spreads the points further and help overcome the curse of dimensionality.
- See the :ref:`quasi monte carlo tutorial <quasi-monte-carlo>` for more
- details.
- .. warning::
- The algorithm is more suitable for low dimensions and sampling size
- due to its iterative nature and memory requirements.
- Selecting a small radius with a high dimension would
- mean that the space could contain more samples than using lower
- dimension or a bigger radius.
- Some code taken from [2]_, written consent given on 31.03.2021
- by the original author, Shamis, for free use in SciPy under
- the 3-clause BSD.
- References
- ----------
- .. [1] Robert Bridson, "Fast Poisson Disk Sampling in Arbitrary
- Dimensions." SIGGRAPH, 2007.
- .. [2] `StackOverflow <https://stackoverflow.com/questions/66047540>`__.
- Examples
- --------
- Generate a 2D sample using a `radius` of 0.2.
- >>> import numpy as np
- >>> import matplotlib.pyplot as plt
- >>> from matplotlib.collections import PatchCollection
- >>> from scipy.stats import qmc
- >>>
- >>> rng = np.random.default_rng()
- >>> radius = 0.2
- >>> engine = qmc.PoissonDisk(d=2, radius=radius, seed=rng)
- >>> sample = engine.random(20)
- Visualizing the 2D sample and showing that no points are closer than
- `radius`. ``radius/2`` is used to visualize non-intersecting circles.
- If two samples are exactly at `radius` from each other, then their circle
- of radius ``radius/2`` will touch.
- >>> fig, ax = plt.subplots()
- >>> _ = ax.scatter(sample[:, 0], sample[:, 1])
- >>> circles = [plt.Circle((xi, yi), radius=radius/2, fill=False)
- ... for xi, yi in sample]
- >>> collection = PatchCollection(circles, match_original=True)
- >>> ax.add_collection(collection)
- >>> _ = ax.set(aspect='equal', xlabel=r'$x_1$', ylabel=r'$x_2$',
- ... xlim=[0, 1], ylim=[0, 1])
- >>> plt.show()
- Such visualization can be seen as circle packing: how many circle can
- we put in the space. It is a np-hard problem. The method `fill_space`
- can be used to add samples until no more samples can be added. This is
- a hard problem and parameters may need to be adjusted manually. Beware of
- the dimension: as the dimensionality increases, the number of samples
- required to fill the space increases exponentially
- (curse-of-dimensionality).
- """
- def __init__(
- self,
- d: IntNumber,
- *,
- radius: DecimalNumber = 0.05,
- hypersphere: Literal["volume", "surface"] = "volume",
- ncandidates: IntNumber = 30,
- optimization: Optional[Literal["random-cd", "lloyd"]] = None,
- seed: SeedType = None
- ) -> None:
- # Used in `scipy.integrate.qmc_quad`
- self._init_quad = {'d': d, 'radius': radius,
- 'hypersphere': hypersphere,
- 'ncandidates': ncandidates,
- 'optimization': optimization}
- super().__init__(d=d, optimization=optimization, seed=seed)
- hypersphere_sample = {
- "volume": self._hypersphere_volume_sample,
- "surface": self._hypersphere_surface_sample
- }
- try:
- self.hypersphere_method = hypersphere_sample[hypersphere]
- except KeyError as exc:
- message = (
- f"{hypersphere!r} is not a valid hypersphere sampling"
- f" method. It must be one of {set(hypersphere_sample)!r}")
- raise ValueError(message) from exc
- # size of the sphere from which the samples are drawn relative to the
- # size of a disk (radius)
- # for the surface sampler, all new points are almost exactly 1 radius
- # away from at least one existing sample +eps to avoid rejection
- self.radius_factor = 2 if hypersphere == "volume" else 1.001
- self.radius = radius
- self.radius_squared = self.radius**2
- # sample to generate per iteration in the hypersphere around center
- self.ncandidates = ncandidates
- with np.errstate(divide='ignore'):
- self.cell_size = self.radius / np.sqrt(self.d)
- self.grid_size = (
- np.ceil(np.ones(self.d) / self.cell_size)
- ).astype(int)
- self._initialize_grid_pool()
- def _initialize_grid_pool(self):
- """Sampling pool and sample grid."""
- self.sample_pool = []
- # Positions of cells
- # n-dim value for each grid cell
- self.sample_grid = np.empty(
- np.append(self.grid_size, self.d),
- dtype=np.float32
- )
- # Initialise empty cells with NaNs
- self.sample_grid.fill(np.nan)
- def _random(
- self, n: IntNumber = 1, *, workers: IntNumber = 1
- ) -> np.ndarray:
- """Draw `n` in the interval ``[0, 1]``.
- Note that it can return fewer samples if the space is full.
- See the note section of the class.
- Parameters
- ----------
- n : int, optional
- Number of samples to generate in the parameter space. Default is 1.
- Returns
- -------
- sample : array_like (n, d)
- QMC sample.
- """
- if n == 0 or self.d == 0:
- return np.empty((n, self.d))
- def in_limits(sample: np.ndarray) -> bool:
- return (sample.max() <= 1.) and (sample.min() >= 0.)
- def in_neighborhood(candidate: np.ndarray, n: int = 2) -> bool:
- """
- Check if there are samples closer than ``radius_squared`` to the
- `candidate` sample.
- """
- indices = (candidate / self.cell_size).astype(int)
- ind_min = np.maximum(indices - n, np.zeros(self.d, dtype=int))
- ind_max = np.minimum(indices + n + 1, self.grid_size)
- # Check if the center cell is empty
- if not np.isnan(self.sample_grid[tuple(indices)][0]):
- return True
- a = [slice(ind_min[i], ind_max[i]) for i in range(self.d)]
- # guards against: invalid value encountered in less as we are
- # comparing with nan and returns False. Which is wanted.
- with np.errstate(invalid='ignore'):
- if np.any(
- np.sum(
- np.square(candidate - self.sample_grid[tuple(a)]),
- axis=self.d
- ) < self.radius_squared
- ):
- return True
- return False
- def add_sample(candidate: np.ndarray) -> None:
- self.sample_pool.append(candidate)
- indices = (candidate / self.cell_size).astype(int)
- self.sample_grid[tuple(indices)] = candidate
- curr_sample.append(candidate)
- curr_sample: List[np.ndarray] = []
- if len(self.sample_pool) == 0:
- # the pool is being initialized with a single random sample
- add_sample(self.rng.random(self.d))
- num_drawn = 1
- else:
- num_drawn = 0
- # exhaust sample pool to have up to n sample
- while len(self.sample_pool) and num_drawn < n:
- # select a sample from the available pool
- idx_center = rng_integers(self.rng, len(self.sample_pool))
- center = self.sample_pool[idx_center]
- del self.sample_pool[idx_center]
- # generate candidates around the center sample
- candidates = self.hypersphere_method(
- center, self.radius * self.radius_factor, self.ncandidates
- )
- # keep candidates that satisfy some conditions
- for candidate in candidates:
- if in_limits(candidate) and not in_neighborhood(candidate):
- add_sample(candidate)
- num_drawn += 1
- if num_drawn >= n:
- break
- self.num_generated += num_drawn
- return np.array(curr_sample)
- def fill_space(self) -> np.ndarray:
- """Draw ``n`` samples in the interval ``[0, 1]``.
- Unlike `random`, this method will try to add points until
- the space is full. Depending on ``candidates`` (and to a lesser extent
- other parameters), some empty areas can still be present in the sample.
- .. warning::
- This can be extremely slow in high dimensions or if the
- ``radius`` is very small-with respect to the dimensionality.
- Returns
- -------
- sample : array_like (n, d)
- QMC sample.
- """
- return self.random(np.inf) # type: ignore[arg-type]
- def reset(self) -> PoissonDisk:
- """Reset the engine to base state.
- Returns
- -------
- engine : PoissonDisk
- Engine reset to its base state.
- """
- super().reset()
- self._initialize_grid_pool()
- return self
- def _hypersphere_volume_sample(
- self, center: np.ndarray, radius: DecimalNumber,
- candidates: IntNumber = 1
- ) -> np.ndarray:
- """Uniform sampling within hypersphere."""
- # should remove samples within r/2
- x = self.rng.standard_normal(size=(candidates, self.d))
- ssq = np.sum(x**2, axis=1)
- fr = radius * gammainc(self.d/2, ssq/2)**(1/self.d) / np.sqrt(ssq)
- fr_tiled = np.tile(
- fr.reshape(-1, 1), (1, self.d) # type: ignore[arg-type]
- )
- p = center + np.multiply(x, fr_tiled)
- return p
- def _hypersphere_surface_sample(
- self, center: np.ndarray, radius: DecimalNumber,
- candidates: IntNumber = 1
- ) -> np.ndarray:
- """Uniform sampling on the hypersphere's surface."""
- vec = self.rng.standard_normal(size=(candidates, self.d))
- vec /= np.linalg.norm(vec, axis=1)[:, None]
- p = center + np.multiply(vec, radius)
- return p
- class MultivariateNormalQMC:
- r"""QMC sampling from a multivariate Normal :math:`N(\mu, \Sigma)`.
- Parameters
- ----------
- mean : array_like (d,)
- The mean vector. Where ``d`` is the dimension.
- cov : array_like (d, d), optional
- The covariance matrix. If omitted, use `cov_root` instead.
- If both `cov` and `cov_root` are omitted, use the identity matrix.
- cov_root : array_like (d, d'), optional
- A root decomposition of the covariance matrix, where ``d'`` may be less
- than ``d`` if the covariance is not full rank. If omitted, use `cov`.
- inv_transform : bool, optional
- If True, use inverse transform instead of Box-Muller. Default is True.
- engine : QMCEngine, optional
- Quasi-Monte Carlo engine sampler. If None, `Sobol` is used.
- seed : {None, int, `numpy.random.Generator`}, optional
- Used only if `engine` is None.
- If `seed` is an int or None, a new `numpy.random.Generator` is
- created using ``np.random.default_rng(seed)``.
- If `seed` is already a ``Generator`` instance, then the provided
- instance is used.
- Examples
- --------
- >>> import matplotlib.pyplot as plt
- >>> from scipy.stats import qmc
- >>> dist = qmc.MultivariateNormalQMC(mean=[0, 5], cov=[[1, 0], [0, 1]])
- >>> sample = dist.random(512)
- >>> _ = plt.scatter(sample[:, 0], sample[:, 1])
- >>> plt.show()
- """
- def __init__(
- self, mean: npt.ArrayLike, cov: Optional[npt.ArrayLike] = None, *,
- cov_root: Optional[npt.ArrayLike] = None,
- inv_transform: bool = True,
- engine: Optional[QMCEngine] = None,
- seed: SeedType = None
- ) -> None:
- mean = np.array(mean, copy=False, ndmin=1)
- d = mean.shape[0]
- if cov is not None:
- # covariance matrix provided
- cov = np.array(cov, copy=False, ndmin=2)
- # check for square/symmetric cov matrix and mean vector has the
- # same d
- if not mean.shape[0] == cov.shape[0]:
- raise ValueError("Dimension mismatch between mean and "
- "covariance.")
- if not np.allclose(cov, cov.transpose()):
- raise ValueError("Covariance matrix is not symmetric.")
- # compute Cholesky decomp; if it fails, do the eigen decomposition
- try:
- cov_root = np.linalg.cholesky(cov).transpose()
- except np.linalg.LinAlgError:
- eigval, eigvec = np.linalg.eigh(cov)
- if not np.all(eigval >= -1.0e-8):
- raise ValueError("Covariance matrix not PSD.")
- eigval = np.clip(eigval, 0.0, None)
- cov_root = (eigvec * np.sqrt(eigval)).transpose()
- elif cov_root is not None:
- # root decomposition provided
- cov_root = np.atleast_2d(cov_root)
- if not mean.shape[0] == cov_root.shape[0]:
- raise ValueError("Dimension mismatch between mean and "
- "covariance.")
- else:
- # corresponds to identity covariance matrix
- cov_root = None
- self._inv_transform = inv_transform
- if not inv_transform:
- # to apply Box-Muller, we need an even number of dimensions
- engine_dim = 2 * math.ceil(d / 2)
- else:
- engine_dim = d
- if engine is None:
- self.engine = Sobol(
- d=engine_dim, scramble=True, bits=30, seed=seed
- ) # type: QMCEngine
- elif isinstance(engine, QMCEngine):
- if engine.d != engine_dim:
- raise ValueError("Dimension of `engine` must be consistent"
- " with dimensions of mean and covariance."
- " If `inv_transform` is False, it must be"
- " an even number.")
- self.engine = engine
- else:
- raise ValueError("`engine` must be an instance of "
- "`scipy.stats.qmc.QMCEngine` or `None`.")
- self._mean = mean
- self._corr_matrix = cov_root
- self._d = d
- def random(self, n: IntNumber = 1) -> np.ndarray:
- """Draw `n` QMC samples from the multivariate Normal.
- Parameters
- ----------
- n : int, optional
- Number of samples to generate in the parameter space. Default is 1.
- Returns
- -------
- sample : array_like (n, d)
- Sample.
- """
- base_samples = self._standard_normal_samples(n)
- return self._correlate(base_samples)
- def _correlate(self, base_samples: np.ndarray) -> np.ndarray:
- if self._corr_matrix is not None:
- return base_samples @ self._corr_matrix + self._mean
- else:
- # avoid multiplying with identity here
- return base_samples + self._mean
- def _standard_normal_samples(self, n: IntNumber = 1) -> np.ndarray:
- """Draw `n` QMC samples from the standard Normal :math:`N(0, I_d)`.
- Parameters
- ----------
- n : int, optional
- Number of samples to generate in the parameter space. Default is 1.
- Returns
- -------
- sample : array_like (n, d)
- Sample.
- """
- # get base samples
- samples = self.engine.random(n)
- if self._inv_transform:
- # apply inverse transform
- # (values to close to 0/1 result in inf values)
- return stats.norm.ppf(0.5 + (1 - 1e-10) * (samples - 0.5)) # type: ignore[attr-defined]
- else:
- # apply Box-Muller transform (note: indexes starting from 1)
- even = np.arange(0, samples.shape[-1], 2)
- Rs = np.sqrt(-2 * np.log(samples[:, even]))
- thetas = 2 * math.pi * samples[:, 1 + even]
- cos = np.cos(thetas)
- sin = np.sin(thetas)
- transf_samples = np.stack([Rs * cos, Rs * sin],
- -1).reshape(n, -1)
- # make sure we only return the number of dimension requested
- return transf_samples[:, : self._d]
- class MultinomialQMC:
- r"""QMC sampling from a multinomial distribution.
- Parameters
- ----------
- pvals : array_like (k,)
- Vector of probabilities of size ``k``, where ``k`` is the number
- of categories. Elements must be non-negative and sum to 1.
- n_trials : int
- Number of trials.
- engine : QMCEngine, optional
- Quasi-Monte Carlo engine sampler. If None, `Sobol` is used.
- seed : {None, int, `numpy.random.Generator`}, optional
- Used only if `engine` is None.
- If `seed` is an int or None, a new `numpy.random.Generator` is
- created using ``np.random.default_rng(seed)``.
- If `seed` is already a ``Generator`` instance, then the provided
- instance is used.
- Examples
- --------
- Let's define 3 categories and for a given sample, the sum of the trials
- of each category is 8. The number of trials per category is determined
- by the `pvals` associated to each category.
- Then, we sample this distribution 64 times.
- >>> import matplotlib.pyplot as plt
- >>> from scipy.stats import qmc
- >>> dist = qmc.MultinomialQMC(
- ... pvals=[0.2, 0.4, 0.4], n_trials=10, engine=qmc.Halton(d=1)
- ... )
- >>> sample = dist.random(64)
- We can plot the sample and verify that the median of number of trials
- for each category is following the `pvals`. That would be
- ``pvals * n_trials = [2, 4, 4]``.
- >>> fig, ax = plt.subplots()
- >>> ax.yaxis.get_major_locator().set_params(integer=True)
- >>> _ = ax.boxplot(sample)
- >>> ax.set(xlabel="Categories", ylabel="Trials")
- >>> plt.show()
- """
- def __init__(
- self, pvals: npt.ArrayLike, n_trials: IntNumber,
- *, engine: Optional[QMCEngine] = None,
- seed: SeedType = None
- ) -> None:
- self.pvals = np.array(pvals, copy=False, ndmin=1)
- if np.min(pvals) < 0:
- raise ValueError('Elements of pvals must be non-negative.')
- if not np.isclose(np.sum(pvals), 1):
- raise ValueError('Elements of pvals must sum to 1.')
- self.n_trials = n_trials
- if engine is None:
- self.engine = Sobol(
- d=1, scramble=True, bits=30, seed=seed
- ) # type: QMCEngine
- elif isinstance(engine, QMCEngine):
- if engine.d != 1:
- raise ValueError("Dimension of `engine` must be 1.")
- self.engine = engine
- else:
- raise ValueError("`engine` must be an instance of "
- "`scipy.stats.qmc.QMCEngine` or `None`.")
- def random(self, n: IntNumber = 1) -> np.ndarray:
- """Draw `n` QMC samples from the multinomial distribution.
- Parameters
- ----------
- n : int, optional
- Number of samples to generate in the parameter space. Default is 1.
- Returns
- -------
- samples : array_like (n, pvals)
- Sample.
- """
- sample = np.empty((n, len(self.pvals)))
- for i in range(n):
- base_draws = self.engine.random(self.n_trials).ravel()
- p_cumulative = np.empty_like(self.pvals, dtype=float)
- _fill_p_cumulative(np.array(self.pvals, dtype=float), p_cumulative)
- sample_ = np.zeros_like(self.pvals, dtype=int)
- _categorize(base_draws, p_cumulative, sample_)
- sample[i] = sample_
- return sample
- def _select_optimizer(
- optimization: Optional[Literal["random-cd", "lloyd"]], config: Dict
- ) -> Optional[Callable]:
- """A factory for optimization methods."""
- optimization_method: Dict[str, Callable] = {
- "random-cd": _random_cd,
- "lloyd": _lloyd_centroidal_voronoi_tessellation
- }
- optimizer: Optional[partial]
- if optimization is not None:
- try:
- optimization = optimization.lower() # type: ignore[assignment]
- optimizer_ = optimization_method[optimization]
- except KeyError as exc:
- message = (f"{optimization!r} is not a valid optimization"
- f" method. It must be one of"
- f" {set(optimization_method)!r}")
- raise ValueError(message) from exc
- # config
- optimizer = partial(optimizer_, **config)
- else:
- optimizer = None
- return optimizer
- def _random_cd(
- best_sample: np.ndarray, n_iters: int, n_nochange: int, rng: GeneratorType,
- **kwargs: Dict
- ) -> np.ndarray:
- """Optimal LHS on CD.
- Create a base LHS and do random permutations of coordinates to
- lower the centered discrepancy.
- Because it starts with a normal LHS, it also works with the
- `centered` keyword argument.
- Two stopping criterion are used to stop the algorithm: at most,
- `n_iters` iterations are performed; or if there is no improvement
- for `n_nochange` consecutive iterations.
- """
- del kwargs # only use keywords which are defined, needed by factory
- n, d = best_sample.shape
- if d == 0 or n == 0:
- return np.empty((n, d))
- best_disc = discrepancy(best_sample)
- if n == 1:
- return best_sample
- bounds = ([0, d - 1],
- [0, n - 1],
- [0, n - 1])
- n_nochange_ = 0
- n_iters_ = 0
- while n_nochange_ < n_nochange and n_iters_ < n_iters:
- n_iters_ += 1
- col = rng_integers(rng, *bounds[0], endpoint=True) # type: ignore[misc]
- row_1 = rng_integers(rng, *bounds[1], endpoint=True) # type: ignore[misc]
- row_2 = rng_integers(rng, *bounds[2], endpoint=True) # type: ignore[misc]
- disc = _perturb_discrepancy(best_sample,
- row_1, row_2, col,
- best_disc)
- if disc < best_disc:
- best_sample[row_1, col], best_sample[row_2, col] = (
- best_sample[row_2, col], best_sample[row_1, col])
- best_disc = disc
- n_nochange_ = 0
- else:
- n_nochange_ += 1
- return best_sample
- def _l1_norm(sample: np.ndarray) -> float:
- return distance.pdist(sample, 'cityblock').min()
- def _lloyd_iteration(
- sample: np.ndarray,
- decay: float,
- qhull_options: str
- ) -> np.ndarray:
- """Lloyd-Max algorithm iteration.
- Based on the implementation of Stéfan van der Walt:
- https://github.com/stefanv/lloyd
- which is:
- Copyright (c) 2021-04-21 Stéfan van der Walt
- https://github.com/stefanv/lloyd
- MIT License
- Parameters
- ----------
- sample : array_like (n, d)
- The sample to iterate on.
- decay : float
- Relaxation decay. A positive value would move the samples toward
- their centroid, and negative value would move them away.
- 1 would move the samples to their centroid.
- qhull_options : str
- Additional options to pass to Qhull. See Qhull manual
- for details. (Default: "Qbb Qc Qz Qj Qx" for ndim > 4 and
- "Qbb Qc Qz Qj" otherwise.)
- Returns
- -------
- sample : array_like (n, d)
- The sample after an iteration of Lloyd's algorithm.
- """
- new_sample = np.empty_like(sample)
- voronoi = Voronoi(sample, qhull_options=qhull_options)
- for ii, idx in enumerate(voronoi.point_region):
- # the region is a series of indices into self.voronoi.vertices
- # remove samples at infinity, designated by index -1
- region = [i for i in voronoi.regions[idx] if i != -1]
- # get the vertices for this region
- verts = voronoi.vertices[region]
- # clipping would be wrong, we need to intersect
- # verts = np.clip(verts, 0, 1)
- # move samples towards centroids:
- # Centroid in n-D is the mean for uniformly distributed nodes
- # of a geometry.
- centroid = np.mean(verts, axis=0)
- new_sample[ii] = sample[ii] + (centroid - sample[ii]) * decay
- # only update sample to centroid within the region
- is_valid = np.all(np.logical_and(new_sample >= 0, new_sample <= 1), axis=1)
- sample[is_valid] = new_sample[is_valid]
- return sample
- def _lloyd_centroidal_voronoi_tessellation(
- sample: npt.ArrayLike,
- *,
- tol: DecimalNumber = 1e-5,
- maxiter: IntNumber = 10,
- qhull_options: Optional[str] = None,
- **kwargs: Dict
- ) -> np.ndarray:
- """Approximate Centroidal Voronoi Tessellation.
- Perturb samples in N-dimensions using Lloyd-Max algorithm.
- Parameters
- ----------
- sample : array_like (n, d)
- The sample to iterate on. With ``n`` the number of samples and ``d``
- the dimension. Samples must be in :math:`[0, 1]^d`, with ``d>=2``.
- tol : float, optional
- Tolerance for termination. If the min of the L1-norm over the samples
- changes less than `tol`, it stops the algorithm. Default is 1e-5.
- maxiter : int, optional
- Maximum number of iterations. It will stop the algorithm even if
- `tol` is above the threshold.
- Too many iterations tend to cluster the samples as a hypersphere.
- Default is 10.
- qhull_options : str, optional
- Additional options to pass to Qhull. See Qhull manual
- for details. (Default: "Qbb Qc Qz Qj Qx" for ndim > 4 and
- "Qbb Qc Qz Qj" otherwise.)
- Returns
- -------
- sample : array_like (n, d)
- The sample after being processed by Lloyd-Max algorithm.
- Notes
- -----
- Lloyd-Max algorithm is an iterative process with the purpose of improving
- the dispersion of samples. For given sample: (i) compute a Voronoi
- Tessellation; (ii) find the centroid of each Voronoi cell; (iii) move the
- samples toward the centroid of their respective cell. See [1]_, [2]_.
- A relaxation factor is used to control how fast samples can move at each
- iteration. This factor is starting at 2 and ending at 1 after `maxiter`
- following an exponential decay.
- The process converges to equally spaced samples. It implies that measures
- like the discrepancy could suffer from too many iterations. On the other
- hand, L1 and L2 distances should improve. This is especially true with
- QMC methods which tend to favor the discrepancy over other criteria.
- .. note::
- The current implementation does not intersect the Voronoi Tessellation
- with the boundaries. This implies that for a low number of samples,
- empirically below 20, no Voronoi cell is touching the boundaries.
- Hence, samples cannot be moved close to the boundaries.
- Further improvements could consider the samples at infinity so that
- all boundaries are segments of some Voronoi cells. This would fix
- the computation of the centroid position.
- .. warning::
- The Voronoi Tessellation step is expensive and quickly becomes
- intractable with dimensions as low as 10 even for a sample
- of size as low as 1000.
- .. versionadded:: 1.9.0
- References
- ----------
- .. [1] Lloyd. "Least Squares Quantization in PCM".
- IEEE Transactions on Information Theory, 1982.
- .. [2] Max J. "Quantizing for minimum distortion".
- IEEE Transactions on Information Theory, 1960.
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.spatial import distance
- >>> rng = np.random.default_rng()
- >>> sample = rng.random((128, 2))
- .. note::
- The samples need to be in :math:`[0, 1]^d`. `scipy.stats.qmc.scale`
- can be used to scale the samples from their
- original bounds to :math:`[0, 1]^d`. And back to their original bounds.
- Compute the quality of the sample using the L1 criterion.
- >>> def l1_norm(sample):
- ... return distance.pdist(sample, 'cityblock').min()
- >>> l1_norm(sample)
- 0.00161... # random
- Now process the sample using Lloyd's algorithm and check the improvement
- on the L1. The value should increase.
- >>> sample = _lloyd_centroidal_voronoi_tessellation(sample)
- >>> l1_norm(sample)
- 0.0278... # random
- """
- del kwargs # only use keywords which are defined, needed by factory
- sample = np.asarray(sample).copy()
- if not sample.ndim == 2:
- raise ValueError('`sample` is not a 2D array')
- if not sample.shape[1] >= 2:
- raise ValueError('`sample` dimension is not >= 2')
- # Checking that sample is within the hypercube
- if (sample.max() > 1.) or (sample.min() < 0.):
- raise ValueError('`sample` is not in unit hypercube')
- if qhull_options is None:
- qhull_options = 'Qbb Qc Qz QJ'
- if sample.shape[1] >= 5:
- qhull_options += ' Qx'
- # Fit an exponential to be 2 at 0 and 1 at `maxiter`.
- # The decay is used for relaxation.
- # analytical solution for y=exp(-maxiter/x) - 0.1
- root = -maxiter / np.log(0.1)
- decay = [np.exp(-x / root)+0.9 for x in range(maxiter)]
- l1_old = _l1_norm(sample=sample)
- for i in range(maxiter):
- sample = _lloyd_iteration(
- sample=sample, decay=decay[i],
- qhull_options=qhull_options,
- )
- l1_new = _l1_norm(sample=sample)
- if abs(l1_new - l1_old) < tol:
- break
- else:
- l1_old = l1_new
- return sample
- def _validate_workers(workers: IntNumber = 1) -> IntNumber:
- """Validate `workers` based on platform and value.
- Parameters
- ----------
- workers : int, optional
- Number of workers to use for parallel processing. If -1 is
- given all CPU threads are used. Default is 1.
- Returns
- -------
- Workers : int
- Number of CPU used by the algorithm
- """
- workers = int(workers)
- if workers == -1:
- workers = os.cpu_count() # type: ignore[assignment]
- if workers is None:
- raise NotImplementedError(
- "Cannot determine the number of cpus using os.cpu_count(), "
- "cannot use -1 for the number of workers"
- )
- elif workers <= 0:
- raise ValueError(f"Invalid number of workers: {workers}, must be -1 "
- "or > 0")
- return workers
- def _validate_bounds(
- l_bounds: npt.ArrayLike, u_bounds: npt.ArrayLike, d: int
- ) -> Tuple[np.ndarray, ...]:
- """Bounds input validation.
- Parameters
- ----------
- l_bounds, u_bounds : array_like (d,)
- Lower and upper bounds.
- d : int
- Dimension to use for broadcasting.
- Returns
- -------
- l_bounds, u_bounds : array_like (d,)
- Lower and upper bounds.
- """
- try:
- lower = np.broadcast_to(l_bounds, d)
- upper = np.broadcast_to(u_bounds, d)
- except ValueError as exc:
- msg = ("'l_bounds' and 'u_bounds' must be broadcastable and respect"
- " the sample dimension")
- raise ValueError(msg) from exc
- if not np.all(lower < upper):
- raise ValueError("Bounds are not consistent 'l_bounds' < 'u_bounds'")
- return lower, upper
|