test_multivariate.py 110 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905
  1. """
  2. Test functions for multivariate normal distributions.
  3. """
  4. import pickle
  5. from numpy.testing import (assert_allclose, assert_almost_equal,
  6. assert_array_almost_equal, assert_equal,
  7. assert_array_less, assert_)
  8. import pytest
  9. from pytest import raises as assert_raises
  10. from .test_continuous_basic import check_distribution_rvs
  11. import numpy
  12. import numpy as np
  13. import scipy.linalg
  14. from scipy.stats._multivariate import (_PSD,
  15. _lnB,
  16. _cho_inv_batch,
  17. multivariate_normal_frozen)
  18. from scipy.stats import (multivariate_normal, multivariate_hypergeom,
  19. matrix_normal, special_ortho_group, ortho_group,
  20. random_correlation, unitary_group, dirichlet,
  21. beta, wishart, multinomial, invwishart, chi2,
  22. invgamma, norm, uniform, ks_2samp, kstest, binom,
  23. hypergeom, multivariate_t, cauchy, normaltest,
  24. random_table, uniform_direction)
  25. from scipy.stats import _covariance, Covariance
  26. from scipy.integrate import romb
  27. from scipy.special import multigammaln
  28. from .common_tests import check_random_state_property
  29. from unittest.mock import patch
  30. def assert_close(res, ref, *args, **kwargs):
  31. res, ref = np.asarray(res), np.asarray(ref)
  32. assert_allclose(res, ref, *args, **kwargs)
  33. assert_equal(res.shape, ref.shape)
  34. class TestCovariance:
  35. def test_input_validation(self):
  36. message = "The input `precision` must be a square, two-dimensional..."
  37. with pytest.raises(ValueError, match=message):
  38. _covariance.CovViaPrecision(np.ones(2))
  39. message = "`precision.shape` must equal `covariance.shape`."
  40. with pytest.raises(ValueError, match=message):
  41. _covariance.CovViaPrecision(np.eye(3), covariance=np.eye(2))
  42. message = "The input `diagonal` must be a one-dimensional array..."
  43. with pytest.raises(ValueError, match=message):
  44. _covariance.CovViaDiagonal("alpaca")
  45. message = "The input `cholesky` must be a square, two-dimensional..."
  46. with pytest.raises(ValueError, match=message):
  47. _covariance.CovViaCholesky(np.ones(2))
  48. message = "The input `eigenvalues` must be a one-dimensional..."
  49. with pytest.raises(ValueError, match=message):
  50. _covariance.CovViaEigendecomposition(("alpaca", np.eye(2)))
  51. message = "The input `eigenvectors` must be a square..."
  52. with pytest.raises(ValueError, match=message):
  53. _covariance.CovViaEigendecomposition((np.ones(2), "alpaca"))
  54. message = "The shapes of `eigenvalues` and `eigenvectors` must be..."
  55. with pytest.raises(ValueError, match=message):
  56. _covariance.CovViaEigendecomposition(([1, 2, 3], np.eye(2)))
  57. _covariance_preprocessing = {"Diagonal": np.diag,
  58. "Precision": np.linalg.inv,
  59. "Cholesky": np.linalg.cholesky,
  60. "Eigendecomposition": np.linalg.eigh,
  61. "PSD": lambda x:
  62. _PSD(x, allow_singular=True)}
  63. _all_covariance_types = np.array(list(_covariance_preprocessing))
  64. _matrices = {"diagonal full rank": np.diag([1, 2, 3]),
  65. "general full rank": [[5, 1, 3], [1, 6, 4], [3, 4, 7]],
  66. "diagonal singular": np.diag([1, 0, 3]),
  67. "general singular": [[5, -1, 0], [-1, 5, 0], [0, 0, 0]]}
  68. _cov_types = {"diagonal full rank": _all_covariance_types,
  69. "general full rank": _all_covariance_types[1:],
  70. "diagonal singular": _all_covariance_types[[0, -2, -1]],
  71. "general singular": _all_covariance_types[-2:]}
  72. @pytest.mark.parametrize("cov_type_name", _all_covariance_types[:-1])
  73. def test_factories(self, cov_type_name):
  74. A = np.diag([1, 2, 3])
  75. x = [-4, 2, 5]
  76. cov_type = getattr(_covariance, f"CovVia{cov_type_name}")
  77. preprocessing = self._covariance_preprocessing[cov_type_name]
  78. factory = getattr(Covariance, f"from_{cov_type_name.lower()}")
  79. res = factory(preprocessing(A))
  80. ref = cov_type(preprocessing(A))
  81. assert type(res) == type(ref)
  82. assert_allclose(res.whiten(x), ref.whiten(x))
  83. @pytest.mark.parametrize("matrix_type", list(_matrices))
  84. @pytest.mark.parametrize("cov_type_name", _all_covariance_types)
  85. def test_covariance(self, matrix_type, cov_type_name):
  86. message = (f"CovVia{cov_type_name} does not support {matrix_type} "
  87. "matrices")
  88. if cov_type_name not in self._cov_types[matrix_type]:
  89. pytest.skip(message)
  90. A = self._matrices[matrix_type]
  91. cov_type = getattr(_covariance, f"CovVia{cov_type_name}")
  92. preprocessing = self._covariance_preprocessing[cov_type_name]
  93. psd = _PSD(A, allow_singular=True)
  94. # test properties
  95. cov_object = cov_type(preprocessing(A))
  96. assert_close(cov_object.log_pdet, psd.log_pdet)
  97. assert_equal(cov_object.rank, psd.rank)
  98. assert_equal(cov_object.shape, np.asarray(A).shape)
  99. assert_close(cov_object.covariance, np.asarray(A))
  100. # test whitening/coloring 1D x
  101. rng = np.random.default_rng(5292808890472453840)
  102. x = rng.random(size=3)
  103. res = cov_object.whiten(x)
  104. ref = x @ psd.U
  105. # res != ref in general; but res @ res == ref @ ref
  106. assert_close(res @ res, ref @ ref)
  107. if hasattr(cov_object, "_colorize") and "singular" not in matrix_type:
  108. # CovViaPSD does not have _colorize
  109. assert_close(cov_object.colorize(res), x)
  110. # test whitening/coloring 3D x
  111. x = rng.random(size=(2, 4, 3))
  112. res = cov_object.whiten(x)
  113. ref = x @ psd.U
  114. assert_close((res**2).sum(axis=-1), (ref**2).sum(axis=-1))
  115. if hasattr(cov_object, "_colorize") and "singular" not in matrix_type:
  116. assert_close(cov_object.colorize(res), x)
  117. @pytest.mark.parametrize("size", [None, tuple(), 1, (2, 4, 3)])
  118. @pytest.mark.parametrize("matrix_type", list(_matrices))
  119. @pytest.mark.parametrize("cov_type_name", _all_covariance_types)
  120. def test_mvn_with_covariance(self, size, matrix_type, cov_type_name):
  121. message = (f"CovVia{cov_type_name} does not support {matrix_type} "
  122. "matrices")
  123. if cov_type_name not in self._cov_types[matrix_type]:
  124. pytest.skip(message)
  125. A = self._matrices[matrix_type]
  126. cov_type = getattr(_covariance, f"CovVia{cov_type_name}")
  127. preprocessing = self._covariance_preprocessing[cov_type_name]
  128. mean = [0.1, 0.2, 0.3]
  129. cov_object = cov_type(preprocessing(A))
  130. mvn = multivariate_normal
  131. dist0 = multivariate_normal(mean, A, allow_singular=True)
  132. dist1 = multivariate_normal(mean, cov_object, allow_singular=True)
  133. rng = np.random.default_rng(5292808890472453840)
  134. x = rng.multivariate_normal(mean, A, size=size)
  135. rng = np.random.default_rng(5292808890472453840)
  136. x1 = mvn.rvs(mean, cov_object, size=size, random_state=rng)
  137. rng = np.random.default_rng(5292808890472453840)
  138. x2 = mvn(mean, cov_object, seed=rng).rvs(size=size)
  139. if isinstance(cov_object, _covariance.CovViaPSD):
  140. assert_close(x1, np.squeeze(x)) # for backward compatibility
  141. assert_close(x2, np.squeeze(x))
  142. else:
  143. assert_equal(x1.shape, x.shape)
  144. assert_equal(x2.shape, x.shape)
  145. assert_close(x2, x1)
  146. assert_close(mvn.pdf(x, mean, cov_object), dist0.pdf(x))
  147. assert_close(dist1.pdf(x), dist0.pdf(x))
  148. assert_close(mvn.logpdf(x, mean, cov_object), dist0.logpdf(x))
  149. assert_close(dist1.logpdf(x), dist0.logpdf(x))
  150. assert_close(mvn.entropy(mean, cov_object), dist0.entropy())
  151. assert_close(dist1.entropy(), dist0.entropy())
  152. @pytest.mark.parametrize("size", [tuple(), (2, 4, 3)])
  153. @pytest.mark.parametrize("cov_type_name", _all_covariance_types)
  154. def test_mvn_with_covariance_cdf(self, size, cov_type_name):
  155. # This is split from the test above because it's slow to be running
  156. # with all matrix types, and there's no need because _mvn.mvnun
  157. # does the calculation. All Covariance needs to do is pass is
  158. # provide the `covariance` attribute.
  159. matrix_type = "diagonal full rank"
  160. A = self._matrices[matrix_type]
  161. cov_type = getattr(_covariance, f"CovVia{cov_type_name}")
  162. preprocessing = self._covariance_preprocessing[cov_type_name]
  163. mean = [0.1, 0.2, 0.3]
  164. cov_object = cov_type(preprocessing(A))
  165. mvn = multivariate_normal
  166. dist0 = multivariate_normal(mean, A, allow_singular=True)
  167. dist1 = multivariate_normal(mean, cov_object, allow_singular=True)
  168. rng = np.random.default_rng(5292808890472453840)
  169. x = rng.multivariate_normal(mean, A, size=size)
  170. assert_close(mvn.cdf(x, mean, cov_object), dist0.cdf(x))
  171. assert_close(dist1.cdf(x), dist0.cdf(x))
  172. assert_close(mvn.logcdf(x, mean, cov_object), dist0.logcdf(x))
  173. assert_close(dist1.logcdf(x), dist0.logcdf(x))
  174. def test_covariance_instantiation(self):
  175. message = "The `Covariance` class cannot be instantiated directly."
  176. with pytest.raises(NotImplementedError, match=message):
  177. Covariance()
  178. @pytest.mark.filterwarnings("ignore::RuntimeWarning") # matrix not PSD
  179. def test_gh9942(self):
  180. # Originally there was a mistake in the `multivariate_normal_frozen`
  181. # `rvs` method that caused all covariance objects to be processed as
  182. # a `_CovViaPSD`. Ensure that this is resolved.
  183. A = np.diag([1, 2, -1e-8])
  184. n = A.shape[0]
  185. mean = np.zeros(n)
  186. # Error if the matrix is processed as a `_CovViaPSD`
  187. with pytest.raises(ValueError, match="The input matrix must be..."):
  188. multivariate_normal(mean, A).rvs()
  189. # No error if it is provided as a `CovViaEigendecomposition`
  190. seed = 3562050283508273023
  191. rng1 = np.random.default_rng(seed)
  192. rng2 = np.random.default_rng(seed)
  193. cov = Covariance.from_eigendecomposition(np.linalg.eigh(A))
  194. rv = multivariate_normal(mean, cov)
  195. res = rv.rvs(random_state=rng1)
  196. ref = multivariate_normal.rvs(mean, cov, random_state=rng2)
  197. assert_equal(res, ref)
  198. def _sample_orthonormal_matrix(n):
  199. M = np.random.randn(n, n)
  200. u, s, v = scipy.linalg.svd(M)
  201. return u
  202. class TestMultivariateNormal:
  203. def test_input_shape(self):
  204. mu = np.arange(3)
  205. cov = np.identity(2)
  206. assert_raises(ValueError, multivariate_normal.pdf, (0, 1), mu, cov)
  207. assert_raises(ValueError, multivariate_normal.pdf, (0, 1, 2), mu, cov)
  208. assert_raises(ValueError, multivariate_normal.cdf, (0, 1), mu, cov)
  209. assert_raises(ValueError, multivariate_normal.cdf, (0, 1, 2), mu, cov)
  210. def test_scalar_values(self):
  211. np.random.seed(1234)
  212. # When evaluated on scalar data, the pdf should return a scalar
  213. x, mean, cov = 1.5, 1.7, 2.5
  214. pdf = multivariate_normal.pdf(x, mean, cov)
  215. assert_equal(pdf.ndim, 0)
  216. # When evaluated on a single vector, the pdf should return a scalar
  217. x = np.random.randn(5)
  218. mean = np.random.randn(5)
  219. cov = np.abs(np.random.randn(5)) # Diagonal values for cov. matrix
  220. pdf = multivariate_normal.pdf(x, mean, cov)
  221. assert_equal(pdf.ndim, 0)
  222. # When evaluated on scalar data, the cdf should return a scalar
  223. x, mean, cov = 1.5, 1.7, 2.5
  224. cdf = multivariate_normal.cdf(x, mean, cov)
  225. assert_equal(cdf.ndim, 0)
  226. # When evaluated on a single vector, the cdf should return a scalar
  227. x = np.random.randn(5)
  228. mean = np.random.randn(5)
  229. cov = np.abs(np.random.randn(5)) # Diagonal values for cov. matrix
  230. cdf = multivariate_normal.cdf(x, mean, cov)
  231. assert_equal(cdf.ndim, 0)
  232. def test_logpdf(self):
  233. # Check that the log of the pdf is in fact the logpdf
  234. np.random.seed(1234)
  235. x = np.random.randn(5)
  236. mean = np.random.randn(5)
  237. cov = np.abs(np.random.randn(5))
  238. d1 = multivariate_normal.logpdf(x, mean, cov)
  239. d2 = multivariate_normal.pdf(x, mean, cov)
  240. assert_allclose(d1, np.log(d2))
  241. def test_logpdf_default_values(self):
  242. # Check that the log of the pdf is in fact the logpdf
  243. # with default parameters Mean=None and cov = 1
  244. np.random.seed(1234)
  245. x = np.random.randn(5)
  246. d1 = multivariate_normal.logpdf(x)
  247. d2 = multivariate_normal.pdf(x)
  248. # check whether default values are being used
  249. d3 = multivariate_normal.logpdf(x, None, 1)
  250. d4 = multivariate_normal.pdf(x, None, 1)
  251. assert_allclose(d1, np.log(d2))
  252. assert_allclose(d3, np.log(d4))
  253. def test_logcdf(self):
  254. # Check that the log of the cdf is in fact the logcdf
  255. np.random.seed(1234)
  256. x = np.random.randn(5)
  257. mean = np.random.randn(5)
  258. cov = np.abs(np.random.randn(5))
  259. d1 = multivariate_normal.logcdf(x, mean, cov)
  260. d2 = multivariate_normal.cdf(x, mean, cov)
  261. assert_allclose(d1, np.log(d2))
  262. def test_logcdf_default_values(self):
  263. # Check that the log of the cdf is in fact the logcdf
  264. # with default parameters Mean=None and cov = 1
  265. np.random.seed(1234)
  266. x = np.random.randn(5)
  267. d1 = multivariate_normal.logcdf(x)
  268. d2 = multivariate_normal.cdf(x)
  269. # check whether default values are being used
  270. d3 = multivariate_normal.logcdf(x, None, 1)
  271. d4 = multivariate_normal.cdf(x, None, 1)
  272. assert_allclose(d1, np.log(d2))
  273. assert_allclose(d3, np.log(d4))
  274. def test_rank(self):
  275. # Check that the rank is detected correctly.
  276. np.random.seed(1234)
  277. n = 4
  278. mean = np.random.randn(n)
  279. for expected_rank in range(1, n + 1):
  280. s = np.random.randn(n, expected_rank)
  281. cov = np.dot(s, s.T)
  282. distn = multivariate_normal(mean, cov, allow_singular=True)
  283. assert_equal(distn.cov_object.rank, expected_rank)
  284. def test_degenerate_distributions(self):
  285. for n in range(1, 5):
  286. z = np.random.randn(n)
  287. for k in range(1, n):
  288. # Sample a small covariance matrix.
  289. s = np.random.randn(k, k)
  290. cov_kk = np.dot(s, s.T)
  291. # Embed the small covariance matrix into a larger singular one.
  292. cov_nn = np.zeros((n, n))
  293. cov_nn[:k, :k] = cov_kk
  294. # Embed part of the vector in the same way
  295. x = np.zeros(n)
  296. x[:k] = z[:k]
  297. # Define a rotation of the larger low rank matrix.
  298. u = _sample_orthonormal_matrix(n)
  299. cov_rr = np.dot(u, np.dot(cov_nn, u.T))
  300. y = np.dot(u, x)
  301. # Check some identities.
  302. distn_kk = multivariate_normal(np.zeros(k), cov_kk,
  303. allow_singular=True)
  304. distn_nn = multivariate_normal(np.zeros(n), cov_nn,
  305. allow_singular=True)
  306. distn_rr = multivariate_normal(np.zeros(n), cov_rr,
  307. allow_singular=True)
  308. assert_equal(distn_kk.cov_object.rank, k)
  309. assert_equal(distn_nn.cov_object.rank, k)
  310. assert_equal(distn_rr.cov_object.rank, k)
  311. pdf_kk = distn_kk.pdf(x[:k])
  312. pdf_nn = distn_nn.pdf(x)
  313. pdf_rr = distn_rr.pdf(y)
  314. assert_allclose(pdf_kk, pdf_nn)
  315. assert_allclose(pdf_kk, pdf_rr)
  316. logpdf_kk = distn_kk.logpdf(x[:k])
  317. logpdf_nn = distn_nn.logpdf(x)
  318. logpdf_rr = distn_rr.logpdf(y)
  319. assert_allclose(logpdf_kk, logpdf_nn)
  320. assert_allclose(logpdf_kk, logpdf_rr)
  321. # Add an orthogonal component and find the density
  322. y_orth = y + u[:, -1]
  323. pdf_rr_orth = distn_rr.pdf(y_orth)
  324. logpdf_rr_orth = distn_rr.logpdf(y_orth)
  325. # Ensure that this has zero probability
  326. assert_equal(pdf_rr_orth, 0.0)
  327. assert_equal(logpdf_rr_orth, -np.inf)
  328. def test_degenerate_array(self):
  329. # Test that we can generate arrays of random variate from a degenerate
  330. # multivariate normal, and that the pdf for these samples is non-zero
  331. # (i.e. samples from the distribution lie on the subspace)
  332. k = 10
  333. for n in range(2, 6):
  334. for r in range(1, n):
  335. mn = np.zeros(n)
  336. u = _sample_orthonormal_matrix(n)[:, :r]
  337. vr = np.dot(u, u.T)
  338. X = multivariate_normal.rvs(mean=mn, cov=vr, size=k)
  339. pdf = multivariate_normal.pdf(X, mean=mn, cov=vr,
  340. allow_singular=True)
  341. assert_equal(pdf.size, k)
  342. assert np.all(pdf > 0.0)
  343. logpdf = multivariate_normal.logpdf(X, mean=mn, cov=vr,
  344. allow_singular=True)
  345. assert_equal(logpdf.size, k)
  346. assert np.all(logpdf > -np.inf)
  347. def test_large_pseudo_determinant(self):
  348. # Check that large pseudo-determinants are handled appropriately.
  349. # Construct a singular diagonal covariance matrix
  350. # whose pseudo determinant overflows double precision.
  351. large_total_log = 1000.0
  352. npos = 100
  353. nzero = 2
  354. large_entry = np.exp(large_total_log / npos)
  355. n = npos + nzero
  356. cov = np.zeros((n, n), dtype=float)
  357. np.fill_diagonal(cov, large_entry)
  358. cov[-nzero:, -nzero:] = 0
  359. # Check some determinants.
  360. assert_equal(scipy.linalg.det(cov), 0)
  361. assert_equal(scipy.linalg.det(cov[:npos, :npos]), np.inf)
  362. assert_allclose(np.linalg.slogdet(cov[:npos, :npos]),
  363. (1, large_total_log))
  364. # Check the pseudo-determinant.
  365. psd = _PSD(cov)
  366. assert_allclose(psd.log_pdet, large_total_log)
  367. def test_broadcasting(self):
  368. np.random.seed(1234)
  369. n = 4
  370. # Construct a random covariance matrix.
  371. data = np.random.randn(n, n)
  372. cov = np.dot(data, data.T)
  373. mean = np.random.randn(n)
  374. # Construct an ndarray which can be interpreted as
  375. # a 2x3 array whose elements are random data vectors.
  376. X = np.random.randn(2, 3, n)
  377. # Check that multiple data points can be evaluated at once.
  378. desired_pdf = multivariate_normal.pdf(X, mean, cov)
  379. desired_cdf = multivariate_normal.cdf(X, mean, cov)
  380. for i in range(2):
  381. for j in range(3):
  382. actual = multivariate_normal.pdf(X[i, j], mean, cov)
  383. assert_allclose(actual, desired_pdf[i,j])
  384. # Repeat for cdf
  385. actual = multivariate_normal.cdf(X[i, j], mean, cov)
  386. assert_allclose(actual, desired_cdf[i,j], rtol=1e-3)
  387. def test_normal_1D(self):
  388. # The probability density function for a 1D normal variable should
  389. # agree with the standard normal distribution in scipy.stats.distributions
  390. x = np.linspace(0, 2, 10)
  391. mean, cov = 1.2, 0.9
  392. scale = cov**0.5
  393. d1 = norm.pdf(x, mean, scale)
  394. d2 = multivariate_normal.pdf(x, mean, cov)
  395. assert_allclose(d1, d2)
  396. # The same should hold for the cumulative distribution function
  397. d1 = norm.cdf(x, mean, scale)
  398. d2 = multivariate_normal.cdf(x, mean, cov)
  399. assert_allclose(d1, d2)
  400. def test_marginalization(self):
  401. # Integrating out one of the variables of a 2D Gaussian should
  402. # yield a 1D Gaussian
  403. mean = np.array([2.5, 3.5])
  404. cov = np.array([[.5, 0.2], [0.2, .6]])
  405. n = 2 ** 8 + 1 # Number of samples
  406. delta = 6 / (n - 1) # Grid spacing
  407. v = np.linspace(0, 6, n)
  408. xv, yv = np.meshgrid(v, v)
  409. pos = np.empty((n, n, 2))
  410. pos[:, :, 0] = xv
  411. pos[:, :, 1] = yv
  412. pdf = multivariate_normal.pdf(pos, mean, cov)
  413. # Marginalize over x and y axis
  414. margin_x = romb(pdf, delta, axis=0)
  415. margin_y = romb(pdf, delta, axis=1)
  416. # Compare with standard normal distribution
  417. gauss_x = norm.pdf(v, loc=mean[0], scale=cov[0, 0] ** 0.5)
  418. gauss_y = norm.pdf(v, loc=mean[1], scale=cov[1, 1] ** 0.5)
  419. assert_allclose(margin_x, gauss_x, rtol=1e-2, atol=1e-2)
  420. assert_allclose(margin_y, gauss_y, rtol=1e-2, atol=1e-2)
  421. def test_frozen(self):
  422. # The frozen distribution should agree with the regular one
  423. np.random.seed(1234)
  424. x = np.random.randn(5)
  425. mean = np.random.randn(5)
  426. cov = np.abs(np.random.randn(5))
  427. norm_frozen = multivariate_normal(mean, cov)
  428. assert_allclose(norm_frozen.pdf(x), multivariate_normal.pdf(x, mean, cov))
  429. assert_allclose(norm_frozen.logpdf(x),
  430. multivariate_normal.logpdf(x, mean, cov))
  431. assert_allclose(norm_frozen.cdf(x), multivariate_normal.cdf(x, mean, cov))
  432. assert_allclose(norm_frozen.logcdf(x),
  433. multivariate_normal.logcdf(x, mean, cov))
  434. @pytest.mark.parametrize(
  435. 'covariance',
  436. [
  437. np.eye(2),
  438. Covariance.from_diagonal([1, 1]),
  439. ]
  440. )
  441. def test_frozen_multivariate_normal_exposes_attributes(self, covariance):
  442. mean = np.ones((2,))
  443. cov_should_be = np.eye(2)
  444. norm_frozen = multivariate_normal(mean, covariance)
  445. assert np.allclose(norm_frozen.mean, mean)
  446. assert np.allclose(norm_frozen.cov, cov_should_be)
  447. def test_pseudodet_pinv(self):
  448. # Make sure that pseudo-inverse and pseudo-det agree on cutoff
  449. # Assemble random covariance matrix with large and small eigenvalues
  450. np.random.seed(1234)
  451. n = 7
  452. x = np.random.randn(n, n)
  453. cov = np.dot(x, x.T)
  454. s, u = scipy.linalg.eigh(cov)
  455. s = np.full(n, 0.5)
  456. s[0] = 1.0
  457. s[-1] = 1e-7
  458. cov = np.dot(u, np.dot(np.diag(s), u.T))
  459. # Set cond so that the lowest eigenvalue is below the cutoff
  460. cond = 1e-5
  461. psd = _PSD(cov, cond=cond)
  462. psd_pinv = _PSD(psd.pinv, cond=cond)
  463. # Check that the log pseudo-determinant agrees with the sum
  464. # of the logs of all but the smallest eigenvalue
  465. assert_allclose(psd.log_pdet, np.sum(np.log(s[:-1])))
  466. # Check that the pseudo-determinant of the pseudo-inverse
  467. # agrees with 1 / pseudo-determinant
  468. assert_allclose(-psd.log_pdet, psd_pinv.log_pdet)
  469. def test_exception_nonsquare_cov(self):
  470. cov = [[1, 2, 3], [4, 5, 6]]
  471. assert_raises(ValueError, _PSD, cov)
  472. def test_exception_nonfinite_cov(self):
  473. cov_nan = [[1, 0], [0, np.nan]]
  474. assert_raises(ValueError, _PSD, cov_nan)
  475. cov_inf = [[1, 0], [0, np.inf]]
  476. assert_raises(ValueError, _PSD, cov_inf)
  477. def test_exception_non_psd_cov(self):
  478. cov = [[1, 0], [0, -1]]
  479. assert_raises(ValueError, _PSD, cov)
  480. def test_exception_singular_cov(self):
  481. np.random.seed(1234)
  482. x = np.random.randn(5)
  483. mean = np.random.randn(5)
  484. cov = np.ones((5, 5))
  485. e = np.linalg.LinAlgError
  486. assert_raises(e, multivariate_normal, mean, cov)
  487. assert_raises(e, multivariate_normal.pdf, x, mean, cov)
  488. assert_raises(e, multivariate_normal.logpdf, x, mean, cov)
  489. assert_raises(e, multivariate_normal.cdf, x, mean, cov)
  490. assert_raises(e, multivariate_normal.logcdf, x, mean, cov)
  491. # Message used to be "singular matrix", but this is more accurate.
  492. # See gh-15508
  493. cov = [[1., 0.], [1., 1.]]
  494. msg = "When `allow_singular is False`, the input matrix"
  495. with pytest.raises(np.linalg.LinAlgError, match=msg):
  496. multivariate_normal(cov=cov)
  497. def test_R_values(self):
  498. # Compare the multivariate pdf with some values precomputed
  499. # in R version 3.0.1 (2013-05-16) on Mac OS X 10.6.
  500. # The values below were generated by the following R-script:
  501. # > library(mnormt)
  502. # > x <- seq(0, 2, length=5)
  503. # > y <- 3*x - 2
  504. # > z <- x + cos(y)
  505. # > mu <- c(1, 3, 2)
  506. # > Sigma <- matrix(c(1,2,0,2,5,0.5,0,0.5,3), 3, 3)
  507. # > r_pdf <- dmnorm(cbind(x,y,z), mu, Sigma)
  508. r_pdf = np.array([0.0002214706, 0.0013819953, 0.0049138692,
  509. 0.0103803050, 0.0140250800])
  510. x = np.linspace(0, 2, 5)
  511. y = 3 * x - 2
  512. z = x + np.cos(y)
  513. r = np.array([x, y, z]).T
  514. mean = np.array([1, 3, 2], 'd')
  515. cov = np.array([[1, 2, 0], [2, 5, .5], [0, .5, 3]], 'd')
  516. pdf = multivariate_normal.pdf(r, mean, cov)
  517. assert_allclose(pdf, r_pdf, atol=1e-10)
  518. # Compare the multivariate cdf with some values precomputed
  519. # in R version 3.3.2 (2016-10-31) on Debian GNU/Linux.
  520. # The values below were generated by the following R-script:
  521. # > library(mnormt)
  522. # > x <- seq(0, 2, length=5)
  523. # > y <- 3*x - 2
  524. # > z <- x + cos(y)
  525. # > mu <- c(1, 3, 2)
  526. # > Sigma <- matrix(c(1,2,0,2,5,0.5,0,0.5,3), 3, 3)
  527. # > r_cdf <- pmnorm(cbind(x,y,z), mu, Sigma)
  528. r_cdf = np.array([0.0017866215, 0.0267142892, 0.0857098761,
  529. 0.1063242573, 0.2501068509])
  530. cdf = multivariate_normal.cdf(r, mean, cov)
  531. assert_allclose(cdf, r_cdf, atol=2e-5)
  532. # Also test bivariate cdf with some values precomputed
  533. # in R version 3.3.2 (2016-10-31) on Debian GNU/Linux.
  534. # The values below were generated by the following R-script:
  535. # > library(mnormt)
  536. # > x <- seq(0, 2, length=5)
  537. # > y <- 3*x - 2
  538. # > mu <- c(1, 3)
  539. # > Sigma <- matrix(c(1,2,2,5), 2, 2)
  540. # > r_cdf2 <- pmnorm(cbind(x,y), mu, Sigma)
  541. r_cdf2 = np.array([0.01262147, 0.05838989, 0.18389571,
  542. 0.40696599, 0.66470577])
  543. r2 = np.array([x, y]).T
  544. mean2 = np.array([1, 3], 'd')
  545. cov2 = np.array([[1, 2], [2, 5]], 'd')
  546. cdf2 = multivariate_normal.cdf(r2, mean2, cov2)
  547. assert_allclose(cdf2, r_cdf2, atol=1e-5)
  548. def test_multivariate_normal_rvs_zero_covariance(self):
  549. mean = np.zeros(2)
  550. covariance = np.zeros((2, 2))
  551. model = multivariate_normal(mean, covariance, allow_singular=True)
  552. sample = model.rvs()
  553. assert_equal(sample, [0, 0])
  554. def test_rvs_shape(self):
  555. # Check that rvs parses the mean and covariance correctly, and returns
  556. # an array of the right shape
  557. N = 300
  558. d = 4
  559. sample = multivariate_normal.rvs(mean=np.zeros(d), cov=1, size=N)
  560. assert_equal(sample.shape, (N, d))
  561. sample = multivariate_normal.rvs(mean=None,
  562. cov=np.array([[2, .1], [.1, 1]]),
  563. size=N)
  564. assert_equal(sample.shape, (N, 2))
  565. u = multivariate_normal(mean=0, cov=1)
  566. sample = u.rvs(N)
  567. assert_equal(sample.shape, (N, ))
  568. def test_large_sample(self):
  569. # Generate large sample and compare sample mean and sample covariance
  570. # with mean and covariance matrix.
  571. np.random.seed(2846)
  572. n = 3
  573. mean = np.random.randn(n)
  574. M = np.random.randn(n, n)
  575. cov = np.dot(M, M.T)
  576. size = 5000
  577. sample = multivariate_normal.rvs(mean, cov, size)
  578. assert_allclose(numpy.cov(sample.T), cov, rtol=1e-1)
  579. assert_allclose(sample.mean(0), mean, rtol=1e-1)
  580. def test_entropy(self):
  581. np.random.seed(2846)
  582. n = 3
  583. mean = np.random.randn(n)
  584. M = np.random.randn(n, n)
  585. cov = np.dot(M, M.T)
  586. rv = multivariate_normal(mean, cov)
  587. # Check that frozen distribution agrees with entropy function
  588. assert_almost_equal(rv.entropy(), multivariate_normal.entropy(mean, cov))
  589. # Compare entropy with manually computed expression involving
  590. # the sum of the logs of the eigenvalues of the covariance matrix
  591. eigs = np.linalg.eig(cov)[0]
  592. desired = 1 / 2 * (n * (np.log(2 * np.pi) + 1) + np.sum(np.log(eigs)))
  593. assert_almost_equal(desired, rv.entropy())
  594. def test_lnB(self):
  595. alpha = np.array([1, 1, 1])
  596. desired = .5 # e^lnB = 1/2 for [1, 1, 1]
  597. assert_almost_equal(np.exp(_lnB(alpha)), desired)
  598. def test_cdf_with_lower_limit_arrays(self):
  599. # test CDF with lower limit in several dimensions
  600. rng = np.random.default_rng(2408071309372769818)
  601. mean = [0, 0]
  602. cov = np.eye(2)
  603. a = rng.random((4, 3, 2))*6 - 3
  604. b = rng.random((4, 3, 2))*6 - 3
  605. cdf1 = multivariate_normal.cdf(b, mean, cov, lower_limit=a)
  606. cdf2a = multivariate_normal.cdf(b, mean, cov)
  607. cdf2b = multivariate_normal.cdf(a, mean, cov)
  608. ab1 = np.concatenate((a[..., 0:1], b[..., 1:2]), axis=-1)
  609. ab2 = np.concatenate((a[..., 1:2], b[..., 0:1]), axis=-1)
  610. cdf2ab1 = multivariate_normal.cdf(ab1, mean, cov)
  611. cdf2ab2 = multivariate_normal.cdf(ab2, mean, cov)
  612. cdf2 = cdf2a + cdf2b - cdf2ab1 - cdf2ab2
  613. assert_allclose(cdf1, cdf2)
  614. def test_cdf_with_lower_limit_consistency(self):
  615. # check that multivariate normal CDF functions are consistent
  616. rng = np.random.default_rng(2408071309372769818)
  617. mean = rng.random(3)
  618. cov = rng.random((3, 3))
  619. cov = cov @ cov.T
  620. a = rng.random((2, 3))*6 - 3
  621. b = rng.random((2, 3))*6 - 3
  622. cdf1 = multivariate_normal.cdf(b, mean, cov, lower_limit=a)
  623. cdf2 = multivariate_normal(mean, cov).cdf(b, lower_limit=a)
  624. cdf3 = np.exp(multivariate_normal.logcdf(b, mean, cov, lower_limit=a))
  625. cdf4 = np.exp(multivariate_normal(mean, cov).logcdf(b, lower_limit=a))
  626. assert_allclose(cdf2, cdf1, rtol=1e-4)
  627. assert_allclose(cdf3, cdf1, rtol=1e-4)
  628. assert_allclose(cdf4, cdf1, rtol=1e-4)
  629. def test_cdf_signs(self):
  630. # check that sign of output is correct when np.any(lower > x)
  631. mean = np.zeros(3)
  632. cov = np.eye(3)
  633. b = [[1, 1, 1], [0, 0, 0], [1, 0, 1], [0, 1, 0]]
  634. a = [[0, 0, 0], [1, 1, 1], [0, 1, 0], [1, 0, 1]]
  635. # when odd number of elements of b < a, output is negative
  636. expected_signs = np.array([1, -1, -1, 1])
  637. cdf = multivariate_normal.cdf(b, mean, cov, lower_limit=a)
  638. assert_allclose(cdf, cdf[0]*expected_signs)
  639. def test_mean_cov(self):
  640. # test the interaction between a Covariance object and mean
  641. P = np.diag(1 / np.array([1, 2, 3]))
  642. cov_object = _covariance.CovViaPrecision(P)
  643. message = "`cov` represents a covariance matrix in 3 dimensions..."
  644. with pytest.raises(ValueError, match=message):
  645. multivariate_normal.entropy([0, 0], cov_object)
  646. with pytest.raises(ValueError, match=message):
  647. multivariate_normal([0, 0], cov_object)
  648. x = [0.5, 0.5, 0.5]
  649. ref = multivariate_normal.pdf(x, [0, 0, 0], cov_object)
  650. assert_equal(multivariate_normal.pdf(x, cov=cov_object), ref)
  651. ref = multivariate_normal.pdf(x, [1, 1, 1], cov_object)
  652. assert_equal(multivariate_normal.pdf(x, 1, cov=cov_object), ref)
  653. class TestMatrixNormal:
  654. def test_bad_input(self):
  655. # Check that bad inputs raise errors
  656. num_rows = 4
  657. num_cols = 3
  658. M = np.full((num_rows,num_cols), 0.3)
  659. U = 0.5 * np.identity(num_rows) + np.full((num_rows, num_rows), 0.5)
  660. V = 0.7 * np.identity(num_cols) + np.full((num_cols, num_cols), 0.3)
  661. # Incorrect dimensions
  662. assert_raises(ValueError, matrix_normal, np.zeros((5,4,3)))
  663. assert_raises(ValueError, matrix_normal, M, np.zeros(10), V)
  664. assert_raises(ValueError, matrix_normal, M, U, np.zeros(10))
  665. assert_raises(ValueError, matrix_normal, M, U, U)
  666. assert_raises(ValueError, matrix_normal, M, V, V)
  667. assert_raises(ValueError, matrix_normal, M.T, U, V)
  668. e = np.linalg.LinAlgError
  669. # Singular covariance for the rvs method of a non-frozen instance
  670. assert_raises(e, matrix_normal.rvs,
  671. M, U, np.ones((num_cols, num_cols)))
  672. assert_raises(e, matrix_normal.rvs,
  673. M, np.ones((num_rows, num_rows)), V)
  674. # Singular covariance for a frozen instance
  675. assert_raises(e, matrix_normal, M, U, np.ones((num_cols, num_cols)))
  676. assert_raises(e, matrix_normal, M, np.ones((num_rows, num_rows)), V)
  677. def test_default_inputs(self):
  678. # Check that default argument handling works
  679. num_rows = 4
  680. num_cols = 3
  681. M = np.full((num_rows,num_cols), 0.3)
  682. U = 0.5 * np.identity(num_rows) + np.full((num_rows, num_rows), 0.5)
  683. V = 0.7 * np.identity(num_cols) + np.full((num_cols, num_cols), 0.3)
  684. Z = np.zeros((num_rows, num_cols))
  685. Zr = np.zeros((num_rows, 1))
  686. Zc = np.zeros((1, num_cols))
  687. Ir = np.identity(num_rows)
  688. Ic = np.identity(num_cols)
  689. I1 = np.identity(1)
  690. assert_equal(matrix_normal.rvs(mean=M, rowcov=U, colcov=V).shape,
  691. (num_rows, num_cols))
  692. assert_equal(matrix_normal.rvs(mean=M).shape,
  693. (num_rows, num_cols))
  694. assert_equal(matrix_normal.rvs(rowcov=U).shape,
  695. (num_rows, 1))
  696. assert_equal(matrix_normal.rvs(colcov=V).shape,
  697. (1, num_cols))
  698. assert_equal(matrix_normal.rvs(mean=M, colcov=V).shape,
  699. (num_rows, num_cols))
  700. assert_equal(matrix_normal.rvs(mean=M, rowcov=U).shape,
  701. (num_rows, num_cols))
  702. assert_equal(matrix_normal.rvs(rowcov=U, colcov=V).shape,
  703. (num_rows, num_cols))
  704. assert_equal(matrix_normal(mean=M).rowcov, Ir)
  705. assert_equal(matrix_normal(mean=M).colcov, Ic)
  706. assert_equal(matrix_normal(rowcov=U).mean, Zr)
  707. assert_equal(matrix_normal(rowcov=U).colcov, I1)
  708. assert_equal(matrix_normal(colcov=V).mean, Zc)
  709. assert_equal(matrix_normal(colcov=V).rowcov, I1)
  710. assert_equal(matrix_normal(mean=M, rowcov=U).colcov, Ic)
  711. assert_equal(matrix_normal(mean=M, colcov=V).rowcov, Ir)
  712. assert_equal(matrix_normal(rowcov=U, colcov=V).mean, Z)
  713. def test_covariance_expansion(self):
  714. # Check that covariance can be specified with scalar or vector
  715. num_rows = 4
  716. num_cols = 3
  717. M = np.full((num_rows, num_cols), 0.3)
  718. Uv = np.full(num_rows, 0.2)
  719. Us = 0.2
  720. Vv = np.full(num_cols, 0.1)
  721. Vs = 0.1
  722. Ir = np.identity(num_rows)
  723. Ic = np.identity(num_cols)
  724. assert_equal(matrix_normal(mean=M, rowcov=Uv, colcov=Vv).rowcov,
  725. 0.2*Ir)
  726. assert_equal(matrix_normal(mean=M, rowcov=Uv, colcov=Vv).colcov,
  727. 0.1*Ic)
  728. assert_equal(matrix_normal(mean=M, rowcov=Us, colcov=Vs).rowcov,
  729. 0.2*Ir)
  730. assert_equal(matrix_normal(mean=M, rowcov=Us, colcov=Vs).colcov,
  731. 0.1*Ic)
  732. def test_frozen_matrix_normal(self):
  733. for i in range(1,5):
  734. for j in range(1,5):
  735. M = np.full((i,j), 0.3)
  736. U = 0.5 * np.identity(i) + np.full((i,i), 0.5)
  737. V = 0.7 * np.identity(j) + np.full((j,j), 0.3)
  738. frozen = matrix_normal(mean=M, rowcov=U, colcov=V)
  739. rvs1 = frozen.rvs(random_state=1234)
  740. rvs2 = matrix_normal.rvs(mean=M, rowcov=U, colcov=V,
  741. random_state=1234)
  742. assert_equal(rvs1, rvs2)
  743. X = frozen.rvs(random_state=1234)
  744. pdf1 = frozen.pdf(X)
  745. pdf2 = matrix_normal.pdf(X, mean=M, rowcov=U, colcov=V)
  746. assert_equal(pdf1, pdf2)
  747. logpdf1 = frozen.logpdf(X)
  748. logpdf2 = matrix_normal.logpdf(X, mean=M, rowcov=U, colcov=V)
  749. assert_equal(logpdf1, logpdf2)
  750. def test_matches_multivariate(self):
  751. # Check that the pdfs match those obtained by vectorising and
  752. # treating as a multivariate normal.
  753. for i in range(1,5):
  754. for j in range(1,5):
  755. M = np.full((i,j), 0.3)
  756. U = 0.5 * np.identity(i) + np.full((i,i), 0.5)
  757. V = 0.7 * np.identity(j) + np.full((j,j), 0.3)
  758. frozen = matrix_normal(mean=M, rowcov=U, colcov=V)
  759. X = frozen.rvs(random_state=1234)
  760. pdf1 = frozen.pdf(X)
  761. logpdf1 = frozen.logpdf(X)
  762. vecX = X.T.flatten()
  763. vecM = M.T.flatten()
  764. cov = np.kron(V,U)
  765. pdf2 = multivariate_normal.pdf(vecX, mean=vecM, cov=cov)
  766. logpdf2 = multivariate_normal.logpdf(vecX, mean=vecM, cov=cov)
  767. assert_allclose(pdf1, pdf2, rtol=1E-10)
  768. assert_allclose(logpdf1, logpdf2, rtol=1E-10)
  769. def test_array_input(self):
  770. # Check array of inputs has the same output as the separate entries.
  771. num_rows = 4
  772. num_cols = 3
  773. M = np.full((num_rows,num_cols), 0.3)
  774. U = 0.5 * np.identity(num_rows) + np.full((num_rows, num_rows), 0.5)
  775. V = 0.7 * np.identity(num_cols) + np.full((num_cols, num_cols), 0.3)
  776. N = 10
  777. frozen = matrix_normal(mean=M, rowcov=U, colcov=V)
  778. X1 = frozen.rvs(size=N, random_state=1234)
  779. X2 = frozen.rvs(size=N, random_state=4321)
  780. X = np.concatenate((X1[np.newaxis,:,:,:],X2[np.newaxis,:,:,:]), axis=0)
  781. assert_equal(X.shape, (2, N, num_rows, num_cols))
  782. array_logpdf = frozen.logpdf(X)
  783. assert_equal(array_logpdf.shape, (2, N))
  784. for i in range(2):
  785. for j in range(N):
  786. separate_logpdf = matrix_normal.logpdf(X[i,j], mean=M,
  787. rowcov=U, colcov=V)
  788. assert_allclose(separate_logpdf, array_logpdf[i,j], 1E-10)
  789. def test_moments(self):
  790. # Check that the sample moments match the parameters
  791. num_rows = 4
  792. num_cols = 3
  793. M = np.full((num_rows,num_cols), 0.3)
  794. U = 0.5 * np.identity(num_rows) + np.full((num_rows, num_rows), 0.5)
  795. V = 0.7 * np.identity(num_cols) + np.full((num_cols, num_cols), 0.3)
  796. N = 1000
  797. frozen = matrix_normal(mean=M, rowcov=U, colcov=V)
  798. X = frozen.rvs(size=N, random_state=1234)
  799. sample_mean = np.mean(X,axis=0)
  800. assert_allclose(sample_mean, M, atol=0.1)
  801. sample_colcov = np.cov(X.reshape(N*num_rows,num_cols).T)
  802. assert_allclose(sample_colcov, V, atol=0.1)
  803. sample_rowcov = np.cov(np.swapaxes(X,1,2).reshape(
  804. N*num_cols,num_rows).T)
  805. assert_allclose(sample_rowcov, U, atol=0.1)
  806. def test_samples(self):
  807. # Regression test to ensure that we always generate the same stream of
  808. # random variates.
  809. actual = matrix_normal.rvs(
  810. mean=np.array([[1, 2], [3, 4]]),
  811. rowcov=np.array([[4, -1], [-1, 2]]),
  812. colcov=np.array([[5, 1], [1, 10]]),
  813. random_state=np.random.default_rng(0),
  814. size=2
  815. )
  816. expected = np.array(
  817. [[[1.56228264238181, -1.24136424071189],
  818. [2.46865788392114, 6.22964440489445]],
  819. [[3.86405716144353, 10.73714311429529],
  820. [2.59428444080606, 5.79987854490876]]]
  821. )
  822. assert_allclose(actual, expected)
  823. class TestDirichlet:
  824. def test_frozen_dirichlet(self):
  825. np.random.seed(2846)
  826. n = np.random.randint(1, 32)
  827. alpha = np.random.uniform(10e-10, 100, n)
  828. d = dirichlet(alpha)
  829. assert_equal(d.var(), dirichlet.var(alpha))
  830. assert_equal(d.mean(), dirichlet.mean(alpha))
  831. assert_equal(d.entropy(), dirichlet.entropy(alpha))
  832. num_tests = 10
  833. for i in range(num_tests):
  834. x = np.random.uniform(10e-10, 100, n)
  835. x /= np.sum(x)
  836. assert_equal(d.pdf(x[:-1]), dirichlet.pdf(x[:-1], alpha))
  837. assert_equal(d.logpdf(x[:-1]), dirichlet.logpdf(x[:-1], alpha))
  838. def test_numpy_rvs_shape_compatibility(self):
  839. np.random.seed(2846)
  840. alpha = np.array([1.0, 2.0, 3.0])
  841. x = np.random.dirichlet(alpha, size=7)
  842. assert_equal(x.shape, (7, 3))
  843. assert_raises(ValueError, dirichlet.pdf, x, alpha)
  844. assert_raises(ValueError, dirichlet.logpdf, x, alpha)
  845. dirichlet.pdf(x.T, alpha)
  846. dirichlet.pdf(x.T[:-1], alpha)
  847. dirichlet.logpdf(x.T, alpha)
  848. dirichlet.logpdf(x.T[:-1], alpha)
  849. def test_alpha_with_zeros(self):
  850. np.random.seed(2846)
  851. alpha = [1.0, 0.0, 3.0]
  852. # don't pass invalid alpha to np.random.dirichlet
  853. x = np.random.dirichlet(np.maximum(1e-9, alpha), size=7).T
  854. assert_raises(ValueError, dirichlet.pdf, x, alpha)
  855. assert_raises(ValueError, dirichlet.logpdf, x, alpha)
  856. def test_alpha_with_negative_entries(self):
  857. np.random.seed(2846)
  858. alpha = [1.0, -2.0, 3.0]
  859. # don't pass invalid alpha to np.random.dirichlet
  860. x = np.random.dirichlet(np.maximum(1e-9, alpha), size=7).T
  861. assert_raises(ValueError, dirichlet.pdf, x, alpha)
  862. assert_raises(ValueError, dirichlet.logpdf, x, alpha)
  863. def test_data_with_zeros(self):
  864. alpha = np.array([1.0, 2.0, 3.0, 4.0])
  865. x = np.array([0.1, 0.0, 0.2, 0.7])
  866. dirichlet.pdf(x, alpha)
  867. dirichlet.logpdf(x, alpha)
  868. alpha = np.array([1.0, 1.0, 1.0, 1.0])
  869. assert_almost_equal(dirichlet.pdf(x, alpha), 6)
  870. assert_almost_equal(dirichlet.logpdf(x, alpha), np.log(6))
  871. def test_data_with_zeros_and_small_alpha(self):
  872. alpha = np.array([1.0, 0.5, 3.0, 4.0])
  873. x = np.array([0.1, 0.0, 0.2, 0.7])
  874. assert_raises(ValueError, dirichlet.pdf, x, alpha)
  875. assert_raises(ValueError, dirichlet.logpdf, x, alpha)
  876. def test_data_with_negative_entries(self):
  877. alpha = np.array([1.0, 2.0, 3.0, 4.0])
  878. x = np.array([0.1, -0.1, 0.3, 0.7])
  879. assert_raises(ValueError, dirichlet.pdf, x, alpha)
  880. assert_raises(ValueError, dirichlet.logpdf, x, alpha)
  881. def test_data_with_too_large_entries(self):
  882. alpha = np.array([1.0, 2.0, 3.0, 4.0])
  883. x = np.array([0.1, 1.1, 0.3, 0.7])
  884. assert_raises(ValueError, dirichlet.pdf, x, alpha)
  885. assert_raises(ValueError, dirichlet.logpdf, x, alpha)
  886. def test_data_too_deep_c(self):
  887. alpha = np.array([1.0, 2.0, 3.0])
  888. x = np.full((2, 7, 7), 1 / 14)
  889. assert_raises(ValueError, dirichlet.pdf, x, alpha)
  890. assert_raises(ValueError, dirichlet.logpdf, x, alpha)
  891. def test_alpha_too_deep(self):
  892. alpha = np.array([[1.0, 2.0], [3.0, 4.0]])
  893. x = np.full((2, 2, 7), 1 / 4)
  894. assert_raises(ValueError, dirichlet.pdf, x, alpha)
  895. assert_raises(ValueError, dirichlet.logpdf, x, alpha)
  896. def test_alpha_correct_depth(self):
  897. alpha = np.array([1.0, 2.0, 3.0])
  898. x = np.full((3, 7), 1 / 3)
  899. dirichlet.pdf(x, alpha)
  900. dirichlet.logpdf(x, alpha)
  901. def test_non_simplex_data(self):
  902. alpha = np.array([1.0, 2.0, 3.0])
  903. x = np.full((3, 7), 1 / 2)
  904. assert_raises(ValueError, dirichlet.pdf, x, alpha)
  905. assert_raises(ValueError, dirichlet.logpdf, x, alpha)
  906. def test_data_vector_too_short(self):
  907. alpha = np.array([1.0, 2.0, 3.0, 4.0])
  908. x = np.full((2, 7), 1 / 2)
  909. assert_raises(ValueError, dirichlet.pdf, x, alpha)
  910. assert_raises(ValueError, dirichlet.logpdf, x, alpha)
  911. def test_data_vector_too_long(self):
  912. alpha = np.array([1.0, 2.0, 3.0, 4.0])
  913. x = np.full((5, 7), 1 / 5)
  914. assert_raises(ValueError, dirichlet.pdf, x, alpha)
  915. assert_raises(ValueError, dirichlet.logpdf, x, alpha)
  916. def test_mean_and_var(self):
  917. alpha = np.array([1., 0.8, 0.2])
  918. d = dirichlet(alpha)
  919. expected_var = [1. / 12., 0.08, 0.03]
  920. expected_mean = [0.5, 0.4, 0.1]
  921. assert_array_almost_equal(d.var(), expected_var)
  922. assert_array_almost_equal(d.mean(), expected_mean)
  923. def test_scalar_values(self):
  924. alpha = np.array([0.2])
  925. d = dirichlet(alpha)
  926. # For alpha of length 1, mean and var should be scalar instead of array
  927. assert_equal(d.mean().ndim, 0)
  928. assert_equal(d.var().ndim, 0)
  929. assert_equal(d.pdf([1.]).ndim, 0)
  930. assert_equal(d.logpdf([1.]).ndim, 0)
  931. def test_K_and_K_minus_1_calls_equal(self):
  932. # Test that calls with K and K-1 entries yield the same results.
  933. np.random.seed(2846)
  934. n = np.random.randint(1, 32)
  935. alpha = np.random.uniform(10e-10, 100, n)
  936. d = dirichlet(alpha)
  937. num_tests = 10
  938. for i in range(num_tests):
  939. x = np.random.uniform(10e-10, 100, n)
  940. x /= np.sum(x)
  941. assert_almost_equal(d.pdf(x[:-1]), d.pdf(x))
  942. def test_multiple_entry_calls(self):
  943. # Test that calls with multiple x vectors as matrix work
  944. np.random.seed(2846)
  945. n = np.random.randint(1, 32)
  946. alpha = np.random.uniform(10e-10, 100, n)
  947. d = dirichlet(alpha)
  948. num_tests = 10
  949. num_multiple = 5
  950. xm = None
  951. for i in range(num_tests):
  952. for m in range(num_multiple):
  953. x = np.random.uniform(10e-10, 100, n)
  954. x /= np.sum(x)
  955. if xm is not None:
  956. xm = np.vstack((xm, x))
  957. else:
  958. xm = x
  959. rm = d.pdf(xm.T)
  960. rs = None
  961. for xs in xm:
  962. r = d.pdf(xs)
  963. if rs is not None:
  964. rs = np.append(rs, r)
  965. else:
  966. rs = r
  967. assert_array_almost_equal(rm, rs)
  968. def test_2D_dirichlet_is_beta(self):
  969. np.random.seed(2846)
  970. alpha = np.random.uniform(10e-10, 100, 2)
  971. d = dirichlet(alpha)
  972. b = beta(alpha[0], alpha[1])
  973. num_tests = 10
  974. for i in range(num_tests):
  975. x = np.random.uniform(10e-10, 100, 2)
  976. x /= np.sum(x)
  977. assert_almost_equal(b.pdf(x), d.pdf([x]))
  978. assert_almost_equal(b.mean(), d.mean()[0])
  979. assert_almost_equal(b.var(), d.var()[0])
  980. def test_multivariate_normal_dimensions_mismatch():
  981. # Regression test for GH #3493. Check that setting up a PDF with a mean of
  982. # length M and a covariance matrix of size (N, N), where M != N, raises a
  983. # ValueError with an informative error message.
  984. mu = np.array([0.0, 0.0])
  985. sigma = np.array([[1.0]])
  986. assert_raises(ValueError, multivariate_normal, mu, sigma)
  987. # A simple check that the right error message was passed along. Checking
  988. # that the entire message is there, word for word, would be somewhat
  989. # fragile, so we just check for the leading part.
  990. try:
  991. multivariate_normal(mu, sigma)
  992. except ValueError as e:
  993. msg = "Dimension mismatch"
  994. assert_equal(str(e)[:len(msg)], msg)
  995. class TestWishart:
  996. def test_scale_dimensions(self):
  997. # Test that we can call the Wishart with various scale dimensions
  998. # Test case: dim=1, scale=1
  999. true_scale = np.array(1, ndmin=2)
  1000. scales = [
  1001. 1, # scalar
  1002. [1], # iterable
  1003. np.array(1), # 0-dim
  1004. np.r_[1], # 1-dim
  1005. np.array(1, ndmin=2) # 2-dim
  1006. ]
  1007. for scale in scales:
  1008. w = wishart(1, scale)
  1009. assert_equal(w.scale, true_scale)
  1010. assert_equal(w.scale.shape, true_scale.shape)
  1011. # Test case: dim=2, scale=[[1,0]
  1012. # [0,2]
  1013. true_scale = np.array([[1,0],
  1014. [0,2]])
  1015. scales = [
  1016. [1,2], # iterable
  1017. np.r_[1,2], # 1-dim
  1018. np.array([[1,0], # 2-dim
  1019. [0,2]])
  1020. ]
  1021. for scale in scales:
  1022. w = wishart(2, scale)
  1023. assert_equal(w.scale, true_scale)
  1024. assert_equal(w.scale.shape, true_scale.shape)
  1025. # We cannot call with a df < dim - 1
  1026. assert_raises(ValueError, wishart, 1, np.eye(2))
  1027. # But we can call with dim - 1 < df < dim
  1028. wishart(1.1, np.eye(2)) # no error
  1029. # see gh-5562
  1030. # We cannot call with a 3-dimension array
  1031. scale = np.array(1, ndmin=3)
  1032. assert_raises(ValueError, wishart, 1, scale)
  1033. def test_quantile_dimensions(self):
  1034. # Test that we can call the Wishart rvs with various quantile dimensions
  1035. # If dim == 1, consider x.shape = [1,1,1]
  1036. X = [
  1037. 1, # scalar
  1038. [1], # iterable
  1039. np.array(1), # 0-dim
  1040. np.r_[1], # 1-dim
  1041. np.array(1, ndmin=2), # 2-dim
  1042. np.array([1], ndmin=3) # 3-dim
  1043. ]
  1044. w = wishart(1,1)
  1045. density = w.pdf(np.array(1, ndmin=3))
  1046. for x in X:
  1047. assert_equal(w.pdf(x), density)
  1048. # If dim == 1, consider x.shape = [1,1,*]
  1049. X = [
  1050. [1,2,3], # iterable
  1051. np.r_[1,2,3], # 1-dim
  1052. np.array([1,2,3], ndmin=3) # 3-dim
  1053. ]
  1054. w = wishart(1,1)
  1055. density = w.pdf(np.array([1,2,3], ndmin=3))
  1056. for x in X:
  1057. assert_equal(w.pdf(x), density)
  1058. # If dim == 2, consider x.shape = [2,2,1]
  1059. # where x[:,:,*] = np.eye(1)*2
  1060. X = [
  1061. 2, # scalar
  1062. [2,2], # iterable
  1063. np.array(2), # 0-dim
  1064. np.r_[2,2], # 1-dim
  1065. np.array([[2,0],
  1066. [0,2]]), # 2-dim
  1067. np.array([[2,0],
  1068. [0,2]])[:,:,np.newaxis] # 3-dim
  1069. ]
  1070. w = wishart(2,np.eye(2))
  1071. density = w.pdf(np.array([[2,0],
  1072. [0,2]])[:,:,np.newaxis])
  1073. for x in X:
  1074. assert_equal(w.pdf(x), density)
  1075. def test_frozen(self):
  1076. # Test that the frozen and non-frozen Wishart gives the same answers
  1077. # Construct an arbitrary positive definite scale matrix
  1078. dim = 4
  1079. scale = np.diag(np.arange(dim)+1)
  1080. scale[np.tril_indices(dim, k=-1)] = np.arange(dim * (dim-1) // 2)
  1081. scale = np.dot(scale.T, scale)
  1082. # Construct a collection of positive definite matrices to test the PDF
  1083. X = []
  1084. for i in range(5):
  1085. x = np.diag(np.arange(dim)+(i+1)**2)
  1086. x[np.tril_indices(dim, k=-1)] = np.arange(dim * (dim-1) // 2)
  1087. x = np.dot(x.T, x)
  1088. X.append(x)
  1089. X = np.array(X).T
  1090. # Construct a 1D and 2D set of parameters
  1091. parameters = [
  1092. (10, 1, np.linspace(0.1, 10, 5)), # 1D case
  1093. (10, scale, X)
  1094. ]
  1095. for (df, scale, x) in parameters:
  1096. w = wishart(df, scale)
  1097. assert_equal(w.var(), wishart.var(df, scale))
  1098. assert_equal(w.mean(), wishart.mean(df, scale))
  1099. assert_equal(w.mode(), wishart.mode(df, scale))
  1100. assert_equal(w.entropy(), wishart.entropy(df, scale))
  1101. assert_equal(w.pdf(x), wishart.pdf(x, df, scale))
  1102. def test_1D_is_chisquared(self):
  1103. # The 1-dimensional Wishart with an identity scale matrix is just a
  1104. # chi-squared distribution.
  1105. # Test variance, mean, entropy, pdf
  1106. # Kolgomorov-Smirnov test for rvs
  1107. np.random.seed(482974)
  1108. sn = 500
  1109. dim = 1
  1110. scale = np.eye(dim)
  1111. df_range = np.arange(1, 10, 2, dtype=float)
  1112. X = np.linspace(0.1,10,num=10)
  1113. for df in df_range:
  1114. w = wishart(df, scale)
  1115. c = chi2(df)
  1116. # Statistics
  1117. assert_allclose(w.var(), c.var())
  1118. assert_allclose(w.mean(), c.mean())
  1119. assert_allclose(w.entropy(), c.entropy())
  1120. # PDF
  1121. assert_allclose(w.pdf(X), c.pdf(X))
  1122. # rvs
  1123. rvs = w.rvs(size=sn)
  1124. args = (df,)
  1125. alpha = 0.01
  1126. check_distribution_rvs('chi2', args, alpha, rvs)
  1127. def test_is_scaled_chisquared(self):
  1128. # The 2-dimensional Wishart with an arbitrary scale matrix can be
  1129. # transformed to a scaled chi-squared distribution.
  1130. # For :math:`S \sim W_p(V,n)` and :math:`\lambda \in \mathbb{R}^p` we have
  1131. # :math:`\lambda' S \lambda \sim \lambda' V \lambda \times \chi^2(n)`
  1132. np.random.seed(482974)
  1133. sn = 500
  1134. df = 10
  1135. dim = 4
  1136. # Construct an arbitrary positive definite matrix
  1137. scale = np.diag(np.arange(4)+1)
  1138. scale[np.tril_indices(4, k=-1)] = np.arange(6)
  1139. scale = np.dot(scale.T, scale)
  1140. # Use :math:`\lambda = [1, \dots, 1]'`
  1141. lamda = np.ones((dim,1))
  1142. sigma_lamda = lamda.T.dot(scale).dot(lamda).squeeze()
  1143. w = wishart(df, sigma_lamda)
  1144. c = chi2(df, scale=sigma_lamda)
  1145. # Statistics
  1146. assert_allclose(w.var(), c.var())
  1147. assert_allclose(w.mean(), c.mean())
  1148. assert_allclose(w.entropy(), c.entropy())
  1149. # PDF
  1150. X = np.linspace(0.1,10,num=10)
  1151. assert_allclose(w.pdf(X), c.pdf(X))
  1152. # rvs
  1153. rvs = w.rvs(size=sn)
  1154. args = (df,0,sigma_lamda)
  1155. alpha = 0.01
  1156. check_distribution_rvs('chi2', args, alpha, rvs)
  1157. class TestMultinomial:
  1158. def test_logpmf(self):
  1159. vals1 = multinomial.logpmf((3,4), 7, (0.3, 0.7))
  1160. assert_allclose(vals1, -1.483270127243324, rtol=1e-8)
  1161. vals2 = multinomial.logpmf([3, 4], 0, [.3, .7])
  1162. assert_allclose(vals2, np.NAN, rtol=1e-8)
  1163. vals3 = multinomial.logpmf([3, 4], 0, [-2, 3])
  1164. assert_allclose(vals3, np.NAN, rtol=1e-8)
  1165. def test_reduces_binomial(self):
  1166. # test that the multinomial pmf reduces to the binomial pmf in the 2d
  1167. # case
  1168. val1 = multinomial.logpmf((3, 4), 7, (0.3, 0.7))
  1169. val2 = binom.logpmf(3, 7, 0.3)
  1170. assert_allclose(val1, val2, rtol=1e-8)
  1171. val1 = multinomial.pmf((6, 8), 14, (0.1, 0.9))
  1172. val2 = binom.pmf(6, 14, 0.1)
  1173. assert_allclose(val1, val2, rtol=1e-8)
  1174. def test_R(self):
  1175. # test against the values produced by this R code
  1176. # (https://stat.ethz.ch/R-manual/R-devel/library/stats/html/Multinom.html)
  1177. # X <- t(as.matrix(expand.grid(0:3, 0:3))); X <- X[, colSums(X) <= 3]
  1178. # X <- rbind(X, 3:3 - colSums(X)); dimnames(X) <- list(letters[1:3], NULL)
  1179. # X
  1180. # apply(X, 2, function(x) dmultinom(x, prob = c(1,2,5)))
  1181. n, p = 3, [1./8, 2./8, 5./8]
  1182. r_vals = {(0, 0, 3): 0.244140625, (1, 0, 2): 0.146484375,
  1183. (2, 0, 1): 0.029296875, (3, 0, 0): 0.001953125,
  1184. (0, 1, 2): 0.292968750, (1, 1, 1): 0.117187500,
  1185. (2, 1, 0): 0.011718750, (0, 2, 1): 0.117187500,
  1186. (1, 2, 0): 0.023437500, (0, 3, 0): 0.015625000}
  1187. for x in r_vals:
  1188. assert_allclose(multinomial.pmf(x, n, p), r_vals[x], atol=1e-14)
  1189. def test_rvs_np(self):
  1190. # test that .rvs agrees w/numpy
  1191. sc_rvs = multinomial.rvs(3, [1/4.]*3, size=7, random_state=123)
  1192. rndm = np.random.RandomState(123)
  1193. np_rvs = rndm.multinomial(3, [1/4.]*3, size=7)
  1194. assert_equal(sc_rvs, np_rvs)
  1195. def test_pmf(self):
  1196. vals0 = multinomial.pmf((5,), 5, (1,))
  1197. assert_allclose(vals0, 1, rtol=1e-8)
  1198. vals1 = multinomial.pmf((3,4), 7, (.3, .7))
  1199. assert_allclose(vals1, .22689449999999994, rtol=1e-8)
  1200. vals2 = multinomial.pmf([[[3,5],[0,8]], [[-1, 9], [1, 1]]], 8,
  1201. (.1, .9))
  1202. assert_allclose(vals2, [[.03306744, .43046721], [0, 0]], rtol=1e-8)
  1203. x = np.empty((0,2), dtype=np.float64)
  1204. vals3 = multinomial.pmf(x, 4, (.3, .7))
  1205. assert_equal(vals3, np.empty([], dtype=np.float64))
  1206. vals4 = multinomial.pmf([1,2], 4, (.3, .7))
  1207. assert_allclose(vals4, 0, rtol=1e-8)
  1208. vals5 = multinomial.pmf([3, 3, 0], 6, [2/3.0, 1/3.0, 0])
  1209. assert_allclose(vals5, 0.219478737997, rtol=1e-8)
  1210. def test_pmf_broadcasting(self):
  1211. vals0 = multinomial.pmf([1, 2], 3, [[.1, .9], [.2, .8]])
  1212. assert_allclose(vals0, [.243, .384], rtol=1e-8)
  1213. vals1 = multinomial.pmf([1, 2], [3, 4], [.1, .9])
  1214. assert_allclose(vals1, [.243, 0], rtol=1e-8)
  1215. vals2 = multinomial.pmf([[[1, 2], [1, 1]]], 3, [.1, .9])
  1216. assert_allclose(vals2, [[.243, 0]], rtol=1e-8)
  1217. vals3 = multinomial.pmf([1, 2], [[[3], [4]]], [.1, .9])
  1218. assert_allclose(vals3, [[[.243], [0]]], rtol=1e-8)
  1219. vals4 = multinomial.pmf([[1, 2], [1,1]], [[[[3]]]], [.1, .9])
  1220. assert_allclose(vals4, [[[[.243, 0]]]], rtol=1e-8)
  1221. def test_cov(self):
  1222. cov1 = multinomial.cov(5, (.2, .3, .5))
  1223. cov2 = [[5*.2*.8, -5*.2*.3, -5*.2*.5],
  1224. [-5*.3*.2, 5*.3*.7, -5*.3*.5],
  1225. [-5*.5*.2, -5*.5*.3, 5*.5*.5]]
  1226. assert_allclose(cov1, cov2, rtol=1e-8)
  1227. def test_cov_broadcasting(self):
  1228. cov1 = multinomial.cov(5, [[.1, .9], [.2, .8]])
  1229. cov2 = [[[.45, -.45],[-.45, .45]], [[.8, -.8], [-.8, .8]]]
  1230. assert_allclose(cov1, cov2, rtol=1e-8)
  1231. cov3 = multinomial.cov([4, 5], [.1, .9])
  1232. cov4 = [[[.36, -.36], [-.36, .36]], [[.45, -.45], [-.45, .45]]]
  1233. assert_allclose(cov3, cov4, rtol=1e-8)
  1234. cov5 = multinomial.cov([4, 5], [[.3, .7], [.4, .6]])
  1235. cov6 = [[[4*.3*.7, -4*.3*.7], [-4*.3*.7, 4*.3*.7]],
  1236. [[5*.4*.6, -5*.4*.6], [-5*.4*.6, 5*.4*.6]]]
  1237. assert_allclose(cov5, cov6, rtol=1e-8)
  1238. def test_entropy(self):
  1239. # this is equivalent to a binomial distribution with n=2, so the
  1240. # entropy .77899774929 is easily computed "by hand"
  1241. ent0 = multinomial.entropy(2, [.2, .8])
  1242. assert_allclose(ent0, binom.entropy(2, .2), rtol=1e-8)
  1243. def test_entropy_broadcasting(self):
  1244. ent0 = multinomial.entropy([2, 3], [.2, .3])
  1245. assert_allclose(ent0, [binom.entropy(2, .2), binom.entropy(3, .2)],
  1246. rtol=1e-8)
  1247. ent1 = multinomial.entropy([7, 8], [[.3, .7], [.4, .6]])
  1248. assert_allclose(ent1, [binom.entropy(7, .3), binom.entropy(8, .4)],
  1249. rtol=1e-8)
  1250. ent2 = multinomial.entropy([[7], [8]], [[.3, .7], [.4, .6]])
  1251. assert_allclose(ent2,
  1252. [[binom.entropy(7, .3), binom.entropy(7, .4)],
  1253. [binom.entropy(8, .3), binom.entropy(8, .4)]],
  1254. rtol=1e-8)
  1255. def test_mean(self):
  1256. mean1 = multinomial.mean(5, [.2, .8])
  1257. assert_allclose(mean1, [5*.2, 5*.8], rtol=1e-8)
  1258. def test_mean_broadcasting(self):
  1259. mean1 = multinomial.mean([5, 6], [.2, .8])
  1260. assert_allclose(mean1, [[5*.2, 5*.8], [6*.2, 6*.8]], rtol=1e-8)
  1261. def test_frozen(self):
  1262. # The frozen distribution should agree with the regular one
  1263. np.random.seed(1234)
  1264. n = 12
  1265. pvals = (.1, .2, .3, .4)
  1266. x = [[0,0,0,12],[0,0,1,11],[0,1,1,10],[1,1,1,9],[1,1,2,8]]
  1267. x = np.asarray(x, dtype=np.float64)
  1268. mn_frozen = multinomial(n, pvals)
  1269. assert_allclose(mn_frozen.pmf(x), multinomial.pmf(x, n, pvals))
  1270. assert_allclose(mn_frozen.logpmf(x), multinomial.logpmf(x, n, pvals))
  1271. assert_allclose(mn_frozen.entropy(), multinomial.entropy(n, pvals))
  1272. def test_gh_11860(self):
  1273. # gh-11860 reported cases in which the adjustments made by multinomial
  1274. # to the last element of `p` can cause `nan`s even when the input is
  1275. # essentially valid. Check that a pathological case returns a finite,
  1276. # nonzero result. (This would fail in main before the PR.)
  1277. n = 88
  1278. rng = np.random.default_rng(8879715917488330089)
  1279. p = rng.random(n)
  1280. p[-1] = 1e-30
  1281. p /= np.sum(p)
  1282. x = np.ones(n)
  1283. logpmf = multinomial.logpmf(x, n, p)
  1284. assert np.isfinite(logpmf)
  1285. class TestInvwishart:
  1286. def test_frozen(self):
  1287. # Test that the frozen and non-frozen inverse Wishart gives the same
  1288. # answers
  1289. # Construct an arbitrary positive definite scale matrix
  1290. dim = 4
  1291. scale = np.diag(np.arange(dim)+1)
  1292. scale[np.tril_indices(dim, k=-1)] = np.arange(dim*(dim-1)/2)
  1293. scale = np.dot(scale.T, scale)
  1294. # Construct a collection of positive definite matrices to test the PDF
  1295. X = []
  1296. for i in range(5):
  1297. x = np.diag(np.arange(dim)+(i+1)**2)
  1298. x[np.tril_indices(dim, k=-1)] = np.arange(dim*(dim-1)/2)
  1299. x = np.dot(x.T, x)
  1300. X.append(x)
  1301. X = np.array(X).T
  1302. # Construct a 1D and 2D set of parameters
  1303. parameters = [
  1304. (10, 1, np.linspace(0.1, 10, 5)), # 1D case
  1305. (10, scale, X)
  1306. ]
  1307. for (df, scale, x) in parameters:
  1308. iw = invwishart(df, scale)
  1309. assert_equal(iw.var(), invwishart.var(df, scale))
  1310. assert_equal(iw.mean(), invwishart.mean(df, scale))
  1311. assert_equal(iw.mode(), invwishart.mode(df, scale))
  1312. assert_allclose(iw.pdf(x), invwishart.pdf(x, df, scale))
  1313. def test_1D_is_invgamma(self):
  1314. # The 1-dimensional inverse Wishart with an identity scale matrix is
  1315. # just an inverse gamma distribution.
  1316. # Test variance, mean, pdf
  1317. # Kolgomorov-Smirnov test for rvs
  1318. np.random.seed(482974)
  1319. sn = 500
  1320. dim = 1
  1321. scale = np.eye(dim)
  1322. df_range = np.arange(5, 20, 2, dtype=float)
  1323. X = np.linspace(0.1,10,num=10)
  1324. for df in df_range:
  1325. iw = invwishart(df, scale)
  1326. ig = invgamma(df/2, scale=1./2)
  1327. # Statistics
  1328. assert_allclose(iw.var(), ig.var())
  1329. assert_allclose(iw.mean(), ig.mean())
  1330. # PDF
  1331. assert_allclose(iw.pdf(X), ig.pdf(X))
  1332. # rvs
  1333. rvs = iw.rvs(size=sn)
  1334. args = (df/2, 0, 1./2)
  1335. alpha = 0.01
  1336. check_distribution_rvs('invgamma', args, alpha, rvs)
  1337. def test_wishart_invwishart_2D_rvs(self):
  1338. dim = 3
  1339. df = 10
  1340. # Construct a simple non-diagonal positive definite matrix
  1341. scale = np.eye(dim)
  1342. scale[0,1] = 0.5
  1343. scale[1,0] = 0.5
  1344. # Construct frozen Wishart and inverse Wishart random variables
  1345. w = wishart(df, scale)
  1346. iw = invwishart(df, scale)
  1347. # Get the generated random variables from a known seed
  1348. np.random.seed(248042)
  1349. w_rvs = wishart.rvs(df, scale)
  1350. np.random.seed(248042)
  1351. frozen_w_rvs = w.rvs()
  1352. np.random.seed(248042)
  1353. iw_rvs = invwishart.rvs(df, scale)
  1354. np.random.seed(248042)
  1355. frozen_iw_rvs = iw.rvs()
  1356. # Manually calculate what it should be, based on the Bartlett (1933)
  1357. # decomposition of a Wishart into D A A' D', where D is the Cholesky
  1358. # factorization of the scale matrix and A is the lower triangular matrix
  1359. # with the square root of chi^2 variates on the diagonal and N(0,1)
  1360. # variates in the lower triangle.
  1361. np.random.seed(248042)
  1362. covariances = np.random.normal(size=3)
  1363. variances = np.r_[
  1364. np.random.chisquare(df),
  1365. np.random.chisquare(df-1),
  1366. np.random.chisquare(df-2),
  1367. ]**0.5
  1368. # Construct the lower-triangular A matrix
  1369. A = np.diag(variances)
  1370. A[np.tril_indices(dim, k=-1)] = covariances
  1371. # Wishart random variate
  1372. D = np.linalg.cholesky(scale)
  1373. DA = D.dot(A)
  1374. manual_w_rvs = np.dot(DA, DA.T)
  1375. # inverse Wishart random variate
  1376. # Supposing that the inverse wishart has scale matrix `scale`, then the
  1377. # random variate is the inverse of a random variate drawn from a Wishart
  1378. # distribution with scale matrix `inv_scale = np.linalg.inv(scale)`
  1379. iD = np.linalg.cholesky(np.linalg.inv(scale))
  1380. iDA = iD.dot(A)
  1381. manual_iw_rvs = np.linalg.inv(np.dot(iDA, iDA.T))
  1382. # Test for equality
  1383. assert_allclose(w_rvs, manual_w_rvs)
  1384. assert_allclose(frozen_w_rvs, manual_w_rvs)
  1385. assert_allclose(iw_rvs, manual_iw_rvs)
  1386. assert_allclose(frozen_iw_rvs, manual_iw_rvs)
  1387. def test_cho_inv_batch(self):
  1388. """Regression test for gh-8844."""
  1389. a0 = np.array([[2, 1, 0, 0.5],
  1390. [1, 2, 0.5, 0.5],
  1391. [0, 0.5, 3, 1],
  1392. [0.5, 0.5, 1, 2]])
  1393. a1 = np.array([[2, -1, 0, 0.5],
  1394. [-1, 2, 0.5, 0.5],
  1395. [0, 0.5, 3, 1],
  1396. [0.5, 0.5, 1, 4]])
  1397. a = np.array([a0, a1])
  1398. ainv = a.copy()
  1399. _cho_inv_batch(ainv)
  1400. ident = np.eye(4)
  1401. assert_allclose(a[0].dot(ainv[0]), ident, atol=1e-15)
  1402. assert_allclose(a[1].dot(ainv[1]), ident, atol=1e-15)
  1403. def test_logpdf_4x4(self):
  1404. """Regression test for gh-8844."""
  1405. X = np.array([[2, 1, 0, 0.5],
  1406. [1, 2, 0.5, 0.5],
  1407. [0, 0.5, 3, 1],
  1408. [0.5, 0.5, 1, 2]])
  1409. Psi = np.array([[9, 7, 3, 1],
  1410. [7, 9, 5, 1],
  1411. [3, 5, 8, 2],
  1412. [1, 1, 2, 9]])
  1413. nu = 6
  1414. prob = invwishart.logpdf(X, nu, Psi)
  1415. # Explicit calculation from the formula on wikipedia.
  1416. p = X.shape[0]
  1417. sig, logdetX = np.linalg.slogdet(X)
  1418. sig, logdetPsi = np.linalg.slogdet(Psi)
  1419. M = np.linalg.solve(X, Psi)
  1420. expected = ((nu/2)*logdetPsi
  1421. - (nu*p/2)*np.log(2)
  1422. - multigammaln(nu/2, p)
  1423. - (nu + p + 1)/2*logdetX
  1424. - 0.5*M.trace())
  1425. assert_allclose(prob, expected)
  1426. class TestSpecialOrthoGroup:
  1427. def test_reproducibility(self):
  1428. np.random.seed(514)
  1429. x = special_ortho_group.rvs(3)
  1430. expected = np.array([[-0.99394515, -0.04527879, 0.10011432],
  1431. [0.04821555, -0.99846897, 0.02711042],
  1432. [0.09873351, 0.03177334, 0.99460653]])
  1433. assert_array_almost_equal(x, expected)
  1434. random_state = np.random.RandomState(seed=514)
  1435. x = special_ortho_group.rvs(3, random_state=random_state)
  1436. assert_array_almost_equal(x, expected)
  1437. def test_invalid_dim(self):
  1438. assert_raises(ValueError, special_ortho_group.rvs, None)
  1439. assert_raises(ValueError, special_ortho_group.rvs, (2, 2))
  1440. assert_raises(ValueError, special_ortho_group.rvs, 1)
  1441. assert_raises(ValueError, special_ortho_group.rvs, 2.5)
  1442. def test_frozen_matrix(self):
  1443. dim = 7
  1444. frozen = special_ortho_group(dim)
  1445. rvs1 = frozen.rvs(random_state=1234)
  1446. rvs2 = special_ortho_group.rvs(dim, random_state=1234)
  1447. assert_equal(rvs1, rvs2)
  1448. def test_det_and_ortho(self):
  1449. xs = [special_ortho_group.rvs(dim)
  1450. for dim in range(2,12)
  1451. for i in range(3)]
  1452. # Test that determinants are always +1
  1453. dets = [np.linalg.det(x) for x in xs]
  1454. assert_allclose(dets, [1.]*30, rtol=1e-13)
  1455. # Test that these are orthogonal matrices
  1456. for x in xs:
  1457. assert_array_almost_equal(np.dot(x, x.T),
  1458. np.eye(x.shape[0]))
  1459. def test_haar(self):
  1460. # Test that the distribution is constant under rotation
  1461. # Every column should have the same distribution
  1462. # Additionally, the distribution should be invariant under another rotation
  1463. # Generate samples
  1464. dim = 5
  1465. samples = 1000 # Not too many, or the test takes too long
  1466. ks_prob = .05
  1467. np.random.seed(514)
  1468. xs = special_ortho_group.rvs(dim, size=samples)
  1469. # Dot a few rows (0, 1, 2) with unit vectors (0, 2, 4, 3),
  1470. # effectively picking off entries in the matrices of xs.
  1471. # These projections should all have the same disribution,
  1472. # establishing rotational invariance. We use the two-sided
  1473. # KS test to confirm this.
  1474. # We could instead test that angles between random vectors
  1475. # are uniformly distributed, but the below is sufficient.
  1476. # It is not feasible to consider all pairs, so pick a few.
  1477. els = ((0,0), (0,2), (1,4), (2,3))
  1478. #proj = {(er, ec): [x[er][ec] for x in xs] for er, ec in els}
  1479. proj = dict(((er, ec), sorted([x[er][ec] for x in xs])) for er, ec in els)
  1480. pairs = [(e0, e1) for e0 in els for e1 in els if e0 > e1]
  1481. ks_tests = [ks_2samp(proj[p0], proj[p1])[1] for (p0, p1) in pairs]
  1482. assert_array_less([ks_prob]*len(pairs), ks_tests)
  1483. class TestOrthoGroup:
  1484. def test_reproducibility(self):
  1485. seed = 514
  1486. np.random.seed(seed)
  1487. x = ortho_group.rvs(3)
  1488. x2 = ortho_group.rvs(3, random_state=seed)
  1489. # Note this matrix has det -1, distinguishing O(N) from SO(N)
  1490. assert_almost_equal(np.linalg.det(x), -1)
  1491. expected = np.array([[0.381686, -0.090374, 0.919863],
  1492. [0.905794, -0.161537, -0.391718],
  1493. [-0.183993, -0.98272, -0.020204]])
  1494. assert_array_almost_equal(x, expected)
  1495. assert_array_almost_equal(x2, expected)
  1496. def test_invalid_dim(self):
  1497. assert_raises(ValueError, ortho_group.rvs, None)
  1498. assert_raises(ValueError, ortho_group.rvs, (2, 2))
  1499. assert_raises(ValueError, ortho_group.rvs, 1)
  1500. assert_raises(ValueError, ortho_group.rvs, 2.5)
  1501. def test_frozen_matrix(self):
  1502. dim = 7
  1503. frozen = ortho_group(dim)
  1504. frozen_seed = ortho_group(dim, seed=1234)
  1505. rvs1 = frozen.rvs(random_state=1234)
  1506. rvs2 = ortho_group.rvs(dim, random_state=1234)
  1507. rvs3 = frozen_seed.rvs(size=1)
  1508. assert_equal(rvs1, rvs2)
  1509. assert_equal(rvs1, rvs3)
  1510. def test_det_and_ortho(self):
  1511. xs = [[ortho_group.rvs(dim)
  1512. for i in range(10)]
  1513. for dim in range(2,12)]
  1514. # Test that abs determinants are always +1
  1515. dets = np.array([[np.linalg.det(x) for x in xx] for xx in xs])
  1516. assert_allclose(np.fabs(dets), np.ones(dets.shape), rtol=1e-13)
  1517. # Test that we get both positive and negative determinants
  1518. # Check that we have at least one and less than 10 negative dets in a sample of 10. The rest are positive by the previous test.
  1519. # Test each dimension separately
  1520. assert_array_less([0]*10, [np.nonzero(d < 0)[0].shape[0] for d in dets])
  1521. assert_array_less([np.nonzero(d < 0)[0].shape[0] for d in dets], [10]*10)
  1522. # Test that these are orthogonal matrices
  1523. for xx in xs:
  1524. for x in xx:
  1525. assert_array_almost_equal(np.dot(x, x.T),
  1526. np.eye(x.shape[0]))
  1527. def test_haar(self):
  1528. # Test that the distribution is constant under rotation
  1529. # Every column should have the same distribution
  1530. # Additionally, the distribution should be invariant under another rotation
  1531. # Generate samples
  1532. dim = 5
  1533. samples = 1000 # Not too many, or the test takes too long
  1534. ks_prob = .05
  1535. np.random.seed(518) # Note that the test is sensitive to seed too
  1536. xs = ortho_group.rvs(dim, size=samples)
  1537. # Dot a few rows (0, 1, 2) with unit vectors (0, 2, 4, 3),
  1538. # effectively picking off entries in the matrices of xs.
  1539. # These projections should all have the same disribution,
  1540. # establishing rotational invariance. We use the two-sided
  1541. # KS test to confirm this.
  1542. # We could instead test that angles between random vectors
  1543. # are uniformly distributed, but the below is sufficient.
  1544. # It is not feasible to consider all pairs, so pick a few.
  1545. els = ((0,0), (0,2), (1,4), (2,3))
  1546. #proj = {(er, ec): [x[er][ec] for x in xs] for er, ec in els}
  1547. proj = dict(((er, ec), sorted([x[er][ec] for x in xs])) for er, ec in els)
  1548. pairs = [(e0, e1) for e0 in els for e1 in els if e0 > e1]
  1549. ks_tests = [ks_2samp(proj[p0], proj[p1])[1] for (p0, p1) in pairs]
  1550. assert_array_less([ks_prob]*len(pairs), ks_tests)
  1551. @pytest.mark.slow
  1552. def test_pairwise_distances(self):
  1553. # Test that the distribution of pairwise distances is close to correct.
  1554. np.random.seed(514)
  1555. def random_ortho(dim):
  1556. u, _s, v = np.linalg.svd(np.random.normal(size=(dim, dim)))
  1557. return np.dot(u, v)
  1558. for dim in range(2, 6):
  1559. def generate_test_statistics(rvs, N=1000, eps=1e-10):
  1560. stats = np.array([
  1561. np.sum((rvs(dim=dim) - rvs(dim=dim))**2)
  1562. for _ in range(N)
  1563. ])
  1564. # Add a bit of noise to account for numeric accuracy.
  1565. stats += np.random.uniform(-eps, eps, size=stats.shape)
  1566. return stats
  1567. expected = generate_test_statistics(random_ortho)
  1568. actual = generate_test_statistics(scipy.stats.ortho_group.rvs)
  1569. _D, p = scipy.stats.ks_2samp(expected, actual)
  1570. assert_array_less(.05, p)
  1571. class TestRandomCorrelation:
  1572. def test_reproducibility(self):
  1573. np.random.seed(514)
  1574. eigs = (.5, .8, 1.2, 1.5)
  1575. x = random_correlation.rvs(eigs)
  1576. x2 = random_correlation.rvs(eigs, random_state=514)
  1577. expected = np.array([[1., -0.184851, 0.109017, -0.227494],
  1578. [-0.184851, 1., 0.231236, 0.326669],
  1579. [0.109017, 0.231236, 1., -0.178912],
  1580. [-0.227494, 0.326669, -0.178912, 1.]])
  1581. assert_array_almost_equal(x, expected)
  1582. assert_array_almost_equal(x2, expected)
  1583. def test_invalid_eigs(self):
  1584. assert_raises(ValueError, random_correlation.rvs, None)
  1585. assert_raises(ValueError, random_correlation.rvs, 'test')
  1586. assert_raises(ValueError, random_correlation.rvs, 2.5)
  1587. assert_raises(ValueError, random_correlation.rvs, [2.5])
  1588. assert_raises(ValueError, random_correlation.rvs, [[1,2],[3,4]])
  1589. assert_raises(ValueError, random_correlation.rvs, [2.5, -.5])
  1590. assert_raises(ValueError, random_correlation.rvs, [1, 2, .1])
  1591. def test_frozen_matrix(self):
  1592. eigs = (.5, .8, 1.2, 1.5)
  1593. frozen = random_correlation(eigs)
  1594. frozen_seed = random_correlation(eigs, seed=514)
  1595. rvs1 = random_correlation.rvs(eigs, random_state=514)
  1596. rvs2 = frozen.rvs(random_state=514)
  1597. rvs3 = frozen_seed.rvs()
  1598. assert_equal(rvs1, rvs2)
  1599. assert_equal(rvs1, rvs3)
  1600. def test_definition(self):
  1601. # Test the definition of a correlation matrix in several dimensions:
  1602. #
  1603. # 1. Det is product of eigenvalues (and positive by construction
  1604. # in examples)
  1605. # 2. 1's on diagonal
  1606. # 3. Matrix is symmetric
  1607. def norm(i, e):
  1608. return i*e/sum(e)
  1609. np.random.seed(123)
  1610. eigs = [norm(i, np.random.uniform(size=i)) for i in range(2, 6)]
  1611. eigs.append([4,0,0,0])
  1612. ones = [[1.]*len(e) for e in eigs]
  1613. xs = [random_correlation.rvs(e) for e in eigs]
  1614. # Test that determinants are products of eigenvalues
  1615. # These are positive by construction
  1616. # Could also test that the eigenvalues themselves are correct,
  1617. # but this seems sufficient.
  1618. dets = [np.fabs(np.linalg.det(x)) for x in xs]
  1619. dets_known = [np.prod(e) for e in eigs]
  1620. assert_allclose(dets, dets_known, rtol=1e-13, atol=1e-13)
  1621. # Test for 1's on the diagonal
  1622. diags = [np.diag(x) for x in xs]
  1623. for a, b in zip(diags, ones):
  1624. assert_allclose(a, b, rtol=1e-13)
  1625. # Correlation matrices are symmetric
  1626. for x in xs:
  1627. assert_allclose(x, x.T, rtol=1e-13)
  1628. def test_to_corr(self):
  1629. # Check some corner cases in to_corr
  1630. # ajj == 1
  1631. m = np.array([[0.1, 0], [0, 1]], dtype=float)
  1632. m = random_correlation._to_corr(m)
  1633. assert_allclose(m, np.array([[1, 0], [0, 0.1]]))
  1634. # Floating point overflow; fails to compute the correct
  1635. # rotation, but should still produce some valid rotation
  1636. # rather than infs/nans
  1637. with np.errstate(over='ignore'):
  1638. g = np.array([[0, 1], [-1, 0]])
  1639. m0 = np.array([[1e300, 0], [0, np.nextafter(1, 0)]], dtype=float)
  1640. m = random_correlation._to_corr(m0.copy())
  1641. assert_allclose(m, g.T.dot(m0).dot(g))
  1642. m0 = np.array([[0.9, 1e300], [1e300, 1.1]], dtype=float)
  1643. m = random_correlation._to_corr(m0.copy())
  1644. assert_allclose(m, g.T.dot(m0).dot(g))
  1645. # Zero discriminant; should set the first diag entry to 1
  1646. m0 = np.array([[2, 1], [1, 2]], dtype=float)
  1647. m = random_correlation._to_corr(m0.copy())
  1648. assert_allclose(m[0,0], 1)
  1649. # Slightly negative discriminant; should be approx correct still
  1650. m0 = np.array([[2 + 1e-7, 1], [1, 2]], dtype=float)
  1651. m = random_correlation._to_corr(m0.copy())
  1652. assert_allclose(m[0,0], 1)
  1653. class TestUniformDirection:
  1654. @pytest.mark.parametrize("dim", [1, 3])
  1655. @pytest.mark.parametrize("size", [None, 1, 5, (5, 4)])
  1656. def test_samples(self, dim, size):
  1657. # test that samples have correct shape and norm 1
  1658. rng = np.random.default_rng(2777937887058094419)
  1659. uniform_direction_dist = uniform_direction(dim, seed=rng)
  1660. samples = uniform_direction_dist.rvs(size)
  1661. mean, cov = np.zeros(dim), np.eye(dim)
  1662. expected_shape = rng.multivariate_normal(mean, cov, size=size).shape
  1663. assert samples.shape == expected_shape
  1664. norms = np.linalg.norm(samples, axis=-1)
  1665. assert_allclose(norms, 1.)
  1666. @pytest.mark.parametrize("dim", [None, 0, (2, 2), 2.5])
  1667. def test_invalid_dim(self, dim):
  1668. message = ("Dimension of vector must be specified, "
  1669. "and must be an integer greater than 0.")
  1670. with pytest.raises(ValueError, match=message):
  1671. uniform_direction.rvs(dim)
  1672. def test_frozen_distribution(self):
  1673. dim = 5
  1674. frozen = uniform_direction(dim)
  1675. frozen_seed = uniform_direction(dim, seed=514)
  1676. rvs1 = frozen.rvs(random_state=514)
  1677. rvs2 = uniform_direction.rvs(dim, random_state=514)
  1678. rvs3 = frozen_seed.rvs()
  1679. assert_equal(rvs1, rvs2)
  1680. assert_equal(rvs1, rvs3)
  1681. @pytest.mark.parametrize("dim", [2, 5, 8])
  1682. def test_uniform(self, dim):
  1683. rng = np.random.default_rng(1036978481269651776)
  1684. spherical_dist = uniform_direction(dim, seed=rng)
  1685. # generate random, orthogonal vectors
  1686. v1, v2 = spherical_dist.rvs(size=2)
  1687. v2 -= v1 @ v2 * v1
  1688. v2 /= np.linalg.norm(v2)
  1689. assert_allclose(v1 @ v2, 0, atol=1e-14) # orthogonal
  1690. # generate data and project onto orthogonal vectors
  1691. samples = spherical_dist.rvs(size=10000)
  1692. s1 = samples @ v1
  1693. s2 = samples @ v2
  1694. angles = np.arctan2(s1, s2)
  1695. # test that angles follow a uniform distribution
  1696. # normalize angles to range [0, 1]
  1697. angles += np.pi
  1698. angles /= 2*np.pi
  1699. # perform KS test
  1700. uniform_dist = uniform()
  1701. kstest_result = kstest(angles, uniform_dist.cdf)
  1702. assert kstest_result.pvalue > 0.05
  1703. class TestUnitaryGroup:
  1704. def test_reproducibility(self):
  1705. np.random.seed(514)
  1706. x = unitary_group.rvs(3)
  1707. x2 = unitary_group.rvs(3, random_state=514)
  1708. expected = np.array([[0.308771+0.360312j, 0.044021+0.622082j, 0.160327+0.600173j],
  1709. [0.732757+0.297107j, 0.076692-0.4614j, -0.394349+0.022613j],
  1710. [-0.148844+0.357037j, -0.284602-0.557949j, 0.607051+0.299257j]])
  1711. assert_array_almost_equal(x, expected)
  1712. assert_array_almost_equal(x2, expected)
  1713. def test_invalid_dim(self):
  1714. assert_raises(ValueError, unitary_group.rvs, None)
  1715. assert_raises(ValueError, unitary_group.rvs, (2, 2))
  1716. assert_raises(ValueError, unitary_group.rvs, 1)
  1717. assert_raises(ValueError, unitary_group.rvs, 2.5)
  1718. def test_frozen_matrix(self):
  1719. dim = 7
  1720. frozen = unitary_group(dim)
  1721. frozen_seed = unitary_group(dim, seed=514)
  1722. rvs1 = frozen.rvs(random_state=514)
  1723. rvs2 = unitary_group.rvs(dim, random_state=514)
  1724. rvs3 = frozen_seed.rvs(size=1)
  1725. assert_equal(rvs1, rvs2)
  1726. assert_equal(rvs1, rvs3)
  1727. def test_unitarity(self):
  1728. xs = [unitary_group.rvs(dim)
  1729. for dim in range(2,12)
  1730. for i in range(3)]
  1731. # Test that these are unitary matrices
  1732. for x in xs:
  1733. assert_allclose(np.dot(x, x.conj().T), np.eye(x.shape[0]), atol=1e-15)
  1734. def test_haar(self):
  1735. # Test that the eigenvalues, which lie on the unit circle in
  1736. # the complex plane, are uncorrelated.
  1737. # Generate samples
  1738. dim = 5
  1739. samples = 1000 # Not too many, or the test takes too long
  1740. np.random.seed(514) # Note that the test is sensitive to seed too
  1741. xs = unitary_group.rvs(dim, size=samples)
  1742. # The angles "x" of the eigenvalues should be uniformly distributed
  1743. # Overall this seems to be a necessary but weak test of the distribution.
  1744. eigs = np.vstack([scipy.linalg.eigvals(x) for x in xs])
  1745. x = np.arctan2(eigs.imag, eigs.real)
  1746. res = kstest(x.ravel(), uniform(-np.pi, 2*np.pi).cdf)
  1747. assert_(res.pvalue > 0.05)
  1748. class TestMultivariateT:
  1749. # These tests were created by running vpa(mvtpdf(...)) in MATLAB. The
  1750. # function takes no `mu` parameter. The tests were run as
  1751. #
  1752. # >> ans = vpa(mvtpdf(x - mu, shape, df));
  1753. #
  1754. PDF_TESTS = [(
  1755. # x
  1756. [
  1757. [1, 2],
  1758. [4, 1],
  1759. [2, 1],
  1760. [2, 4],
  1761. [1, 4],
  1762. [4, 1],
  1763. [3, 2],
  1764. [3, 3],
  1765. [4, 4],
  1766. [5, 1],
  1767. ],
  1768. # loc
  1769. [0, 0],
  1770. # shape
  1771. [
  1772. [1, 0],
  1773. [0, 1]
  1774. ],
  1775. # df
  1776. 4,
  1777. # ans
  1778. [
  1779. 0.013972450422333741737457302178882,
  1780. 0.0010998721906793330026219646100571,
  1781. 0.013972450422333741737457302178882,
  1782. 0.00073682844024025606101402363634634,
  1783. 0.0010998721906793330026219646100571,
  1784. 0.0010998721906793330026219646100571,
  1785. 0.0020732579600816823488240725481546,
  1786. 0.00095660371505271429414668515889275,
  1787. 0.00021831953784896498569831346792114,
  1788. 0.00037725616140301147447000396084604
  1789. ]
  1790. ), (
  1791. # x
  1792. [
  1793. [0.9718, 0.1298, 0.8134],
  1794. [0.4922, 0.5522, 0.7185],
  1795. [0.3010, 0.1491, 0.5008],
  1796. [0.5971, 0.2585, 0.8940],
  1797. [0.5434, 0.5287, 0.9507],
  1798. ],
  1799. # loc
  1800. [-1, 1, 50],
  1801. # shape
  1802. [
  1803. [1.0000, 0.5000, 0.2500],
  1804. [0.5000, 1.0000, -0.1000],
  1805. [0.2500, -0.1000, 1.0000],
  1806. ],
  1807. # df
  1808. 8,
  1809. # ans
  1810. [
  1811. 0.00000000000000069609279697467772867405511133763,
  1812. 0.00000000000000073700739052207366474839369535934,
  1813. 0.00000000000000069522909962669171512174435447027,
  1814. 0.00000000000000074212293557998314091880208889767,
  1815. 0.00000000000000077039675154022118593323030449058,
  1816. ]
  1817. )]
  1818. @pytest.mark.parametrize("x, loc, shape, df, ans", PDF_TESTS)
  1819. def test_pdf_correctness(self, x, loc, shape, df, ans):
  1820. dist = multivariate_t(loc, shape, df, seed=0)
  1821. val = dist.pdf(x)
  1822. assert_array_almost_equal(val, ans)
  1823. @pytest.mark.parametrize("x, loc, shape, df, ans", PDF_TESTS)
  1824. def test_logpdf_correct(self, x, loc, shape, df, ans):
  1825. dist = multivariate_t(loc, shape, df, seed=0)
  1826. val1 = dist.pdf(x)
  1827. val2 = dist.logpdf(x)
  1828. assert_array_almost_equal(np.log(val1), val2)
  1829. # https://github.com/scipy/scipy/issues/10042#issuecomment-576795195
  1830. def test_mvt_with_df_one_is_cauchy(self):
  1831. x = [9, 7, 4, 1, -3, 9, 0, -3, -1, 3]
  1832. val = multivariate_t.pdf(x, df=1)
  1833. ans = cauchy.pdf(x)
  1834. assert_array_almost_equal(val, ans)
  1835. def test_mvt_with_high_df_is_approx_normal(self):
  1836. # `normaltest` returns the chi-squared statistic and the associated
  1837. # p-value. The null hypothesis is that `x` came from a normal
  1838. # distribution, so a low p-value represents rejecting the null, i.e.
  1839. # that it is unlikely that `x` came a normal distribution.
  1840. P_VAL_MIN = 0.1
  1841. dist = multivariate_t(0, 1, df=100000, seed=1)
  1842. samples = dist.rvs(size=100000)
  1843. _, p = normaltest(samples)
  1844. assert (p > P_VAL_MIN)
  1845. dist = multivariate_t([-2, 3], [[10, -1], [-1, 10]], df=100000,
  1846. seed=42)
  1847. samples = dist.rvs(size=100000)
  1848. _, p = normaltest(samples)
  1849. assert ((p > P_VAL_MIN).all())
  1850. @patch('scipy.stats.multivariate_normal._logpdf')
  1851. def test_mvt_with_inf_df_calls_normal(self, mock):
  1852. dist = multivariate_t(0, 1, df=np.inf, seed=7)
  1853. assert isinstance(dist, multivariate_normal_frozen)
  1854. multivariate_t.pdf(0, df=np.inf)
  1855. assert mock.call_count == 1
  1856. multivariate_t.logpdf(0, df=np.inf)
  1857. assert mock.call_count == 2
  1858. def test_shape_correctness(self):
  1859. # pdf and logpdf should return scalar when the
  1860. # number of samples in x is one.
  1861. dim = 4
  1862. loc = np.zeros(dim)
  1863. shape = np.eye(dim)
  1864. df = 4.5
  1865. x = np.zeros(dim)
  1866. res = multivariate_t(loc, shape, df).pdf(x)
  1867. assert np.isscalar(res)
  1868. res = multivariate_t(loc, shape, df).logpdf(x)
  1869. assert np.isscalar(res)
  1870. # pdf() and logpdf() should return probabilities of shape
  1871. # (n_samples,) when x has n_samples.
  1872. n_samples = 7
  1873. x = np.random.random((n_samples, dim))
  1874. res = multivariate_t(loc, shape, df).pdf(x)
  1875. assert (res.shape == (n_samples,))
  1876. res = multivariate_t(loc, shape, df).logpdf(x)
  1877. assert (res.shape == (n_samples,))
  1878. # rvs() should return scalar unless a size argument is applied.
  1879. res = multivariate_t(np.zeros(1), np.eye(1), 1).rvs()
  1880. assert np.isscalar(res)
  1881. # rvs() should return vector of shape (size,) if size argument
  1882. # is applied.
  1883. size = 7
  1884. res = multivariate_t(np.zeros(1), np.eye(1), 1).rvs(size=size)
  1885. assert (res.shape == (size,))
  1886. def test_default_arguments(self):
  1887. dist = multivariate_t()
  1888. assert_equal(dist.loc, [0])
  1889. assert_equal(dist.shape, [[1]])
  1890. assert (dist.df == 1)
  1891. DEFAULT_ARGS_TESTS = [
  1892. (None, None, None, 0, 1, 1),
  1893. (None, None, 7, 0, 1, 7),
  1894. (None, [[7, 0], [0, 7]], None, [0, 0], [[7, 0], [0, 7]], 1),
  1895. (None, [[7, 0], [0, 7]], 7, [0, 0], [[7, 0], [0, 7]], 7),
  1896. ([7, 7], None, None, [7, 7], [[1, 0], [0, 1]], 1),
  1897. ([7, 7], None, 7, [7, 7], [[1, 0], [0, 1]], 7),
  1898. ([7, 7], [[7, 0], [0, 7]], None, [7, 7], [[7, 0], [0, 7]], 1),
  1899. ([7, 7], [[7, 0], [0, 7]], 7, [7, 7], [[7, 0], [0, 7]], 7)
  1900. ]
  1901. @pytest.mark.parametrize("loc, shape, df, loc_ans, shape_ans, df_ans", DEFAULT_ARGS_TESTS)
  1902. def test_default_args(self, loc, shape, df, loc_ans, shape_ans, df_ans):
  1903. dist = multivariate_t(loc=loc, shape=shape, df=df)
  1904. assert_equal(dist.loc, loc_ans)
  1905. assert_equal(dist.shape, shape_ans)
  1906. assert (dist.df == df_ans)
  1907. ARGS_SHAPES_TESTS = [
  1908. (-1, 2, 3, [-1], [[2]], 3),
  1909. ([-1], [2], 3, [-1], [[2]], 3),
  1910. (np.array([-1]), np.array([2]), 3, [-1], [[2]], 3)
  1911. ]
  1912. @pytest.mark.parametrize("loc, shape, df, loc_ans, shape_ans, df_ans", ARGS_SHAPES_TESTS)
  1913. def test_scalar_list_and_ndarray_arguments(self, loc, shape, df, loc_ans, shape_ans, df_ans):
  1914. dist = multivariate_t(loc, shape, df)
  1915. assert_equal(dist.loc, loc_ans)
  1916. assert_equal(dist.shape, shape_ans)
  1917. assert_equal(dist.df, df_ans)
  1918. def test_argument_error_handling(self):
  1919. # `loc` should be a one-dimensional vector.
  1920. loc = [[1, 1]]
  1921. assert_raises(ValueError,
  1922. multivariate_t,
  1923. **dict(loc=loc))
  1924. # `shape` should be scalar or square matrix.
  1925. shape = [[1, 1], [2, 2], [3, 3]]
  1926. assert_raises(ValueError,
  1927. multivariate_t,
  1928. **dict(loc=loc, shape=shape))
  1929. # `df` should be greater than zero.
  1930. loc = np.zeros(2)
  1931. shape = np.eye(2)
  1932. df = -1
  1933. assert_raises(ValueError,
  1934. multivariate_t,
  1935. **dict(loc=loc, shape=shape, df=df))
  1936. df = 0
  1937. assert_raises(ValueError,
  1938. multivariate_t,
  1939. **dict(loc=loc, shape=shape, df=df))
  1940. def test_reproducibility(self):
  1941. rng = np.random.RandomState(4)
  1942. loc = rng.uniform(size=3)
  1943. shape = np.eye(3)
  1944. dist1 = multivariate_t(loc, shape, df=3, seed=2)
  1945. dist2 = multivariate_t(loc, shape, df=3, seed=2)
  1946. samples1 = dist1.rvs(size=10)
  1947. samples2 = dist2.rvs(size=10)
  1948. assert_equal(samples1, samples2)
  1949. def test_allow_singular(self):
  1950. # Make shape singular and verify error was raised.
  1951. args = dict(loc=[0,0], shape=[[0,0],[0,1]], df=1, allow_singular=False)
  1952. assert_raises(np.linalg.LinAlgError, multivariate_t, **args)
  1953. @pytest.mark.parametrize("size", [(10, 3), (5, 6, 4, 3)])
  1954. @pytest.mark.parametrize("dim", [2, 3, 4, 5])
  1955. @pytest.mark.parametrize("df", [1., 2., np.inf])
  1956. def test_rvs(self, size, dim, df):
  1957. dist = multivariate_t(np.zeros(dim), np.eye(dim), df)
  1958. rvs = dist.rvs(size=size)
  1959. assert rvs.shape == size + (dim, )
  1960. class TestMultivariateHypergeom:
  1961. @pytest.mark.parametrize(
  1962. "x, m, n, expected",
  1963. [
  1964. # Ground truth value from R dmvhyper
  1965. ([3, 4], [5, 10], 7, -1.119814),
  1966. # test for `n=0`
  1967. ([3, 4], [5, 10], 0, np.NINF),
  1968. # test for `x < 0`
  1969. ([-3, 4], [5, 10], 7, np.NINF),
  1970. # test for `m < 0` (RuntimeWarning issue)
  1971. ([3, 4], [-5, 10], 7, np.nan),
  1972. # test for all `m < 0` and `x.sum() != n`
  1973. ([[1, 2], [3, 4]], [[-4, -6], [-5, -10]],
  1974. [3, 7], [np.nan, np.nan]),
  1975. # test for `x < 0` and `m < 0` (RuntimeWarning issue)
  1976. ([-3, 4], [-5, 10], 1, np.nan),
  1977. # test for `x > m`
  1978. ([1, 11], [10, 1], 12, np.nan),
  1979. # test for `m < 0` (RuntimeWarning issue)
  1980. ([1, 11], [10, -1], 12, np.nan),
  1981. # test for `n < 0`
  1982. ([3, 4], [5, 10], -7, np.nan),
  1983. # test for `x.sum() != n`
  1984. ([3, 3], [5, 10], 7, np.NINF)
  1985. ]
  1986. )
  1987. def test_logpmf(self, x, m, n, expected):
  1988. vals = multivariate_hypergeom.logpmf(x, m, n)
  1989. assert_allclose(vals, expected, rtol=1e-6)
  1990. def test_reduces_hypergeom(self):
  1991. # test that the multivariate_hypergeom pmf reduces to the
  1992. # hypergeom pmf in the 2d case.
  1993. val1 = multivariate_hypergeom.pmf(x=[3, 1], m=[10, 5], n=4)
  1994. val2 = hypergeom.pmf(k=3, M=15, n=4, N=10)
  1995. assert_allclose(val1, val2, rtol=1e-8)
  1996. val1 = multivariate_hypergeom.pmf(x=[7, 3], m=[15, 10], n=10)
  1997. val2 = hypergeom.pmf(k=7, M=25, n=10, N=15)
  1998. assert_allclose(val1, val2, rtol=1e-8)
  1999. def test_rvs(self):
  2000. # test if `rvs` is unbiased and large sample size converges
  2001. # to the true mean.
  2002. rv = multivariate_hypergeom(m=[3, 5], n=4)
  2003. rvs = rv.rvs(size=1000, random_state=123)
  2004. assert_allclose(rvs.mean(0), rv.mean(), rtol=1e-2)
  2005. def test_rvs_broadcasting(self):
  2006. rv = multivariate_hypergeom(m=[[3, 5], [5, 10]], n=[4, 9])
  2007. rvs = rv.rvs(size=(1000, 2), random_state=123)
  2008. assert_allclose(rvs.mean(0), rv.mean(), rtol=1e-2)
  2009. @pytest.mark.parametrize('m, n', (
  2010. ([0, 0, 20, 0, 0], 5), ([0, 0, 0, 0, 0], 0),
  2011. ([0, 0], 0), ([0], 0)
  2012. ))
  2013. def test_rvs_gh16171(self, m, n):
  2014. res = multivariate_hypergeom.rvs(m, n)
  2015. m = np.asarray(m)
  2016. res_ex = m.copy()
  2017. res_ex[m != 0] = n
  2018. assert_equal(res, res_ex)
  2019. @pytest.mark.parametrize(
  2020. "x, m, n, expected",
  2021. [
  2022. ([5], [5], 5, 1),
  2023. ([3, 4], [5, 10], 7, 0.3263403),
  2024. # Ground truth value from R dmvhyper
  2025. ([[[3, 5], [0, 8]], [[-1, 9], [1, 1]]],
  2026. [5, 10], [[8, 8], [8, 2]],
  2027. [[0.3916084, 0.006993007], [0, 0.4761905]]),
  2028. # test with empty arrays.
  2029. (np.array([], np.int_), np.array([], np.int_), 0, []),
  2030. ([1, 2], [4, 5], 5, 0),
  2031. # Ground truth value from R dmvhyper
  2032. ([3, 3, 0], [5, 6, 7], 6, 0.01077354)
  2033. ]
  2034. )
  2035. def test_pmf(self, x, m, n, expected):
  2036. vals = multivariate_hypergeom.pmf(x, m, n)
  2037. assert_allclose(vals, expected, rtol=1e-7)
  2038. @pytest.mark.parametrize(
  2039. "x, m, n, expected",
  2040. [
  2041. ([3, 4], [[5, 10], [10, 15]], 7, [0.3263403, 0.3407531]),
  2042. ([[1], [2]], [[3], [4]], [1, 3], [1., 0.]),
  2043. ([[[1], [2]]], [[3], [4]], [1, 3], [[1., 0.]]),
  2044. ([[1], [2]], [[[[3]]]], [1, 3], [[[1., 0.]]])
  2045. ]
  2046. )
  2047. def test_pmf_broadcasting(self, x, m, n, expected):
  2048. vals = multivariate_hypergeom.pmf(x, m, n)
  2049. assert_allclose(vals, expected, rtol=1e-7)
  2050. def test_cov(self):
  2051. cov1 = multivariate_hypergeom.cov(m=[3, 7, 10], n=12)
  2052. cov2 = [[0.64421053, -0.26526316, -0.37894737],
  2053. [-0.26526316, 1.14947368, -0.88421053],
  2054. [-0.37894737, -0.88421053, 1.26315789]]
  2055. assert_allclose(cov1, cov2, rtol=1e-8)
  2056. def test_cov_broadcasting(self):
  2057. cov1 = multivariate_hypergeom.cov(m=[[7, 9], [10, 15]], n=[8, 12])
  2058. cov2 = [[[1.05, -1.05], [-1.05, 1.05]],
  2059. [[1.56, -1.56], [-1.56, 1.56]]]
  2060. assert_allclose(cov1, cov2, rtol=1e-8)
  2061. cov3 = multivariate_hypergeom.cov(m=[[4], [5]], n=[4, 5])
  2062. cov4 = [[[0.]], [[0.]]]
  2063. assert_allclose(cov3, cov4, rtol=1e-8)
  2064. cov5 = multivariate_hypergeom.cov(m=[7, 9], n=[8, 12])
  2065. cov6 = [[[1.05, -1.05], [-1.05, 1.05]],
  2066. [[0.7875, -0.7875], [-0.7875, 0.7875]]]
  2067. assert_allclose(cov5, cov6, rtol=1e-8)
  2068. def test_var(self):
  2069. # test with hypergeom
  2070. var0 = multivariate_hypergeom.var(m=[10, 5], n=4)
  2071. var1 = hypergeom.var(M=15, n=4, N=10)
  2072. assert_allclose(var0, var1, rtol=1e-8)
  2073. def test_var_broadcasting(self):
  2074. var0 = multivariate_hypergeom.var(m=[10, 5], n=[4, 8])
  2075. var1 = multivariate_hypergeom.var(m=[10, 5], n=4)
  2076. var2 = multivariate_hypergeom.var(m=[10, 5], n=8)
  2077. assert_allclose(var0[0], var1, rtol=1e-8)
  2078. assert_allclose(var0[1], var2, rtol=1e-8)
  2079. var3 = multivariate_hypergeom.var(m=[[10, 5], [10, 14]], n=[4, 8])
  2080. var4 = [[0.6984127, 0.6984127], [1.352657, 1.352657]]
  2081. assert_allclose(var3, var4, rtol=1e-8)
  2082. var5 = multivariate_hypergeom.var(m=[[5], [10]], n=[5, 10])
  2083. var6 = [[0.], [0.]]
  2084. assert_allclose(var5, var6, rtol=1e-8)
  2085. def test_mean(self):
  2086. # test with hypergeom
  2087. mean0 = multivariate_hypergeom.mean(m=[10, 5], n=4)
  2088. mean1 = hypergeom.mean(M=15, n=4, N=10)
  2089. assert_allclose(mean0[0], mean1, rtol=1e-8)
  2090. mean2 = multivariate_hypergeom.mean(m=[12, 8], n=10)
  2091. mean3 = [12.*10./20., 8.*10./20.]
  2092. assert_allclose(mean2, mean3, rtol=1e-8)
  2093. def test_mean_broadcasting(self):
  2094. mean0 = multivariate_hypergeom.mean(m=[[3, 5], [10, 5]], n=[4, 8])
  2095. mean1 = [[3.*4./8., 5.*4./8.], [10.*8./15., 5.*8./15.]]
  2096. assert_allclose(mean0, mean1, rtol=1e-8)
  2097. def test_mean_edge_cases(self):
  2098. mean0 = multivariate_hypergeom.mean(m=[0, 0, 0], n=0)
  2099. assert_equal(mean0, [0., 0., 0.])
  2100. mean1 = multivariate_hypergeom.mean(m=[1, 0, 0], n=2)
  2101. assert_equal(mean1, [np.nan, np.nan, np.nan])
  2102. mean2 = multivariate_hypergeom.mean(m=[[1, 0, 0], [1, 0, 1]], n=2)
  2103. assert_allclose(mean2, [[np.nan, np.nan, np.nan], [1., 0., 1.]],
  2104. rtol=1e-17)
  2105. mean3 = multivariate_hypergeom.mean(m=np.array([], np.int_), n=0)
  2106. assert_equal(mean3, [])
  2107. assert_(mean3.shape == (0, ))
  2108. def test_var_edge_cases(self):
  2109. var0 = multivariate_hypergeom.var(m=[0, 0, 0], n=0)
  2110. assert_allclose(var0, [0., 0., 0.], rtol=1e-16)
  2111. var1 = multivariate_hypergeom.var(m=[1, 0, 0], n=2)
  2112. assert_equal(var1, [np.nan, np.nan, np.nan])
  2113. var2 = multivariate_hypergeom.var(m=[[1, 0, 0], [1, 0, 1]], n=2)
  2114. assert_allclose(var2, [[np.nan, np.nan, np.nan], [0., 0., 0.]],
  2115. rtol=1e-17)
  2116. var3 = multivariate_hypergeom.var(m=np.array([], np.int_), n=0)
  2117. assert_equal(var3, [])
  2118. assert_(var3.shape == (0, ))
  2119. def test_cov_edge_cases(self):
  2120. cov0 = multivariate_hypergeom.cov(m=[1, 0, 0], n=1)
  2121. cov1 = [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]]
  2122. assert_allclose(cov0, cov1, rtol=1e-17)
  2123. cov3 = multivariate_hypergeom.cov(m=[0, 0, 0], n=0)
  2124. cov4 = [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]]
  2125. assert_equal(cov3, cov4)
  2126. cov5 = multivariate_hypergeom.cov(m=np.array([], np.int_), n=0)
  2127. cov6 = np.array([], dtype=np.float_).reshape(0, 0)
  2128. assert_allclose(cov5, cov6, rtol=1e-17)
  2129. assert_(cov5.shape == (0, 0))
  2130. def test_frozen(self):
  2131. # The frozen distribution should agree with the regular one
  2132. np.random.seed(1234)
  2133. n = 12
  2134. m = [7, 9, 11, 13]
  2135. x = [[0, 0, 0, 12], [0, 0, 1, 11], [0, 1, 1, 10],
  2136. [1, 1, 1, 9], [1, 1, 2, 8]]
  2137. x = np.asarray(x, dtype=np.int_)
  2138. mhg_frozen = multivariate_hypergeom(m, n)
  2139. assert_allclose(mhg_frozen.pmf(x),
  2140. multivariate_hypergeom.pmf(x, m, n))
  2141. assert_allclose(mhg_frozen.logpmf(x),
  2142. multivariate_hypergeom.logpmf(x, m, n))
  2143. assert_allclose(mhg_frozen.var(), multivariate_hypergeom.var(m, n))
  2144. assert_allclose(mhg_frozen.cov(), multivariate_hypergeom.cov(m, n))
  2145. def test_invalid_params(self):
  2146. assert_raises(ValueError, multivariate_hypergeom.pmf, 5, 10, 5)
  2147. assert_raises(ValueError, multivariate_hypergeom.pmf, 5, [10], 5)
  2148. assert_raises(ValueError, multivariate_hypergeom.pmf, [5, 4], [10], 5)
  2149. assert_raises(TypeError, multivariate_hypergeom.pmf, [5.5, 4.5],
  2150. [10, 15], 5)
  2151. assert_raises(TypeError, multivariate_hypergeom.pmf, [5, 4],
  2152. [10.5, 15.5], 5)
  2153. assert_raises(TypeError, multivariate_hypergeom.pmf, [5, 4],
  2154. [10, 15], 5.5)
  2155. class TestRandomTable:
  2156. def get_rng(self):
  2157. return np.random.default_rng(628174795866951638)
  2158. def test_process_parameters(self):
  2159. message = "`row` must be one-dimensional"
  2160. with pytest.raises(ValueError, match=message):
  2161. random_table([[1, 2]], [1, 2])
  2162. message = "`col` must be one-dimensional"
  2163. with pytest.raises(ValueError, match=message):
  2164. random_table([1, 2], [[1, 2]])
  2165. message = "each element of `row` must be non-negative"
  2166. with pytest.raises(ValueError, match=message):
  2167. random_table([1, -1], [1, 2])
  2168. message = "each element of `col` must be non-negative"
  2169. with pytest.raises(ValueError, match=message):
  2170. random_table([1, 2], [1, -2])
  2171. message = "sums over `row` and `col` must be equal"
  2172. with pytest.raises(ValueError, match=message):
  2173. random_table([1, 2], [1, 0])
  2174. message = "each element of `row` must be an integer"
  2175. with pytest.raises(ValueError, match=message):
  2176. random_table([2.1, 2.1], [1, 1, 2])
  2177. message = "each element of `col` must be an integer"
  2178. with pytest.raises(ValueError, match=message):
  2179. random_table([1, 2], [1.1, 1.1, 1])
  2180. row = [1, 3]
  2181. col = [2, 1, 1]
  2182. r, c, n = random_table._process_parameters([1, 3], [2, 1, 1])
  2183. assert_equal(row, r)
  2184. assert_equal(col, c)
  2185. assert n == np.sum(row)
  2186. @pytest.mark.parametrize("scale,method",
  2187. ((1, "boyett"), (100, "patefield")))
  2188. def test_process_rvs_method_on_None(self, scale, method):
  2189. row = np.array([1, 3]) * scale
  2190. col = np.array([2, 1, 1]) * scale
  2191. ct = random_table
  2192. expected = ct.rvs(row, col, method=method, random_state=1)
  2193. got = ct.rvs(row, col, method=None, random_state=1)
  2194. assert_equal(expected, got)
  2195. def test_process_rvs_method_bad_argument(self):
  2196. row = [1, 3]
  2197. col = [2, 1, 1]
  2198. # order of items in set is random, so cannot check that
  2199. message = "'foo' not recognized, must be one of"
  2200. with pytest.raises(ValueError, match=message):
  2201. random_table.rvs(row, col, method="foo")
  2202. @pytest.mark.parametrize('frozen', (True, False))
  2203. @pytest.mark.parametrize('log', (True, False))
  2204. def test_pmf_logpmf(self, frozen, log):
  2205. # The pmf is tested through random sample generation
  2206. # with Boyett's algorithm, whose implementation is simple
  2207. # enough to verify manually for correctness.
  2208. rng = self.get_rng()
  2209. row = [2, 6]
  2210. col = [1, 3, 4]
  2211. rvs = random_table.rvs(row, col, size=1000,
  2212. method="boyett", random_state=rng)
  2213. obj = random_table(row, col) if frozen else random_table
  2214. method = getattr(obj, "logpmf" if log else "pmf")
  2215. if not frozen:
  2216. original_method = method
  2217. def method(x):
  2218. return original_method(x, row, col)
  2219. pmf = (lambda x: np.exp(method(x))) if log else method
  2220. unique_rvs, counts = np.unique(rvs, axis=0, return_counts=True)
  2221. # rough accuracy check
  2222. p = pmf(unique_rvs)
  2223. assert_allclose(p * len(rvs), counts, rtol=0.1)
  2224. # accept any iterable
  2225. p2 = pmf(list(unique_rvs[0]))
  2226. assert_equal(p2, p[0])
  2227. # accept high-dimensional input and 2d input
  2228. rvs_nd = rvs.reshape((10, 100) + rvs.shape[1:])
  2229. p = pmf(rvs_nd)
  2230. assert p.shape == (10, 100)
  2231. for i in range(p.shape[0]):
  2232. for j in range(p.shape[1]):
  2233. pij = p[i, j]
  2234. rvij = rvs_nd[i, j]
  2235. qij = pmf(rvij)
  2236. assert_equal(pij, qij)
  2237. # probability is zero if column marginal does not match
  2238. x = [[0, 1, 1], [2, 1, 3]]
  2239. assert_equal(np.sum(x, axis=-1), row)
  2240. p = pmf(x)
  2241. assert p == 0
  2242. # probability is zero if row marginal does not match
  2243. x = [[0, 1, 2], [1, 2, 2]]
  2244. assert_equal(np.sum(x, axis=-2), col)
  2245. p = pmf(x)
  2246. assert p == 0
  2247. # response to invalid inputs
  2248. message = "`x` must be at least two-dimensional"
  2249. with pytest.raises(ValueError, match=message):
  2250. pmf([1])
  2251. message = "`x` must contain only integral values"
  2252. with pytest.raises(ValueError, match=message):
  2253. pmf([[1.1]])
  2254. message = "`x` must contain only integral values"
  2255. with pytest.raises(ValueError, match=message):
  2256. pmf([[np.nan]])
  2257. message = "`x` must contain only non-negative values"
  2258. with pytest.raises(ValueError, match=message):
  2259. pmf([[-1]])
  2260. message = "shape of `x` must agree with `row`"
  2261. with pytest.raises(ValueError, match=message):
  2262. pmf([[1, 2, 3]])
  2263. message = "shape of `x` must agree with `col`"
  2264. with pytest.raises(ValueError, match=message):
  2265. pmf([[1, 2],
  2266. [3, 4]])
  2267. @pytest.mark.parametrize("method", ("boyett", "patefield"))
  2268. def test_rvs_mean(self, method):
  2269. # test if `rvs` is unbiased and large sample size converges
  2270. # to the true mean.
  2271. rng = self.get_rng()
  2272. row = [2, 6]
  2273. col = [1, 3, 4]
  2274. rvs = random_table.rvs(row, col, size=1000, method=method,
  2275. random_state=rng)
  2276. mean = random_table.mean(row, col)
  2277. assert_equal(np.sum(mean), np.sum(row))
  2278. assert_allclose(rvs.mean(0), mean, atol=0.05)
  2279. assert_equal(rvs.sum(axis=-1), np.broadcast_to(row, (1000, 2)))
  2280. assert_equal(rvs.sum(axis=-2), np.broadcast_to(col, (1000, 3)))
  2281. def test_rvs_cov(self):
  2282. # test if `rvs` generated with patefield and boyett algorithms
  2283. # produce approximately the same covariance matrix
  2284. rng = self.get_rng()
  2285. row = [2, 6]
  2286. col = [1, 3, 4]
  2287. rvs1 = random_table.rvs(row, col, size=10000, method="boyett",
  2288. random_state=rng)
  2289. rvs2 = random_table.rvs(row, col, size=10000, method="patefield",
  2290. random_state=rng)
  2291. cov1 = np.var(rvs1, axis=0)
  2292. cov2 = np.var(rvs2, axis=0)
  2293. assert_allclose(cov1, cov2, atol=0.02)
  2294. @pytest.mark.parametrize("method", ("boyett", "patefield"))
  2295. def test_rvs_size(self, method):
  2296. row = [2, 6]
  2297. col = [1, 3, 4]
  2298. # test size `None`
  2299. rv = random_table.rvs(row, col, method=method,
  2300. random_state=self.get_rng())
  2301. assert rv.shape == (2, 3)
  2302. # test size 1
  2303. rv2 = random_table.rvs(row, col, size=1, method=method,
  2304. random_state=self.get_rng())
  2305. assert rv2.shape == (1, 2, 3)
  2306. assert_equal(rv, rv2[0])
  2307. # test size 0
  2308. rv3 = random_table.rvs(row, col, size=0, method=method,
  2309. random_state=self.get_rng())
  2310. assert rv3.shape == (0, 2, 3)
  2311. # test other valid size
  2312. rv4 = random_table.rvs(row, col, size=20, method=method,
  2313. random_state=self.get_rng())
  2314. assert rv4.shape == (20, 2, 3)
  2315. rv5 = random_table.rvs(row, col, size=(4, 5), method=method,
  2316. random_state=self.get_rng())
  2317. assert rv5.shape == (4, 5, 2, 3)
  2318. assert_allclose(rv5.reshape(20, 2, 3), rv4, rtol=1e-15)
  2319. # test invalid size
  2320. message = "`size` must be a non-negative integer or `None`"
  2321. with pytest.raises(ValueError, match=message):
  2322. random_table.rvs(row, col, size=-1, method=method,
  2323. random_state=self.get_rng())
  2324. with pytest.raises(ValueError, match=message):
  2325. random_table.rvs(row, col, size=np.nan, method=method,
  2326. random_state=self.get_rng())
  2327. @pytest.mark.parametrize("method", ("boyett", "patefield"))
  2328. def test_rvs_method(self, method):
  2329. # This test assumes that pmf is correct and checks that random samples
  2330. # follow this probability distribution. This seems like a circular
  2331. # argument, since pmf is checked in test_pmf_logpmf with random samples
  2332. # generated with the rvs method. This test is not redundant, because
  2333. # test_pmf_logpmf intentionally uses rvs generation with Boyett only,
  2334. # but here we test both Boyett and Patefield.
  2335. row = [2, 6]
  2336. col = [1, 3, 4]
  2337. ct = random_table
  2338. rvs = ct.rvs(row, col, size=100000, method=method,
  2339. random_state=self.get_rng())
  2340. unique_rvs, counts = np.unique(rvs, axis=0, return_counts=True)
  2341. # generated frequencies should match expected frequencies
  2342. p = ct.pmf(unique_rvs, row, col)
  2343. assert_allclose(p * len(rvs), counts, rtol=0.02)
  2344. @pytest.mark.parametrize("method", ("boyett", "patefield"))
  2345. def test_rvs_with_zeros_in_col_row(self, method):
  2346. row = [0, 1, 0]
  2347. col = [1, 0, 0, 0]
  2348. d = random_table(row, col)
  2349. rv = d.rvs(1000, method=method, random_state=self.get_rng())
  2350. expected = np.zeros((1000, len(row), len(col)))
  2351. expected[...] = [[0, 0, 0, 0],
  2352. [1, 0, 0, 0],
  2353. [0, 0, 0, 0]]
  2354. assert_equal(rv, expected)
  2355. @pytest.mark.parametrize("method", (None, "boyett", "patefield"))
  2356. @pytest.mark.parametrize("col", ([], [0]))
  2357. @pytest.mark.parametrize("row", ([], [0]))
  2358. def test_rvs_with_edge_cases(self, method, row, col):
  2359. d = random_table(row, col)
  2360. rv = d.rvs(10, method=method, random_state=self.get_rng())
  2361. expected = np.zeros((10, len(row), len(col)))
  2362. assert_equal(rv, expected)
  2363. @pytest.mark.parametrize('v', (1, 2))
  2364. def test_rvs_rcont(self, v):
  2365. # This test checks the internal low-level interface.
  2366. # It is implicitly also checked by the other test_rvs* calls.
  2367. import scipy.stats._rcont as _rcont
  2368. row = np.array([1, 3], dtype=np.int64)
  2369. col = np.array([2, 1, 1], dtype=np.int64)
  2370. rvs = getattr(_rcont, f"rvs_rcont{v}")
  2371. ntot = np.sum(row)
  2372. result = rvs(row, col, ntot, 1, self.get_rng())
  2373. assert result.shape == (1, len(row), len(col))
  2374. assert np.sum(result) == ntot
  2375. def test_frozen(self):
  2376. row = [2, 6]
  2377. col = [1, 3, 4]
  2378. d = random_table(row, col, seed=self.get_rng())
  2379. sample = d.rvs()
  2380. expected = random_table.mean(row, col)
  2381. assert_equal(expected, d.mean())
  2382. expected = random_table.pmf(sample, row, col)
  2383. assert_equal(expected, d.pmf(sample))
  2384. expected = random_table.logpmf(sample, row, col)
  2385. assert_equal(expected, d.logpmf(sample))
  2386. @pytest.mark.parametrize("method", ("boyett", "patefield"))
  2387. def test_rvs_frozen(self, method):
  2388. row = [2, 6]
  2389. col = [1, 3, 4]
  2390. d = random_table(row, col, seed=self.get_rng())
  2391. expected = random_table.rvs(row, col, size=10, method=method,
  2392. random_state=self.get_rng())
  2393. got = d.rvs(size=10, method=method)
  2394. assert_equal(expected, got)
  2395. def check_pickling(distfn, args):
  2396. # check that a distribution instance pickles and unpickles
  2397. # pay special attention to the random_state property
  2398. # save the random_state (restore later)
  2399. rndm = distfn.random_state
  2400. distfn.random_state = 1234
  2401. distfn.rvs(*args, size=8)
  2402. s = pickle.dumps(distfn)
  2403. r0 = distfn.rvs(*args, size=8)
  2404. unpickled = pickle.loads(s)
  2405. r1 = unpickled.rvs(*args, size=8)
  2406. assert_equal(r0, r1)
  2407. # restore the random_state
  2408. distfn.random_state = rndm
  2409. def test_random_state_property():
  2410. scale = np.eye(3)
  2411. scale[0, 1] = 0.5
  2412. scale[1, 0] = 0.5
  2413. dists = [
  2414. [multivariate_normal, ()],
  2415. [dirichlet, (np.array([1.]), )],
  2416. [wishart, (10, scale)],
  2417. [invwishart, (10, scale)],
  2418. [multinomial, (5, [0.5, 0.4, 0.1])],
  2419. [ortho_group, (2,)],
  2420. [special_ortho_group, (2,)]
  2421. ]
  2422. for distfn, args in dists:
  2423. check_random_state_property(distfn, args)
  2424. check_pickling(distfn, args)