test_kdeoth.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604
  1. from scipy import stats, linalg, integrate
  2. import numpy as np
  3. from numpy.testing import (assert_almost_equal, assert_, assert_equal,
  4. assert_array_almost_equal, assert_array_almost_equal_nulp, assert_allclose)
  5. import pytest
  6. from pytest import raises as assert_raises
  7. def test_kde_1d():
  8. #some basic tests comparing to normal distribution
  9. np.random.seed(8765678)
  10. n_basesample = 500
  11. xn = np.random.randn(n_basesample)
  12. xnmean = xn.mean()
  13. xnstd = xn.std(ddof=1)
  14. # get kde for original sample
  15. gkde = stats.gaussian_kde(xn)
  16. # evaluate the density function for the kde for some points
  17. xs = np.linspace(-7,7,501)
  18. kdepdf = gkde.evaluate(xs)
  19. normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
  20. intervall = xs[1] - xs[0]
  21. assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
  22. prob1 = gkde.integrate_box_1d(xnmean, np.inf)
  23. prob2 = gkde.integrate_box_1d(-np.inf, xnmean)
  24. assert_almost_equal(prob1, 0.5, decimal=1)
  25. assert_almost_equal(prob2, 0.5, decimal=1)
  26. assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13)
  27. assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13)
  28. assert_almost_equal(gkde.integrate_kde(gkde),
  29. (kdepdf**2).sum()*intervall, decimal=2)
  30. assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
  31. (kdepdf*normpdf).sum()*intervall, decimal=2)
  32. def test_kde_1d_weighted():
  33. #some basic tests comparing to normal distribution
  34. np.random.seed(8765678)
  35. n_basesample = 500
  36. xn = np.random.randn(n_basesample)
  37. wn = np.random.rand(n_basesample)
  38. xnmean = np.average(xn, weights=wn)
  39. xnstd = np.sqrt(np.average((xn-xnmean)**2, weights=wn))
  40. # get kde for original sample
  41. gkde = stats.gaussian_kde(xn, weights=wn)
  42. # evaluate the density function for the kde for some points
  43. xs = np.linspace(-7,7,501)
  44. kdepdf = gkde.evaluate(xs)
  45. normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd)
  46. intervall = xs[1] - xs[0]
  47. assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01)
  48. prob1 = gkde.integrate_box_1d(xnmean, np.inf)
  49. prob2 = gkde.integrate_box_1d(-np.inf, xnmean)
  50. assert_almost_equal(prob1, 0.5, decimal=1)
  51. assert_almost_equal(prob2, 0.5, decimal=1)
  52. assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13)
  53. assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13)
  54. assert_almost_equal(gkde.integrate_kde(gkde),
  55. (kdepdf**2).sum()*intervall, decimal=2)
  56. assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2),
  57. (kdepdf*normpdf).sum()*intervall, decimal=2)
  58. @pytest.mark.slow
  59. def test_kde_2d():
  60. #some basic tests comparing to normal distribution
  61. np.random.seed(8765678)
  62. n_basesample = 500
  63. mean = np.array([1.0, 3.0])
  64. covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
  65. # Need transpose (shape (2, 500)) for kde
  66. xn = np.random.multivariate_normal(mean, covariance, size=n_basesample).T
  67. # get kde for original sample
  68. gkde = stats.gaussian_kde(xn)
  69. # evaluate the density function for the kde for some points
  70. x, y = np.mgrid[-7:7:500j, -7:7:500j]
  71. grid_coords = np.vstack([x.ravel(), y.ravel()])
  72. kdepdf = gkde.evaluate(grid_coords)
  73. kdepdf = kdepdf.reshape(500, 500)
  74. normpdf = stats.multivariate_normal.pdf(np.dstack([x, y]), mean=mean, cov=covariance)
  75. intervall = y.ravel()[1] - y.ravel()[0]
  76. assert_(np.sum((kdepdf - normpdf)**2) * (intervall**2) < 0.01)
  77. small = -1e100
  78. large = 1e100
  79. prob1 = gkde.integrate_box([small, mean[1]], [large, large])
  80. prob2 = gkde.integrate_box([small, small], [large, mean[1]])
  81. assert_almost_equal(prob1, 0.5, decimal=1)
  82. assert_almost_equal(prob2, 0.5, decimal=1)
  83. assert_almost_equal(gkde.integrate_kde(gkde),
  84. (kdepdf**2).sum()*(intervall**2), decimal=2)
  85. assert_almost_equal(gkde.integrate_gaussian(mean, covariance),
  86. (kdepdf*normpdf).sum()*(intervall**2), decimal=2)
  87. @pytest.mark.slow
  88. def test_kde_2d_weighted():
  89. #some basic tests comparing to normal distribution
  90. np.random.seed(8765678)
  91. n_basesample = 500
  92. mean = np.array([1.0, 3.0])
  93. covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
  94. # Need transpose (shape (2, 500)) for kde
  95. xn = np.random.multivariate_normal(mean, covariance, size=n_basesample).T
  96. wn = np.random.rand(n_basesample)
  97. # get kde for original sample
  98. gkde = stats.gaussian_kde(xn, weights=wn)
  99. # evaluate the density function for the kde for some points
  100. x, y = np.mgrid[-7:7:500j, -7:7:500j]
  101. grid_coords = np.vstack([x.ravel(), y.ravel()])
  102. kdepdf = gkde.evaluate(grid_coords)
  103. kdepdf = kdepdf.reshape(500, 500)
  104. normpdf = stats.multivariate_normal.pdf(np.dstack([x, y]), mean=mean, cov=covariance)
  105. intervall = y.ravel()[1] - y.ravel()[0]
  106. assert_(np.sum((kdepdf - normpdf)**2) * (intervall**2) < 0.01)
  107. small = -1e100
  108. large = 1e100
  109. prob1 = gkde.integrate_box([small, mean[1]], [large, large])
  110. prob2 = gkde.integrate_box([small, small], [large, mean[1]])
  111. assert_almost_equal(prob1, 0.5, decimal=1)
  112. assert_almost_equal(prob2, 0.5, decimal=1)
  113. assert_almost_equal(gkde.integrate_kde(gkde),
  114. (kdepdf**2).sum()*(intervall**2), decimal=2)
  115. assert_almost_equal(gkde.integrate_gaussian(mean, covariance),
  116. (kdepdf*normpdf).sum()*(intervall**2), decimal=2)
  117. def test_kde_bandwidth_method():
  118. def scotts_factor(kde_obj):
  119. """Same as default, just check that it works."""
  120. return np.power(kde_obj.n, -1./(kde_obj.d+4))
  121. np.random.seed(8765678)
  122. n_basesample = 50
  123. xn = np.random.randn(n_basesample)
  124. # Default
  125. gkde = stats.gaussian_kde(xn)
  126. # Supply a callable
  127. gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor)
  128. # Supply a scalar
  129. gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor)
  130. xs = np.linspace(-7,7,51)
  131. kdepdf = gkde.evaluate(xs)
  132. kdepdf2 = gkde2.evaluate(xs)
  133. assert_almost_equal(kdepdf, kdepdf2)
  134. kdepdf3 = gkde3.evaluate(xs)
  135. assert_almost_equal(kdepdf, kdepdf3)
  136. assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring')
  137. def test_kde_bandwidth_method_weighted():
  138. def scotts_factor(kde_obj):
  139. """Same as default, just check that it works."""
  140. return np.power(kde_obj.neff, -1./(kde_obj.d+4))
  141. np.random.seed(8765678)
  142. n_basesample = 50
  143. xn = np.random.randn(n_basesample)
  144. # Default
  145. gkde = stats.gaussian_kde(xn)
  146. # Supply a callable
  147. gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor)
  148. # Supply a scalar
  149. gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor)
  150. xs = np.linspace(-7,7,51)
  151. kdepdf = gkde.evaluate(xs)
  152. kdepdf2 = gkde2.evaluate(xs)
  153. assert_almost_equal(kdepdf, kdepdf2)
  154. kdepdf3 = gkde3.evaluate(xs)
  155. assert_almost_equal(kdepdf, kdepdf3)
  156. assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring')
  157. # Subclasses that should stay working (extracted from various sources).
  158. # Unfortunately the earlier design of gaussian_kde made it necessary for users
  159. # to create these kinds of subclasses, or call _compute_covariance() directly.
  160. class _kde_subclass1(stats.gaussian_kde):
  161. def __init__(self, dataset):
  162. self.dataset = np.atleast_2d(dataset)
  163. self.d, self.n = self.dataset.shape
  164. self.covariance_factor = self.scotts_factor
  165. self._compute_covariance()
  166. class _kde_subclass2(stats.gaussian_kde):
  167. def __init__(self, dataset):
  168. self.covariance_factor = self.scotts_factor
  169. super().__init__(dataset)
  170. class _kde_subclass4(stats.gaussian_kde):
  171. def covariance_factor(self):
  172. return 0.5 * self.silverman_factor()
  173. def test_gaussian_kde_subclassing():
  174. x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
  175. xs = np.linspace(-10, 10, num=50)
  176. # gaussian_kde itself
  177. kde = stats.gaussian_kde(x1)
  178. ys = kde(xs)
  179. # subclass 1
  180. kde1 = _kde_subclass1(x1)
  181. y1 = kde1(xs)
  182. assert_array_almost_equal_nulp(ys, y1, nulp=10)
  183. # subclass 2
  184. kde2 = _kde_subclass2(x1)
  185. y2 = kde2(xs)
  186. assert_array_almost_equal_nulp(ys, y2, nulp=10)
  187. # subclass 3 was removed because we have no obligation to maintain support
  188. # for user invocation of private methods
  189. # subclass 4
  190. kde4 = _kde_subclass4(x1)
  191. y4 = kde4(x1)
  192. y_expected = [0.06292987, 0.06346938, 0.05860291, 0.08657652, 0.07904017]
  193. assert_array_almost_equal(y_expected, y4, decimal=6)
  194. # Not a subclass, but check for use of _compute_covariance()
  195. kde5 = kde
  196. kde5.covariance_factor = lambda: kde.factor
  197. kde5._compute_covariance()
  198. y5 = kde5(xs)
  199. assert_array_almost_equal_nulp(ys, y5, nulp=10)
  200. def test_gaussian_kde_covariance_caching():
  201. x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
  202. xs = np.linspace(-10, 10, num=5)
  203. # These expected values are from scipy 0.10, before some changes to
  204. # gaussian_kde. They were not compared with any external reference.
  205. y_expected = [0.02463386, 0.04689208, 0.05395444, 0.05337754, 0.01664475]
  206. # Set the bandwidth, then reset it to the default.
  207. kde = stats.gaussian_kde(x1)
  208. kde.set_bandwidth(bw_method=0.5)
  209. kde.set_bandwidth(bw_method='scott')
  210. y2 = kde(xs)
  211. assert_array_almost_equal(y_expected, y2, decimal=7)
  212. def test_gaussian_kde_monkeypatch():
  213. """Ugly, but people may rely on this. See scipy pull request 123,
  214. specifically the linked ML thread "Width of the Gaussian in stats.kde".
  215. If it is necessary to break this later on, that is to be discussed on ML.
  216. """
  217. x1 = np.array([-7, -5, 1, 4, 5], dtype=float)
  218. xs = np.linspace(-10, 10, num=50)
  219. # The old monkeypatched version to get at Silverman's Rule.
  220. kde = stats.gaussian_kde(x1)
  221. kde.covariance_factor = kde.silverman_factor
  222. kde._compute_covariance()
  223. y1 = kde(xs)
  224. # The new saner version.
  225. kde2 = stats.gaussian_kde(x1, bw_method='silverman')
  226. y2 = kde2(xs)
  227. assert_array_almost_equal_nulp(y1, y2, nulp=10)
  228. def test_kde_integer_input():
  229. """Regression test for #1181."""
  230. x1 = np.arange(5)
  231. kde = stats.gaussian_kde(x1)
  232. y_expected = [0.13480721, 0.18222869, 0.19514935, 0.18222869, 0.13480721]
  233. assert_array_almost_equal(kde(x1), y_expected, decimal=6)
  234. _ftypes = ['float32', 'float64', 'float96', 'float128', 'int32', 'int64']
  235. @pytest.mark.parametrize("bw_type", _ftypes + ["scott", "silverman"])
  236. @pytest.mark.parametrize("dtype", _ftypes)
  237. def test_kde_output_dtype(dtype, bw_type):
  238. # Check whether the datatypes are available
  239. dtype = getattr(np, dtype, None)
  240. if bw_type in ["scott", "silverman"]:
  241. bw = bw_type
  242. else:
  243. bw_type = getattr(np, bw_type, None)
  244. bw = bw_type(3) if bw_type else None
  245. if any(dt is None for dt in [dtype, bw]):
  246. pytest.skip()
  247. weights = np.arange(5, dtype=dtype)
  248. dataset = np.arange(5, dtype=dtype)
  249. k = stats.gaussian_kde(dataset, bw_method=bw, weights=weights)
  250. points = np.arange(5, dtype=dtype)
  251. result = k(points)
  252. # weights are always cast to float64
  253. assert result.dtype == np.result_type(dataset, points, np.float64(weights),
  254. k.factor)
  255. def test_pdf_logpdf_validation():
  256. rng = np.random.default_rng(64202298293133848336925499069837723291)
  257. xn = rng.standard_normal((2, 10))
  258. gkde = stats.gaussian_kde(xn)
  259. xs = rng.standard_normal((3, 10))
  260. msg = "points have dimension 3, dataset has dimension 2"
  261. with pytest.raises(ValueError, match=msg):
  262. gkde.logpdf(xs)
  263. def test_pdf_logpdf():
  264. np.random.seed(1)
  265. n_basesample = 50
  266. xn = np.random.randn(n_basesample)
  267. # Default
  268. gkde = stats.gaussian_kde(xn)
  269. xs = np.linspace(-15, 12, 25)
  270. pdf = gkde.evaluate(xs)
  271. pdf2 = gkde.pdf(xs)
  272. assert_almost_equal(pdf, pdf2, decimal=12)
  273. logpdf = np.log(pdf)
  274. logpdf2 = gkde.logpdf(xs)
  275. assert_almost_equal(logpdf, logpdf2, decimal=12)
  276. # There are more points than data
  277. gkde = stats.gaussian_kde(xs)
  278. pdf = np.log(gkde.evaluate(xn))
  279. pdf2 = gkde.logpdf(xn)
  280. assert_almost_equal(pdf, pdf2, decimal=12)
  281. def test_pdf_logpdf_weighted():
  282. np.random.seed(1)
  283. n_basesample = 50
  284. xn = np.random.randn(n_basesample)
  285. wn = np.random.rand(n_basesample)
  286. # Default
  287. gkde = stats.gaussian_kde(xn, weights=wn)
  288. xs = np.linspace(-15, 12, 25)
  289. pdf = gkde.evaluate(xs)
  290. pdf2 = gkde.pdf(xs)
  291. assert_almost_equal(pdf, pdf2, decimal=12)
  292. logpdf = np.log(pdf)
  293. logpdf2 = gkde.logpdf(xs)
  294. assert_almost_equal(logpdf, logpdf2, decimal=12)
  295. # There are more points than data
  296. gkde = stats.gaussian_kde(xs, weights=np.random.rand(len(xs)))
  297. pdf = np.log(gkde.evaluate(xn))
  298. pdf2 = gkde.logpdf(xn)
  299. assert_almost_equal(pdf, pdf2, decimal=12)
  300. def test_marginal_1_axis():
  301. rng = np.random.default_rng(6111799263660870475)
  302. n_data = 50
  303. n_dim = 10
  304. dataset = rng.normal(size=(n_dim, n_data))
  305. points = rng.normal(size=(n_dim, 3))
  306. dimensions = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]) # dimensions to keep
  307. kde = stats.gaussian_kde(dataset)
  308. marginal = kde.marginal(dimensions)
  309. pdf = marginal.pdf(points[dimensions])
  310. def marginal_pdf_single(point):
  311. def f(x):
  312. x = np.concatenate(([x], point[dimensions]))
  313. return kde.pdf(x)[0]
  314. return integrate.quad(f, -np.inf, np.inf)[0]
  315. def marginal_pdf(points):
  316. return np.apply_along_axis(marginal_pdf_single, axis=0, arr=points)
  317. ref = marginal_pdf(points)
  318. assert_allclose(pdf, ref, rtol=1e-6)
  319. @pytest.mark.xslow
  320. def test_marginal_2_axis():
  321. rng = np.random.default_rng(6111799263660870475)
  322. n_data = 30
  323. n_dim = 4
  324. dataset = rng.normal(size=(n_dim, n_data))
  325. points = rng.normal(size=(n_dim, 3))
  326. dimensions = np.array([1, 3]) # dimensions to keep
  327. kde = stats.gaussian_kde(dataset)
  328. marginal = kde.marginal(dimensions)
  329. pdf = marginal.pdf(points[dimensions])
  330. def marginal_pdf(points):
  331. def marginal_pdf_single(point):
  332. def f(y, x):
  333. w, z = point[dimensions]
  334. x = np.array([x, w, y, z])
  335. return kde.pdf(x)[0]
  336. return integrate.dblquad(f, -np.inf, np.inf, -np.inf, np.inf)[0]
  337. return np.apply_along_axis(marginal_pdf_single, axis=0, arr=points)
  338. ref = marginal_pdf(points)
  339. assert_allclose(pdf, ref, rtol=1e-6)
  340. def test_marginal_iv():
  341. # test input validation
  342. rng = np.random.default_rng(6111799263660870475)
  343. n_data = 30
  344. n_dim = 4
  345. dataset = rng.normal(size=(n_dim, n_data))
  346. points = rng.normal(size=(n_dim, 3))
  347. kde = stats.gaussian_kde(dataset)
  348. # check that positive and negative indices are equivalent
  349. dimensions1 = [-1, 1]
  350. marginal1 = kde.marginal(dimensions1)
  351. pdf1 = marginal1.pdf(points[dimensions1])
  352. dimensions2 = [3, -3]
  353. marginal2 = kde.marginal(dimensions2)
  354. pdf2 = marginal2.pdf(points[dimensions2])
  355. assert_equal(pdf1, pdf2)
  356. # IV for non-integer dimensions
  357. message = "Elements of `dimensions` must be integers..."
  358. with pytest.raises(ValueError, match=message):
  359. kde.marginal([1, 2.5])
  360. # IV for uniquenes
  361. message = "All elements of `dimensions` must be unique."
  362. with pytest.raises(ValueError, match=message):
  363. kde.marginal([1, 2, 2])
  364. # IV for non-integer dimensions
  365. message = (r"Dimensions \[-5 6\] are invalid for a distribution in 4...")
  366. with pytest.raises(ValueError, match=message):
  367. kde.marginal([1, -5, 6])
  368. @pytest.mark.xslow
  369. def test_logpdf_overflow():
  370. # regression test for gh-12988; testing against linalg instability for
  371. # very high dimensionality kde
  372. np.random.seed(1)
  373. n_dimensions = 2500
  374. n_samples = 5000
  375. xn = np.array([np.random.randn(n_samples) + (n) for n in range(
  376. 0, n_dimensions)])
  377. # Default
  378. gkde = stats.gaussian_kde(xn)
  379. logpdf = gkde.logpdf(np.arange(0, n_dimensions))
  380. np.testing.assert_equal(np.isneginf(logpdf[0]), False)
  381. np.testing.assert_equal(np.isnan(logpdf[0]), False)
  382. def test_weights_intact():
  383. # regression test for gh-9709: weights are not modified
  384. np.random.seed(12345)
  385. vals = np.random.lognormal(size=100)
  386. weights = np.random.choice([1.0, 10.0, 100], size=vals.size)
  387. orig_weights = weights.copy()
  388. stats.gaussian_kde(np.log10(vals), weights=weights)
  389. assert_allclose(weights, orig_weights, atol=1e-14, rtol=1e-14)
  390. def test_weights_integer():
  391. # integer weights are OK, cf gh-9709 (comment)
  392. np.random.seed(12345)
  393. values = [0.2, 13.5, 21.0, 75.0, 99.0]
  394. weights = [1, 2, 4, 8, 16] # a list of integers
  395. pdf_i = stats.gaussian_kde(values, weights=weights)
  396. pdf_f = stats.gaussian_kde(values, weights=np.float64(weights))
  397. xn = [0.3, 11, 88]
  398. assert_allclose(pdf_i.evaluate(xn),
  399. pdf_f.evaluate(xn), atol=1e-14, rtol=1e-14)
  400. def test_seed():
  401. # Test the seed option of the resample method
  402. def test_seed_sub(gkde_trail):
  403. n_sample = 200
  404. # The results should be different without using seed
  405. samp1 = gkde_trail.resample(n_sample)
  406. samp2 = gkde_trail.resample(n_sample)
  407. assert_raises(
  408. AssertionError, assert_allclose, samp1, samp2, atol=1e-13
  409. )
  410. # Use integer seed
  411. seed = 831
  412. samp1 = gkde_trail.resample(n_sample, seed=seed)
  413. samp2 = gkde_trail.resample(n_sample, seed=seed)
  414. assert_allclose(samp1, samp2, atol=1e-13)
  415. # Use RandomState
  416. rstate1 = np.random.RandomState(seed=138)
  417. samp1 = gkde_trail.resample(n_sample, seed=rstate1)
  418. rstate2 = np.random.RandomState(seed=138)
  419. samp2 = gkde_trail.resample(n_sample, seed=rstate2)
  420. assert_allclose(samp1, samp2, atol=1e-13)
  421. # check that np.random.Generator can be used (numpy >= 1.17)
  422. if hasattr(np.random, 'default_rng'):
  423. # obtain a np.random.Generator object
  424. rng = np.random.default_rng(1234)
  425. gkde_trail.resample(n_sample, seed=rng)
  426. np.random.seed(8765678)
  427. n_basesample = 500
  428. wn = np.random.rand(n_basesample)
  429. # Test 1D case
  430. xn_1d = np.random.randn(n_basesample)
  431. gkde_1d = stats.gaussian_kde(xn_1d)
  432. test_seed_sub(gkde_1d)
  433. gkde_1d_weighted = stats.gaussian_kde(xn_1d, weights=wn)
  434. test_seed_sub(gkde_1d_weighted)
  435. # Test 2D case
  436. mean = np.array([1.0, 3.0])
  437. covariance = np.array([[1.0, 2.0], [2.0, 6.0]])
  438. xn_2d = np.random.multivariate_normal(mean, covariance, size=n_basesample).T
  439. gkde_2d = stats.gaussian_kde(xn_2d)
  440. test_seed_sub(gkde_2d)
  441. gkde_2d_weighted = stats.gaussian_kde(xn_2d, weights=wn)
  442. test_seed_sub(gkde_2d_weighted)
  443. def test_singular_data_covariance_gh10205():
  444. # When the data lie in a lower-dimensional subspace and this causes
  445. # and exception, check that the error message is informative.
  446. rng = np.random.default_rng(2321583144339784787)
  447. mu = np.array([1, 10, 20])
  448. sigma = np.array([[4, 10, 0], [10, 25, 0], [0, 0, 100]])
  449. data = rng.multivariate_normal(mu, sigma, 1000)
  450. try: # doesn't raise any error on some platforms, and that's OK
  451. stats.gaussian_kde(data.T)
  452. except linalg.LinAlgError:
  453. msg = "The data appears to lie in a lower-dimensional subspace..."
  454. with assert_raises(linalg.LinAlgError, match=msg):
  455. stats.gaussian_kde(data.T)
  456. def test_fewer_points_than_dimensions_gh17436():
  457. # When the number of points is fewer than the number of dimensions, the
  458. # the covariance matrix would be singular, and the exception tested in
  459. # test_singular_data_covariance_gh10205 would occur. However, sometimes
  460. # this occurs when the user passes in the transpose of what `gaussian_kde`
  461. # expects. This can result in a huge covariance matrix, so bail early.
  462. rng = np.random.default_rng(2046127537594925772)
  463. rvs = rng.multivariate_normal(np.zeros(3), np.eye(3), size=5)
  464. message = "Number of dimensions is greater than number of samples..."
  465. with pytest.raises(ValueError, match=message):
  466. stats.gaussian_kde(rvs)