test_entropy.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. import numpy as np
  2. from numpy.testing import assert_equal, assert_allclose
  3. # avoid new uses of the following; prefer assert/np.testing.assert_allclose
  4. from numpy.testing import (assert_, assert_almost_equal,
  5. assert_array_almost_equal)
  6. import pytest
  7. from pytest import raises as assert_raises
  8. import scipy.stats as stats
  9. class TestEntropy:
  10. def test_entropy_positive(self):
  11. # See ticket #497
  12. pk = [0.5, 0.2, 0.3]
  13. qk = [0.1, 0.25, 0.65]
  14. eself = stats.entropy(pk, pk)
  15. edouble = stats.entropy(pk, qk)
  16. assert_(0.0 == eself)
  17. assert_(edouble >= 0.0)
  18. def test_entropy_base(self):
  19. pk = np.ones(16, float)
  20. S = stats.entropy(pk, base=2.)
  21. assert_(abs(S - 4.) < 1.e-5)
  22. qk = np.ones(16, float)
  23. qk[:8] = 2.
  24. S = stats.entropy(pk, qk)
  25. S2 = stats.entropy(pk, qk, base=2.)
  26. assert_(abs(S/S2 - np.log(2.)) < 1.e-5)
  27. def test_entropy_zero(self):
  28. # Test for PR-479
  29. assert_almost_equal(stats.entropy([0, 1, 2]), 0.63651416829481278,
  30. decimal=12)
  31. def test_entropy_2d(self):
  32. pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
  33. qk = [[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]]
  34. assert_array_almost_equal(stats.entropy(pk, qk),
  35. [0.1933259, 0.18609809])
  36. def test_entropy_2d_zero(self):
  37. pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
  38. qk = [[0.0, 0.1], [0.3, 0.6], [0.5, 0.3]]
  39. assert_array_almost_equal(stats.entropy(pk, qk),
  40. [np.inf, 0.18609809])
  41. pk[0][0] = 0.0
  42. assert_array_almost_equal(stats.entropy(pk, qk),
  43. [0.17403988, 0.18609809])
  44. def test_entropy_base_2d_nondefault_axis(self):
  45. pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
  46. assert_array_almost_equal(stats.entropy(pk, axis=1),
  47. [0.63651417, 0.63651417, 0.66156324])
  48. def test_entropy_2d_nondefault_axis(self):
  49. pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
  50. qk = [[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]]
  51. assert_array_almost_equal(stats.entropy(pk, qk, axis=1),
  52. [0.231049, 0.231049, 0.127706])
  53. def test_entropy_raises_value_error(self):
  54. pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
  55. qk = [[0.1, 0.2], [0.6, 0.3]]
  56. assert_raises(ValueError, stats.entropy, pk, qk)
  57. def test_base_entropy_with_axis_0_is_equal_to_default(self):
  58. pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
  59. assert_array_almost_equal(stats.entropy(pk, axis=0),
  60. stats.entropy(pk))
  61. def test_entropy_with_axis_0_is_equal_to_default(self):
  62. pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]
  63. qk = [[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]]
  64. assert_array_almost_equal(stats.entropy(pk, qk, axis=0),
  65. stats.entropy(pk, qk))
  66. def test_base_entropy_transposed(self):
  67. pk = np.array([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
  68. assert_array_almost_equal(stats.entropy(pk.T).T,
  69. stats.entropy(pk, axis=1))
  70. def test_entropy_transposed(self):
  71. pk = np.array([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]])
  72. qk = np.array([[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]])
  73. assert_array_almost_equal(stats.entropy(pk.T, qk.T).T,
  74. stats.entropy(pk, qk, axis=1))
  75. def test_entropy_broadcasting(self):
  76. np.random.rand(0)
  77. x = np.random.rand(3)
  78. y = np.random.rand(2, 1)
  79. res = stats.entropy(x, y, axis=-1)
  80. assert_equal(res[0], stats.entropy(x, y[0]))
  81. assert_equal(res[1], stats.entropy(x, y[1]))
  82. def test_entropy_shape_mismatch(self):
  83. x = np.random.rand(10, 1, 12)
  84. y = np.random.rand(11, 2)
  85. message = "shape mismatch: objects cannot be broadcast"
  86. with pytest.raises(ValueError, match=message):
  87. stats.entropy(x, y)
  88. def test_input_validation(self):
  89. x = np.random.rand(10)
  90. message = "`base` must be a positive number."
  91. with pytest.raises(ValueError, match=message):
  92. stats.entropy(x, base=-2)
  93. class TestDifferentialEntropy:
  94. """
  95. Vasicek results are compared with the R package vsgoftest.
  96. # library(vsgoftest)
  97. #
  98. # samp <- c(<values>)
  99. # entropy.estimate(x = samp, window = <window_length>)
  100. """
  101. def test_differential_entropy_vasicek(self):
  102. random_state = np.random.RandomState(0)
  103. values = random_state.standard_normal(100)
  104. entropy = stats.differential_entropy(values, method='vasicek')
  105. assert_allclose(entropy, 1.342551, rtol=1e-6)
  106. entropy = stats.differential_entropy(values, window_length=1,
  107. method='vasicek')
  108. assert_allclose(entropy, 1.122044, rtol=1e-6)
  109. entropy = stats.differential_entropy(values, window_length=8,
  110. method='vasicek')
  111. assert_allclose(entropy, 1.349401, rtol=1e-6)
  112. def test_differential_entropy_vasicek_2d_nondefault_axis(self):
  113. random_state = np.random.RandomState(0)
  114. values = random_state.standard_normal((3, 100))
  115. entropy = stats.differential_entropy(values, axis=1, method='vasicek')
  116. assert_allclose(
  117. entropy,
  118. [1.342551, 1.341826, 1.293775],
  119. rtol=1e-6,
  120. )
  121. entropy = stats.differential_entropy(values, axis=1, window_length=1,
  122. method='vasicek')
  123. assert_allclose(
  124. entropy,
  125. [1.122044, 1.102944, 1.129616],
  126. rtol=1e-6,
  127. )
  128. entropy = stats.differential_entropy(values, axis=1, window_length=8,
  129. method='vasicek')
  130. assert_allclose(
  131. entropy,
  132. [1.349401, 1.338514, 1.292332],
  133. rtol=1e-6,
  134. )
  135. def test_differential_entropy_raises_value_error(self):
  136. random_state = np.random.RandomState(0)
  137. values = random_state.standard_normal((3, 100))
  138. error_str = (
  139. r"Window length \({window_length}\) must be positive and less "
  140. r"than half the sample size \({sample_size}\)."
  141. )
  142. sample_size = values.shape[1]
  143. for window_length in {-1, 0, sample_size//2, sample_size}:
  144. formatted_error_str = error_str.format(
  145. window_length=window_length,
  146. sample_size=sample_size,
  147. )
  148. with assert_raises(ValueError, match=formatted_error_str):
  149. stats.differential_entropy(
  150. values,
  151. window_length=window_length,
  152. axis=1,
  153. )
  154. def test_base_differential_entropy_with_axis_0_is_equal_to_default(self):
  155. random_state = np.random.RandomState(0)
  156. values = random_state.standard_normal((100, 3))
  157. entropy = stats.differential_entropy(values, axis=0)
  158. default_entropy = stats.differential_entropy(values)
  159. assert_allclose(entropy, default_entropy)
  160. def test_base_differential_entropy_transposed(self):
  161. random_state = np.random.RandomState(0)
  162. values = random_state.standard_normal((3, 100))
  163. assert_allclose(
  164. stats.differential_entropy(values.T).T,
  165. stats.differential_entropy(values, axis=1),
  166. )
  167. def test_input_validation(self):
  168. x = np.random.rand(10)
  169. message = "`base` must be a positive number or `None`."
  170. with pytest.raises(ValueError, match=message):
  171. stats.differential_entropy(x, base=-2)
  172. message = "`method` must be one of..."
  173. with pytest.raises(ValueError, match=message):
  174. stats.differential_entropy(x, method='ekki-ekki')
  175. @pytest.mark.parametrize('method', ['vasicek', 'van es',
  176. 'ebrahimi', 'correa'])
  177. def test_consistency(self, method):
  178. # test that method is a consistent estimator
  179. n = 10000 if method == 'correa' else 1000000
  180. rvs = stats.norm.rvs(size=n, random_state=0)
  181. expected = stats.norm.entropy()
  182. res = stats.differential_entropy(rvs, method=method)
  183. assert_allclose(res, expected, rtol=0.005)
  184. # values from differential_entropy reference [6], table 1, n=50, m=7
  185. norm_rmse_std_cases = { # method: (RMSE, STD)
  186. 'vasicek': (0.198, 0.109),
  187. 'van es': (0.212, 0.110),
  188. 'correa': (0.135, 0.112),
  189. 'ebrahimi': (0.128, 0.109)
  190. }
  191. @pytest.mark.parametrize('method, expected',
  192. list(norm_rmse_std_cases.items()))
  193. def test_norm_rmse_std(self, method, expected):
  194. # test that RMSE and standard deviation of estimators matches values
  195. # given in differential_entropy reference [6]. Incidentally, also
  196. # tests vectorization.
  197. reps, n, m = 10000, 50, 7
  198. rmse_expected, std_expected = expected
  199. rvs = stats.norm.rvs(size=(reps, n), random_state=0)
  200. true_entropy = stats.norm.entropy()
  201. res = stats.differential_entropy(rvs, window_length=m,
  202. method=method, axis=-1)
  203. assert_allclose(np.sqrt(np.mean((res - true_entropy)**2)),
  204. rmse_expected, atol=0.005)
  205. assert_allclose(np.std(res), std_expected, atol=0.002)
  206. # values from differential_entropy reference [6], table 2, n=50, m=7
  207. expon_rmse_std_cases = { # method: (RMSE, STD)
  208. 'vasicek': (0.194, 0.148),
  209. 'van es': (0.179, 0.149),
  210. 'correa': (0.155, 0.152),
  211. 'ebrahimi': (0.151, 0.148)
  212. }
  213. @pytest.mark.parametrize('method, expected',
  214. list(expon_rmse_std_cases.items()))
  215. def test_expon_rmse_std(self, method, expected):
  216. # test that RMSE and standard deviation of estimators matches values
  217. # given in differential_entropy reference [6]. Incidentally, also
  218. # tests vectorization.
  219. reps, n, m = 10000, 50, 7
  220. rmse_expected, std_expected = expected
  221. rvs = stats.expon.rvs(size=(reps, n), random_state=0)
  222. true_entropy = stats.expon.entropy()
  223. res = stats.differential_entropy(rvs, window_length=m,
  224. method=method, axis=-1)
  225. assert_allclose(np.sqrt(np.mean((res - true_entropy)**2)),
  226. rmse_expected, atol=0.005)
  227. assert_allclose(np.std(res), std_expected, atol=0.002)
  228. @pytest.mark.parametrize('n, method', [(8, 'van es'),
  229. (12, 'ebrahimi'),
  230. (1001, 'vasicek')])
  231. def test_method_auto(self, n, method):
  232. rvs = stats.norm.rvs(size=(n,), random_state=0)
  233. res1 = stats.differential_entropy(rvs)
  234. res2 = stats.differential_entropy(rvs, method=method)
  235. assert res1 == res2