_hypotests.py 77 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006
  1. from collections import namedtuple
  2. from dataclasses import make_dataclass
  3. from math import comb
  4. import numpy as np
  5. import warnings
  6. from itertools import combinations
  7. import scipy.stats
  8. from scipy.optimize import shgo
  9. from . import distributions
  10. from ._common import ConfidenceInterval
  11. from ._continuous_distns import chi2, norm
  12. from scipy.special import gamma, kv, gammaln
  13. from scipy.fft import ifft
  14. from ._stats_pythran import _a_ij_Aij_Dij2
  15. from ._stats_pythran import (
  16. _concordant_pairs as _P, _discordant_pairs as _Q
  17. )
  18. from scipy.stats import _stats_py
  19. __all__ = ['epps_singleton_2samp', 'cramervonmises', 'somersd',
  20. 'barnard_exact', 'boschloo_exact', 'cramervonmises_2samp',
  21. 'tukey_hsd', 'poisson_means_test']
  22. Epps_Singleton_2sampResult = namedtuple('Epps_Singleton_2sampResult',
  23. ('statistic', 'pvalue'))
  24. def epps_singleton_2samp(x, y, t=(0.4, 0.8)):
  25. """Compute the Epps-Singleton (ES) test statistic.
  26. Test the null hypothesis that two samples have the same underlying
  27. probability distribution.
  28. Parameters
  29. ----------
  30. x, y : array-like
  31. The two samples of observations to be tested. Input must not have more
  32. than one dimension. Samples can have different lengths.
  33. t : array-like, optional
  34. The points (t1, ..., tn) where the empirical characteristic function is
  35. to be evaluated. It should be positive distinct numbers. The default
  36. value (0.4, 0.8) is proposed in [1]_. Input must not have more than
  37. one dimension.
  38. Returns
  39. -------
  40. statistic : float
  41. The test statistic.
  42. pvalue : float
  43. The associated p-value based on the asymptotic chi2-distribution.
  44. See Also
  45. --------
  46. ks_2samp, anderson_ksamp
  47. Notes
  48. -----
  49. Testing whether two samples are generated by the same underlying
  50. distribution is a classical question in statistics. A widely used test is
  51. the Kolmogorov-Smirnov (KS) test which relies on the empirical
  52. distribution function. Epps and Singleton introduce a test based on the
  53. empirical characteristic function in [1]_.
  54. One advantage of the ES test compared to the KS test is that is does
  55. not assume a continuous distribution. In [1]_, the authors conclude
  56. that the test also has a higher power than the KS test in many
  57. examples. They recommend the use of the ES test for discrete samples as
  58. well as continuous samples with at least 25 observations each, whereas
  59. `anderson_ksamp` is recommended for smaller sample sizes in the
  60. continuous case.
  61. The p-value is computed from the asymptotic distribution of the test
  62. statistic which follows a `chi2` distribution. If the sample size of both
  63. `x` and `y` is below 25, the small sample correction proposed in [1]_ is
  64. applied to the test statistic.
  65. The default values of `t` are determined in [1]_ by considering
  66. various distributions and finding good values that lead to a high power
  67. of the test in general. Table III in [1]_ gives the optimal values for
  68. the distributions tested in that study. The values of `t` are scaled by
  69. the semi-interquartile range in the implementation, see [1]_.
  70. References
  71. ----------
  72. .. [1] T. W. Epps and K. J. Singleton, "An omnibus test for the two-sample
  73. problem using the empirical characteristic function", Journal of
  74. Statistical Computation and Simulation 26, p. 177--203, 1986.
  75. .. [2] S. J. Goerg and J. Kaiser, "Nonparametric testing of distributions
  76. - the Epps-Singleton two-sample test using the empirical characteristic
  77. function", The Stata Journal 9(3), p. 454--465, 2009.
  78. """
  79. x, y, t = np.asarray(x), np.asarray(y), np.asarray(t)
  80. # check if x and y are valid inputs
  81. if x.ndim > 1:
  82. raise ValueError('x must be 1d, but x.ndim equals {}.'.format(x.ndim))
  83. if y.ndim > 1:
  84. raise ValueError('y must be 1d, but y.ndim equals {}.'.format(y.ndim))
  85. nx, ny = len(x), len(y)
  86. if (nx < 5) or (ny < 5):
  87. raise ValueError('x and y should have at least 5 elements, but len(x) '
  88. '= {} and len(y) = {}.'.format(nx, ny))
  89. if not np.isfinite(x).all():
  90. raise ValueError('x must not contain nonfinite values.')
  91. if not np.isfinite(y).all():
  92. raise ValueError('y must not contain nonfinite values.')
  93. n = nx + ny
  94. # check if t is valid
  95. if t.ndim > 1:
  96. raise ValueError('t must be 1d, but t.ndim equals {}.'.format(t.ndim))
  97. if np.less_equal(t, 0).any():
  98. raise ValueError('t must contain positive elements only.')
  99. # rescale t with semi-iqr as proposed in [1]; import iqr here to avoid
  100. # circular import
  101. from scipy.stats import iqr
  102. sigma = iqr(np.hstack((x, y))) / 2
  103. ts = np.reshape(t, (-1, 1)) / sigma
  104. # covariance estimation of ES test
  105. gx = np.vstack((np.cos(ts*x), np.sin(ts*x))).T # shape = (nx, 2*len(t))
  106. gy = np.vstack((np.cos(ts*y), np.sin(ts*y))).T
  107. cov_x = np.cov(gx.T, bias=True) # the test uses biased cov-estimate
  108. cov_y = np.cov(gy.T, bias=True)
  109. est_cov = (n/nx)*cov_x + (n/ny)*cov_y
  110. est_cov_inv = np.linalg.pinv(est_cov)
  111. r = np.linalg.matrix_rank(est_cov_inv)
  112. if r < 2*len(t):
  113. warnings.warn('Estimated covariance matrix does not have full rank. '
  114. 'This indicates a bad choice of the input t and the '
  115. 'test might not be consistent.') # see p. 183 in [1]_
  116. # compute test statistic w distributed asympt. as chisquare with df=r
  117. g_diff = np.mean(gx, axis=0) - np.mean(gy, axis=0)
  118. w = n*np.dot(g_diff.T, np.dot(est_cov_inv, g_diff))
  119. # apply small-sample correction
  120. if (max(nx, ny) < 25):
  121. corr = 1.0/(1.0 + n**(-0.45) + 10.1*(nx**(-1.7) + ny**(-1.7)))
  122. w = corr * w
  123. p = chi2.sf(w, r)
  124. return Epps_Singleton_2sampResult(w, p)
  125. def poisson_means_test(k1, n1, k2, n2, *, diff=0, alternative='two-sided'):
  126. r"""
  127. Performs the Poisson means test, AKA the "E-test".
  128. This is a test of the null hypothesis that the difference between means of
  129. two Poisson distributions is `diff`. The samples are provided as the
  130. number of events `k1` and `k2` observed within measurement intervals
  131. (e.g. of time, space, number of observations) of sizes `n1` and `n2`.
  132. Parameters
  133. ----------
  134. k1 : int
  135. Number of events observed from distribution 1.
  136. n1: float
  137. Size of sample from distribution 1.
  138. k2 : int
  139. Number of events observed from distribution 2.
  140. n2 : float
  141. Size of sample from distribution 2.
  142. diff : float, default=0
  143. The hypothesized difference in means between the distributions
  144. underlying the samples.
  145. alternative : {'two-sided', 'less', 'greater'}, optional
  146. Defines the alternative hypothesis.
  147. The following options are available (default is 'two-sided'):
  148. * 'two-sided': the difference between distribution means is not
  149. equal to `diff`
  150. * 'less': the difference between distribution means is less than
  151. `diff`
  152. * 'greater': the difference between distribution means is greater
  153. than `diff`
  154. Returns
  155. -------
  156. statistic : float
  157. The test statistic (see [1]_ equation 3.3).
  158. pvalue : float
  159. The probability of achieving such an extreme value of the test
  160. statistic under the null hypothesis.
  161. Notes
  162. -----
  163. Let:
  164. .. math:: X_1 \sim \mbox{Poisson}(\mathtt{n1}\lambda_1)
  165. be a random variable independent of
  166. .. math:: X_2 \sim \mbox{Poisson}(\mathtt{n2}\lambda_2)
  167. and let ``k1`` and ``k2`` be the observed values of :math:`X_1`
  168. and :math:`X_2`, respectively. Then `poisson_means_test` uses the number
  169. of observed events ``k1`` and ``k2`` from samples of size ``n1`` and
  170. ``n2``, respectively, to test the null hypothesis that
  171. .. math::
  172. H_0: \lambda_1 - \lambda_2 = \mathtt{diff}
  173. A benefit of the E-test is that it has good power for small sample sizes,
  174. which can reduce sampling costs [1]_. It has been evaluated and determined
  175. to be more powerful than the comparable C-test, sometimes referred to as
  176. the Poisson exact test.
  177. References
  178. ----------
  179. .. [1] Krishnamoorthy, K., & Thomson, J. (2004). A more powerful test for
  180. comparing two Poisson means. Journal of Statistical Planning and
  181. Inference, 119(1), 23-35.
  182. .. [2] Przyborowski, J., & Wilenski, H. (1940). Homogeneity of results in
  183. testing samples from Poisson series: With an application to testing
  184. clover seed for dodder. Biometrika, 31(3/4), 313-323.
  185. Examples
  186. --------
  187. Suppose that a gardener wishes to test the number of dodder (weed) seeds
  188. in a sack of clover seeds that they buy from a seed company. It has
  189. previously been established that the number of dodder seeds in clover
  190. follows the Poisson distribution.
  191. A 100 gram sample is drawn from the sack before being shipped to the
  192. gardener. The sample is analyzed, and it is found to contain no dodder
  193. seeds; that is, `k1` is 0. However, upon arrival, the gardener draws
  194. another 100 gram sample from the sack. This time, three dodder seeds are
  195. found in the sample; that is, `k2` is 3. The gardener would like to
  196. know if the difference is significant and not due to chance. The
  197. null hypothesis is that the difference between the two samples is merely
  198. due to chance, or that :math:`\lambda_1 - \lambda_2 = \mathtt{diff}`
  199. where :math:`\mathtt{diff} = 0`. The alternative hypothesis is that the
  200. difference is not due to chance, or :math:`\lambda_1 - \lambda_2 \ne 0`.
  201. The gardener selects a significance level of 5% to reject the null
  202. hypothesis in favor of the alternative [2]_.
  203. >>> import scipy.stats as stats
  204. >>> res = stats.poisson_means_test(0, 100, 3, 100)
  205. >>> res.statistic, res.pvalue
  206. (-1.7320508075688772, 0.08837900929018157)
  207. The p-value is .088, indicating a near 9% chance of observing a value of
  208. the test statistic under the null hypothesis. This exceeds 5%, so the
  209. gardener does not reject the null hypothesis as the difference cannot be
  210. regarded as significant at this level.
  211. """
  212. _poisson_means_test_iv(k1, n1, k2, n2, diff, alternative)
  213. # "for a given k_1 and k_2, an estimate of \lambda_2 is given by" [1] (3.4)
  214. lmbd_hat2 = ((k1 + k2) / (n1 + n2) - diff * n1 / (n1 + n2))
  215. # "\hat{\lambda_{2k}} may be less than or equal to zero ... and in this
  216. # case the null hypothesis cannot be rejected ... [and] it is not necessary
  217. # to compute the p-value". [1] page 26 below eq. (3.6).
  218. if lmbd_hat2 <= 0:
  219. return _stats_py.SignificanceResult(0, 1)
  220. # The unbiased variance estimate [1] (3.2)
  221. var = k1 / (n1 ** 2) + k2 / (n2 ** 2)
  222. # The _observed_ pivot statistic from the input. It follows the
  223. # unnumbered equation following equation (3.3) This is used later in
  224. # comparison with the computed pivot statistics in an indicator function.
  225. t_k1k2 = (k1 / n1 - k2 / n2 - diff) / np.sqrt(var)
  226. # Equation (3.5) of [1] is lengthy, so it is broken into several parts,
  227. # beginning here. Note that the probability mass function of poisson is
  228. # exp^(-\mu)*\mu^k/k!, so and this is called with shape \mu, here noted
  229. # here as nlmbd_hat*. The strategy for evaluating the double summation in
  230. # (3.5) is to create two arrays of the values of the two products inside
  231. # the summation and then broadcast them together into a matrix, and then
  232. # sum across the entire matrix.
  233. # Compute constants (as seen in the first and second separated products in
  234. # (3.5).). (This is the shape (\mu) parameter of the poisson distribution.)
  235. nlmbd_hat1 = n1 * (lmbd_hat2 + diff)
  236. nlmbd_hat2 = n2 * lmbd_hat2
  237. # Determine summation bounds for tail ends of distribution rather than
  238. # summing to infinity. `x1*` is for the outer sum and `x2*` is the inner
  239. # sum.
  240. x1_lb, x1_ub = distributions.poisson.ppf([1e-10, 1 - 1e-16], nlmbd_hat1)
  241. x2_lb, x2_ub = distributions.poisson.ppf([1e-10, 1 - 1e-16], nlmbd_hat2)
  242. # Construct arrays to function as the x_1 and x_2 counters on the summation
  243. # in (3.5). `x1` is in columns and `x2` is in rows to allow for
  244. # broadcasting.
  245. x1 = np.arange(x1_lb, x1_ub + 1)
  246. x2 = np.arange(x2_lb, x2_ub + 1)[:, None]
  247. # These are the two products in equation (3.5) with `prob_x1` being the
  248. # first (left side) and `prob_x2` being the second (right side). (To
  249. # make as clear as possible: the 1st contains a "+ d" term, the 2nd does
  250. # not.)
  251. prob_x1 = distributions.poisson.pmf(x1, nlmbd_hat1)
  252. prob_x2 = distributions.poisson.pmf(x2, nlmbd_hat2)
  253. # compute constants for use in the "pivot statistic" per the
  254. # unnumbered equation following (3.3).
  255. lmbd_x1 = x1 / n1
  256. lmbd_x2 = x2 / n2
  257. lmbds_diff = lmbd_x1 - lmbd_x2 - diff
  258. var_x1x2 = lmbd_x1 / n1 + lmbd_x2 / n2
  259. # This is the 'pivot statistic' for use in the indicator of the summation
  260. # (left side of "I[.]").
  261. with np.errstate(invalid='ignore', divide='ignore'):
  262. t_x1x2 = lmbds_diff / np.sqrt(var_x1x2)
  263. # `[indicator]` implements the "I[.] ... the indicator function" per
  264. # the paragraph following equation (3.5).
  265. if alternative == 'two-sided':
  266. indicator = np.abs(t_x1x2) >= np.abs(t_k1k2)
  267. elif alternative == 'less':
  268. indicator = t_x1x2 <= t_k1k2
  269. else:
  270. indicator = t_x1x2 >= t_k1k2
  271. # Multiply all combinations of the products together, exclude terms
  272. # based on the `indicator` and then sum. (3.5)
  273. pvalue = np.sum((prob_x1 * prob_x2)[indicator])
  274. return _stats_py.SignificanceResult(t_k1k2, pvalue)
  275. def _poisson_means_test_iv(k1, n1, k2, n2, diff, alternative):
  276. # """check for valid types and values of input to `poisson_mean_test`."""
  277. if k1 != int(k1) or k2 != int(k2):
  278. raise TypeError('`k1` and `k2` must be integers.')
  279. count_err = '`k1` and `k2` must be greater than or equal to 0.'
  280. if k1 < 0 or k2 < 0:
  281. raise ValueError(count_err)
  282. if n1 <= 0 or n2 <= 0:
  283. raise ValueError('`n1` and `n2` must be greater than 0.')
  284. if diff < 0:
  285. raise ValueError('diff must be greater than or equal to 0.')
  286. alternatives = {'two-sided', 'less', 'greater'}
  287. if alternative.lower() not in alternatives:
  288. raise ValueError(f"Alternative must be one of '{alternatives}'.")
  289. class CramerVonMisesResult:
  290. def __init__(self, statistic, pvalue):
  291. self.statistic = statistic
  292. self.pvalue = pvalue
  293. def __repr__(self):
  294. return (f"{self.__class__.__name__}(statistic={self.statistic}, "
  295. f"pvalue={self.pvalue})")
  296. def _psi1_mod(x):
  297. """
  298. psi1 is defined in equation 1.10 in Csörgő, S. and Faraway, J. (1996).
  299. This implements a modified version by excluding the term V(x) / 12
  300. (here: _cdf_cvm_inf(x) / 12) to avoid evaluating _cdf_cvm_inf(x)
  301. twice in _cdf_cvm.
  302. Implementation based on MAPLE code of Julian Faraway and R code of the
  303. function pCvM in the package goftest (v1.1.1), permission granted
  304. by Adrian Baddeley. Main difference in the implementation: the code
  305. here keeps adding terms of the series until the terms are small enough.
  306. """
  307. def _ed2(y):
  308. z = y**2 / 4
  309. b = kv(1/4, z) + kv(3/4, z)
  310. return np.exp(-z) * (y/2)**(3/2) * b / np.sqrt(np.pi)
  311. def _ed3(y):
  312. z = y**2 / 4
  313. c = np.exp(-z) / np.sqrt(np.pi)
  314. return c * (y/2)**(5/2) * (2*kv(1/4, z) + 3*kv(3/4, z) - kv(5/4, z))
  315. def _Ak(k, x):
  316. m = 2*k + 1
  317. sx = 2 * np.sqrt(x)
  318. y1 = x**(3/4)
  319. y2 = x**(5/4)
  320. e1 = m * gamma(k + 1/2) * _ed2((4 * k + 3)/sx) / (9 * y1)
  321. e2 = gamma(k + 1/2) * _ed3((4 * k + 1) / sx) / (72 * y2)
  322. e3 = 2 * (m + 2) * gamma(k + 3/2) * _ed3((4 * k + 5) / sx) / (12 * y2)
  323. e4 = 7 * m * gamma(k + 1/2) * _ed2((4 * k + 1) / sx) / (144 * y1)
  324. e5 = 7 * m * gamma(k + 1/2) * _ed2((4 * k + 5) / sx) / (144 * y1)
  325. return e1 + e2 + e3 + e4 + e5
  326. x = np.asarray(x)
  327. tot = np.zeros_like(x, dtype='float')
  328. cond = np.ones_like(x, dtype='bool')
  329. k = 0
  330. while np.any(cond):
  331. z = -_Ak(k, x[cond]) / (np.pi * gamma(k + 1))
  332. tot[cond] = tot[cond] + z
  333. cond[cond] = np.abs(z) >= 1e-7
  334. k += 1
  335. return tot
  336. def _cdf_cvm_inf(x):
  337. """
  338. Calculate the cdf of the Cramér-von Mises statistic (infinite sample size).
  339. See equation 1.2 in Csörgő, S. and Faraway, J. (1996).
  340. Implementation based on MAPLE code of Julian Faraway and R code of the
  341. function pCvM in the package goftest (v1.1.1), permission granted
  342. by Adrian Baddeley. Main difference in the implementation: the code
  343. here keeps adding terms of the series until the terms are small enough.
  344. The function is not expected to be accurate for large values of x, say
  345. x > 4, when the cdf is very close to 1.
  346. """
  347. x = np.asarray(x)
  348. def term(x, k):
  349. # this expression can be found in [2], second line of (1.3)
  350. u = np.exp(gammaln(k + 0.5) - gammaln(k+1)) / (np.pi**1.5 * np.sqrt(x))
  351. y = 4*k + 1
  352. q = y**2 / (16*x)
  353. b = kv(0.25, q)
  354. return u * np.sqrt(y) * np.exp(-q) * b
  355. tot = np.zeros_like(x, dtype='float')
  356. cond = np.ones_like(x, dtype='bool')
  357. k = 0
  358. while np.any(cond):
  359. z = term(x[cond], k)
  360. tot[cond] = tot[cond] + z
  361. cond[cond] = np.abs(z) >= 1e-7
  362. k += 1
  363. return tot
  364. def _cdf_cvm(x, n=None):
  365. """
  366. Calculate the cdf of the Cramér-von Mises statistic for a finite sample
  367. size n. If N is None, use the asymptotic cdf (n=inf).
  368. See equation 1.8 in Csörgő, S. and Faraway, J. (1996) for finite samples,
  369. 1.2 for the asymptotic cdf.
  370. The function is not expected to be accurate for large values of x, say
  371. x > 2, when the cdf is very close to 1 and it might return values > 1
  372. in that case, e.g. _cdf_cvm(2.0, 12) = 1.0000027556716846. Moreover, it
  373. is not accurate for small values of n, especially close to the bounds of
  374. the distribution's domain, [1/(12*n), n/3], where the value jumps to 0
  375. and 1, respectively. These are limitations of the approximation by Csörgő
  376. and Faraway (1996) implemented in this function.
  377. """
  378. x = np.asarray(x)
  379. if n is None:
  380. y = _cdf_cvm_inf(x)
  381. else:
  382. # support of the test statistic is [12/n, n/3], see 1.1 in [2]
  383. y = np.zeros_like(x, dtype='float')
  384. sup = (1./(12*n) < x) & (x < n/3.)
  385. # note: _psi1_mod does not include the term _cdf_cvm_inf(x) / 12
  386. # therefore, we need to add it here
  387. y[sup] = _cdf_cvm_inf(x[sup]) * (1 + 1./(12*n)) + _psi1_mod(x[sup]) / n
  388. y[x >= n/3] = 1
  389. if y.ndim == 0:
  390. return y[()]
  391. return y
  392. def cramervonmises(rvs, cdf, args=()):
  393. """Perform the one-sample Cramér-von Mises test for goodness of fit.
  394. This performs a test of the goodness of fit of a cumulative distribution
  395. function (cdf) :math:`F` compared to the empirical distribution function
  396. :math:`F_n` of observed random variates :math:`X_1, ..., X_n` that are
  397. assumed to be independent and identically distributed ([1]_).
  398. The null hypothesis is that the :math:`X_i` have cumulative distribution
  399. :math:`F`.
  400. Parameters
  401. ----------
  402. rvs : array_like
  403. A 1-D array of observed values of the random variables :math:`X_i`.
  404. cdf : str or callable
  405. The cumulative distribution function :math:`F` to test the
  406. observations against. If a string, it should be the name of a
  407. distribution in `scipy.stats`. If a callable, that callable is used
  408. to calculate the cdf: ``cdf(x, *args) -> float``.
  409. args : tuple, optional
  410. Distribution parameters. These are assumed to be known; see Notes.
  411. Returns
  412. -------
  413. res : object with attributes
  414. statistic : float
  415. Cramér-von Mises statistic.
  416. pvalue : float
  417. The p-value.
  418. See Also
  419. --------
  420. kstest, cramervonmises_2samp
  421. Notes
  422. -----
  423. .. versionadded:: 1.6.0
  424. The p-value relies on the approximation given by equation 1.8 in [2]_.
  425. It is important to keep in mind that the p-value is only accurate if
  426. one tests a simple hypothesis, i.e. the parameters of the reference
  427. distribution are known. If the parameters are estimated from the data
  428. (composite hypothesis), the computed p-value is not reliable.
  429. References
  430. ----------
  431. .. [1] Cramér-von Mises criterion, Wikipedia,
  432. https://en.wikipedia.org/wiki/Cram%C3%A9r%E2%80%93von_Mises_criterion
  433. .. [2] Csörgő, S. and Faraway, J. (1996). The Exact and Asymptotic
  434. Distribution of Cramér-von Mises Statistics. Journal of the
  435. Royal Statistical Society, pp. 221-234.
  436. Examples
  437. --------
  438. Suppose we wish to test whether data generated by ``scipy.stats.norm.rvs``
  439. were, in fact, drawn from the standard normal distribution. We choose a
  440. significance level of alpha=0.05.
  441. >>> import numpy as np
  442. >>> from scipy import stats
  443. >>> rng = np.random.default_rng()
  444. >>> x = stats.norm.rvs(size=500, random_state=rng)
  445. >>> res = stats.cramervonmises(x, 'norm')
  446. >>> res.statistic, res.pvalue
  447. (0.49121480855028343, 0.04189256516661377)
  448. The p-value 0.79 exceeds our chosen significance level, so we do not
  449. reject the null hypothesis that the observed sample is drawn from the
  450. standard normal distribution.
  451. Now suppose we wish to check whether the same samples shifted by 2.1 is
  452. consistent with being drawn from a normal distribution with a mean of 2.
  453. >>> y = x + 2.1
  454. >>> res = stats.cramervonmises(y, 'norm', args=(2,))
  455. >>> res.statistic, res.pvalue
  456. (0.07400330012187435, 0.7274595666160468)
  457. Here we have used the `args` keyword to specify the mean (``loc``)
  458. of the normal distribution to test the data against. This is equivalent
  459. to the following, in which we create a frozen normal distribution with
  460. mean 2.1, then pass its ``cdf`` method as an argument.
  461. >>> frozen_dist = stats.norm(loc=2)
  462. >>> res = stats.cramervonmises(y, frozen_dist.cdf)
  463. >>> res.statistic, res.pvalue
  464. (0.07400330012187435, 0.7274595666160468)
  465. In either case, we would reject the null hypothesis that the observed
  466. sample is drawn from a normal distribution with a mean of 2 (and default
  467. variance of 1) because the p-value 0.04 is less than our chosen
  468. significance level.
  469. """
  470. if isinstance(cdf, str):
  471. cdf = getattr(distributions, cdf).cdf
  472. vals = np.sort(np.asarray(rvs))
  473. if vals.size <= 1:
  474. raise ValueError('The sample must contain at least two observations.')
  475. if vals.ndim > 1:
  476. raise ValueError('The sample must be one-dimensional.')
  477. n = len(vals)
  478. cdfvals = cdf(vals, *args)
  479. u = (2*np.arange(1, n+1) - 1)/(2*n)
  480. w = 1/(12*n) + np.sum((u - cdfvals)**2)
  481. # avoid small negative values that can occur due to the approximation
  482. p = max(0, 1. - _cdf_cvm(w, n))
  483. return CramerVonMisesResult(statistic=w, pvalue=p)
  484. def _get_wilcoxon_distr(n):
  485. """
  486. Distribution of probability of the Wilcoxon ranksum statistic r_plus (sum
  487. of ranks of positive differences).
  488. Returns an array with the probabilities of all the possible ranks
  489. r = 0, ..., n*(n+1)/2
  490. """
  491. c = np.ones(1, dtype=np.double)
  492. for k in range(1, n + 1):
  493. prev_c = c
  494. c = np.zeros(k * (k + 1) // 2 + 1, dtype=np.double)
  495. m = len(prev_c)
  496. c[:m] = prev_c * 0.5
  497. c[-m:] += prev_c * 0.5
  498. return c
  499. def _get_wilcoxon_distr2(n):
  500. """
  501. Distribution of probability of the Wilcoxon ranksum statistic r_plus (sum
  502. of ranks of positive differences).
  503. Returns an array with the probabilities of all the possible ranks
  504. r = 0, ..., n*(n+1)/2
  505. This is a slower reference function
  506. References
  507. ----------
  508. .. [1] 1. Harris T, Hardin JW. Exact Wilcoxon Signed-Rank and Wilcoxon
  509. Mann-Whitney Ranksum Tests. The Stata Journal. 2013;13(2):337-343.
  510. """
  511. ai = np.arange(1, n+1)[:, None]
  512. t = n*(n+1)/2
  513. q = 2*t
  514. j = np.arange(q)
  515. theta = 2*np.pi/q*j
  516. phi_sp = np.prod(np.cos(theta*ai), axis=0)
  517. phi_s = np.exp(1j*theta*t) * phi_sp
  518. p = np.real(ifft(phi_s))
  519. res = np.zeros(int(t)+1)
  520. res[:-1:] = p[::2]
  521. res[0] /= 2
  522. res[-1] = res[0]
  523. return res
  524. def _tau_b(A):
  525. """Calculate Kendall's tau-b and p-value from contingency table."""
  526. # See [2] 2.2 and 4.2
  527. # contingency table must be truly 2D
  528. if A.shape[0] == 1 or A.shape[1] == 1:
  529. return np.nan, np.nan
  530. NA = A.sum()
  531. PA = _P(A)
  532. QA = _Q(A)
  533. Sri2 = (A.sum(axis=1)**2).sum()
  534. Scj2 = (A.sum(axis=0)**2).sum()
  535. denominator = (NA**2 - Sri2)*(NA**2 - Scj2)
  536. tau = (PA-QA)/(denominator)**0.5
  537. numerator = 4*(_a_ij_Aij_Dij2(A) - (PA - QA)**2 / NA)
  538. s02_tau_b = numerator/denominator
  539. if s02_tau_b == 0: # Avoid divide by zero
  540. return tau, 0
  541. Z = tau/s02_tau_b**0.5
  542. p = 2*norm.sf(abs(Z)) # 2-sided p-value
  543. return tau, p
  544. def _somers_d(A, alternative='two-sided'):
  545. """Calculate Somers' D and p-value from contingency table."""
  546. # See [3] page 1740
  547. # contingency table must be truly 2D
  548. if A.shape[0] <= 1 or A.shape[1] <= 1:
  549. return np.nan, np.nan
  550. NA = A.sum()
  551. NA2 = NA**2
  552. PA = _P(A)
  553. QA = _Q(A)
  554. Sri2 = (A.sum(axis=1)**2).sum()
  555. d = (PA - QA)/(NA2 - Sri2)
  556. S = _a_ij_Aij_Dij2(A) - (PA-QA)**2/NA
  557. with np.errstate(divide='ignore'):
  558. Z = (PA - QA)/(4*(S))**0.5
  559. _, p = scipy.stats._stats_py._normtest_finish(Z, alternative)
  560. return d, p
  561. SomersDResult = make_dataclass("SomersDResult",
  562. ("statistic", "pvalue", "table"))
  563. def somersd(x, y=None, alternative='two-sided'):
  564. r"""Calculates Somers' D, an asymmetric measure of ordinal association.
  565. Like Kendall's :math:`\tau`, Somers' :math:`D` is a measure of the
  566. correspondence between two rankings. Both statistics consider the
  567. difference between the number of concordant and discordant pairs in two
  568. rankings :math:`X` and :math:`Y`, and both are normalized such that values
  569. close to 1 indicate strong agreement and values close to -1 indicate
  570. strong disagreement. They differ in how they are normalized. To show the
  571. relationship, Somers' :math:`D` can be defined in terms of Kendall's
  572. :math:`\tau_a`:
  573. .. math::
  574. D(Y|X) = \frac{\tau_a(X, Y)}{\tau_a(X, X)}
  575. Suppose the first ranking :math:`X` has :math:`r` distinct ranks and the
  576. second ranking :math:`Y` has :math:`s` distinct ranks. These two lists of
  577. :math:`n` rankings can also be viewed as an :math:`r \times s` contingency
  578. table in which element :math:`i, j` is the number of rank pairs with rank
  579. :math:`i` in ranking :math:`X` and rank :math:`j` in ranking :math:`Y`.
  580. Accordingly, `somersd` also allows the input data to be supplied as a
  581. single, 2D contingency table instead of as two separate, 1D rankings.
  582. Note that the definition of Somers' :math:`D` is asymmetric: in general,
  583. :math:`D(Y|X) \neq D(X|Y)`. ``somersd(x, y)`` calculates Somers'
  584. :math:`D(Y|X)`: the "row" variable :math:`X` is treated as an independent
  585. variable, and the "column" variable :math:`Y` is dependent. For Somers'
  586. :math:`D(X|Y)`, swap the input lists or transpose the input table.
  587. Parameters
  588. ----------
  589. x : array_like
  590. 1D array of rankings, treated as the (row) independent variable.
  591. Alternatively, a 2D contingency table.
  592. y : array_like, optional
  593. If `x` is a 1D array of rankings, `y` is a 1D array of rankings of the
  594. same length, treated as the (column) dependent variable.
  595. If `x` is 2D, `y` is ignored.
  596. alternative : {'two-sided', 'less', 'greater'}, optional
  597. Defines the alternative hypothesis. Default is 'two-sided'.
  598. The following options are available:
  599. * 'two-sided': the rank correlation is nonzero
  600. * 'less': the rank correlation is negative (less than zero)
  601. * 'greater': the rank correlation is positive (greater than zero)
  602. Returns
  603. -------
  604. res : SomersDResult
  605. A `SomersDResult` object with the following fields:
  606. statistic : float
  607. The Somers' :math:`D` statistic.
  608. pvalue : float
  609. The p-value for a hypothesis test whose null
  610. hypothesis is an absence of association, :math:`D=0`.
  611. See notes for more information.
  612. table : 2D array
  613. The contingency table formed from rankings `x` and `y` (or the
  614. provided contingency table, if `x` is a 2D array)
  615. See Also
  616. --------
  617. kendalltau : Calculates Kendall's tau, another correlation measure.
  618. weightedtau : Computes a weighted version of Kendall's tau.
  619. spearmanr : Calculates a Spearman rank-order correlation coefficient.
  620. pearsonr : Calculates a Pearson correlation coefficient.
  621. Notes
  622. -----
  623. This function follows the contingency table approach of [2]_ and
  624. [3]_. *p*-values are computed based on an asymptotic approximation of
  625. the test statistic distribution under the null hypothesis :math:`D=0`.
  626. Theoretically, hypothesis tests based on Kendall's :math:`tau` and Somers'
  627. :math:`D` should be identical.
  628. However, the *p*-values returned by `kendalltau` are based
  629. on the null hypothesis of *independence* between :math:`X` and :math:`Y`
  630. (i.e. the population from which pairs in :math:`X` and :math:`Y` are
  631. sampled contains equal numbers of all possible pairs), which is more
  632. specific than the null hypothesis :math:`D=0` used here. If the null
  633. hypothesis of independence is desired, it is acceptable to use the
  634. *p*-value returned by `kendalltau` with the statistic returned by
  635. `somersd` and vice versa. For more information, see [2]_.
  636. Contingency tables are formatted according to the convention used by
  637. SAS and R: the first ranking supplied (``x``) is the "row" variable, and
  638. the second ranking supplied (``y``) is the "column" variable. This is
  639. opposite the convention of Somers' original paper [1]_.
  640. References
  641. ----------
  642. .. [1] Robert H. Somers, "A New Asymmetric Measure of Association for
  643. Ordinal Variables", *American Sociological Review*, Vol. 27, No. 6,
  644. pp. 799--811, 1962.
  645. .. [2] Morton B. Brown and Jacqueline K. Benedetti, "Sampling Behavior of
  646. Tests for Correlation in Two-Way Contingency Tables", *Journal of
  647. the American Statistical Association* Vol. 72, No. 358, pp.
  648. 309--315, 1977.
  649. .. [3] SAS Institute, Inc., "The FREQ Procedure (Book Excerpt)",
  650. *SAS/STAT 9.2 User's Guide, Second Edition*, SAS Publishing, 2009.
  651. .. [4] Laerd Statistics, "Somers' d using SPSS Statistics", *SPSS
  652. Statistics Tutorials and Statistical Guides*,
  653. https://statistics.laerd.com/spss-tutorials/somers-d-using-spss-statistics.php,
  654. Accessed July 31, 2020.
  655. Examples
  656. --------
  657. We calculate Somers' D for the example given in [4]_, in which a hotel
  658. chain owner seeks to determine the association between hotel room
  659. cleanliness and customer satisfaction. The independent variable, hotel
  660. room cleanliness, is ranked on an ordinal scale: "below average (1)",
  661. "average (2)", or "above average (3)". The dependent variable, customer
  662. satisfaction, is ranked on a second scale: "very dissatisfied (1)",
  663. "moderately dissatisfied (2)", "neither dissatisfied nor satisfied (3)",
  664. "moderately satisfied (4)", or "very satisfied (5)". 189 customers
  665. respond to the survey, and the results are cast into a contingency table
  666. with the hotel room cleanliness as the "row" variable and customer
  667. satisfaction as the "column" variable.
  668. +-----+-----+-----+-----+-----+-----+
  669. | | (1) | (2) | (3) | (4) | (5) |
  670. +=====+=====+=====+=====+=====+=====+
  671. | (1) | 27 | 25 | 14 | 7 | 0 |
  672. +-----+-----+-----+-----+-----+-----+
  673. | (2) | 7 | 14 | 18 | 35 | 12 |
  674. +-----+-----+-----+-----+-----+-----+
  675. | (3) | 1 | 3 | 2 | 7 | 17 |
  676. +-----+-----+-----+-----+-----+-----+
  677. For example, 27 customers assigned their room a cleanliness ranking of
  678. "below average (1)" and a corresponding satisfaction of "very
  679. dissatisfied (1)". We perform the analysis as follows.
  680. >>> from scipy.stats import somersd
  681. >>> table = [[27, 25, 14, 7, 0], [7, 14, 18, 35, 12], [1, 3, 2, 7, 17]]
  682. >>> res = somersd(table)
  683. >>> res.statistic
  684. 0.6032766111513396
  685. >>> res.pvalue
  686. 1.0007091191074533e-27
  687. The value of the Somers' D statistic is approximately 0.6, indicating
  688. a positive correlation between room cleanliness and customer satisfaction
  689. in the sample.
  690. The *p*-value is very small, indicating a very small probability of
  691. observing such an extreme value of the statistic under the null
  692. hypothesis that the statistic of the entire population (from which
  693. our sample of 189 customers is drawn) is zero. This supports the
  694. alternative hypothesis that the true value of Somers' D for the population
  695. is nonzero.
  696. """
  697. x, y = np.array(x), np.array(y)
  698. if x.ndim == 1:
  699. if x.size != y.size:
  700. raise ValueError("Rankings must be of equal length.")
  701. table = scipy.stats.contingency.crosstab(x, y)[1]
  702. elif x.ndim == 2:
  703. if np.any(x < 0):
  704. raise ValueError("All elements of the contingency table must be "
  705. "non-negative.")
  706. if np.any(x != x.astype(int)):
  707. raise ValueError("All elements of the contingency table must be "
  708. "integer.")
  709. if x.nonzero()[0].size < 2:
  710. raise ValueError("At least two elements of the contingency table "
  711. "must be nonzero.")
  712. table = x
  713. else:
  714. raise ValueError("x must be either a 1D or 2D array")
  715. d, p = _somers_d(table, alternative)
  716. # add alias for consistency with other correlation functions
  717. res = SomersDResult(d, p, table)
  718. res.correlation = d
  719. return res
  720. # This could be combined with `_all_partitions` in `_resampling.py`
  721. def _all_partitions(nx, ny):
  722. """
  723. Partition a set of indices into two fixed-length sets in all possible ways
  724. Partition a set of indices 0 ... nx + ny - 1 into two sets of length nx and
  725. ny in all possible ways (ignoring order of elements).
  726. """
  727. z = np.arange(nx+ny)
  728. for c in combinations(z, nx):
  729. x = np.array(c)
  730. mask = np.ones(nx+ny, bool)
  731. mask[x] = False
  732. y = z[mask]
  733. yield x, y
  734. def _compute_log_combinations(n):
  735. """Compute all log combination of C(n, k)."""
  736. gammaln_arr = gammaln(np.arange(n + 1) + 1)
  737. return gammaln(n + 1) - gammaln_arr - gammaln_arr[::-1]
  738. BarnardExactResult = make_dataclass(
  739. "BarnardExactResult", [("statistic", float), ("pvalue", float)]
  740. )
  741. def barnard_exact(table, alternative="two-sided", pooled=True, n=32):
  742. r"""Perform a Barnard exact test on a 2x2 contingency table.
  743. Parameters
  744. ----------
  745. table : array_like of ints
  746. A 2x2 contingency table. Elements should be non-negative integers.
  747. alternative : {'two-sided', 'less', 'greater'}, optional
  748. Defines the null and alternative hypotheses. Default is 'two-sided'.
  749. Please see explanations in the Notes section below.
  750. pooled : bool, optional
  751. Whether to compute score statistic with pooled variance (as in
  752. Student's t-test, for example) or unpooled variance (as in Welch's
  753. t-test). Default is ``True``.
  754. n : int, optional
  755. Number of sampling points used in the construction of the sampling
  756. method. Note that this argument will automatically be converted to
  757. the next higher power of 2 since `scipy.stats.qmc.Sobol` is used to
  758. select sample points. Default is 32. Must be positive. In most cases,
  759. 32 points is enough to reach good precision. More points comes at
  760. performance cost.
  761. Returns
  762. -------
  763. ber : BarnardExactResult
  764. A result object with the following attributes.
  765. statistic : float
  766. The Wald statistic with pooled or unpooled variance, depending
  767. on the user choice of `pooled`.
  768. pvalue : float
  769. P-value, the probability of obtaining a distribution at least as
  770. extreme as the one that was actually observed, assuming that the
  771. null hypothesis is true.
  772. See Also
  773. --------
  774. chi2_contingency : Chi-square test of independence of variables in a
  775. contingency table.
  776. fisher_exact : Fisher exact test on a 2x2 contingency table.
  777. boschloo_exact : Boschloo's exact test on a 2x2 contingency table,
  778. which is an uniformly more powerful alternative to Fisher's exact test.
  779. Notes
  780. -----
  781. Barnard's test is an exact test used in the analysis of contingency
  782. tables. It examines the association of two categorical variables, and
  783. is a more powerful alternative than Fisher's exact test
  784. for 2x2 contingency tables.
  785. Let's define :math:`X_0` a 2x2 matrix representing the observed sample,
  786. where each column stores the binomial experiment, as in the example
  787. below. Let's also define :math:`p_1, p_2` the theoretical binomial
  788. probabilities for :math:`x_{11}` and :math:`x_{12}`. When using
  789. Barnard exact test, we can assert three different null hypotheses :
  790. - :math:`H_0 : p_1 \geq p_2` versus :math:`H_1 : p_1 < p_2`,
  791. with `alternative` = "less"
  792. - :math:`H_0 : p_1 \leq p_2` versus :math:`H_1 : p_1 > p_2`,
  793. with `alternative` = "greater"
  794. - :math:`H_0 : p_1 = p_2` versus :math:`H_1 : p_1 \neq p_2`,
  795. with `alternative` = "two-sided" (default one)
  796. In order to compute Barnard's exact test, we are using the Wald
  797. statistic [3]_ with pooled or unpooled variance.
  798. Under the default assumption that both variances are equal
  799. (``pooled = True``), the statistic is computed as:
  800. .. math::
  801. T(X) = \frac{
  802. \hat{p}_1 - \hat{p}_2
  803. }{
  804. \sqrt{
  805. \hat{p}(1 - \hat{p})
  806. (\frac{1}{c_1} +
  807. \frac{1}{c_2})
  808. }
  809. }
  810. with :math:`\hat{p}_1, \hat{p}_2` and :math:`\hat{p}` the estimator of
  811. :math:`p_1, p_2` and :math:`p`, the latter being the combined probability,
  812. given the assumption that :math:`p_1 = p_2`.
  813. If this assumption is invalid (``pooled = False``), the statistic is:
  814. .. math::
  815. T(X) = \frac{
  816. \hat{p}_1 - \hat{p}_2
  817. }{
  818. \sqrt{
  819. \frac{\hat{p}_1 (1 - \hat{p}_1)}{c_1} +
  820. \frac{\hat{p}_2 (1 - \hat{p}_2)}{c_2}
  821. }
  822. }
  823. The p-value is then computed as:
  824. .. math::
  825. \sum
  826. \binom{c_1}{x_{11}}
  827. \binom{c_2}{x_{12}}
  828. \pi^{x_{11} + x_{12}}
  829. (1 - \pi)^{t - x_{11} - x_{12}}
  830. where the sum is over all 2x2 contingency tables :math:`X` such that:
  831. * :math:`T(X) \leq T(X_0)` when `alternative` = "less",
  832. * :math:`T(X) \geq T(X_0)` when `alternative` = "greater", or
  833. * :math:`T(X) \geq |T(X_0)|` when `alternative` = "two-sided".
  834. Above, :math:`c_1, c_2` are the sum of the columns 1 and 2,
  835. and :math:`t` the total (sum of the 4 sample's element).
  836. The returned p-value is the maximum p-value taken over the nuisance
  837. parameter :math:`\pi`, where :math:`0 \leq \pi \leq 1`.
  838. This function's complexity is :math:`O(n c_1 c_2)`, where `n` is the
  839. number of sample points.
  840. References
  841. ----------
  842. .. [1] Barnard, G. A. "Significance Tests for 2x2 Tables". *Biometrika*.
  843. 34.1/2 (1947): 123-138. :doi:`dpgkg3`
  844. .. [2] Mehta, Cyrus R., and Pralay Senchaudhuri. "Conditional versus
  845. unconditional exact tests for comparing two binomials."
  846. *Cytel Software Corporation* 675 (2003): 1-5.
  847. .. [3] "Wald Test". *Wikipedia*. https://en.wikipedia.org/wiki/Wald_test
  848. Examples
  849. --------
  850. An example use of Barnard's test is presented in [2]_.
  851. Consider the following example of a vaccine efficacy study
  852. (Chan, 1998). In a randomized clinical trial of 30 subjects, 15 were
  853. inoculated with a recombinant DNA influenza vaccine and the 15 were
  854. inoculated with a placebo. Twelve of the 15 subjects in the placebo
  855. group (80%) eventually became infected with influenza whereas for the
  856. vaccine group, only 7 of the 15 subjects (47%) became infected. The
  857. data are tabulated as a 2 x 2 table::
  858. Vaccine Placebo
  859. Yes 7 12
  860. No 8 3
  861. When working with statistical hypothesis testing, we usually use a
  862. threshold probability or significance level upon which we decide
  863. to reject the null hypothesis :math:`H_0`. Suppose we choose the common
  864. significance level of 5%.
  865. Our alternative hypothesis is that the vaccine will lower the chance of
  866. becoming infected with the virus; that is, the probability :math:`p_1` of
  867. catching the virus with the vaccine will be *less than* the probability
  868. :math:`p_2` of catching the virus without the vaccine. Therefore, we call
  869. `barnard_exact` with the ``alternative="less"`` option:
  870. >>> import scipy.stats as stats
  871. >>> res = stats.barnard_exact([[7, 12], [8, 3]], alternative="less")
  872. >>> res.statistic
  873. -1.894...
  874. >>> res.pvalue
  875. 0.03407...
  876. Under the null hypothesis that the vaccine will not lower the chance of
  877. becoming infected, the probability of obtaining test results at least as
  878. extreme as the observed data is approximately 3.4%. Since this p-value is
  879. less than our chosen significance level, we have evidence to reject
  880. :math:`H_0` in favor of the alternative.
  881. Suppose we had used Fisher's exact test instead:
  882. >>> _, pvalue = stats.fisher_exact([[7, 12], [8, 3]], alternative="less")
  883. >>> pvalue
  884. 0.0640...
  885. With the same threshold significance of 5%, we would not have been able
  886. to reject the null hypothesis in favor of the alternative. As stated in
  887. [2]_, Barnard's test is uniformly more powerful than Fisher's exact test
  888. because Barnard's test does not condition on any margin. Fisher's test
  889. should only be used when both sets of marginals are fixed.
  890. """
  891. if n <= 0:
  892. raise ValueError(
  893. "Number of points `n` must be strictly positive, "
  894. f"found {n!r}"
  895. )
  896. table = np.asarray(table, dtype=np.int64)
  897. if not table.shape == (2, 2):
  898. raise ValueError("The input `table` must be of shape (2, 2).")
  899. if np.any(table < 0):
  900. raise ValueError("All values in `table` must be nonnegative.")
  901. if 0 in table.sum(axis=0):
  902. # If both values in column are zero, the p-value is 1 and
  903. # the score's statistic is NaN.
  904. return BarnardExactResult(np.nan, 1.0)
  905. total_col_1, total_col_2 = table.sum(axis=0)
  906. x1 = np.arange(total_col_1 + 1, dtype=np.int64).reshape(-1, 1)
  907. x2 = np.arange(total_col_2 + 1, dtype=np.int64).reshape(1, -1)
  908. # We need to calculate the wald statistics for each combination of x1 and
  909. # x2.
  910. p1, p2 = x1 / total_col_1, x2 / total_col_2
  911. if pooled:
  912. p = (x1 + x2) / (total_col_1 + total_col_2)
  913. variances = p * (1 - p) * (1 / total_col_1 + 1 / total_col_2)
  914. else:
  915. variances = p1 * (1 - p1) / total_col_1 + p2 * (1 - p2) / total_col_2
  916. # To avoid warning when dividing by 0
  917. with np.errstate(divide="ignore", invalid="ignore"):
  918. wald_statistic = np.divide((p1 - p2), np.sqrt(variances))
  919. wald_statistic[p1 == p2] = 0 # Removing NaN values
  920. wald_stat_obs = wald_statistic[table[0, 0], table[0, 1]]
  921. if alternative == "two-sided":
  922. index_arr = np.abs(wald_statistic) >= abs(wald_stat_obs)
  923. elif alternative == "less":
  924. index_arr = wald_statistic <= wald_stat_obs
  925. elif alternative == "greater":
  926. index_arr = wald_statistic >= wald_stat_obs
  927. else:
  928. msg = (
  929. "`alternative` should be one of {'two-sided', 'less', 'greater'},"
  930. f" found {alternative!r}"
  931. )
  932. raise ValueError(msg)
  933. x1_sum_x2 = x1 + x2
  934. x1_log_comb = _compute_log_combinations(total_col_1)
  935. x2_log_comb = _compute_log_combinations(total_col_2)
  936. x1_sum_x2_log_comb = x1_log_comb[x1] + x2_log_comb[x2]
  937. result = shgo(
  938. _get_binomial_log_p_value_with_nuisance_param,
  939. args=(x1_sum_x2, x1_sum_x2_log_comb, index_arr),
  940. bounds=((0, 1),),
  941. n=n,
  942. sampling_method="sobol",
  943. )
  944. # result.fun is the negative log pvalue and therefore needs to be
  945. # changed before return
  946. p_value = np.clip(np.exp(-result.fun), a_min=0, a_max=1)
  947. return BarnardExactResult(wald_stat_obs, p_value)
  948. BoschlooExactResult = make_dataclass(
  949. "BoschlooExactResult", [("statistic", float), ("pvalue", float)]
  950. )
  951. def boschloo_exact(table, alternative="two-sided", n=32):
  952. r"""Perform Boschloo's exact test on a 2x2 contingency table.
  953. Parameters
  954. ----------
  955. table : array_like of ints
  956. A 2x2 contingency table. Elements should be non-negative integers.
  957. alternative : {'two-sided', 'less', 'greater'}, optional
  958. Defines the null and alternative hypotheses. Default is 'two-sided'.
  959. Please see explanations in the Notes section below.
  960. n : int, optional
  961. Number of sampling points used in the construction of the sampling
  962. method. Note that this argument will automatically be converted to
  963. the next higher power of 2 since `scipy.stats.qmc.Sobol` is used to
  964. select sample points. Default is 32. Must be positive. In most cases,
  965. 32 points is enough to reach good precision. More points comes at
  966. performance cost.
  967. Returns
  968. -------
  969. ber : BoschlooExactResult
  970. A result object with the following attributes.
  971. statistic : float
  972. The statistic used in Boschloo's test; that is, the p-value
  973. from Fisher's exact test.
  974. pvalue : float
  975. P-value, the probability of obtaining a distribution at least as
  976. extreme as the one that was actually observed, assuming that the
  977. null hypothesis is true.
  978. See Also
  979. --------
  980. chi2_contingency : Chi-square test of independence of variables in a
  981. contingency table.
  982. fisher_exact : Fisher exact test on a 2x2 contingency table.
  983. barnard_exact : Barnard's exact test, which is a more powerful alternative
  984. than Fisher's exact test for 2x2 contingency tables.
  985. Notes
  986. -----
  987. Boschloo's test is an exact test used in the analysis of contingency
  988. tables. It examines the association of two categorical variables, and
  989. is a uniformly more powerful alternative to Fisher's exact test
  990. for 2x2 contingency tables.
  991. Boschloo's exact test uses the p-value of Fisher's exact test as a
  992. statistic, and Boschloo's p-value is the probability under the null
  993. hypothesis of observing such an extreme value of this statistic.
  994. Let's define :math:`X_0` a 2x2 matrix representing the observed sample,
  995. where each column stores the binomial experiment, as in the example
  996. below. Let's also define :math:`p_1, p_2` the theoretical binomial
  997. probabilities for :math:`x_{11}` and :math:`x_{12}`. When using
  998. Boschloo exact test, we can assert three different alternative hypotheses:
  999. - :math:`H_0 : p_1=p_2` versus :math:`H_1 : p_1 < p_2`,
  1000. with `alternative` = "less"
  1001. - :math:`H_0 : p_1=p_2` versus :math:`H_1 : p_1 > p_2`,
  1002. with `alternative` = "greater"
  1003. - :math:`H_0 : p_1=p_2` versus :math:`H_1 : p_1 \neq p_2`,
  1004. with `alternative` = "two-sided" (default)
  1005. There are multiple conventions for computing a two-sided p-value when the
  1006. null distribution is asymmetric. Here, we apply the convention that the
  1007. p-value of a two-sided test is twice the minimum of the p-values of the
  1008. one-sided tests (clipped to 1.0). Note that `fisher_exact` follows a
  1009. different convention, so for a given `table`, the statistic reported by
  1010. `boschloo_exact` may differ from the p-value reported by `fisher_exact`
  1011. when ``alternative='two-sided'``.
  1012. .. versionadded:: 1.7.0
  1013. References
  1014. ----------
  1015. .. [1] R.D. Boschloo. "Raised conditional level of significance for the
  1016. 2 x 2-table when testing the equality of two probabilities",
  1017. Statistica Neerlandica, 24(1), 1970
  1018. .. [2] "Boschloo's test", Wikipedia,
  1019. https://en.wikipedia.org/wiki/Boschloo%27s_test
  1020. .. [3] Lise M. Saari et al. "Employee attitudes and job satisfaction",
  1021. Human Resource Management, 43(4), 395-407, 2004,
  1022. :doi:`10.1002/hrm.20032`.
  1023. Examples
  1024. --------
  1025. In the following example, we consider the article "Employee
  1026. attitudes and job satisfaction" [3]_
  1027. which reports the results of a survey from 63 scientists and 117 college
  1028. professors. Of the 63 scientists, 31 said they were very satisfied with
  1029. their jobs, whereas 74 of the college professors were very satisfied
  1030. with their work. Is this significant evidence that college
  1031. professors are happier with their work than scientists?
  1032. The following table summarizes the data mentioned above::
  1033. college professors scientists
  1034. Very Satisfied 74 31
  1035. Dissatisfied 43 32
  1036. When working with statistical hypothesis testing, we usually use a
  1037. threshold probability or significance level upon which we decide
  1038. to reject the null hypothesis :math:`H_0`. Suppose we choose the common
  1039. significance level of 5%.
  1040. Our alternative hypothesis is that college professors are truly more
  1041. satisfied with their work than scientists. Therefore, we expect
  1042. :math:`p_1` the proportion of very satisfied college professors to be
  1043. greater than :math:`p_2`, the proportion of very satisfied scientists.
  1044. We thus call `boschloo_exact` with the ``alternative="greater"`` option:
  1045. >>> import scipy.stats as stats
  1046. >>> res = stats.boschloo_exact([[74, 31], [43, 32]], alternative="greater")
  1047. >>> res.statistic
  1048. 0.0483...
  1049. >>> res.pvalue
  1050. 0.0355...
  1051. Under the null hypothesis that scientists are happier in their work than
  1052. college professors, the probability of obtaining test
  1053. results at least as extreme as the observed data is approximately 3.55%.
  1054. Since this p-value is less than our chosen significance level, we have
  1055. evidence to reject :math:`H_0` in favor of the alternative hypothesis.
  1056. """
  1057. hypergeom = distributions.hypergeom
  1058. if n <= 0:
  1059. raise ValueError(
  1060. "Number of points `n` must be strictly positive,"
  1061. f" found {n!r}"
  1062. )
  1063. table = np.asarray(table, dtype=np.int64)
  1064. if not table.shape == (2, 2):
  1065. raise ValueError("The input `table` must be of shape (2, 2).")
  1066. if np.any(table < 0):
  1067. raise ValueError("All values in `table` must be nonnegative.")
  1068. if 0 in table.sum(axis=0):
  1069. # If both values in column are zero, the p-value is 1 and
  1070. # the score's statistic is NaN.
  1071. return BoschlooExactResult(np.nan, np.nan)
  1072. total_col_1, total_col_2 = table.sum(axis=0)
  1073. total = total_col_1 + total_col_2
  1074. x1 = np.arange(total_col_1 + 1, dtype=np.int64).reshape(1, -1)
  1075. x2 = np.arange(total_col_2 + 1, dtype=np.int64).reshape(-1, 1)
  1076. x1_sum_x2 = x1 + x2
  1077. if alternative == 'less':
  1078. pvalues = hypergeom.cdf(x1, total, x1_sum_x2, total_col_1).T
  1079. elif alternative == 'greater':
  1080. # Same formula as the 'less' case, but with the second column.
  1081. pvalues = hypergeom.cdf(x2, total, x1_sum_x2, total_col_2).T
  1082. elif alternative == 'two-sided':
  1083. boschloo_less = boschloo_exact(table, alternative="less", n=n)
  1084. boschloo_greater = boschloo_exact(table, alternative="greater", n=n)
  1085. res = (
  1086. boschloo_less if boschloo_less.pvalue < boschloo_greater.pvalue
  1087. else boschloo_greater
  1088. )
  1089. # Two-sided p-value is defined as twice the minimum of the one-sided
  1090. # p-values
  1091. pvalue = np.clip(2 * res.pvalue, a_min=0, a_max=1)
  1092. return BoschlooExactResult(res.statistic, pvalue)
  1093. else:
  1094. msg = (
  1095. f"`alternative` should be one of {'two-sided', 'less', 'greater'},"
  1096. f" found {alternative!r}"
  1097. )
  1098. raise ValueError(msg)
  1099. fisher_stat = pvalues[table[0, 0], table[0, 1]]
  1100. # fisher_stat * (1+1e-13) guards us from small numerical error. It is
  1101. # equivalent to np.isclose with relative tol of 1e-13 and absolute tol of 0
  1102. # For more throughout explanations, see gh-14178
  1103. index_arr = pvalues <= fisher_stat * (1+1e-13)
  1104. x1, x2, x1_sum_x2 = x1.T, x2.T, x1_sum_x2.T
  1105. x1_log_comb = _compute_log_combinations(total_col_1)
  1106. x2_log_comb = _compute_log_combinations(total_col_2)
  1107. x1_sum_x2_log_comb = x1_log_comb[x1] + x2_log_comb[x2]
  1108. result = shgo(
  1109. _get_binomial_log_p_value_with_nuisance_param,
  1110. args=(x1_sum_x2, x1_sum_x2_log_comb, index_arr),
  1111. bounds=((0, 1),),
  1112. n=n,
  1113. sampling_method="sobol",
  1114. )
  1115. # result.fun is the negative log pvalue and therefore needs to be
  1116. # changed before return
  1117. p_value = np.clip(np.exp(-result.fun), a_min=0, a_max=1)
  1118. return BoschlooExactResult(fisher_stat, p_value)
  1119. def _get_binomial_log_p_value_with_nuisance_param(
  1120. nuisance_param, x1_sum_x2, x1_sum_x2_log_comb, index_arr
  1121. ):
  1122. r"""
  1123. Compute the log pvalue in respect of a nuisance parameter considering
  1124. a 2x2 sample space.
  1125. Parameters
  1126. ----------
  1127. nuisance_param : float
  1128. nuisance parameter used in the computation of the maximisation of
  1129. the p-value. Must be between 0 and 1
  1130. x1_sum_x2 : ndarray
  1131. Sum of x1 and x2 inside barnard_exact
  1132. x1_sum_x2_log_comb : ndarray
  1133. sum of the log combination of x1 and x2
  1134. index_arr : ndarray of boolean
  1135. Returns
  1136. -------
  1137. p_value : float
  1138. Return the maximum p-value considering every nuisance paramater
  1139. between 0 and 1
  1140. Notes
  1141. -----
  1142. Both Barnard's test and Boschloo's test iterate over a nuisance parameter
  1143. :math:`\pi \in [0, 1]` to find the maximum p-value. To search this
  1144. maxima, this function return the negative log pvalue with respect to the
  1145. nuisance parameter passed in params. This negative log p-value is then
  1146. used in `shgo` to find the minimum negative pvalue which is our maximum
  1147. pvalue.
  1148. Also, to compute the different combination used in the
  1149. p-values' computation formula, this function uses `gammaln` which is
  1150. more tolerant for large value than `scipy.special.comb`. `gammaln` gives
  1151. a log combination. For the little precision loss, performances are
  1152. improved a lot.
  1153. """
  1154. t1, t2 = x1_sum_x2.shape
  1155. n = t1 + t2 - 2
  1156. with np.errstate(divide="ignore", invalid="ignore"):
  1157. log_nuisance = np.log(
  1158. nuisance_param,
  1159. out=np.zeros_like(nuisance_param),
  1160. where=nuisance_param >= 0,
  1161. )
  1162. log_1_minus_nuisance = np.log(
  1163. 1 - nuisance_param,
  1164. out=np.zeros_like(nuisance_param),
  1165. where=1 - nuisance_param >= 0,
  1166. )
  1167. nuisance_power_x1_x2 = log_nuisance * x1_sum_x2
  1168. nuisance_power_x1_x2[(x1_sum_x2 == 0)[:, :]] = 0
  1169. nuisance_power_n_minus_x1_x2 = log_1_minus_nuisance * (n - x1_sum_x2)
  1170. nuisance_power_n_minus_x1_x2[(x1_sum_x2 == n)[:, :]] = 0
  1171. tmp_log_values_arr = (
  1172. x1_sum_x2_log_comb
  1173. + nuisance_power_x1_x2
  1174. + nuisance_power_n_minus_x1_x2
  1175. )
  1176. tmp_values_from_index = tmp_log_values_arr[index_arr]
  1177. # To avoid dividing by zero in log function and getting inf value,
  1178. # values are centered according to the max
  1179. max_value = tmp_values_from_index.max()
  1180. # To have better result's precision, the log pvalue is taken here.
  1181. # Indeed, pvalue is included inside [0, 1] interval. Passing the
  1182. # pvalue to log makes the interval a lot bigger ([-inf, 0]), and thus
  1183. # help us to achieve better precision
  1184. with np.errstate(divide="ignore", invalid="ignore"):
  1185. log_probs = np.exp(tmp_values_from_index - max_value).sum()
  1186. log_pvalue = max_value + np.log(
  1187. log_probs,
  1188. out=np.full_like(log_probs, -np.inf),
  1189. where=log_probs > 0,
  1190. )
  1191. # Since shgo find the minima, minus log pvalue is returned
  1192. return -log_pvalue
  1193. def _pval_cvm_2samp_exact(s, m, n):
  1194. """
  1195. Compute the exact p-value of the Cramer-von Mises two-sample test
  1196. for a given value s of the test statistic.
  1197. m and n are the sizes of the samples.
  1198. [1] Y. Xiao, A. Gordon, and A. Yakovlev, "A C++ Program for
  1199. the Cramér-Von Mises Two-Sample Test", J. Stat. Soft.,
  1200. vol. 17, no. 8, pp. 1-15, Dec. 2006.
  1201. [2] T. W. Anderson "On the Distribution of the Two-Sample Cramer-von Mises
  1202. Criterion," The Annals of Mathematical Statistics, Ann. Math. Statist.
  1203. 33(3), 1148-1159, (September, 1962)
  1204. """
  1205. # [1, p. 3]
  1206. lcm = np.lcm(m, n)
  1207. # [1, p. 4], below eq. 3
  1208. a = lcm // m
  1209. b = lcm // n
  1210. # Combine Eq. 9 in [2] with Eq. 2 in [1] and solve for $\zeta$
  1211. # Hint: `s` is $U$ in [2], and $T_2$ in [1] is $T$ in [2]
  1212. mn = m * n
  1213. zeta = lcm ** 2 * (m + n) * (6 * s - mn * (4 * mn - 1)) // (6 * mn ** 2)
  1214. # bound maximum value that may appear in `gs` (remember both rows!)
  1215. zeta_bound = lcm**2 * (m + n) # bound elements in row 1
  1216. combinations = comb(m + n, m) # sum of row 2
  1217. max_gs = max(zeta_bound, combinations)
  1218. dtype = np.min_scalar_type(max_gs)
  1219. # the frequency table of $g_{u, v}^+$ defined in [1, p. 6]
  1220. gs = ([np.array([[0], [1]], dtype=dtype)]
  1221. + [np.empty((2, 0), dtype=dtype) for _ in range(m)])
  1222. for u in range(n + 1):
  1223. next_gs = []
  1224. tmp = np.empty((2, 0), dtype=dtype)
  1225. for v, g in enumerate(gs):
  1226. # Calculate g recursively with eq. 11 in [1]. Even though it
  1227. # doesn't look like it, this also does 12/13 (all of Algorithm 1).
  1228. vi, i0, i1 = np.intersect1d(tmp[0], g[0], return_indices=True)
  1229. tmp = np.concatenate([
  1230. np.stack([vi, tmp[1, i0] + g[1, i1]]),
  1231. np.delete(tmp, i0, 1),
  1232. np.delete(g, i1, 1)
  1233. ], 1)
  1234. tmp[0] += (a * v - b * u) ** 2
  1235. next_gs.append(tmp)
  1236. gs = next_gs
  1237. value, freq = gs[m]
  1238. return np.float64(np.sum(freq[value >= zeta]) / combinations)
  1239. def cramervonmises_2samp(x, y, method='auto'):
  1240. """Perform the two-sample Cramér-von Mises test for goodness of fit.
  1241. This is the two-sample version of the Cramér-von Mises test ([1]_):
  1242. for two independent samples :math:`X_1, ..., X_n` and
  1243. :math:`Y_1, ..., Y_m`, the null hypothesis is that the samples
  1244. come from the same (unspecified) continuous distribution.
  1245. Parameters
  1246. ----------
  1247. x : array_like
  1248. A 1-D array of observed values of the random variables :math:`X_i`.
  1249. y : array_like
  1250. A 1-D array of observed values of the random variables :math:`Y_i`.
  1251. method : {'auto', 'asymptotic', 'exact'}, optional
  1252. The method used to compute the p-value, see Notes for details.
  1253. The default is 'auto'.
  1254. Returns
  1255. -------
  1256. res : object with attributes
  1257. statistic : float
  1258. Cramér-von Mises statistic.
  1259. pvalue : float
  1260. The p-value.
  1261. See Also
  1262. --------
  1263. cramervonmises, anderson_ksamp, epps_singleton_2samp, ks_2samp
  1264. Notes
  1265. -----
  1266. .. versionadded:: 1.7.0
  1267. The statistic is computed according to equation 9 in [2]_. The
  1268. calculation of the p-value depends on the keyword `method`:
  1269. - ``asymptotic``: The p-value is approximated by using the limiting
  1270. distribution of the test statistic.
  1271. - ``exact``: The exact p-value is computed by enumerating all
  1272. possible combinations of the test statistic, see [2]_.
  1273. If ``method='auto'``, the exact approach is used
  1274. if both samples contain equal to or less than 20 observations,
  1275. otherwise the asymptotic distribution is used.
  1276. If the underlying distribution is not continuous, the p-value is likely to
  1277. be conservative (Section 6.2 in [3]_). When ranking the data to compute
  1278. the test statistic, midranks are used if there are ties.
  1279. References
  1280. ----------
  1281. .. [1] https://en.wikipedia.org/wiki/Cramer-von_Mises_criterion
  1282. .. [2] Anderson, T.W. (1962). On the distribution of the two-sample
  1283. Cramer-von-Mises criterion. The Annals of Mathematical
  1284. Statistics, pp. 1148-1159.
  1285. .. [3] Conover, W.J., Practical Nonparametric Statistics, 1971.
  1286. Examples
  1287. --------
  1288. Suppose we wish to test whether two samples generated by
  1289. ``scipy.stats.norm.rvs`` have the same distribution. We choose a
  1290. significance level of alpha=0.05.
  1291. >>> import numpy as np
  1292. >>> from scipy import stats
  1293. >>> rng = np.random.default_rng()
  1294. >>> x = stats.norm.rvs(size=100, random_state=rng)
  1295. >>> y = stats.norm.rvs(size=70, random_state=rng)
  1296. >>> res = stats.cramervonmises_2samp(x, y)
  1297. >>> res.statistic, res.pvalue
  1298. (0.29376470588235293, 0.1412873014573014)
  1299. The p-value exceeds our chosen significance level, so we do not
  1300. reject the null hypothesis that the observed samples are drawn from the
  1301. same distribution.
  1302. For small sample sizes, one can compute the exact p-values:
  1303. >>> x = stats.norm.rvs(size=7, random_state=rng)
  1304. >>> y = stats.t.rvs(df=2, size=6, random_state=rng)
  1305. >>> res = stats.cramervonmises_2samp(x, y, method='exact')
  1306. >>> res.statistic, res.pvalue
  1307. (0.197802197802198, 0.31643356643356646)
  1308. The p-value based on the asymptotic distribution is a good approximation
  1309. even though the sample size is small.
  1310. >>> res = stats.cramervonmises_2samp(x, y, method='asymptotic')
  1311. >>> res.statistic, res.pvalue
  1312. (0.197802197802198, 0.2966041181527128)
  1313. Independent of the method, one would not reject the null hypothesis at the
  1314. chosen significance level in this example.
  1315. """
  1316. xa = np.sort(np.asarray(x))
  1317. ya = np.sort(np.asarray(y))
  1318. if xa.size <= 1 or ya.size <= 1:
  1319. raise ValueError('x and y must contain at least two observations.')
  1320. if xa.ndim > 1 or ya.ndim > 1:
  1321. raise ValueError('The samples must be one-dimensional.')
  1322. if method not in ['auto', 'exact', 'asymptotic']:
  1323. raise ValueError('method must be either auto, exact or asymptotic.')
  1324. nx = len(xa)
  1325. ny = len(ya)
  1326. if method == 'auto':
  1327. if max(nx, ny) > 20:
  1328. method = 'asymptotic'
  1329. else:
  1330. method = 'exact'
  1331. # get ranks of x and y in the pooled sample
  1332. z = np.concatenate([xa, ya])
  1333. # in case of ties, use midrank (see [1])
  1334. r = scipy.stats.rankdata(z, method='average')
  1335. rx = r[:nx]
  1336. ry = r[nx:]
  1337. # compute U (eq. 10 in [2])
  1338. u = nx * np.sum((rx - np.arange(1, nx+1))**2)
  1339. u += ny * np.sum((ry - np.arange(1, ny+1))**2)
  1340. # compute T (eq. 9 in [2])
  1341. k, N = nx*ny, nx + ny
  1342. t = u / (k*N) - (4*k - 1)/(6*N)
  1343. if method == 'exact':
  1344. p = _pval_cvm_2samp_exact(u, nx, ny)
  1345. else:
  1346. # compute expected value and variance of T (eq. 11 and 14 in [2])
  1347. et = (1 + 1/N)/6
  1348. vt = (N+1) * (4*k*N - 3*(nx**2 + ny**2) - 2*k)
  1349. vt = vt / (45 * N**2 * 4 * k)
  1350. # computed the normalized statistic (eq. 15 in [2])
  1351. tn = 1/6 + (t - et) / np.sqrt(45 * vt)
  1352. # approximate distribution of tn with limiting distribution
  1353. # of the one-sample test statistic
  1354. # if tn < 0.003, the _cdf_cvm_inf(tn) < 1.28*1e-18, return 1.0 directly
  1355. if tn < 0.003:
  1356. p = 1.0
  1357. else:
  1358. p = max(0, 1. - _cdf_cvm_inf(tn))
  1359. return CramerVonMisesResult(statistic=t, pvalue=p)
  1360. class TukeyHSDResult:
  1361. """Result of `scipy.stats.tukey_hsd`.
  1362. Attributes
  1363. ----------
  1364. statistic : float ndarray
  1365. The computed statistic of the test for each comparison. The element
  1366. at index ``(i, j)`` is the statistic for the comparison between groups
  1367. ``i`` and ``j``.
  1368. pvalue : float ndarray
  1369. The associated p-value from the studentized range distribution. The
  1370. element at index ``(i, j)`` is the p-value for the comparison
  1371. between groups ``i`` and ``j``.
  1372. Notes
  1373. -----
  1374. The string representation of this object displays the most recently
  1375. calculated confidence interval, and if none have been previously
  1376. calculated, it will evaluate ``confidence_interval()``.
  1377. References
  1378. ----------
  1379. .. [1] NIST/SEMATECH e-Handbook of Statistical Methods, "7.4.7.1. Tukey's
  1380. Method."
  1381. https://www.itl.nist.gov/div898/handbook/prc/section4/prc471.htm,
  1382. 28 November 2020.
  1383. """
  1384. def __init__(self, statistic, pvalue, _nobs, _ntreatments, _stand_err):
  1385. self.statistic = statistic
  1386. self.pvalue = pvalue
  1387. self._ntreatments = _ntreatments
  1388. self._nobs = _nobs
  1389. self._stand_err = _stand_err
  1390. self._ci = None
  1391. self._ci_cl = None
  1392. def __str__(self):
  1393. # Note: `__str__` prints the confidence intervals from the most
  1394. # recent call to `confidence_interval`. If it has not been called,
  1395. # it will be called with the default CL of .95.
  1396. if self._ci is None:
  1397. self.confidence_interval(confidence_level=.95)
  1398. s = ("Tukey's HSD Pairwise Group Comparisons"
  1399. f" ({self._ci_cl*100:.1f}% Confidence Interval)\n")
  1400. s += "Comparison Statistic p-value Lower CI Upper CI\n"
  1401. for i in range(self.pvalue.shape[0]):
  1402. for j in range(self.pvalue.shape[0]):
  1403. if i != j:
  1404. s += (f" ({i} - {j}) {self.statistic[i, j]:>10.3f}"
  1405. f"{self.pvalue[i, j]:>10.3f}"
  1406. f"{self._ci.low[i, j]:>10.3f}"
  1407. f"{self._ci.high[i, j]:>10.3f}\n")
  1408. return s
  1409. def confidence_interval(self, confidence_level=.95):
  1410. """Compute the confidence interval for the specified confidence level.
  1411. Parameters
  1412. ----------
  1413. confidence_level : float, optional
  1414. Confidence level for the computed confidence interval
  1415. of the estimated proportion. Default is .95.
  1416. Returns
  1417. -------
  1418. ci : ``ConfidenceInterval`` object
  1419. The object has attributes ``low`` and ``high`` that hold the
  1420. lower and upper bounds of the confidence intervals for each
  1421. comparison. The high and low values are accessible for each
  1422. comparison at index ``(i, j)`` between groups ``i`` and ``j``.
  1423. References
  1424. ----------
  1425. .. [1] NIST/SEMATECH e-Handbook of Statistical Methods, "7.4.7.1.
  1426. Tukey's Method."
  1427. https://www.itl.nist.gov/div898/handbook/prc/section4/prc471.htm,
  1428. 28 November 2020.
  1429. Examples
  1430. --------
  1431. >>> from scipy.stats import tukey_hsd
  1432. >>> group0 = [24.5, 23.5, 26.4, 27.1, 29.9]
  1433. >>> group1 = [28.4, 34.2, 29.5, 32.2, 30.1]
  1434. >>> group2 = [26.1, 28.3, 24.3, 26.2, 27.8]
  1435. >>> result = tukey_hsd(group0, group1, group2)
  1436. >>> ci = result.confidence_interval()
  1437. >>> ci.low
  1438. array([[-3.649159, -8.249159, -3.909159],
  1439. [ 0.950841, -3.649159, 0.690841],
  1440. [-3.389159, -7.989159, -3.649159]])
  1441. >>> ci.high
  1442. array([[ 3.649159, -0.950841, 3.389159],
  1443. [ 8.249159, 3.649159, 7.989159],
  1444. [ 3.909159, -0.690841, 3.649159]])
  1445. """
  1446. # check to see if the supplied confidence level matches that of the
  1447. # previously computed CI.
  1448. if (self._ci is not None and self._ci_cl is not None and
  1449. confidence_level == self._ci_cl):
  1450. return self._ci
  1451. if not 0 < confidence_level < 1:
  1452. raise ValueError("Confidence level must be between 0 and 1.")
  1453. # determine the critical value of the studentized range using the
  1454. # appropriate confidence level, number of treatments, and degrees
  1455. # of freedom as determined by the number of data less the number of
  1456. # treatments. ("Confidence limits for Tukey's method")[1]. Note that
  1457. # in the cases of unequal sample sizes there will be a criterion for
  1458. # each group comparison.
  1459. params = (confidence_level, self._nobs, self._ntreatments - self._nobs)
  1460. srd = distributions.studentized_range.ppf(*params)
  1461. # also called maximum critical value, the Tukey criterion is the
  1462. # studentized range critical value * the square root of mean square
  1463. # error over the sample size.
  1464. tukey_criterion = srd * self._stand_err
  1465. # the confidence levels are determined by the
  1466. # `mean_differences` +- `tukey_criterion`
  1467. upper_conf = self.statistic + tukey_criterion
  1468. lower_conf = self.statistic - tukey_criterion
  1469. self._ci = ConfidenceInterval(low=lower_conf, high=upper_conf)
  1470. self._ci_cl = confidence_level
  1471. return self._ci
  1472. def _tukey_hsd_iv(args):
  1473. if (len(args)) < 2:
  1474. raise ValueError("There must be more than 1 treatment.")
  1475. args = [np.asarray(arg) for arg in args]
  1476. for arg in args:
  1477. if arg.ndim != 1:
  1478. raise ValueError("Input samples must be one-dimensional.")
  1479. if arg.size <= 1:
  1480. raise ValueError("Input sample size must be greater than one.")
  1481. if np.isinf(arg).any():
  1482. raise ValueError("Input samples must be finite.")
  1483. return args
  1484. def tukey_hsd(*args):
  1485. """Perform Tukey's HSD test for equality of means over multiple treatments.
  1486. Tukey's honestly significant difference (HSD) test performs pairwise
  1487. comparison of means for a set of samples. Whereas ANOVA (e.g. `f_oneway`)
  1488. assesses whether the true means underlying each sample are identical,
  1489. Tukey's HSD is a post hoc test used to compare the mean of each sample
  1490. to the mean of each other sample.
  1491. The null hypothesis is that the distributions underlying the samples all
  1492. have the same mean. The test statistic, which is computed for every
  1493. possible pairing of samples, is simply the difference between the sample
  1494. means. For each pair, the p-value is the probability under the null
  1495. hypothesis (and other assumptions; see notes) of observing such an extreme
  1496. value of the statistic, considering that many pairwise comparisons are
  1497. being performed. Confidence intervals for the difference between each pair
  1498. of means are also available.
  1499. Parameters
  1500. ----------
  1501. sample1, sample2, ... : array_like
  1502. The sample measurements for each group. There must be at least
  1503. two arguments.
  1504. Returns
  1505. -------
  1506. result : `~scipy.stats._result_classes.TukeyHSDResult` instance
  1507. The return value is an object with the following attributes:
  1508. statistic : float ndarray
  1509. The computed statistic of the test for each comparison. The element
  1510. at index ``(i, j)`` is the statistic for the comparison between
  1511. groups ``i`` and ``j``.
  1512. pvalue : float ndarray
  1513. The computed p-value of the test for each comparison. The element
  1514. at index ``(i, j)`` is the p-value for the comparison between
  1515. groups ``i`` and ``j``.
  1516. The object has the following methods:
  1517. confidence_interval(confidence_level=0.95):
  1518. Compute the confidence interval for the specified confidence level.
  1519. Notes
  1520. -----
  1521. The use of this test relies on several assumptions.
  1522. 1. The observations are independent within and among groups.
  1523. 2. The observations within each group are normally distributed.
  1524. 3. The distributions from which the samples are drawn have the same finite
  1525. variance.
  1526. The original formulation of the test was for samples of equal size [6]_.
  1527. In case of unequal sample sizes, the test uses the Tukey-Kramer method
  1528. [4]_.
  1529. References
  1530. ----------
  1531. .. [1] NIST/SEMATECH e-Handbook of Statistical Methods, "7.4.7.1. Tukey's
  1532. Method."
  1533. https://www.itl.nist.gov/div898/handbook/prc/section4/prc471.htm,
  1534. 28 November 2020.
  1535. .. [2] Abdi, Herve & Williams, Lynne. (2021). "Tukey's Honestly Significant
  1536. Difference (HSD) Test."
  1537. https://personal.utdallas.edu/~herve/abdi-HSD2010-pretty.pdf
  1538. .. [3] "One-Way ANOVA Using SAS PROC ANOVA & PROC GLM." SAS
  1539. Tutorials, 2007, www.stattutorials.com/SAS/TUTORIAL-PROC-GLM.htm.
  1540. .. [4] Kramer, Clyde Young. "Extension of Multiple Range Tests to Group
  1541. Means with Unequal Numbers of Replications." Biometrics, vol. 12,
  1542. no. 3, 1956, pp. 307-310. JSTOR, www.jstor.org/stable/3001469.
  1543. Accessed 25 May 2021.
  1544. .. [5] NIST/SEMATECH e-Handbook of Statistical Methods, "7.4.3.3.
  1545. The ANOVA table and tests of hypotheses about means"
  1546. https://www.itl.nist.gov/div898/handbook/prc/section4/prc433.htm,
  1547. 2 June 2021.
  1548. .. [6] Tukey, John W. "Comparing Individual Means in the Analysis of
  1549. Variance." Biometrics, vol. 5, no. 2, 1949, pp. 99-114. JSTOR,
  1550. www.jstor.org/stable/3001913. Accessed 14 June 2021.
  1551. Examples
  1552. --------
  1553. Here are some data comparing the time to relief of three brands of
  1554. headache medicine, reported in minutes. Data adapted from [3]_.
  1555. >>> import numpy as np
  1556. >>> from scipy.stats import tukey_hsd
  1557. >>> group0 = [24.5, 23.5, 26.4, 27.1, 29.9]
  1558. >>> group1 = [28.4, 34.2, 29.5, 32.2, 30.1]
  1559. >>> group2 = [26.1, 28.3, 24.3, 26.2, 27.8]
  1560. We would like to see if the means between any of the groups are
  1561. significantly different. First, visually examine a box and whisker plot.
  1562. >>> import matplotlib.pyplot as plt
  1563. >>> fig, ax = plt.subplots(1, 1)
  1564. >>> ax.boxplot([group0, group1, group2])
  1565. >>> ax.set_xticklabels(["group0", "group1", "group2"]) # doctest: +SKIP
  1566. >>> ax.set_ylabel("mean") # doctest: +SKIP
  1567. >>> plt.show()
  1568. From the box and whisker plot, we can see overlap in the interquartile
  1569. ranges group 1 to group 2 and group 3, but we can apply the ``tukey_hsd``
  1570. test to determine if the difference between means is significant. We
  1571. set a significance level of .05 to reject the null hypothesis.
  1572. >>> res = tukey_hsd(group0, group1, group2)
  1573. >>> print(res)
  1574. Tukey's HSD Pairwise Group Comparisons (95.0% Confidence Interval)
  1575. Comparison Statistic p-value Lower CI Upper CI
  1576. (0 - 1) -4.600 0.014 -8.249 -0.951
  1577. (0 - 2) -0.260 0.980 -3.909 3.389
  1578. (1 - 0) 4.600 0.014 0.951 8.249
  1579. (1 - 2) 4.340 0.020 0.691 7.989
  1580. (2 - 0) 0.260 0.980 -3.389 3.909
  1581. (2 - 1) -4.340 0.020 -7.989 -0.691
  1582. The null hypothesis is that each group has the same mean. The p-value for
  1583. comparisons between ``group0`` and ``group1`` as well as ``group1`` and
  1584. ``group2`` do not exceed .05, so we reject the null hypothesis that they
  1585. have the same means. The p-value of the comparison between ``group0``
  1586. and ``group2`` exceeds .05, so we accept the null hypothesis that there
  1587. is not a significant difference between their means.
  1588. We can also compute the confidence interval associated with our chosen
  1589. confidence level.
  1590. >>> group0 = [24.5, 23.5, 26.4, 27.1, 29.9]
  1591. >>> group1 = [28.4, 34.2, 29.5, 32.2, 30.1]
  1592. >>> group2 = [26.1, 28.3, 24.3, 26.2, 27.8]
  1593. >>> result = tukey_hsd(group0, group1, group2)
  1594. >>> conf = res.confidence_interval(confidence_level=.99)
  1595. >>> for ((i, j), l) in np.ndenumerate(conf.low):
  1596. ... # filter out self comparisons
  1597. ... if i != j:
  1598. ... h = conf.high[i,j]
  1599. ... print(f"({i} - {j}) {l:>6.3f} {h:>6.3f}")
  1600. (0 - 1) -9.480 0.280
  1601. (0 - 2) -5.140 4.620
  1602. (1 - 0) -0.280 9.480
  1603. (1 - 2) -0.540 9.220
  1604. (2 - 0) -4.620 5.140
  1605. (2 - 1) -9.220 0.540
  1606. """
  1607. args = _tukey_hsd_iv(args)
  1608. ntreatments = len(args)
  1609. means = np.asarray([np.mean(arg) for arg in args])
  1610. nsamples_treatments = np.asarray([a.size for a in args])
  1611. nobs = np.sum(nsamples_treatments)
  1612. # determine mean square error [5]. Note that this is sometimes called
  1613. # mean square error within.
  1614. mse = (np.sum([np.var(arg, ddof=1) for arg in args] *
  1615. (nsamples_treatments - 1)) / (nobs - ntreatments))
  1616. # The calculation of the standard error differs when treatments differ in
  1617. # size. See ("Unequal sample sizes")[1].
  1618. if np.unique(nsamples_treatments).size == 1:
  1619. # all input groups are the same length, so only one value needs to be
  1620. # calculated [1].
  1621. normalize = 2 / nsamples_treatments[0]
  1622. else:
  1623. # to compare groups of differing sizes, we must compute a variance
  1624. # value for each individual comparison. Use broadcasting to get the
  1625. # resulting matrix. [3], verified against [4] (page 308).
  1626. normalize = 1 / nsamples_treatments + 1 / nsamples_treatments[None].T
  1627. # the standard error is used in the computation of the tukey criterion and
  1628. # finding the p-values.
  1629. stand_err = np.sqrt(normalize * mse / 2)
  1630. # the mean difference is the test statistic.
  1631. mean_differences = means[None].T - means
  1632. # Calculate the t-statistic to use within the survival function of the
  1633. # studentized range to get the p-value.
  1634. t_stat = np.abs(mean_differences) / stand_err
  1635. params = t_stat, ntreatments, nobs - ntreatments
  1636. pvalues = distributions.studentized_range.sf(*params)
  1637. return TukeyHSDResult(mean_differences, pvalues, ntreatments,
  1638. nobs, stand_err)