12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006 |
- from collections import namedtuple
- from dataclasses import make_dataclass
- from math import comb
- import numpy as np
- import warnings
- from itertools import combinations
- import scipy.stats
- from scipy.optimize import shgo
- from . import distributions
- from ._common import ConfidenceInterval
- from ._continuous_distns import chi2, norm
- from scipy.special import gamma, kv, gammaln
- from scipy.fft import ifft
- from ._stats_pythran import _a_ij_Aij_Dij2
- from ._stats_pythran import (
- _concordant_pairs as _P, _discordant_pairs as _Q
- )
- from scipy.stats import _stats_py
- __all__ = ['epps_singleton_2samp', 'cramervonmises', 'somersd',
- 'barnard_exact', 'boschloo_exact', 'cramervonmises_2samp',
- 'tukey_hsd', 'poisson_means_test']
- Epps_Singleton_2sampResult = namedtuple('Epps_Singleton_2sampResult',
- ('statistic', 'pvalue'))
- def epps_singleton_2samp(x, y, t=(0.4, 0.8)):
- """Compute the Epps-Singleton (ES) test statistic.
- Test the null hypothesis that two samples have the same underlying
- probability distribution.
- Parameters
- ----------
- x, y : array-like
- The two samples of observations to be tested. Input must not have more
- than one dimension. Samples can have different lengths.
- t : array-like, optional
- The points (t1, ..., tn) where the empirical characteristic function is
- to be evaluated. It should be positive distinct numbers. The default
- value (0.4, 0.8) is proposed in [1]_. Input must not have more than
- one dimension.
- Returns
- -------
- statistic : float
- The test statistic.
- pvalue : float
- The associated p-value based on the asymptotic chi2-distribution.
- See Also
- --------
- ks_2samp, anderson_ksamp
- Notes
- -----
- Testing whether two samples are generated by the same underlying
- distribution is a classical question in statistics. A widely used test is
- the Kolmogorov-Smirnov (KS) test which relies on the empirical
- distribution function. Epps and Singleton introduce a test based on the
- empirical characteristic function in [1]_.
- One advantage of the ES test compared to the KS test is that is does
- not assume a continuous distribution. In [1]_, the authors conclude
- that the test also has a higher power than the KS test in many
- examples. They recommend the use of the ES test for discrete samples as
- well as continuous samples with at least 25 observations each, whereas
- `anderson_ksamp` is recommended for smaller sample sizes in the
- continuous case.
- The p-value is computed from the asymptotic distribution of the test
- statistic which follows a `chi2` distribution. If the sample size of both
- `x` and `y` is below 25, the small sample correction proposed in [1]_ is
- applied to the test statistic.
- The default values of `t` are determined in [1]_ by considering
- various distributions and finding good values that lead to a high power
- of the test in general. Table III in [1]_ gives the optimal values for
- the distributions tested in that study. The values of `t` are scaled by
- the semi-interquartile range in the implementation, see [1]_.
- References
- ----------
- .. [1] T. W. Epps and K. J. Singleton, "An omnibus test for the two-sample
- problem using the empirical characteristic function", Journal of
- Statistical Computation and Simulation 26, p. 177--203, 1986.
- .. [2] S. J. Goerg and J. Kaiser, "Nonparametric testing of distributions
- - the Epps-Singleton two-sample test using the empirical characteristic
- function", The Stata Journal 9(3), p. 454--465, 2009.
- """
- x, y, t = np.asarray(x), np.asarray(y), np.asarray(t)
- # check if x and y are valid inputs
- if x.ndim > 1:
- raise ValueError('x must be 1d, but x.ndim equals {}.'.format(x.ndim))
- if y.ndim > 1:
- raise ValueError('y must be 1d, but y.ndim equals {}.'.format(y.ndim))
- nx, ny = len(x), len(y)
- if (nx < 5) or (ny < 5):
- raise ValueError('x and y should have at least 5 elements, but len(x) '
- '= {} and len(y) = {}.'.format(nx, ny))
- if not np.isfinite(x).all():
- raise ValueError('x must not contain nonfinite values.')
- if not np.isfinite(y).all():
- raise ValueError('y must not contain nonfinite values.')
- n = nx + ny
- # check if t is valid
- if t.ndim > 1:
- raise ValueError('t must be 1d, but t.ndim equals {}.'.format(t.ndim))
- if np.less_equal(t, 0).any():
- raise ValueError('t must contain positive elements only.')
- # rescale t with semi-iqr as proposed in [1]; import iqr here to avoid
- # circular import
- from scipy.stats import iqr
- sigma = iqr(np.hstack((x, y))) / 2
- ts = np.reshape(t, (-1, 1)) / sigma
- # covariance estimation of ES test
- gx = np.vstack((np.cos(ts*x), np.sin(ts*x))).T # shape = (nx, 2*len(t))
- gy = np.vstack((np.cos(ts*y), np.sin(ts*y))).T
- cov_x = np.cov(gx.T, bias=True) # the test uses biased cov-estimate
- cov_y = np.cov(gy.T, bias=True)
- est_cov = (n/nx)*cov_x + (n/ny)*cov_y
- est_cov_inv = np.linalg.pinv(est_cov)
- r = np.linalg.matrix_rank(est_cov_inv)
- if r < 2*len(t):
- warnings.warn('Estimated covariance matrix does not have full rank. '
- 'This indicates a bad choice of the input t and the '
- 'test might not be consistent.') # see p. 183 in [1]_
- # compute test statistic w distributed asympt. as chisquare with df=r
- g_diff = np.mean(gx, axis=0) - np.mean(gy, axis=0)
- w = n*np.dot(g_diff.T, np.dot(est_cov_inv, g_diff))
- # apply small-sample correction
- if (max(nx, ny) < 25):
- corr = 1.0/(1.0 + n**(-0.45) + 10.1*(nx**(-1.7) + ny**(-1.7)))
- w = corr * w
- p = chi2.sf(w, r)
- return Epps_Singleton_2sampResult(w, p)
- def poisson_means_test(k1, n1, k2, n2, *, diff=0, alternative='two-sided'):
- r"""
- Performs the Poisson means test, AKA the "E-test".
- This is a test of the null hypothesis that the difference between means of
- two Poisson distributions is `diff`. The samples are provided as the
- number of events `k1` and `k2` observed within measurement intervals
- (e.g. of time, space, number of observations) of sizes `n1` and `n2`.
- Parameters
- ----------
- k1 : int
- Number of events observed from distribution 1.
- n1: float
- Size of sample from distribution 1.
- k2 : int
- Number of events observed from distribution 2.
- n2 : float
- Size of sample from distribution 2.
- diff : float, default=0
- The hypothesized difference in means between the distributions
- underlying the samples.
- alternative : {'two-sided', 'less', 'greater'}, optional
- Defines the alternative hypothesis.
- The following options are available (default is 'two-sided'):
- * 'two-sided': the difference between distribution means is not
- equal to `diff`
- * 'less': the difference between distribution means is less than
- `diff`
- * 'greater': the difference between distribution means is greater
- than `diff`
- Returns
- -------
- statistic : float
- The test statistic (see [1]_ equation 3.3).
- pvalue : float
- The probability of achieving such an extreme value of the test
- statistic under the null hypothesis.
- Notes
- -----
- Let:
- .. math:: X_1 \sim \mbox{Poisson}(\mathtt{n1}\lambda_1)
- be a random variable independent of
- .. math:: X_2 \sim \mbox{Poisson}(\mathtt{n2}\lambda_2)
- and let ``k1`` and ``k2`` be the observed values of :math:`X_1`
- and :math:`X_2`, respectively. Then `poisson_means_test` uses the number
- of observed events ``k1`` and ``k2`` from samples of size ``n1`` and
- ``n2``, respectively, to test the null hypothesis that
- .. math::
- H_0: \lambda_1 - \lambda_2 = \mathtt{diff}
- A benefit of the E-test is that it has good power for small sample sizes,
- which can reduce sampling costs [1]_. It has been evaluated and determined
- to be more powerful than the comparable C-test, sometimes referred to as
- the Poisson exact test.
- References
- ----------
- .. [1] Krishnamoorthy, K., & Thomson, J. (2004). A more powerful test for
- comparing two Poisson means. Journal of Statistical Planning and
- Inference, 119(1), 23-35.
- .. [2] Przyborowski, J., & Wilenski, H. (1940). Homogeneity of results in
- testing samples from Poisson series: With an application to testing
- clover seed for dodder. Biometrika, 31(3/4), 313-323.
- Examples
- --------
- Suppose that a gardener wishes to test the number of dodder (weed) seeds
- in a sack of clover seeds that they buy from a seed company. It has
- previously been established that the number of dodder seeds in clover
- follows the Poisson distribution.
- A 100 gram sample is drawn from the sack before being shipped to the
- gardener. The sample is analyzed, and it is found to contain no dodder
- seeds; that is, `k1` is 0. However, upon arrival, the gardener draws
- another 100 gram sample from the sack. This time, three dodder seeds are
- found in the sample; that is, `k2` is 3. The gardener would like to
- know if the difference is significant and not due to chance. The
- null hypothesis is that the difference between the two samples is merely
- due to chance, or that :math:`\lambda_1 - \lambda_2 = \mathtt{diff}`
- where :math:`\mathtt{diff} = 0`. The alternative hypothesis is that the
- difference is not due to chance, or :math:`\lambda_1 - \lambda_2 \ne 0`.
- The gardener selects a significance level of 5% to reject the null
- hypothesis in favor of the alternative [2]_.
- >>> import scipy.stats as stats
- >>> res = stats.poisson_means_test(0, 100, 3, 100)
- >>> res.statistic, res.pvalue
- (-1.7320508075688772, 0.08837900929018157)
- The p-value is .088, indicating a near 9% chance of observing a value of
- the test statistic under the null hypothesis. This exceeds 5%, so the
- gardener does not reject the null hypothesis as the difference cannot be
- regarded as significant at this level.
- """
- _poisson_means_test_iv(k1, n1, k2, n2, diff, alternative)
- # "for a given k_1 and k_2, an estimate of \lambda_2 is given by" [1] (3.4)
- lmbd_hat2 = ((k1 + k2) / (n1 + n2) - diff * n1 / (n1 + n2))
- # "\hat{\lambda_{2k}} may be less than or equal to zero ... and in this
- # case the null hypothesis cannot be rejected ... [and] it is not necessary
- # to compute the p-value". [1] page 26 below eq. (3.6).
- if lmbd_hat2 <= 0:
- return _stats_py.SignificanceResult(0, 1)
- # The unbiased variance estimate [1] (3.2)
- var = k1 / (n1 ** 2) + k2 / (n2 ** 2)
- # The _observed_ pivot statistic from the input. It follows the
- # unnumbered equation following equation (3.3) This is used later in
- # comparison with the computed pivot statistics in an indicator function.
- t_k1k2 = (k1 / n1 - k2 / n2 - diff) / np.sqrt(var)
- # Equation (3.5) of [1] is lengthy, so it is broken into several parts,
- # beginning here. Note that the probability mass function of poisson is
- # exp^(-\mu)*\mu^k/k!, so and this is called with shape \mu, here noted
- # here as nlmbd_hat*. The strategy for evaluating the double summation in
- # (3.5) is to create two arrays of the values of the two products inside
- # the summation and then broadcast them together into a matrix, and then
- # sum across the entire matrix.
- # Compute constants (as seen in the first and second separated products in
- # (3.5).). (This is the shape (\mu) parameter of the poisson distribution.)
- nlmbd_hat1 = n1 * (lmbd_hat2 + diff)
- nlmbd_hat2 = n2 * lmbd_hat2
- # Determine summation bounds for tail ends of distribution rather than
- # summing to infinity. `x1*` is for the outer sum and `x2*` is the inner
- # sum.
- x1_lb, x1_ub = distributions.poisson.ppf([1e-10, 1 - 1e-16], nlmbd_hat1)
- x2_lb, x2_ub = distributions.poisson.ppf([1e-10, 1 - 1e-16], nlmbd_hat2)
- # Construct arrays to function as the x_1 and x_2 counters on the summation
- # in (3.5). `x1` is in columns and `x2` is in rows to allow for
- # broadcasting.
- x1 = np.arange(x1_lb, x1_ub + 1)
- x2 = np.arange(x2_lb, x2_ub + 1)[:, None]
- # These are the two products in equation (3.5) with `prob_x1` being the
- # first (left side) and `prob_x2` being the second (right side). (To
- # make as clear as possible: the 1st contains a "+ d" term, the 2nd does
- # not.)
- prob_x1 = distributions.poisson.pmf(x1, nlmbd_hat1)
- prob_x2 = distributions.poisson.pmf(x2, nlmbd_hat2)
- # compute constants for use in the "pivot statistic" per the
- # unnumbered equation following (3.3).
- lmbd_x1 = x1 / n1
- lmbd_x2 = x2 / n2
- lmbds_diff = lmbd_x1 - lmbd_x2 - diff
- var_x1x2 = lmbd_x1 / n1 + lmbd_x2 / n2
- # This is the 'pivot statistic' for use in the indicator of the summation
- # (left side of "I[.]").
- with np.errstate(invalid='ignore', divide='ignore'):
- t_x1x2 = lmbds_diff / np.sqrt(var_x1x2)
- # `[indicator]` implements the "I[.] ... the indicator function" per
- # the paragraph following equation (3.5).
- if alternative == 'two-sided':
- indicator = np.abs(t_x1x2) >= np.abs(t_k1k2)
- elif alternative == 'less':
- indicator = t_x1x2 <= t_k1k2
- else:
- indicator = t_x1x2 >= t_k1k2
- # Multiply all combinations of the products together, exclude terms
- # based on the `indicator` and then sum. (3.5)
- pvalue = np.sum((prob_x1 * prob_x2)[indicator])
- return _stats_py.SignificanceResult(t_k1k2, pvalue)
- def _poisson_means_test_iv(k1, n1, k2, n2, diff, alternative):
- # """check for valid types and values of input to `poisson_mean_test`."""
- if k1 != int(k1) or k2 != int(k2):
- raise TypeError('`k1` and `k2` must be integers.')
- count_err = '`k1` and `k2` must be greater than or equal to 0.'
- if k1 < 0 or k2 < 0:
- raise ValueError(count_err)
- if n1 <= 0 or n2 <= 0:
- raise ValueError('`n1` and `n2` must be greater than 0.')
- if diff < 0:
- raise ValueError('diff must be greater than or equal to 0.')
- alternatives = {'two-sided', 'less', 'greater'}
- if alternative.lower() not in alternatives:
- raise ValueError(f"Alternative must be one of '{alternatives}'.")
- class CramerVonMisesResult:
- def __init__(self, statistic, pvalue):
- self.statistic = statistic
- self.pvalue = pvalue
- def __repr__(self):
- return (f"{self.__class__.__name__}(statistic={self.statistic}, "
- f"pvalue={self.pvalue})")
- def _psi1_mod(x):
- """
- psi1 is defined in equation 1.10 in Csörgő, S. and Faraway, J. (1996).
- This implements a modified version by excluding the term V(x) / 12
- (here: _cdf_cvm_inf(x) / 12) to avoid evaluating _cdf_cvm_inf(x)
- twice in _cdf_cvm.
- Implementation based on MAPLE code of Julian Faraway and R code of the
- function pCvM in the package goftest (v1.1.1), permission granted
- by Adrian Baddeley. Main difference in the implementation: the code
- here keeps adding terms of the series until the terms are small enough.
- """
- def _ed2(y):
- z = y**2 / 4
- b = kv(1/4, z) + kv(3/4, z)
- return np.exp(-z) * (y/2)**(3/2) * b / np.sqrt(np.pi)
- def _ed3(y):
- z = y**2 / 4
- c = np.exp(-z) / np.sqrt(np.pi)
- return c * (y/2)**(5/2) * (2*kv(1/4, z) + 3*kv(3/4, z) - kv(5/4, z))
- def _Ak(k, x):
- m = 2*k + 1
- sx = 2 * np.sqrt(x)
- y1 = x**(3/4)
- y2 = x**(5/4)
- e1 = m * gamma(k + 1/2) * _ed2((4 * k + 3)/sx) / (9 * y1)
- e2 = gamma(k + 1/2) * _ed3((4 * k + 1) / sx) / (72 * y2)
- e3 = 2 * (m + 2) * gamma(k + 3/2) * _ed3((4 * k + 5) / sx) / (12 * y2)
- e4 = 7 * m * gamma(k + 1/2) * _ed2((4 * k + 1) / sx) / (144 * y1)
- e5 = 7 * m * gamma(k + 1/2) * _ed2((4 * k + 5) / sx) / (144 * y1)
- return e1 + e2 + e3 + e4 + e5
- x = np.asarray(x)
- tot = np.zeros_like(x, dtype='float')
- cond = np.ones_like(x, dtype='bool')
- k = 0
- while np.any(cond):
- z = -_Ak(k, x[cond]) / (np.pi * gamma(k + 1))
- tot[cond] = tot[cond] + z
- cond[cond] = np.abs(z) >= 1e-7
- k += 1
- return tot
- def _cdf_cvm_inf(x):
- """
- Calculate the cdf of the Cramér-von Mises statistic (infinite sample size).
- See equation 1.2 in Csörgő, S. and Faraway, J. (1996).
- Implementation based on MAPLE code of Julian Faraway and R code of the
- function pCvM in the package goftest (v1.1.1), permission granted
- by Adrian Baddeley. Main difference in the implementation: the code
- here keeps adding terms of the series until the terms are small enough.
- The function is not expected to be accurate for large values of x, say
- x > 4, when the cdf is very close to 1.
- """
- x = np.asarray(x)
- def term(x, k):
- # this expression can be found in [2], second line of (1.3)
- u = np.exp(gammaln(k + 0.5) - gammaln(k+1)) / (np.pi**1.5 * np.sqrt(x))
- y = 4*k + 1
- q = y**2 / (16*x)
- b = kv(0.25, q)
- return u * np.sqrt(y) * np.exp(-q) * b
- tot = np.zeros_like(x, dtype='float')
- cond = np.ones_like(x, dtype='bool')
- k = 0
- while np.any(cond):
- z = term(x[cond], k)
- tot[cond] = tot[cond] + z
- cond[cond] = np.abs(z) >= 1e-7
- k += 1
- return tot
- def _cdf_cvm(x, n=None):
- """
- Calculate the cdf of the Cramér-von Mises statistic for a finite sample
- size n. If N is None, use the asymptotic cdf (n=inf).
- See equation 1.8 in Csörgő, S. and Faraway, J. (1996) for finite samples,
- 1.2 for the asymptotic cdf.
- The function is not expected to be accurate for large values of x, say
- x > 2, when the cdf is very close to 1 and it might return values > 1
- in that case, e.g. _cdf_cvm(2.0, 12) = 1.0000027556716846. Moreover, it
- is not accurate for small values of n, especially close to the bounds of
- the distribution's domain, [1/(12*n), n/3], where the value jumps to 0
- and 1, respectively. These are limitations of the approximation by Csörgő
- and Faraway (1996) implemented in this function.
- """
- x = np.asarray(x)
- if n is None:
- y = _cdf_cvm_inf(x)
- else:
- # support of the test statistic is [12/n, n/3], see 1.1 in [2]
- y = np.zeros_like(x, dtype='float')
- sup = (1./(12*n) < x) & (x < n/3.)
- # note: _psi1_mod does not include the term _cdf_cvm_inf(x) / 12
- # therefore, we need to add it here
- y[sup] = _cdf_cvm_inf(x[sup]) * (1 + 1./(12*n)) + _psi1_mod(x[sup]) / n
- y[x >= n/3] = 1
- if y.ndim == 0:
- return y[()]
- return y
- def cramervonmises(rvs, cdf, args=()):
- """Perform the one-sample Cramér-von Mises test for goodness of fit.
- This performs a test of the goodness of fit of a cumulative distribution
- function (cdf) :math:`F` compared to the empirical distribution function
- :math:`F_n` of observed random variates :math:`X_1, ..., X_n` that are
- assumed to be independent and identically distributed ([1]_).
- The null hypothesis is that the :math:`X_i` have cumulative distribution
- :math:`F`.
- Parameters
- ----------
- rvs : array_like
- A 1-D array of observed values of the random variables :math:`X_i`.
- cdf : str or callable
- The cumulative distribution function :math:`F` to test the
- observations against. If a string, it should be the name of a
- distribution in `scipy.stats`. If a callable, that callable is used
- to calculate the cdf: ``cdf(x, *args) -> float``.
- args : tuple, optional
- Distribution parameters. These are assumed to be known; see Notes.
- Returns
- -------
- res : object with attributes
- statistic : float
- Cramér-von Mises statistic.
- pvalue : float
- The p-value.
- See Also
- --------
- kstest, cramervonmises_2samp
- Notes
- -----
- .. versionadded:: 1.6.0
- The p-value relies on the approximation given by equation 1.8 in [2]_.
- It is important to keep in mind that the p-value is only accurate if
- one tests a simple hypothesis, i.e. the parameters of the reference
- distribution are known. If the parameters are estimated from the data
- (composite hypothesis), the computed p-value is not reliable.
- References
- ----------
- .. [1] Cramér-von Mises criterion, Wikipedia,
- https://en.wikipedia.org/wiki/Cram%C3%A9r%E2%80%93von_Mises_criterion
- .. [2] Csörgő, S. and Faraway, J. (1996). The Exact and Asymptotic
- Distribution of Cramér-von Mises Statistics. Journal of the
- Royal Statistical Society, pp. 221-234.
- Examples
- --------
- Suppose we wish to test whether data generated by ``scipy.stats.norm.rvs``
- were, in fact, drawn from the standard normal distribution. We choose a
- significance level of alpha=0.05.
- >>> import numpy as np
- >>> from scipy import stats
- >>> rng = np.random.default_rng()
- >>> x = stats.norm.rvs(size=500, random_state=rng)
- >>> res = stats.cramervonmises(x, 'norm')
- >>> res.statistic, res.pvalue
- (0.49121480855028343, 0.04189256516661377)
- The p-value 0.79 exceeds our chosen significance level, so we do not
- reject the null hypothesis that the observed sample is drawn from the
- standard normal distribution.
- Now suppose we wish to check whether the same samples shifted by 2.1 is
- consistent with being drawn from a normal distribution with a mean of 2.
- >>> y = x + 2.1
- >>> res = stats.cramervonmises(y, 'norm', args=(2,))
- >>> res.statistic, res.pvalue
- (0.07400330012187435, 0.7274595666160468)
- Here we have used the `args` keyword to specify the mean (``loc``)
- of the normal distribution to test the data against. This is equivalent
- to the following, in which we create a frozen normal distribution with
- mean 2.1, then pass its ``cdf`` method as an argument.
- >>> frozen_dist = stats.norm(loc=2)
- >>> res = stats.cramervonmises(y, frozen_dist.cdf)
- >>> res.statistic, res.pvalue
- (0.07400330012187435, 0.7274595666160468)
- In either case, we would reject the null hypothesis that the observed
- sample is drawn from a normal distribution with a mean of 2 (and default
- variance of 1) because the p-value 0.04 is less than our chosen
- significance level.
- """
- if isinstance(cdf, str):
- cdf = getattr(distributions, cdf).cdf
- vals = np.sort(np.asarray(rvs))
- if vals.size <= 1:
- raise ValueError('The sample must contain at least two observations.')
- if vals.ndim > 1:
- raise ValueError('The sample must be one-dimensional.')
- n = len(vals)
- cdfvals = cdf(vals, *args)
- u = (2*np.arange(1, n+1) - 1)/(2*n)
- w = 1/(12*n) + np.sum((u - cdfvals)**2)
- # avoid small negative values that can occur due to the approximation
- p = max(0, 1. - _cdf_cvm(w, n))
- return CramerVonMisesResult(statistic=w, pvalue=p)
- def _get_wilcoxon_distr(n):
- """
- Distribution of probability of the Wilcoxon ranksum statistic r_plus (sum
- of ranks of positive differences).
- Returns an array with the probabilities of all the possible ranks
- r = 0, ..., n*(n+1)/2
- """
- c = np.ones(1, dtype=np.double)
- for k in range(1, n + 1):
- prev_c = c
- c = np.zeros(k * (k + 1) // 2 + 1, dtype=np.double)
- m = len(prev_c)
- c[:m] = prev_c * 0.5
- c[-m:] += prev_c * 0.5
- return c
- def _get_wilcoxon_distr2(n):
- """
- Distribution of probability of the Wilcoxon ranksum statistic r_plus (sum
- of ranks of positive differences).
- Returns an array with the probabilities of all the possible ranks
- r = 0, ..., n*(n+1)/2
- This is a slower reference function
- References
- ----------
- .. [1] 1. Harris T, Hardin JW. Exact Wilcoxon Signed-Rank and Wilcoxon
- Mann-Whitney Ranksum Tests. The Stata Journal. 2013;13(2):337-343.
- """
- ai = np.arange(1, n+1)[:, None]
- t = n*(n+1)/2
- q = 2*t
- j = np.arange(q)
- theta = 2*np.pi/q*j
- phi_sp = np.prod(np.cos(theta*ai), axis=0)
- phi_s = np.exp(1j*theta*t) * phi_sp
- p = np.real(ifft(phi_s))
- res = np.zeros(int(t)+1)
- res[:-1:] = p[::2]
- res[0] /= 2
- res[-1] = res[0]
- return res
- def _tau_b(A):
- """Calculate Kendall's tau-b and p-value from contingency table."""
- # See [2] 2.2 and 4.2
- # contingency table must be truly 2D
- if A.shape[0] == 1 or A.shape[1] == 1:
- return np.nan, np.nan
- NA = A.sum()
- PA = _P(A)
- QA = _Q(A)
- Sri2 = (A.sum(axis=1)**2).sum()
- Scj2 = (A.sum(axis=0)**2).sum()
- denominator = (NA**2 - Sri2)*(NA**2 - Scj2)
- tau = (PA-QA)/(denominator)**0.5
- numerator = 4*(_a_ij_Aij_Dij2(A) - (PA - QA)**2 / NA)
- s02_tau_b = numerator/denominator
- if s02_tau_b == 0: # Avoid divide by zero
- return tau, 0
- Z = tau/s02_tau_b**0.5
- p = 2*norm.sf(abs(Z)) # 2-sided p-value
- return tau, p
- def _somers_d(A, alternative='two-sided'):
- """Calculate Somers' D and p-value from contingency table."""
- # See [3] page 1740
- # contingency table must be truly 2D
- if A.shape[0] <= 1 or A.shape[1] <= 1:
- return np.nan, np.nan
- NA = A.sum()
- NA2 = NA**2
- PA = _P(A)
- QA = _Q(A)
- Sri2 = (A.sum(axis=1)**2).sum()
- d = (PA - QA)/(NA2 - Sri2)
- S = _a_ij_Aij_Dij2(A) - (PA-QA)**2/NA
- with np.errstate(divide='ignore'):
- Z = (PA - QA)/(4*(S))**0.5
- _, p = scipy.stats._stats_py._normtest_finish(Z, alternative)
- return d, p
- SomersDResult = make_dataclass("SomersDResult",
- ("statistic", "pvalue", "table"))
- def somersd(x, y=None, alternative='two-sided'):
- r"""Calculates Somers' D, an asymmetric measure of ordinal association.
- Like Kendall's :math:`\tau`, Somers' :math:`D` is a measure of the
- correspondence between two rankings. Both statistics consider the
- difference between the number of concordant and discordant pairs in two
- rankings :math:`X` and :math:`Y`, and both are normalized such that values
- close to 1 indicate strong agreement and values close to -1 indicate
- strong disagreement. They differ in how they are normalized. To show the
- relationship, Somers' :math:`D` can be defined in terms of Kendall's
- :math:`\tau_a`:
- .. math::
- D(Y|X) = \frac{\tau_a(X, Y)}{\tau_a(X, X)}
- Suppose the first ranking :math:`X` has :math:`r` distinct ranks and the
- second ranking :math:`Y` has :math:`s` distinct ranks. These two lists of
- :math:`n` rankings can also be viewed as an :math:`r \times s` contingency
- table in which element :math:`i, j` is the number of rank pairs with rank
- :math:`i` in ranking :math:`X` and rank :math:`j` in ranking :math:`Y`.
- Accordingly, `somersd` also allows the input data to be supplied as a
- single, 2D contingency table instead of as two separate, 1D rankings.
- Note that the definition of Somers' :math:`D` is asymmetric: in general,
- :math:`D(Y|X) \neq D(X|Y)`. ``somersd(x, y)`` calculates Somers'
- :math:`D(Y|X)`: the "row" variable :math:`X` is treated as an independent
- variable, and the "column" variable :math:`Y` is dependent. For Somers'
- :math:`D(X|Y)`, swap the input lists or transpose the input table.
- Parameters
- ----------
- x : array_like
- 1D array of rankings, treated as the (row) independent variable.
- Alternatively, a 2D contingency table.
- y : array_like, optional
- If `x` is a 1D array of rankings, `y` is a 1D array of rankings of the
- same length, treated as the (column) dependent variable.
- If `x` is 2D, `y` is ignored.
- alternative : {'two-sided', 'less', 'greater'}, optional
- Defines the alternative hypothesis. Default is 'two-sided'.
- The following options are available:
- * 'two-sided': the rank correlation is nonzero
- * 'less': the rank correlation is negative (less than zero)
- * 'greater': the rank correlation is positive (greater than zero)
- Returns
- -------
- res : SomersDResult
- A `SomersDResult` object with the following fields:
- statistic : float
- The Somers' :math:`D` statistic.
- pvalue : float
- The p-value for a hypothesis test whose null
- hypothesis is an absence of association, :math:`D=0`.
- See notes for more information.
- table : 2D array
- The contingency table formed from rankings `x` and `y` (or the
- provided contingency table, if `x` is a 2D array)
- See Also
- --------
- kendalltau : Calculates Kendall's tau, another correlation measure.
- weightedtau : Computes a weighted version of Kendall's tau.
- spearmanr : Calculates a Spearman rank-order correlation coefficient.
- pearsonr : Calculates a Pearson correlation coefficient.
- Notes
- -----
- This function follows the contingency table approach of [2]_ and
- [3]_. *p*-values are computed based on an asymptotic approximation of
- the test statistic distribution under the null hypothesis :math:`D=0`.
- Theoretically, hypothesis tests based on Kendall's :math:`tau` and Somers'
- :math:`D` should be identical.
- However, the *p*-values returned by `kendalltau` are based
- on the null hypothesis of *independence* between :math:`X` and :math:`Y`
- (i.e. the population from which pairs in :math:`X` and :math:`Y` are
- sampled contains equal numbers of all possible pairs), which is more
- specific than the null hypothesis :math:`D=0` used here. If the null
- hypothesis of independence is desired, it is acceptable to use the
- *p*-value returned by `kendalltau` with the statistic returned by
- `somersd` and vice versa. For more information, see [2]_.
- Contingency tables are formatted according to the convention used by
- SAS and R: the first ranking supplied (``x``) is the "row" variable, and
- the second ranking supplied (``y``) is the "column" variable. This is
- opposite the convention of Somers' original paper [1]_.
- References
- ----------
- .. [1] Robert H. Somers, "A New Asymmetric Measure of Association for
- Ordinal Variables", *American Sociological Review*, Vol. 27, No. 6,
- pp. 799--811, 1962.
- .. [2] Morton B. Brown and Jacqueline K. Benedetti, "Sampling Behavior of
- Tests for Correlation in Two-Way Contingency Tables", *Journal of
- the American Statistical Association* Vol. 72, No. 358, pp.
- 309--315, 1977.
- .. [3] SAS Institute, Inc., "The FREQ Procedure (Book Excerpt)",
- *SAS/STAT 9.2 User's Guide, Second Edition*, SAS Publishing, 2009.
- .. [4] Laerd Statistics, "Somers' d using SPSS Statistics", *SPSS
- Statistics Tutorials and Statistical Guides*,
- https://statistics.laerd.com/spss-tutorials/somers-d-using-spss-statistics.php,
- Accessed July 31, 2020.
- Examples
- --------
- We calculate Somers' D for the example given in [4]_, in which a hotel
- chain owner seeks to determine the association between hotel room
- cleanliness and customer satisfaction. The independent variable, hotel
- room cleanliness, is ranked on an ordinal scale: "below average (1)",
- "average (2)", or "above average (3)". The dependent variable, customer
- satisfaction, is ranked on a second scale: "very dissatisfied (1)",
- "moderately dissatisfied (2)", "neither dissatisfied nor satisfied (3)",
- "moderately satisfied (4)", or "very satisfied (5)". 189 customers
- respond to the survey, and the results are cast into a contingency table
- with the hotel room cleanliness as the "row" variable and customer
- satisfaction as the "column" variable.
- +-----+-----+-----+-----+-----+-----+
- | | (1) | (2) | (3) | (4) | (5) |
- +=====+=====+=====+=====+=====+=====+
- | (1) | 27 | 25 | 14 | 7 | 0 |
- +-----+-----+-----+-----+-----+-----+
- | (2) | 7 | 14 | 18 | 35 | 12 |
- +-----+-----+-----+-----+-----+-----+
- | (3) | 1 | 3 | 2 | 7 | 17 |
- +-----+-----+-----+-----+-----+-----+
- For example, 27 customers assigned their room a cleanliness ranking of
- "below average (1)" and a corresponding satisfaction of "very
- dissatisfied (1)". We perform the analysis as follows.
- >>> from scipy.stats import somersd
- >>> table = [[27, 25, 14, 7, 0], [7, 14, 18, 35, 12], [1, 3, 2, 7, 17]]
- >>> res = somersd(table)
- >>> res.statistic
- 0.6032766111513396
- >>> res.pvalue
- 1.0007091191074533e-27
- The value of the Somers' D statistic is approximately 0.6, indicating
- a positive correlation between room cleanliness and customer satisfaction
- in the sample.
- The *p*-value is very small, indicating a very small probability of
- observing such an extreme value of the statistic under the null
- hypothesis that the statistic of the entire population (from which
- our sample of 189 customers is drawn) is zero. This supports the
- alternative hypothesis that the true value of Somers' D for the population
- is nonzero.
- """
- x, y = np.array(x), np.array(y)
- if x.ndim == 1:
- if x.size != y.size:
- raise ValueError("Rankings must be of equal length.")
- table = scipy.stats.contingency.crosstab(x, y)[1]
- elif x.ndim == 2:
- if np.any(x < 0):
- raise ValueError("All elements of the contingency table must be "
- "non-negative.")
- if np.any(x != x.astype(int)):
- raise ValueError("All elements of the contingency table must be "
- "integer.")
- if x.nonzero()[0].size < 2:
- raise ValueError("At least two elements of the contingency table "
- "must be nonzero.")
- table = x
- else:
- raise ValueError("x must be either a 1D or 2D array")
- d, p = _somers_d(table, alternative)
- # add alias for consistency with other correlation functions
- res = SomersDResult(d, p, table)
- res.correlation = d
- return res
- # This could be combined with `_all_partitions` in `_resampling.py`
- def _all_partitions(nx, ny):
- """
- Partition a set of indices into two fixed-length sets in all possible ways
- Partition a set of indices 0 ... nx + ny - 1 into two sets of length nx and
- ny in all possible ways (ignoring order of elements).
- """
- z = np.arange(nx+ny)
- for c in combinations(z, nx):
- x = np.array(c)
- mask = np.ones(nx+ny, bool)
- mask[x] = False
- y = z[mask]
- yield x, y
- def _compute_log_combinations(n):
- """Compute all log combination of C(n, k)."""
- gammaln_arr = gammaln(np.arange(n + 1) + 1)
- return gammaln(n + 1) - gammaln_arr - gammaln_arr[::-1]
- BarnardExactResult = make_dataclass(
- "BarnardExactResult", [("statistic", float), ("pvalue", float)]
- )
- def barnard_exact(table, alternative="two-sided", pooled=True, n=32):
- r"""Perform a Barnard exact test on a 2x2 contingency table.
- Parameters
- ----------
- table : array_like of ints
- A 2x2 contingency table. Elements should be non-negative integers.
- alternative : {'two-sided', 'less', 'greater'}, optional
- Defines the null and alternative hypotheses. Default is 'two-sided'.
- Please see explanations in the Notes section below.
- pooled : bool, optional
- Whether to compute score statistic with pooled variance (as in
- Student's t-test, for example) or unpooled variance (as in Welch's
- t-test). Default is ``True``.
- n : int, optional
- Number of sampling points used in the construction of the sampling
- method. Note that this argument will automatically be converted to
- the next higher power of 2 since `scipy.stats.qmc.Sobol` is used to
- select sample points. Default is 32. Must be positive. In most cases,
- 32 points is enough to reach good precision. More points comes at
- performance cost.
- Returns
- -------
- ber : BarnardExactResult
- A result object with the following attributes.
- statistic : float
- The Wald statistic with pooled or unpooled variance, depending
- on the user choice of `pooled`.
- pvalue : float
- P-value, the probability of obtaining a distribution at least as
- extreme as the one that was actually observed, assuming that the
- null hypothesis is true.
- See Also
- --------
- chi2_contingency : Chi-square test of independence of variables in a
- contingency table.
- fisher_exact : Fisher exact test on a 2x2 contingency table.
- boschloo_exact : Boschloo's exact test on a 2x2 contingency table,
- which is an uniformly more powerful alternative to Fisher's exact test.
- Notes
- -----
- Barnard's test is an exact test used in the analysis of contingency
- tables. It examines the association of two categorical variables, and
- is a more powerful alternative than Fisher's exact test
- for 2x2 contingency tables.
- Let's define :math:`X_0` a 2x2 matrix representing the observed sample,
- where each column stores the binomial experiment, as in the example
- below. Let's also define :math:`p_1, p_2` the theoretical binomial
- probabilities for :math:`x_{11}` and :math:`x_{12}`. When using
- Barnard exact test, we can assert three different null hypotheses :
- - :math:`H_0 : p_1 \geq p_2` versus :math:`H_1 : p_1 < p_2`,
- with `alternative` = "less"
- - :math:`H_0 : p_1 \leq p_2` versus :math:`H_1 : p_1 > p_2`,
- with `alternative` = "greater"
- - :math:`H_0 : p_1 = p_2` versus :math:`H_1 : p_1 \neq p_2`,
- with `alternative` = "two-sided" (default one)
- In order to compute Barnard's exact test, we are using the Wald
- statistic [3]_ with pooled or unpooled variance.
- Under the default assumption that both variances are equal
- (``pooled = True``), the statistic is computed as:
- .. math::
- T(X) = \frac{
- \hat{p}_1 - \hat{p}_2
- }{
- \sqrt{
- \hat{p}(1 - \hat{p})
- (\frac{1}{c_1} +
- \frac{1}{c_2})
- }
- }
- with :math:`\hat{p}_1, \hat{p}_2` and :math:`\hat{p}` the estimator of
- :math:`p_1, p_2` and :math:`p`, the latter being the combined probability,
- given the assumption that :math:`p_1 = p_2`.
- If this assumption is invalid (``pooled = False``), the statistic is:
- .. math::
- T(X) = \frac{
- \hat{p}_1 - \hat{p}_2
- }{
- \sqrt{
- \frac{\hat{p}_1 (1 - \hat{p}_1)}{c_1} +
- \frac{\hat{p}_2 (1 - \hat{p}_2)}{c_2}
- }
- }
- The p-value is then computed as:
- .. math::
- \sum
- \binom{c_1}{x_{11}}
- \binom{c_2}{x_{12}}
- \pi^{x_{11} + x_{12}}
- (1 - \pi)^{t - x_{11} - x_{12}}
- where the sum is over all 2x2 contingency tables :math:`X` such that:
- * :math:`T(X) \leq T(X_0)` when `alternative` = "less",
- * :math:`T(X) \geq T(X_0)` when `alternative` = "greater", or
- * :math:`T(X) \geq |T(X_0)|` when `alternative` = "two-sided".
- Above, :math:`c_1, c_2` are the sum of the columns 1 and 2,
- and :math:`t` the total (sum of the 4 sample's element).
- The returned p-value is the maximum p-value taken over the nuisance
- parameter :math:`\pi`, where :math:`0 \leq \pi \leq 1`.
- This function's complexity is :math:`O(n c_1 c_2)`, where `n` is the
- number of sample points.
- References
- ----------
- .. [1] Barnard, G. A. "Significance Tests for 2x2 Tables". *Biometrika*.
- 34.1/2 (1947): 123-138. :doi:`dpgkg3`
- .. [2] Mehta, Cyrus R., and Pralay Senchaudhuri. "Conditional versus
- unconditional exact tests for comparing two binomials."
- *Cytel Software Corporation* 675 (2003): 1-5.
- .. [3] "Wald Test". *Wikipedia*. https://en.wikipedia.org/wiki/Wald_test
- Examples
- --------
- An example use of Barnard's test is presented in [2]_.
- Consider the following example of a vaccine efficacy study
- (Chan, 1998). In a randomized clinical trial of 30 subjects, 15 were
- inoculated with a recombinant DNA influenza vaccine and the 15 were
- inoculated with a placebo. Twelve of the 15 subjects in the placebo
- group (80%) eventually became infected with influenza whereas for the
- vaccine group, only 7 of the 15 subjects (47%) became infected. The
- data are tabulated as a 2 x 2 table::
- Vaccine Placebo
- Yes 7 12
- No 8 3
- When working with statistical hypothesis testing, we usually use a
- threshold probability or significance level upon which we decide
- to reject the null hypothesis :math:`H_0`. Suppose we choose the common
- significance level of 5%.
- Our alternative hypothesis is that the vaccine will lower the chance of
- becoming infected with the virus; that is, the probability :math:`p_1` of
- catching the virus with the vaccine will be *less than* the probability
- :math:`p_2` of catching the virus without the vaccine. Therefore, we call
- `barnard_exact` with the ``alternative="less"`` option:
- >>> import scipy.stats as stats
- >>> res = stats.barnard_exact([[7, 12], [8, 3]], alternative="less")
- >>> res.statistic
- -1.894...
- >>> res.pvalue
- 0.03407...
- Under the null hypothesis that the vaccine will not lower the chance of
- becoming infected, the probability of obtaining test results at least as
- extreme as the observed data is approximately 3.4%. Since this p-value is
- less than our chosen significance level, we have evidence to reject
- :math:`H_0` in favor of the alternative.
- Suppose we had used Fisher's exact test instead:
- >>> _, pvalue = stats.fisher_exact([[7, 12], [8, 3]], alternative="less")
- >>> pvalue
- 0.0640...
- With the same threshold significance of 5%, we would not have been able
- to reject the null hypothesis in favor of the alternative. As stated in
- [2]_, Barnard's test is uniformly more powerful than Fisher's exact test
- because Barnard's test does not condition on any margin. Fisher's test
- should only be used when both sets of marginals are fixed.
- """
- if n <= 0:
- raise ValueError(
- "Number of points `n` must be strictly positive, "
- f"found {n!r}"
- )
- table = np.asarray(table, dtype=np.int64)
- if not table.shape == (2, 2):
- raise ValueError("The input `table` must be of shape (2, 2).")
- if np.any(table < 0):
- raise ValueError("All values in `table` must be nonnegative.")
- if 0 in table.sum(axis=0):
- # If both values in column are zero, the p-value is 1 and
- # the score's statistic is NaN.
- return BarnardExactResult(np.nan, 1.0)
- total_col_1, total_col_2 = table.sum(axis=0)
- x1 = np.arange(total_col_1 + 1, dtype=np.int64).reshape(-1, 1)
- x2 = np.arange(total_col_2 + 1, dtype=np.int64).reshape(1, -1)
- # We need to calculate the wald statistics for each combination of x1 and
- # x2.
- p1, p2 = x1 / total_col_1, x2 / total_col_2
- if pooled:
- p = (x1 + x2) / (total_col_1 + total_col_2)
- variances = p * (1 - p) * (1 / total_col_1 + 1 / total_col_2)
- else:
- variances = p1 * (1 - p1) / total_col_1 + p2 * (1 - p2) / total_col_2
- # To avoid warning when dividing by 0
- with np.errstate(divide="ignore", invalid="ignore"):
- wald_statistic = np.divide((p1 - p2), np.sqrt(variances))
- wald_statistic[p1 == p2] = 0 # Removing NaN values
- wald_stat_obs = wald_statistic[table[0, 0], table[0, 1]]
- if alternative == "two-sided":
- index_arr = np.abs(wald_statistic) >= abs(wald_stat_obs)
- elif alternative == "less":
- index_arr = wald_statistic <= wald_stat_obs
- elif alternative == "greater":
- index_arr = wald_statistic >= wald_stat_obs
- else:
- msg = (
- "`alternative` should be one of {'two-sided', 'less', 'greater'},"
- f" found {alternative!r}"
- )
- raise ValueError(msg)
- x1_sum_x2 = x1 + x2
- x1_log_comb = _compute_log_combinations(total_col_1)
- x2_log_comb = _compute_log_combinations(total_col_2)
- x1_sum_x2_log_comb = x1_log_comb[x1] + x2_log_comb[x2]
- result = shgo(
- _get_binomial_log_p_value_with_nuisance_param,
- args=(x1_sum_x2, x1_sum_x2_log_comb, index_arr),
- bounds=((0, 1),),
- n=n,
- sampling_method="sobol",
- )
- # result.fun is the negative log pvalue and therefore needs to be
- # changed before return
- p_value = np.clip(np.exp(-result.fun), a_min=0, a_max=1)
- return BarnardExactResult(wald_stat_obs, p_value)
- BoschlooExactResult = make_dataclass(
- "BoschlooExactResult", [("statistic", float), ("pvalue", float)]
- )
- def boschloo_exact(table, alternative="two-sided", n=32):
- r"""Perform Boschloo's exact test on a 2x2 contingency table.
- Parameters
- ----------
- table : array_like of ints
- A 2x2 contingency table. Elements should be non-negative integers.
- alternative : {'two-sided', 'less', 'greater'}, optional
- Defines the null and alternative hypotheses. Default is 'two-sided'.
- Please see explanations in the Notes section below.
- n : int, optional
- Number of sampling points used in the construction of the sampling
- method. Note that this argument will automatically be converted to
- the next higher power of 2 since `scipy.stats.qmc.Sobol` is used to
- select sample points. Default is 32. Must be positive. In most cases,
- 32 points is enough to reach good precision. More points comes at
- performance cost.
- Returns
- -------
- ber : BoschlooExactResult
- A result object with the following attributes.
- statistic : float
- The statistic used in Boschloo's test; that is, the p-value
- from Fisher's exact test.
- pvalue : float
- P-value, the probability of obtaining a distribution at least as
- extreme as the one that was actually observed, assuming that the
- null hypothesis is true.
- See Also
- --------
- chi2_contingency : Chi-square test of independence of variables in a
- contingency table.
- fisher_exact : Fisher exact test on a 2x2 contingency table.
- barnard_exact : Barnard's exact test, which is a more powerful alternative
- than Fisher's exact test for 2x2 contingency tables.
- Notes
- -----
- Boschloo's test is an exact test used in the analysis of contingency
- tables. It examines the association of two categorical variables, and
- is a uniformly more powerful alternative to Fisher's exact test
- for 2x2 contingency tables.
- Boschloo's exact test uses the p-value of Fisher's exact test as a
- statistic, and Boschloo's p-value is the probability under the null
- hypothesis of observing such an extreme value of this statistic.
- Let's define :math:`X_0` a 2x2 matrix representing the observed sample,
- where each column stores the binomial experiment, as in the example
- below. Let's also define :math:`p_1, p_2` the theoretical binomial
- probabilities for :math:`x_{11}` and :math:`x_{12}`. When using
- Boschloo exact test, we can assert three different alternative hypotheses:
- - :math:`H_0 : p_1=p_2` versus :math:`H_1 : p_1 < p_2`,
- with `alternative` = "less"
- - :math:`H_0 : p_1=p_2` versus :math:`H_1 : p_1 > p_2`,
- with `alternative` = "greater"
- - :math:`H_0 : p_1=p_2` versus :math:`H_1 : p_1 \neq p_2`,
- with `alternative` = "two-sided" (default)
- There are multiple conventions for computing a two-sided p-value when the
- null distribution is asymmetric. Here, we apply the convention that the
- p-value of a two-sided test is twice the minimum of the p-values of the
- one-sided tests (clipped to 1.0). Note that `fisher_exact` follows a
- different convention, so for a given `table`, the statistic reported by
- `boschloo_exact` may differ from the p-value reported by `fisher_exact`
- when ``alternative='two-sided'``.
- .. versionadded:: 1.7.0
- References
- ----------
- .. [1] R.D. Boschloo. "Raised conditional level of significance for the
- 2 x 2-table when testing the equality of two probabilities",
- Statistica Neerlandica, 24(1), 1970
- .. [2] "Boschloo's test", Wikipedia,
- https://en.wikipedia.org/wiki/Boschloo%27s_test
- .. [3] Lise M. Saari et al. "Employee attitudes and job satisfaction",
- Human Resource Management, 43(4), 395-407, 2004,
- :doi:`10.1002/hrm.20032`.
- Examples
- --------
- In the following example, we consider the article "Employee
- attitudes and job satisfaction" [3]_
- which reports the results of a survey from 63 scientists and 117 college
- professors. Of the 63 scientists, 31 said they were very satisfied with
- their jobs, whereas 74 of the college professors were very satisfied
- with their work. Is this significant evidence that college
- professors are happier with their work than scientists?
- The following table summarizes the data mentioned above::
- college professors scientists
- Very Satisfied 74 31
- Dissatisfied 43 32
- When working with statistical hypothesis testing, we usually use a
- threshold probability or significance level upon which we decide
- to reject the null hypothesis :math:`H_0`. Suppose we choose the common
- significance level of 5%.
- Our alternative hypothesis is that college professors are truly more
- satisfied with their work than scientists. Therefore, we expect
- :math:`p_1` the proportion of very satisfied college professors to be
- greater than :math:`p_2`, the proportion of very satisfied scientists.
- We thus call `boschloo_exact` with the ``alternative="greater"`` option:
- >>> import scipy.stats as stats
- >>> res = stats.boschloo_exact([[74, 31], [43, 32]], alternative="greater")
- >>> res.statistic
- 0.0483...
- >>> res.pvalue
- 0.0355...
- Under the null hypothesis that scientists are happier in their work than
- college professors, the probability of obtaining test
- results at least as extreme as the observed data is approximately 3.55%.
- Since this p-value is less than our chosen significance level, we have
- evidence to reject :math:`H_0` in favor of the alternative hypothesis.
- """
- hypergeom = distributions.hypergeom
- if n <= 0:
- raise ValueError(
- "Number of points `n` must be strictly positive,"
- f" found {n!r}"
- )
- table = np.asarray(table, dtype=np.int64)
- if not table.shape == (2, 2):
- raise ValueError("The input `table` must be of shape (2, 2).")
- if np.any(table < 0):
- raise ValueError("All values in `table` must be nonnegative.")
- if 0 in table.sum(axis=0):
- # If both values in column are zero, the p-value is 1 and
- # the score's statistic is NaN.
- return BoschlooExactResult(np.nan, np.nan)
- total_col_1, total_col_2 = table.sum(axis=0)
- total = total_col_1 + total_col_2
- x1 = np.arange(total_col_1 + 1, dtype=np.int64).reshape(1, -1)
- x2 = np.arange(total_col_2 + 1, dtype=np.int64).reshape(-1, 1)
- x1_sum_x2 = x1 + x2
- if alternative == 'less':
- pvalues = hypergeom.cdf(x1, total, x1_sum_x2, total_col_1).T
- elif alternative == 'greater':
- # Same formula as the 'less' case, but with the second column.
- pvalues = hypergeom.cdf(x2, total, x1_sum_x2, total_col_2).T
- elif alternative == 'two-sided':
- boschloo_less = boschloo_exact(table, alternative="less", n=n)
- boschloo_greater = boschloo_exact(table, alternative="greater", n=n)
- res = (
- boschloo_less if boschloo_less.pvalue < boschloo_greater.pvalue
- else boschloo_greater
- )
- # Two-sided p-value is defined as twice the minimum of the one-sided
- # p-values
- pvalue = np.clip(2 * res.pvalue, a_min=0, a_max=1)
- return BoschlooExactResult(res.statistic, pvalue)
- else:
- msg = (
- f"`alternative` should be one of {'two-sided', 'less', 'greater'},"
- f" found {alternative!r}"
- )
- raise ValueError(msg)
- fisher_stat = pvalues[table[0, 0], table[0, 1]]
- # fisher_stat * (1+1e-13) guards us from small numerical error. It is
- # equivalent to np.isclose with relative tol of 1e-13 and absolute tol of 0
- # For more throughout explanations, see gh-14178
- index_arr = pvalues <= fisher_stat * (1+1e-13)
- x1, x2, x1_sum_x2 = x1.T, x2.T, x1_sum_x2.T
- x1_log_comb = _compute_log_combinations(total_col_1)
- x2_log_comb = _compute_log_combinations(total_col_2)
- x1_sum_x2_log_comb = x1_log_comb[x1] + x2_log_comb[x2]
- result = shgo(
- _get_binomial_log_p_value_with_nuisance_param,
- args=(x1_sum_x2, x1_sum_x2_log_comb, index_arr),
- bounds=((0, 1),),
- n=n,
- sampling_method="sobol",
- )
- # result.fun is the negative log pvalue and therefore needs to be
- # changed before return
- p_value = np.clip(np.exp(-result.fun), a_min=0, a_max=1)
- return BoschlooExactResult(fisher_stat, p_value)
- def _get_binomial_log_p_value_with_nuisance_param(
- nuisance_param, x1_sum_x2, x1_sum_x2_log_comb, index_arr
- ):
- r"""
- Compute the log pvalue in respect of a nuisance parameter considering
- a 2x2 sample space.
- Parameters
- ----------
- nuisance_param : float
- nuisance parameter used in the computation of the maximisation of
- the p-value. Must be between 0 and 1
- x1_sum_x2 : ndarray
- Sum of x1 and x2 inside barnard_exact
- x1_sum_x2_log_comb : ndarray
- sum of the log combination of x1 and x2
- index_arr : ndarray of boolean
- Returns
- -------
- p_value : float
- Return the maximum p-value considering every nuisance paramater
- between 0 and 1
- Notes
- -----
- Both Barnard's test and Boschloo's test iterate over a nuisance parameter
- :math:`\pi \in [0, 1]` to find the maximum p-value. To search this
- maxima, this function return the negative log pvalue with respect to the
- nuisance parameter passed in params. This negative log p-value is then
- used in `shgo` to find the minimum negative pvalue which is our maximum
- pvalue.
- Also, to compute the different combination used in the
- p-values' computation formula, this function uses `gammaln` which is
- more tolerant for large value than `scipy.special.comb`. `gammaln` gives
- a log combination. For the little precision loss, performances are
- improved a lot.
- """
- t1, t2 = x1_sum_x2.shape
- n = t1 + t2 - 2
- with np.errstate(divide="ignore", invalid="ignore"):
- log_nuisance = np.log(
- nuisance_param,
- out=np.zeros_like(nuisance_param),
- where=nuisance_param >= 0,
- )
- log_1_minus_nuisance = np.log(
- 1 - nuisance_param,
- out=np.zeros_like(nuisance_param),
- where=1 - nuisance_param >= 0,
- )
- nuisance_power_x1_x2 = log_nuisance * x1_sum_x2
- nuisance_power_x1_x2[(x1_sum_x2 == 0)[:, :]] = 0
- nuisance_power_n_minus_x1_x2 = log_1_minus_nuisance * (n - x1_sum_x2)
- nuisance_power_n_minus_x1_x2[(x1_sum_x2 == n)[:, :]] = 0
- tmp_log_values_arr = (
- x1_sum_x2_log_comb
- + nuisance_power_x1_x2
- + nuisance_power_n_minus_x1_x2
- )
- tmp_values_from_index = tmp_log_values_arr[index_arr]
- # To avoid dividing by zero in log function and getting inf value,
- # values are centered according to the max
- max_value = tmp_values_from_index.max()
- # To have better result's precision, the log pvalue is taken here.
- # Indeed, pvalue is included inside [0, 1] interval. Passing the
- # pvalue to log makes the interval a lot bigger ([-inf, 0]), and thus
- # help us to achieve better precision
- with np.errstate(divide="ignore", invalid="ignore"):
- log_probs = np.exp(tmp_values_from_index - max_value).sum()
- log_pvalue = max_value + np.log(
- log_probs,
- out=np.full_like(log_probs, -np.inf),
- where=log_probs > 0,
- )
- # Since shgo find the minima, minus log pvalue is returned
- return -log_pvalue
- def _pval_cvm_2samp_exact(s, m, n):
- """
- Compute the exact p-value of the Cramer-von Mises two-sample test
- for a given value s of the test statistic.
- m and n are the sizes of the samples.
- [1] Y. Xiao, A. Gordon, and A. Yakovlev, "A C++ Program for
- the Cramér-Von Mises Two-Sample Test", J. Stat. Soft.,
- vol. 17, no. 8, pp. 1-15, Dec. 2006.
- [2] T. W. Anderson "On the Distribution of the Two-Sample Cramer-von Mises
- Criterion," The Annals of Mathematical Statistics, Ann. Math. Statist.
- 33(3), 1148-1159, (September, 1962)
- """
- # [1, p. 3]
- lcm = np.lcm(m, n)
- # [1, p. 4], below eq. 3
- a = lcm // m
- b = lcm // n
- # Combine Eq. 9 in [2] with Eq. 2 in [1] and solve for $\zeta$
- # Hint: `s` is $U$ in [2], and $T_2$ in [1] is $T$ in [2]
- mn = m * n
- zeta = lcm ** 2 * (m + n) * (6 * s - mn * (4 * mn - 1)) // (6 * mn ** 2)
- # bound maximum value that may appear in `gs` (remember both rows!)
- zeta_bound = lcm**2 * (m + n) # bound elements in row 1
- combinations = comb(m + n, m) # sum of row 2
- max_gs = max(zeta_bound, combinations)
- dtype = np.min_scalar_type(max_gs)
- # the frequency table of $g_{u, v}^+$ defined in [1, p. 6]
- gs = ([np.array([[0], [1]], dtype=dtype)]
- + [np.empty((2, 0), dtype=dtype) for _ in range(m)])
- for u in range(n + 1):
- next_gs = []
- tmp = np.empty((2, 0), dtype=dtype)
- for v, g in enumerate(gs):
- # Calculate g recursively with eq. 11 in [1]. Even though it
- # doesn't look like it, this also does 12/13 (all of Algorithm 1).
- vi, i0, i1 = np.intersect1d(tmp[0], g[0], return_indices=True)
- tmp = np.concatenate([
- np.stack([vi, tmp[1, i0] + g[1, i1]]),
- np.delete(tmp, i0, 1),
- np.delete(g, i1, 1)
- ], 1)
- tmp[0] += (a * v - b * u) ** 2
- next_gs.append(tmp)
- gs = next_gs
- value, freq = gs[m]
- return np.float64(np.sum(freq[value >= zeta]) / combinations)
- def cramervonmises_2samp(x, y, method='auto'):
- """Perform the two-sample Cramér-von Mises test for goodness of fit.
- This is the two-sample version of the Cramér-von Mises test ([1]_):
- for two independent samples :math:`X_1, ..., X_n` and
- :math:`Y_1, ..., Y_m`, the null hypothesis is that the samples
- come from the same (unspecified) continuous distribution.
- Parameters
- ----------
- x : array_like
- A 1-D array of observed values of the random variables :math:`X_i`.
- y : array_like
- A 1-D array of observed values of the random variables :math:`Y_i`.
- method : {'auto', 'asymptotic', 'exact'}, optional
- The method used to compute the p-value, see Notes for details.
- The default is 'auto'.
- Returns
- -------
- res : object with attributes
- statistic : float
- Cramér-von Mises statistic.
- pvalue : float
- The p-value.
- See Also
- --------
- cramervonmises, anderson_ksamp, epps_singleton_2samp, ks_2samp
- Notes
- -----
- .. versionadded:: 1.7.0
- The statistic is computed according to equation 9 in [2]_. The
- calculation of the p-value depends on the keyword `method`:
- - ``asymptotic``: The p-value is approximated by using the limiting
- distribution of the test statistic.
- - ``exact``: The exact p-value is computed by enumerating all
- possible combinations of the test statistic, see [2]_.
- If ``method='auto'``, the exact approach is used
- if both samples contain equal to or less than 20 observations,
- otherwise the asymptotic distribution is used.
- If the underlying distribution is not continuous, the p-value is likely to
- be conservative (Section 6.2 in [3]_). When ranking the data to compute
- the test statistic, midranks are used if there are ties.
- References
- ----------
- .. [1] https://en.wikipedia.org/wiki/Cramer-von_Mises_criterion
- .. [2] Anderson, T.W. (1962). On the distribution of the two-sample
- Cramer-von-Mises criterion. The Annals of Mathematical
- Statistics, pp. 1148-1159.
- .. [3] Conover, W.J., Practical Nonparametric Statistics, 1971.
- Examples
- --------
- Suppose we wish to test whether two samples generated by
- ``scipy.stats.norm.rvs`` have the same distribution. We choose a
- significance level of alpha=0.05.
- >>> import numpy as np
- >>> from scipy import stats
- >>> rng = np.random.default_rng()
- >>> x = stats.norm.rvs(size=100, random_state=rng)
- >>> y = stats.norm.rvs(size=70, random_state=rng)
- >>> res = stats.cramervonmises_2samp(x, y)
- >>> res.statistic, res.pvalue
- (0.29376470588235293, 0.1412873014573014)
- The p-value exceeds our chosen significance level, so we do not
- reject the null hypothesis that the observed samples are drawn from the
- same distribution.
- For small sample sizes, one can compute the exact p-values:
- >>> x = stats.norm.rvs(size=7, random_state=rng)
- >>> y = stats.t.rvs(df=2, size=6, random_state=rng)
- >>> res = stats.cramervonmises_2samp(x, y, method='exact')
- >>> res.statistic, res.pvalue
- (0.197802197802198, 0.31643356643356646)
- The p-value based on the asymptotic distribution is a good approximation
- even though the sample size is small.
- >>> res = stats.cramervonmises_2samp(x, y, method='asymptotic')
- >>> res.statistic, res.pvalue
- (0.197802197802198, 0.2966041181527128)
- Independent of the method, one would not reject the null hypothesis at the
- chosen significance level in this example.
- """
- xa = np.sort(np.asarray(x))
- ya = np.sort(np.asarray(y))
- if xa.size <= 1 or ya.size <= 1:
- raise ValueError('x and y must contain at least two observations.')
- if xa.ndim > 1 or ya.ndim > 1:
- raise ValueError('The samples must be one-dimensional.')
- if method not in ['auto', 'exact', 'asymptotic']:
- raise ValueError('method must be either auto, exact or asymptotic.')
- nx = len(xa)
- ny = len(ya)
- if method == 'auto':
- if max(nx, ny) > 20:
- method = 'asymptotic'
- else:
- method = 'exact'
- # get ranks of x and y in the pooled sample
- z = np.concatenate([xa, ya])
- # in case of ties, use midrank (see [1])
- r = scipy.stats.rankdata(z, method='average')
- rx = r[:nx]
- ry = r[nx:]
- # compute U (eq. 10 in [2])
- u = nx * np.sum((rx - np.arange(1, nx+1))**2)
- u += ny * np.sum((ry - np.arange(1, ny+1))**2)
- # compute T (eq. 9 in [2])
- k, N = nx*ny, nx + ny
- t = u / (k*N) - (4*k - 1)/(6*N)
- if method == 'exact':
- p = _pval_cvm_2samp_exact(u, nx, ny)
- else:
- # compute expected value and variance of T (eq. 11 and 14 in [2])
- et = (1 + 1/N)/6
- vt = (N+1) * (4*k*N - 3*(nx**2 + ny**2) - 2*k)
- vt = vt / (45 * N**2 * 4 * k)
- # computed the normalized statistic (eq. 15 in [2])
- tn = 1/6 + (t - et) / np.sqrt(45 * vt)
- # approximate distribution of tn with limiting distribution
- # of the one-sample test statistic
- # if tn < 0.003, the _cdf_cvm_inf(tn) < 1.28*1e-18, return 1.0 directly
- if tn < 0.003:
- p = 1.0
- else:
- p = max(0, 1. - _cdf_cvm_inf(tn))
- return CramerVonMisesResult(statistic=t, pvalue=p)
- class TukeyHSDResult:
- """Result of `scipy.stats.tukey_hsd`.
- Attributes
- ----------
- statistic : float ndarray
- The computed statistic of the test for each comparison. The element
- at index ``(i, j)`` is the statistic for the comparison between groups
- ``i`` and ``j``.
- pvalue : float ndarray
- The associated p-value from the studentized range distribution. The
- element at index ``(i, j)`` is the p-value for the comparison
- between groups ``i`` and ``j``.
- Notes
- -----
- The string representation of this object displays the most recently
- calculated confidence interval, and if none have been previously
- calculated, it will evaluate ``confidence_interval()``.
- References
- ----------
- .. [1] NIST/SEMATECH e-Handbook of Statistical Methods, "7.4.7.1. Tukey's
- Method."
- https://www.itl.nist.gov/div898/handbook/prc/section4/prc471.htm,
- 28 November 2020.
- """
- def __init__(self, statistic, pvalue, _nobs, _ntreatments, _stand_err):
- self.statistic = statistic
- self.pvalue = pvalue
- self._ntreatments = _ntreatments
- self._nobs = _nobs
- self._stand_err = _stand_err
- self._ci = None
- self._ci_cl = None
- def __str__(self):
- # Note: `__str__` prints the confidence intervals from the most
- # recent call to `confidence_interval`. If it has not been called,
- # it will be called with the default CL of .95.
- if self._ci is None:
- self.confidence_interval(confidence_level=.95)
- s = ("Tukey's HSD Pairwise Group Comparisons"
- f" ({self._ci_cl*100:.1f}% Confidence Interval)\n")
- s += "Comparison Statistic p-value Lower CI Upper CI\n"
- for i in range(self.pvalue.shape[0]):
- for j in range(self.pvalue.shape[0]):
- if i != j:
- s += (f" ({i} - {j}) {self.statistic[i, j]:>10.3f}"
- f"{self.pvalue[i, j]:>10.3f}"
- f"{self._ci.low[i, j]:>10.3f}"
- f"{self._ci.high[i, j]:>10.3f}\n")
- return s
- def confidence_interval(self, confidence_level=.95):
- """Compute the confidence interval for the specified confidence level.
- Parameters
- ----------
- confidence_level : float, optional
- Confidence level for the computed confidence interval
- of the estimated proportion. Default is .95.
- Returns
- -------
- ci : ``ConfidenceInterval`` object
- The object has attributes ``low`` and ``high`` that hold the
- lower and upper bounds of the confidence intervals for each
- comparison. The high and low values are accessible for each
- comparison at index ``(i, j)`` between groups ``i`` and ``j``.
- References
- ----------
- .. [1] NIST/SEMATECH e-Handbook of Statistical Methods, "7.4.7.1.
- Tukey's Method."
- https://www.itl.nist.gov/div898/handbook/prc/section4/prc471.htm,
- 28 November 2020.
- Examples
- --------
- >>> from scipy.stats import tukey_hsd
- >>> group0 = [24.5, 23.5, 26.4, 27.1, 29.9]
- >>> group1 = [28.4, 34.2, 29.5, 32.2, 30.1]
- >>> group2 = [26.1, 28.3, 24.3, 26.2, 27.8]
- >>> result = tukey_hsd(group0, group1, group2)
- >>> ci = result.confidence_interval()
- >>> ci.low
- array([[-3.649159, -8.249159, -3.909159],
- [ 0.950841, -3.649159, 0.690841],
- [-3.389159, -7.989159, -3.649159]])
- >>> ci.high
- array([[ 3.649159, -0.950841, 3.389159],
- [ 8.249159, 3.649159, 7.989159],
- [ 3.909159, -0.690841, 3.649159]])
- """
- # check to see if the supplied confidence level matches that of the
- # previously computed CI.
- if (self._ci is not None and self._ci_cl is not None and
- confidence_level == self._ci_cl):
- return self._ci
- if not 0 < confidence_level < 1:
- raise ValueError("Confidence level must be between 0 and 1.")
- # determine the critical value of the studentized range using the
- # appropriate confidence level, number of treatments, and degrees
- # of freedom as determined by the number of data less the number of
- # treatments. ("Confidence limits for Tukey's method")[1]. Note that
- # in the cases of unequal sample sizes there will be a criterion for
- # each group comparison.
- params = (confidence_level, self._nobs, self._ntreatments - self._nobs)
- srd = distributions.studentized_range.ppf(*params)
- # also called maximum critical value, the Tukey criterion is the
- # studentized range critical value * the square root of mean square
- # error over the sample size.
- tukey_criterion = srd * self._stand_err
- # the confidence levels are determined by the
- # `mean_differences` +- `tukey_criterion`
- upper_conf = self.statistic + tukey_criterion
- lower_conf = self.statistic - tukey_criterion
- self._ci = ConfidenceInterval(low=lower_conf, high=upper_conf)
- self._ci_cl = confidence_level
- return self._ci
- def _tukey_hsd_iv(args):
- if (len(args)) < 2:
- raise ValueError("There must be more than 1 treatment.")
- args = [np.asarray(arg) for arg in args]
- for arg in args:
- if arg.ndim != 1:
- raise ValueError("Input samples must be one-dimensional.")
- if arg.size <= 1:
- raise ValueError("Input sample size must be greater than one.")
- if np.isinf(arg).any():
- raise ValueError("Input samples must be finite.")
- return args
- def tukey_hsd(*args):
- """Perform Tukey's HSD test for equality of means over multiple treatments.
- Tukey's honestly significant difference (HSD) test performs pairwise
- comparison of means for a set of samples. Whereas ANOVA (e.g. `f_oneway`)
- assesses whether the true means underlying each sample are identical,
- Tukey's HSD is a post hoc test used to compare the mean of each sample
- to the mean of each other sample.
- The null hypothesis is that the distributions underlying the samples all
- have the same mean. The test statistic, which is computed for every
- possible pairing of samples, is simply the difference between the sample
- means. For each pair, the p-value is the probability under the null
- hypothesis (and other assumptions; see notes) of observing such an extreme
- value of the statistic, considering that many pairwise comparisons are
- being performed. Confidence intervals for the difference between each pair
- of means are also available.
- Parameters
- ----------
- sample1, sample2, ... : array_like
- The sample measurements for each group. There must be at least
- two arguments.
- Returns
- -------
- result : `~scipy.stats._result_classes.TukeyHSDResult` instance
- The return value is an object with the following attributes:
- statistic : float ndarray
- The computed statistic of the test for each comparison. The element
- at index ``(i, j)`` is the statistic for the comparison between
- groups ``i`` and ``j``.
- pvalue : float ndarray
- The computed p-value of the test for each comparison. The element
- at index ``(i, j)`` is the p-value for the comparison between
- groups ``i`` and ``j``.
- The object has the following methods:
- confidence_interval(confidence_level=0.95):
- Compute the confidence interval for the specified confidence level.
- Notes
- -----
- The use of this test relies on several assumptions.
- 1. The observations are independent within and among groups.
- 2. The observations within each group are normally distributed.
- 3. The distributions from which the samples are drawn have the same finite
- variance.
- The original formulation of the test was for samples of equal size [6]_.
- In case of unequal sample sizes, the test uses the Tukey-Kramer method
- [4]_.
- References
- ----------
- .. [1] NIST/SEMATECH e-Handbook of Statistical Methods, "7.4.7.1. Tukey's
- Method."
- https://www.itl.nist.gov/div898/handbook/prc/section4/prc471.htm,
- 28 November 2020.
- .. [2] Abdi, Herve & Williams, Lynne. (2021). "Tukey's Honestly Significant
- Difference (HSD) Test."
- https://personal.utdallas.edu/~herve/abdi-HSD2010-pretty.pdf
- .. [3] "One-Way ANOVA Using SAS PROC ANOVA & PROC GLM." SAS
- Tutorials, 2007, www.stattutorials.com/SAS/TUTORIAL-PROC-GLM.htm.
- .. [4] Kramer, Clyde Young. "Extension of Multiple Range Tests to Group
- Means with Unequal Numbers of Replications." Biometrics, vol. 12,
- no. 3, 1956, pp. 307-310. JSTOR, www.jstor.org/stable/3001469.
- Accessed 25 May 2021.
- .. [5] NIST/SEMATECH e-Handbook of Statistical Methods, "7.4.3.3.
- The ANOVA table and tests of hypotheses about means"
- https://www.itl.nist.gov/div898/handbook/prc/section4/prc433.htm,
- 2 June 2021.
- .. [6] Tukey, John W. "Comparing Individual Means in the Analysis of
- Variance." Biometrics, vol. 5, no. 2, 1949, pp. 99-114. JSTOR,
- www.jstor.org/stable/3001913. Accessed 14 June 2021.
- Examples
- --------
- Here are some data comparing the time to relief of three brands of
- headache medicine, reported in minutes. Data adapted from [3]_.
- >>> import numpy as np
- >>> from scipy.stats import tukey_hsd
- >>> group0 = [24.5, 23.5, 26.4, 27.1, 29.9]
- >>> group1 = [28.4, 34.2, 29.5, 32.2, 30.1]
- >>> group2 = [26.1, 28.3, 24.3, 26.2, 27.8]
- We would like to see if the means between any of the groups are
- significantly different. First, visually examine a box and whisker plot.
- >>> import matplotlib.pyplot as plt
- >>> fig, ax = plt.subplots(1, 1)
- >>> ax.boxplot([group0, group1, group2])
- >>> ax.set_xticklabels(["group0", "group1", "group2"]) # doctest: +SKIP
- >>> ax.set_ylabel("mean") # doctest: +SKIP
- >>> plt.show()
- From the box and whisker plot, we can see overlap in the interquartile
- ranges group 1 to group 2 and group 3, but we can apply the ``tukey_hsd``
- test to determine if the difference between means is significant. We
- set a significance level of .05 to reject the null hypothesis.
- >>> res = tukey_hsd(group0, group1, group2)
- >>> print(res)
- Tukey's HSD Pairwise Group Comparisons (95.0% Confidence Interval)
- Comparison Statistic p-value Lower CI Upper CI
- (0 - 1) -4.600 0.014 -8.249 -0.951
- (0 - 2) -0.260 0.980 -3.909 3.389
- (1 - 0) 4.600 0.014 0.951 8.249
- (1 - 2) 4.340 0.020 0.691 7.989
- (2 - 0) 0.260 0.980 -3.389 3.909
- (2 - 1) -4.340 0.020 -7.989 -0.691
- The null hypothesis is that each group has the same mean. The p-value for
- comparisons between ``group0`` and ``group1`` as well as ``group1`` and
- ``group2`` do not exceed .05, so we reject the null hypothesis that they
- have the same means. The p-value of the comparison between ``group0``
- and ``group2`` exceeds .05, so we accept the null hypothesis that there
- is not a significant difference between their means.
- We can also compute the confidence interval associated with our chosen
- confidence level.
- >>> group0 = [24.5, 23.5, 26.4, 27.1, 29.9]
- >>> group1 = [28.4, 34.2, 29.5, 32.2, 30.1]
- >>> group2 = [26.1, 28.3, 24.3, 26.2, 27.8]
- >>> result = tukey_hsd(group0, group1, group2)
- >>> conf = res.confidence_interval(confidence_level=.99)
- >>> for ((i, j), l) in np.ndenumerate(conf.low):
- ... # filter out self comparisons
- ... if i != j:
- ... h = conf.high[i,j]
- ... print(f"({i} - {j}) {l:>6.3f} {h:>6.3f}")
- (0 - 1) -9.480 0.280
- (0 - 2) -5.140 4.620
- (1 - 0) -0.280 9.480
- (1 - 2) -0.540 9.220
- (2 - 0) -4.620 5.140
- (2 - 1) -9.220 0.540
- """
- args = _tukey_hsd_iv(args)
- ntreatments = len(args)
- means = np.asarray([np.mean(arg) for arg in args])
- nsamples_treatments = np.asarray([a.size for a in args])
- nobs = np.sum(nsamples_treatments)
- # determine mean square error [5]. Note that this is sometimes called
- # mean square error within.
- mse = (np.sum([np.var(arg, ddof=1) for arg in args] *
- (nsamples_treatments - 1)) / (nobs - ntreatments))
- # The calculation of the standard error differs when treatments differ in
- # size. See ("Unequal sample sizes")[1].
- if np.unique(nsamples_treatments).size == 1:
- # all input groups are the same length, so only one value needs to be
- # calculated [1].
- normalize = 2 / nsamples_treatments[0]
- else:
- # to compare groups of differing sizes, we must compute a variance
- # value for each individual comparison. Use broadcasting to get the
- # resulting matrix. [3], verified against [4] (page 308).
- normalize = 1 / nsamples_treatments + 1 / nsamples_treatments[None].T
- # the standard error is used in the computation of the tukey criterion and
- # finding the p-values.
- stand_err = np.sqrt(normalize * mse / 2)
- # the mean difference is the test statistic.
- mean_differences = means[None].T - means
- # Calculate the t-statistic to use within the survival function of the
- # studentized range to get the p-value.
- t_stat = np.abs(mean_differences) / stand_err
- params = t_stat, ntreatments, nobs - ntreatments
- pvalues = distributions.studentized_range.sf(*params)
- return TukeyHSDResult(mean_differences, pvalues, ntreatments,
- nobs, stand_err)
|