test_hypotests.py 71 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712
  1. from itertools import product
  2. import numpy as np
  3. import functools
  4. import pytest
  5. from numpy.testing import (assert_, assert_equal, assert_allclose,
  6. assert_almost_equal) # avoid new uses
  7. from pytest import raises as assert_raises
  8. import scipy.stats as stats
  9. from scipy.stats import distributions
  10. from scipy.stats._hypotests import (epps_singleton_2samp, cramervonmises,
  11. _cdf_cvm, cramervonmises_2samp,
  12. _pval_cvm_2samp_exact, barnard_exact,
  13. boschloo_exact)
  14. from scipy.stats._mannwhitneyu import mannwhitneyu, _mwu_state
  15. from .common_tests import check_named_results
  16. from scipy._lib._testutils import _TestPythranFunc
  17. class TestEppsSingleton:
  18. def test_statistic_1(self):
  19. # first example in Goerg & Kaiser, also in original paper of
  20. # Epps & Singleton. Note: values do not match exactly, the
  21. # value of the interquartile range varies depending on how
  22. # quantiles are computed
  23. x = np.array([-0.35, 2.55, 1.73, 0.73, 0.35,
  24. 2.69, 0.46, -0.94, -0.37, 12.07])
  25. y = np.array([-1.15, -0.15, 2.48, 3.25, 3.71,
  26. 4.29, 5.00, 7.74, 8.38, 8.60])
  27. w, p = epps_singleton_2samp(x, y)
  28. assert_almost_equal(w, 15.14, decimal=1)
  29. assert_almost_equal(p, 0.00442, decimal=3)
  30. def test_statistic_2(self):
  31. # second example in Goerg & Kaiser, again not a perfect match
  32. x = np.array((0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 10,
  33. 10, 10, 10))
  34. y = np.array((10, 4, 0, 5, 10, 10, 0, 5, 6, 7, 10, 3, 1, 7, 0, 8, 1,
  35. 5, 8, 10))
  36. w, p = epps_singleton_2samp(x, y)
  37. assert_allclose(w, 8.900, atol=0.001)
  38. assert_almost_equal(p, 0.06364, decimal=3)
  39. def test_epps_singleton_array_like(self):
  40. np.random.seed(1234)
  41. x, y = np.arange(30), np.arange(28)
  42. w1, p1 = epps_singleton_2samp(list(x), list(y))
  43. w2, p2 = epps_singleton_2samp(tuple(x), tuple(y))
  44. w3, p3 = epps_singleton_2samp(x, y)
  45. assert_(w1 == w2 == w3)
  46. assert_(p1 == p2 == p3)
  47. def test_epps_singleton_size(self):
  48. # raise error if less than 5 elements
  49. x, y = (1, 2, 3, 4), np.arange(10)
  50. assert_raises(ValueError, epps_singleton_2samp, x, y)
  51. def test_epps_singleton_nonfinite(self):
  52. # raise error if there are non-finite values
  53. x, y = (1, 2, 3, 4, 5, np.inf), np.arange(10)
  54. assert_raises(ValueError, epps_singleton_2samp, x, y)
  55. x, y = np.arange(10), (1, 2, 3, 4, 5, np.nan)
  56. assert_raises(ValueError, epps_singleton_2samp, x, y)
  57. def test_epps_singleton_1d_input(self):
  58. x = np.arange(100).reshape(-1, 1)
  59. assert_raises(ValueError, epps_singleton_2samp, x, x)
  60. def test_names(self):
  61. x, y = np.arange(20), np.arange(30)
  62. res = epps_singleton_2samp(x, y)
  63. attributes = ('statistic', 'pvalue')
  64. check_named_results(res, attributes)
  65. class TestCvm:
  66. # the expected values of the cdfs are taken from Table 1 in
  67. # Csorgo / Faraway: The Exact and Asymptotic Distribution of
  68. # Cramér-von Mises Statistics, 1996.
  69. def test_cdf_4(self):
  70. assert_allclose(
  71. _cdf_cvm([0.02983, 0.04111, 0.12331, 0.94251], 4),
  72. [0.01, 0.05, 0.5, 0.999],
  73. atol=1e-4)
  74. def test_cdf_10(self):
  75. assert_allclose(
  76. _cdf_cvm([0.02657, 0.03830, 0.12068, 0.56643], 10),
  77. [0.01, 0.05, 0.5, 0.975],
  78. atol=1e-4)
  79. def test_cdf_1000(self):
  80. assert_allclose(
  81. _cdf_cvm([0.02481, 0.03658, 0.11889, 1.16120], 1000),
  82. [0.01, 0.05, 0.5, 0.999],
  83. atol=1e-4)
  84. def test_cdf_inf(self):
  85. assert_allclose(
  86. _cdf_cvm([0.02480, 0.03656, 0.11888, 1.16204]),
  87. [0.01, 0.05, 0.5, 0.999],
  88. atol=1e-4)
  89. def test_cdf_support(self):
  90. # cdf has support on [1/(12*n), n/3]
  91. assert_equal(_cdf_cvm([1/(12*533), 533/3], 533), [0, 1])
  92. assert_equal(_cdf_cvm([1/(12*(27 + 1)), (27 + 1)/3], 27), [0, 1])
  93. def test_cdf_large_n(self):
  94. # test that asymptotic cdf and cdf for large samples are close
  95. assert_allclose(
  96. _cdf_cvm([0.02480, 0.03656, 0.11888, 1.16204, 100], 10000),
  97. _cdf_cvm([0.02480, 0.03656, 0.11888, 1.16204, 100]),
  98. atol=1e-4)
  99. def test_large_x(self):
  100. # for large values of x and n, the series used to compute the cdf
  101. # converges slowly.
  102. # this leads to bug in R package goftest and MAPLE code that is
  103. # the basis of the implemenation in scipy
  104. # note: cdf = 1 for x >= 1000/3 and n = 1000
  105. assert_(0.99999 < _cdf_cvm(333.3, 1000) < 1.0)
  106. assert_(0.99999 < _cdf_cvm(333.3) < 1.0)
  107. def test_low_p(self):
  108. # _cdf_cvm can return values larger than 1. In that case, we just
  109. # return a p-value of zero.
  110. n = 12
  111. res = cramervonmises(np.ones(n)*0.8, 'norm')
  112. assert_(_cdf_cvm(res.statistic, n) > 1.0)
  113. assert_equal(res.pvalue, 0)
  114. def test_invalid_input(self):
  115. x = np.arange(10).reshape((2, 5))
  116. assert_raises(ValueError, cramervonmises, x, "norm")
  117. assert_raises(ValueError, cramervonmises, [1.5], "norm")
  118. assert_raises(ValueError, cramervonmises, (), "norm")
  119. def test_values_R(self):
  120. # compared against R package goftest, version 1.1.1
  121. # goftest::cvm.test(c(-1.7, 2, 0, 1.3, 4, 0.1, 0.6), "pnorm")
  122. res = cramervonmises([-1.7, 2, 0, 1.3, 4, 0.1, 0.6], "norm")
  123. assert_allclose(res.statistic, 0.288156, atol=1e-6)
  124. assert_allclose(res.pvalue, 0.1453465, atol=1e-6)
  125. # goftest::cvm.test(c(-1.7, 2, 0, 1.3, 4, 0.1, 0.6),
  126. # "pnorm", mean = 3, sd = 1.5)
  127. res = cramervonmises([-1.7, 2, 0, 1.3, 4, 0.1, 0.6], "norm", (3, 1.5))
  128. assert_allclose(res.statistic, 0.9426685, atol=1e-6)
  129. assert_allclose(res.pvalue, 0.002026417, atol=1e-6)
  130. # goftest::cvm.test(c(1, 2, 5, 1.4, 0.14, 11, 13, 0.9, 7.5), "pexp")
  131. res = cramervonmises([1, 2, 5, 1.4, 0.14, 11, 13, 0.9, 7.5], "expon")
  132. assert_allclose(res.statistic, 0.8421854, atol=1e-6)
  133. assert_allclose(res.pvalue, 0.004433406, atol=1e-6)
  134. def test_callable_cdf(self):
  135. x, args = np.arange(5), (1.4, 0.7)
  136. r1 = cramervonmises(x, distributions.expon.cdf)
  137. r2 = cramervonmises(x, "expon")
  138. assert_equal((r1.statistic, r1.pvalue), (r2.statistic, r2.pvalue))
  139. r1 = cramervonmises(x, distributions.beta.cdf, args)
  140. r2 = cramervonmises(x, "beta", args)
  141. assert_equal((r1.statistic, r1.pvalue), (r2.statistic, r2.pvalue))
  142. class TestMannWhitneyU:
  143. def setup_method(self):
  144. _mwu_state._recursive = True
  145. # All magic numbers are from R wilcox.test unless otherwise specied
  146. # https://rdrr.io/r/stats/wilcox.test.html
  147. # --- Test Input Validation ---
  148. def test_input_validation(self):
  149. x = np.array([1, 2]) # generic, valid inputs
  150. y = np.array([3, 4])
  151. with assert_raises(ValueError, match="`x` and `y` must be of nonzero"):
  152. mannwhitneyu([], y)
  153. with assert_raises(ValueError, match="`x` and `y` must be of nonzero"):
  154. mannwhitneyu(x, [])
  155. with assert_raises(ValueError, match="`use_continuity` must be one"):
  156. mannwhitneyu(x, y, use_continuity='ekki')
  157. with assert_raises(ValueError, match="`alternative` must be one of"):
  158. mannwhitneyu(x, y, alternative='ekki')
  159. with assert_raises(ValueError, match="`axis` must be an integer"):
  160. mannwhitneyu(x, y, axis=1.5)
  161. with assert_raises(ValueError, match="`method` must be one of"):
  162. mannwhitneyu(x, y, method='ekki')
  163. def test_auto(self):
  164. # Test that default method ('auto') chooses intended method
  165. np.random.seed(1)
  166. n = 8 # threshold to switch from exact to asymptotic
  167. # both inputs are smaller than threshold; should use exact
  168. x = np.random.rand(n-1)
  169. y = np.random.rand(n-1)
  170. auto = mannwhitneyu(x, y)
  171. asymptotic = mannwhitneyu(x, y, method='asymptotic')
  172. exact = mannwhitneyu(x, y, method='exact')
  173. assert auto.pvalue == exact.pvalue
  174. assert auto.pvalue != asymptotic.pvalue
  175. # one input is smaller than threshold; should use exact
  176. x = np.random.rand(n-1)
  177. y = np.random.rand(n+1)
  178. auto = mannwhitneyu(x, y)
  179. asymptotic = mannwhitneyu(x, y, method='asymptotic')
  180. exact = mannwhitneyu(x, y, method='exact')
  181. assert auto.pvalue == exact.pvalue
  182. assert auto.pvalue != asymptotic.pvalue
  183. # other input is smaller than threshold; should use exact
  184. auto = mannwhitneyu(y, x)
  185. asymptotic = mannwhitneyu(x, y, method='asymptotic')
  186. exact = mannwhitneyu(x, y, method='exact')
  187. assert auto.pvalue == exact.pvalue
  188. assert auto.pvalue != asymptotic.pvalue
  189. # both inputs are larger than threshold; should use asymptotic
  190. x = np.random.rand(n+1)
  191. y = np.random.rand(n+1)
  192. auto = mannwhitneyu(x, y)
  193. asymptotic = mannwhitneyu(x, y, method='asymptotic')
  194. exact = mannwhitneyu(x, y, method='exact')
  195. assert auto.pvalue != exact.pvalue
  196. assert auto.pvalue == asymptotic.pvalue
  197. # both inputs are smaller than threshold, but there is a tie
  198. # should use asymptotic
  199. x = np.random.rand(n-1)
  200. y = np.random.rand(n-1)
  201. y[3] = x[3]
  202. auto = mannwhitneyu(x, y)
  203. asymptotic = mannwhitneyu(x, y, method='asymptotic')
  204. exact = mannwhitneyu(x, y, method='exact')
  205. assert auto.pvalue != exact.pvalue
  206. assert auto.pvalue == asymptotic.pvalue
  207. # --- Test Basic Functionality ---
  208. x = [210.052110, 110.190630, 307.918612]
  209. y = [436.08811482466416, 416.37397329768191, 179.96975939463582,
  210. 197.8118754228619, 34.038757281225756, 138.54220550921517,
  211. 128.7769351470246, 265.92721427951852, 275.6617533155341,
  212. 592.34083395416258, 448.73177590617018, 300.61495185038905,
  213. 187.97508449019588]
  214. # This test was written for mann_whitney_u in gh-4933.
  215. # Originally, the p-values for alternatives were swapped;
  216. # this has been corrected and the tests have been refactored for
  217. # compactness, but otherwise the tests are unchanged.
  218. # R code for comparison, e.g.:
  219. # options(digits = 16)
  220. # x = c(210.052110, 110.190630, 307.918612)
  221. # y = c(436.08811482466416, 416.37397329768191, 179.96975939463582,
  222. # 197.8118754228619, 34.038757281225756, 138.54220550921517,
  223. # 128.7769351470246, 265.92721427951852, 275.6617533155341,
  224. # 592.34083395416258, 448.73177590617018, 300.61495185038905,
  225. # 187.97508449019588)
  226. # wilcox.test(x, y, alternative="g", exact=TRUE)
  227. cases_basic = [[{"alternative": 'two-sided', "method": "asymptotic"},
  228. (16, 0.6865041817876)],
  229. [{"alternative": 'less', "method": "asymptotic"},
  230. (16, 0.3432520908938)],
  231. [{"alternative": 'greater', "method": "asymptotic"},
  232. (16, 0.7047591913255)],
  233. [{"alternative": 'two-sided', "method": "exact"},
  234. (16, 0.7035714285714)],
  235. [{"alternative": 'less', "method": "exact"},
  236. (16, 0.3517857142857)],
  237. [{"alternative": 'greater', "method": "exact"},
  238. (16, 0.6946428571429)]]
  239. @pytest.mark.parametrize(("kwds", "expected"), cases_basic)
  240. def test_basic(self, kwds, expected):
  241. res = mannwhitneyu(self.x, self.y, **kwds)
  242. assert_allclose(res, expected)
  243. cases_continuity = [[{"alternative": 'two-sided', "use_continuity": True},
  244. (23, 0.6865041817876)],
  245. [{"alternative": 'less', "use_continuity": True},
  246. (23, 0.7047591913255)],
  247. [{"alternative": 'greater', "use_continuity": True},
  248. (23, 0.3432520908938)],
  249. [{"alternative": 'two-sided', "use_continuity": False},
  250. (23, 0.6377328900502)],
  251. [{"alternative": 'less', "use_continuity": False},
  252. (23, 0.6811335549749)],
  253. [{"alternative": 'greater', "use_continuity": False},
  254. (23, 0.3188664450251)]]
  255. @pytest.mark.parametrize(("kwds", "expected"), cases_continuity)
  256. def test_continuity(self, kwds, expected):
  257. # When x and y are interchanged, less and greater p-values should
  258. # swap (compare to above). This wouldn't happen if the continuity
  259. # correction were applied in the wrong direction. Note that less and
  260. # greater p-values do not sum to 1 when continuity correction is on,
  261. # which is what we'd expect. Also check that results match R when
  262. # continuity correction is turned off.
  263. # Note that method='asymptotic' -> exact=FALSE
  264. # and use_continuity=False -> correct=FALSE, e.g.:
  265. # wilcox.test(x, y, alternative="t", exact=FALSE, correct=FALSE)
  266. res = mannwhitneyu(self.y, self.x, method='asymptotic', **kwds)
  267. assert_allclose(res, expected)
  268. def test_tie_correct(self):
  269. # Test tie correction against R's wilcox.test
  270. # options(digits = 16)
  271. # x = c(1, 2, 3, 4)
  272. # y = c(1, 2, 3, 4, 5)
  273. # wilcox.test(x, y, exact=FALSE)
  274. x = [1, 2, 3, 4]
  275. y0 = np.array([1, 2, 3, 4, 5])
  276. dy = np.array([0, 1, 0, 1, 0])*0.01
  277. dy2 = np.array([0, 0, 1, 0, 0])*0.01
  278. y = [y0-0.01, y0-dy, y0-dy2, y0, y0+dy2, y0+dy, y0+0.01]
  279. res = mannwhitneyu(x, y, axis=-1, method="asymptotic")
  280. U_expected = [10, 9, 8.5, 8, 7.5, 7, 6]
  281. p_expected = [1, 0.9017048037317, 0.804080657472, 0.7086240584439,
  282. 0.6197963884941, 0.5368784563079, 0.3912672792826]
  283. assert_equal(res.statistic, U_expected)
  284. assert_allclose(res.pvalue, p_expected)
  285. # --- Test Exact Distribution of U ---
  286. # These are tabulated values of the CDF of the exact distribution of
  287. # the test statistic from pg 52 of reference [1] (Mann-Whitney Original)
  288. pn3 = {1: [0.25, 0.5, 0.75], 2: [0.1, 0.2, 0.4, 0.6],
  289. 3: [0.05, .1, 0.2, 0.35, 0.5, 0.65]}
  290. pn4 = {1: [0.2, 0.4, 0.6], 2: [0.067, 0.133, 0.267, 0.4, 0.6],
  291. 3: [0.028, 0.057, 0.114, 0.2, .314, 0.429, 0.571],
  292. 4: [0.014, 0.029, 0.057, 0.1, 0.171, 0.243, 0.343, 0.443, 0.557]}
  293. pm5 = {1: [0.167, 0.333, 0.5, 0.667],
  294. 2: [0.047, 0.095, 0.19, 0.286, 0.429, 0.571],
  295. 3: [0.018, 0.036, 0.071, 0.125, 0.196, 0.286, 0.393, 0.5, 0.607],
  296. 4: [0.008, 0.016, 0.032, 0.056, 0.095, 0.143,
  297. 0.206, 0.278, 0.365, 0.452, 0.548],
  298. 5: [0.004, 0.008, 0.016, 0.028, 0.048, 0.075, 0.111,
  299. 0.155, 0.21, 0.274, 0.345, .421, 0.5, 0.579]}
  300. pm6 = {1: [0.143, 0.286, 0.428, 0.571],
  301. 2: [0.036, 0.071, 0.143, 0.214, 0.321, 0.429, 0.571],
  302. 3: [0.012, 0.024, 0.048, 0.083, 0.131,
  303. 0.19, 0.274, 0.357, 0.452, 0.548],
  304. 4: [0.005, 0.01, 0.019, 0.033, 0.057, 0.086, 0.129,
  305. 0.176, 0.238, 0.305, 0.381, 0.457, 0.543], # the last element
  306. # of the previous list, 0.543, has been modified from 0.545;
  307. # I assume it was a typo
  308. 5: [0.002, 0.004, 0.009, 0.015, 0.026, 0.041, 0.063, 0.089,
  309. 0.123, 0.165, 0.214, 0.268, 0.331, 0.396, 0.465, 0.535],
  310. 6: [0.001, 0.002, 0.004, 0.008, 0.013, 0.021, 0.032, 0.047,
  311. 0.066, 0.09, 0.12, 0.155, 0.197, 0.242, 0.294, 0.350,
  312. 0.409, 0.469, 0.531]}
  313. def test_exact_distribution(self):
  314. # I considered parametrize. I decided against it.
  315. p_tables = {3: self.pn3, 4: self.pn4, 5: self.pm5, 6: self.pm6}
  316. for n, table in p_tables.items():
  317. for m, p in table.items():
  318. # check p-value against table
  319. u = np.arange(0, len(p))
  320. assert_allclose(_mwu_state.cdf(k=u, m=m, n=n), p, atol=1e-3)
  321. # check identity CDF + SF - PMF = 1
  322. # ( In this implementation, SF(U) includes PMF(U) )
  323. u2 = np.arange(0, m*n+1)
  324. assert_allclose(_mwu_state.cdf(k=u2, m=m, n=n)
  325. + _mwu_state.sf(k=u2, m=m, n=n)
  326. - _mwu_state.pmf(k=u2, m=m, n=n), 1)
  327. # check symmetry about mean of U, i.e. pmf(U) = pmf(m*n-U)
  328. pmf = _mwu_state.pmf(k=u2, m=m, n=n)
  329. assert_allclose(pmf, pmf[::-1])
  330. # check symmetry w.r.t. interchange of m, n
  331. pmf2 = _mwu_state.pmf(k=u2, m=n, n=m)
  332. assert_allclose(pmf, pmf2)
  333. def test_asymptotic_behavior(self):
  334. np.random.seed(0)
  335. # for small samples, the asymptotic test is not very accurate
  336. x = np.random.rand(5)
  337. y = np.random.rand(5)
  338. res1 = mannwhitneyu(x, y, method="exact")
  339. res2 = mannwhitneyu(x, y, method="asymptotic")
  340. assert res1.statistic == res2.statistic
  341. assert np.abs(res1.pvalue - res2.pvalue) > 1e-2
  342. # for large samples, they agree reasonably well
  343. x = np.random.rand(40)
  344. y = np.random.rand(40)
  345. res1 = mannwhitneyu(x, y, method="exact")
  346. res2 = mannwhitneyu(x, y, method="asymptotic")
  347. assert res1.statistic == res2.statistic
  348. assert np.abs(res1.pvalue - res2.pvalue) < 1e-3
  349. # --- Test Corner Cases ---
  350. def test_exact_U_equals_mean(self):
  351. # Test U == m*n/2 with exact method
  352. # Without special treatment, two-sided p-value > 1 because both
  353. # one-sided p-values are > 0.5
  354. res_l = mannwhitneyu([1, 2, 3], [1.5, 2.5], alternative="less",
  355. method="exact")
  356. res_g = mannwhitneyu([1, 2, 3], [1.5, 2.5], alternative="greater",
  357. method="exact")
  358. assert_equal(res_l.pvalue, res_g.pvalue)
  359. assert res_l.pvalue > 0.5
  360. res = mannwhitneyu([1, 2, 3], [1.5, 2.5], alternative="two-sided",
  361. method="exact")
  362. assert_equal(res, (3, 1))
  363. # U == m*n/2 for asymptotic case tested in test_gh_2118
  364. # The reason it's tricky for the asymptotic test has to do with
  365. # continuity correction.
  366. cases_scalar = [[{"alternative": 'two-sided', "method": "asymptotic"},
  367. (0, 1)],
  368. [{"alternative": 'less', "method": "asymptotic"},
  369. (0, 0.5)],
  370. [{"alternative": 'greater', "method": "asymptotic"},
  371. (0, 0.977249868052)],
  372. [{"alternative": 'two-sided', "method": "exact"}, (0, 1)],
  373. [{"alternative": 'less', "method": "exact"}, (0, 0.5)],
  374. [{"alternative": 'greater', "method": "exact"}, (0, 1)]]
  375. @pytest.mark.parametrize(("kwds", "result"), cases_scalar)
  376. def test_scalar_data(self, kwds, result):
  377. # just making sure scalars work
  378. assert_allclose(mannwhitneyu(1, 2, **kwds), result)
  379. def test_equal_scalar_data(self):
  380. # when two scalars are equal, there is an -0.5/0 in the asymptotic
  381. # approximation. R gives pvalue=1.0 for alternatives 'less' and
  382. # 'greater' but NA for 'two-sided'. I don't see why, so I don't
  383. # see a need for a special case to match that behavior.
  384. assert_equal(mannwhitneyu(1, 1, method="exact"), (0.5, 1))
  385. assert_equal(mannwhitneyu(1, 1, method="asymptotic"), (0.5, 1))
  386. # without continuity correction, this becomes 0/0, which really
  387. # is undefined
  388. assert_equal(mannwhitneyu(1, 1, method="asymptotic",
  389. use_continuity=False), (0.5, np.nan))
  390. # --- Test Enhancements / Bug Reports ---
  391. @pytest.mark.parametrize("method", ["asymptotic", "exact"])
  392. def test_gh_12837_11113(self, method):
  393. # Test that behavior for broadcastable nd arrays is appropriate:
  394. # output shape is correct and all values are equal to when the test
  395. # is performed on one pair of samples at a time.
  396. # Tests that gh-12837 and gh-11113 (requests for n-d input)
  397. # are resolved
  398. np.random.seed(0)
  399. # arrays are broadcastable except for axis = -3
  400. axis = -3
  401. m, n = 7, 10 # sample sizes
  402. x = np.random.rand(m, 3, 8)
  403. y = np.random.rand(6, n, 1, 8) + 0.1
  404. res = mannwhitneyu(x, y, method=method, axis=axis)
  405. shape = (6, 3, 8) # appropriate shape of outputs, given inputs
  406. assert res.pvalue.shape == shape
  407. assert res.statistic.shape == shape
  408. # move axis of test to end for simplicity
  409. x, y = np.moveaxis(x, axis, -1), np.moveaxis(y, axis, -1)
  410. x = x[None, ...] # give x a zeroth dimension
  411. assert x.ndim == y.ndim
  412. x = np.broadcast_to(x, shape + (m,))
  413. y = np.broadcast_to(y, shape + (n,))
  414. assert x.shape[:-1] == shape
  415. assert y.shape[:-1] == shape
  416. # loop over pairs of samples
  417. statistics = np.zeros(shape)
  418. pvalues = np.zeros(shape)
  419. for indices in product(*[range(i) for i in shape]):
  420. xi = x[indices]
  421. yi = y[indices]
  422. temp = mannwhitneyu(xi, yi, method=method)
  423. statistics[indices] = temp.statistic
  424. pvalues[indices] = temp.pvalue
  425. np.testing.assert_equal(res.pvalue, pvalues)
  426. np.testing.assert_equal(res.statistic, statistics)
  427. def test_gh_11355(self):
  428. # Test for correct behavior with NaN/Inf in input
  429. x = [1, 2, 3, 4]
  430. y = [3, 6, 7, 8, 9, 3, 2, 1, 4, 4, 5]
  431. res1 = mannwhitneyu(x, y)
  432. # Inf is not a problem. This is a rank test, and it's the largest value
  433. y[4] = np.inf
  434. res2 = mannwhitneyu(x, y)
  435. assert_equal(res1.statistic, res2.statistic)
  436. assert_equal(res1.pvalue, res2.pvalue)
  437. # NaNs should propagate by default.
  438. y[4] = np.nan
  439. res3 = mannwhitneyu(x, y)
  440. assert_equal(res3.statistic, np.nan)
  441. assert_equal(res3.pvalue, np.nan)
  442. cases_11355 = [([1, 2, 3, 4],
  443. [3, 6, 7, 8, np.inf, 3, 2, 1, 4, 4, 5],
  444. 10, 0.1297704873477),
  445. ([1, 2, 3, 4],
  446. [3, 6, 7, 8, np.inf, np.inf, 2, 1, 4, 4, 5],
  447. 8.5, 0.08735617507695),
  448. ([1, 2, np.inf, 4],
  449. [3, 6, 7, 8, np.inf, 3, 2, 1, 4, 4, 5],
  450. 17.5, 0.5988856695752),
  451. ([1, 2, np.inf, 4],
  452. [3, 6, 7, 8, np.inf, np.inf, 2, 1, 4, 4, 5],
  453. 16, 0.4687165824462),
  454. ([1, np.inf, np.inf, 4],
  455. [3, 6, 7, 8, np.inf, np.inf, 2, 1, 4, 4, 5],
  456. 24.5, 0.7912517950119)]
  457. @pytest.mark.parametrize(("x", "y", "statistic", "pvalue"), cases_11355)
  458. def test_gh_11355b(self, x, y, statistic, pvalue):
  459. # Test for correct behavior with NaN/Inf in input
  460. res = mannwhitneyu(x, y, method='asymptotic')
  461. assert_allclose(res.statistic, statistic, atol=1e-12)
  462. assert_allclose(res.pvalue, pvalue, atol=1e-12)
  463. cases_9184 = [[True, "less", "asymptotic", 0.900775348204],
  464. [True, "greater", "asymptotic", 0.1223118025635],
  465. [True, "two-sided", "asymptotic", 0.244623605127],
  466. [False, "less", "asymptotic", 0.8896643190401],
  467. [False, "greater", "asymptotic", 0.1103356809599],
  468. [False, "two-sided", "asymptotic", 0.2206713619198],
  469. [True, "less", "exact", 0.8967698967699],
  470. [True, "greater", "exact", 0.1272061272061],
  471. [True, "two-sided", "exact", 0.2544122544123]]
  472. @pytest.mark.parametrize(("use_continuity", "alternative",
  473. "method", "pvalue_exp"), cases_9184)
  474. def test_gh_9184(self, use_continuity, alternative, method, pvalue_exp):
  475. # gh-9184 might be considered a doc-only bug. Please see the
  476. # documentation to confirm that mannwhitneyu correctly notes
  477. # that the output statistic is that of the first sample (x). In any
  478. # case, check the case provided there against output from R.
  479. # R code:
  480. # options(digits=16)
  481. # x <- c(0.80, 0.83, 1.89, 1.04, 1.45, 1.38, 1.91, 1.64, 0.73, 1.46)
  482. # y <- c(1.15, 0.88, 0.90, 0.74, 1.21)
  483. # wilcox.test(x, y, alternative = "less", exact = FALSE)
  484. # wilcox.test(x, y, alternative = "greater", exact = FALSE)
  485. # wilcox.test(x, y, alternative = "two.sided", exact = FALSE)
  486. # wilcox.test(x, y, alternative = "less", exact = FALSE,
  487. # correct=FALSE)
  488. # wilcox.test(x, y, alternative = "greater", exact = FALSE,
  489. # correct=FALSE)
  490. # wilcox.test(x, y, alternative = "two.sided", exact = FALSE,
  491. # correct=FALSE)
  492. # wilcox.test(x, y, alternative = "less", exact = TRUE)
  493. # wilcox.test(x, y, alternative = "greater", exact = TRUE)
  494. # wilcox.test(x, y, alternative = "two.sided", exact = TRUE)
  495. statistic_exp = 35
  496. x = (0.80, 0.83, 1.89, 1.04, 1.45, 1.38, 1.91, 1.64, 0.73, 1.46)
  497. y = (1.15, 0.88, 0.90, 0.74, 1.21)
  498. res = mannwhitneyu(x, y, use_continuity=use_continuity,
  499. alternative=alternative, method=method)
  500. assert_equal(res.statistic, statistic_exp)
  501. assert_allclose(res.pvalue, pvalue_exp)
  502. def test_gh_6897(self):
  503. # Test for correct behavior with empty input
  504. with assert_raises(ValueError, match="`x` and `y` must be of nonzero"):
  505. mannwhitneyu([], [])
  506. def test_gh_4067(self):
  507. # Test for correct behavior with all NaN input - default is propagate
  508. a = np.array([np.nan, np.nan, np.nan, np.nan, np.nan])
  509. b = np.array([np.nan, np.nan, np.nan, np.nan, np.nan])
  510. res = mannwhitneyu(a, b)
  511. assert_equal(res.statistic, np.nan)
  512. assert_equal(res.pvalue, np.nan)
  513. # All cases checked against R wilcox.test, e.g.
  514. # options(digits=16)
  515. # x = c(1, 2, 3)
  516. # y = c(1.5, 2.5)
  517. # wilcox.test(x, y, exact=FALSE, alternative='less')
  518. cases_2118 = [[[1, 2, 3], [1.5, 2.5], "greater", (3, 0.6135850036578)],
  519. [[1, 2, 3], [1.5, 2.5], "less", (3, 0.6135850036578)],
  520. [[1, 2, 3], [1.5, 2.5], "two-sided", (3, 1.0)],
  521. [[1, 2, 3], [2], "greater", (1.5, 0.681324055883)],
  522. [[1, 2, 3], [2], "less", (1.5, 0.681324055883)],
  523. [[1, 2, 3], [2], "two-sided", (1.5, 1)],
  524. [[1, 2], [1, 2], "greater", (2, 0.667497228949)],
  525. [[1, 2], [1, 2], "less", (2, 0.667497228949)],
  526. [[1, 2], [1, 2], "two-sided", (2, 1)]]
  527. @pytest.mark.parametrize(["x", "y", "alternative", "expected"], cases_2118)
  528. def test_gh_2118(self, x, y, alternative, expected):
  529. # test cases in which U == m*n/2 when method is asymptotic
  530. # applying continuity correction could result in p-value > 1
  531. res = mannwhitneyu(x, y, use_continuity=True, alternative=alternative,
  532. method="asymptotic")
  533. assert_allclose(res, expected, rtol=1e-12)
  534. def teardown_method(self):
  535. _mwu_state._recursive = None
  536. class TestMannWhitneyU_iterative(TestMannWhitneyU):
  537. def setup_method(self):
  538. _mwu_state._recursive = False
  539. def teardown_method(self):
  540. _mwu_state._recursive = None
  541. @pytest.mark.xslow
  542. def test_mann_whitney_u_switch():
  543. # Check that mannwhiteneyu switches between recursive and iterative
  544. # implementations at n = 500
  545. # ensure that recursion is not enforced
  546. _mwu_state._recursive = None
  547. _mwu_state._fmnks = -np.ones((1, 1, 1))
  548. rng = np.random.default_rng(9546146887652)
  549. x = rng.random(5)
  550. # use iterative algorithm because n > 500
  551. y = rng.random(501)
  552. stats.mannwhitneyu(x, y, method='exact')
  553. # iterative algorithm doesn't modify _mwu_state._fmnks
  554. assert np.all(_mwu_state._fmnks == -1)
  555. # use recursive algorithm because n <= 500
  556. y = rng.random(500)
  557. stats.mannwhitneyu(x, y, method='exact')
  558. # recursive algorithm has modified _mwu_state._fmnks
  559. assert not np.all(_mwu_state._fmnks == -1)
  560. class TestSomersD(_TestPythranFunc):
  561. def setup_method(self):
  562. self.dtypes = self.ALL_INTEGER + self.ALL_FLOAT
  563. self.arguments = {0: (np.arange(10),
  564. self.ALL_INTEGER + self.ALL_FLOAT),
  565. 1: (np.arange(10),
  566. self.ALL_INTEGER + self.ALL_FLOAT)}
  567. input_array = [self.arguments[idx][0] for idx in self.arguments]
  568. # In this case, self.partialfunc can simply be stats.somersd,
  569. # since `alternative` is an optional argument. If it is required,
  570. # we can use functools.partial to freeze the value, because
  571. # we only mainly test various array inputs, not str, etc.
  572. self.partialfunc = functools.partial(stats.somersd,
  573. alternative='two-sided')
  574. self.expected = self.partialfunc(*input_array)
  575. def pythranfunc(self, *args):
  576. res = self.partialfunc(*args)
  577. assert_allclose(res.statistic, self.expected.statistic, atol=1e-15)
  578. assert_allclose(res.pvalue, self.expected.pvalue, atol=1e-15)
  579. def test_pythranfunc_keywords(self):
  580. # Not specifying the optional keyword args
  581. table = [[27, 25, 14, 7, 0], [7, 14, 18, 35, 12], [1, 3, 2, 7, 17]]
  582. res1 = stats.somersd(table)
  583. # Specifying the optional keyword args with default value
  584. optional_args = self.get_optional_args(stats.somersd)
  585. res2 = stats.somersd(table, **optional_args)
  586. # Check if the results are the same in two cases
  587. assert_allclose(res1.statistic, res2.statistic, atol=1e-15)
  588. assert_allclose(res1.pvalue, res2.pvalue, atol=1e-15)
  589. def test_like_kendalltau(self):
  590. # All tests correspond with one in test_stats.py `test_kendalltau`
  591. # case without ties, con-dis equal zero
  592. x = [5, 2, 1, 3, 6, 4, 7, 8]
  593. y = [5, 2, 6, 3, 1, 8, 7, 4]
  594. # Cross-check with result from SAS FREQ:
  595. expected = (0.000000000000000, 1.000000000000000)
  596. res = stats.somersd(x, y)
  597. assert_allclose(res.statistic, expected[0], atol=1e-15)
  598. assert_allclose(res.pvalue, expected[1], atol=1e-15)
  599. # case without ties, con-dis equal zero
  600. x = [0, 5, 2, 1, 3, 6, 4, 7, 8]
  601. y = [5, 2, 0, 6, 3, 1, 8, 7, 4]
  602. # Cross-check with result from SAS FREQ:
  603. expected = (0.000000000000000, 1.000000000000000)
  604. res = stats.somersd(x, y)
  605. assert_allclose(res.statistic, expected[0], atol=1e-15)
  606. assert_allclose(res.pvalue, expected[1], atol=1e-15)
  607. # case without ties, con-dis close to zero
  608. x = [5, 2, 1, 3, 6, 4, 7]
  609. y = [5, 2, 6, 3, 1, 7, 4]
  610. # Cross-check with result from SAS FREQ:
  611. expected = (-0.142857142857140, 0.630326953157670)
  612. res = stats.somersd(x, y)
  613. assert_allclose(res.statistic, expected[0], atol=1e-15)
  614. assert_allclose(res.pvalue, expected[1], atol=1e-15)
  615. # simple case without ties
  616. x = np.arange(10)
  617. y = np.arange(10)
  618. # Cross-check with result from SAS FREQ:
  619. # SAS p value is not provided.
  620. expected = (1.000000000000000, 0)
  621. res = stats.somersd(x, y)
  622. assert_allclose(res.statistic, expected[0], atol=1e-15)
  623. assert_allclose(res.pvalue, expected[1], atol=1e-15)
  624. # swap a couple values and a couple more
  625. x = np.arange(10)
  626. y = np.array([0, 2, 1, 3, 4, 6, 5, 7, 8, 9])
  627. # Cross-check with result from SAS FREQ:
  628. expected = (0.911111111111110, 0.000000000000000)
  629. res = stats.somersd(x, y)
  630. assert_allclose(res.statistic, expected[0], atol=1e-15)
  631. assert_allclose(res.pvalue, expected[1], atol=1e-15)
  632. # same in opposite direction
  633. x = np.arange(10)
  634. y = np.arange(10)[::-1]
  635. # Cross-check with result from SAS FREQ:
  636. # SAS p value is not provided.
  637. expected = (-1.000000000000000, 0)
  638. res = stats.somersd(x, y)
  639. assert_allclose(res.statistic, expected[0], atol=1e-15)
  640. assert_allclose(res.pvalue, expected[1], atol=1e-15)
  641. # swap a couple values and a couple more
  642. x = np.arange(10)
  643. y = np.array([9, 7, 8, 6, 5, 3, 4, 2, 1, 0])
  644. # Cross-check with result from SAS FREQ:
  645. expected = (-0.9111111111111111, 0.000000000000000)
  646. res = stats.somersd(x, y)
  647. assert_allclose(res.statistic, expected[0], atol=1e-15)
  648. assert_allclose(res.pvalue, expected[1], atol=1e-15)
  649. # with some ties
  650. x1 = [12, 2, 1, 12, 2]
  651. x2 = [1, 4, 7, 1, 0]
  652. # Cross-check with result from SAS FREQ:
  653. expected = (-0.500000000000000, 0.304901788178780)
  654. res = stats.somersd(x1, x2)
  655. assert_allclose(res.statistic, expected[0], atol=1e-15)
  656. assert_allclose(res.pvalue, expected[1], atol=1e-15)
  657. # with only ties in one or both inputs
  658. # SAS will not produce an output for these:
  659. # NOTE: No statistics are computed for x * y because x has fewer
  660. # than 2 nonmissing levels.
  661. # WARNING: No OUTPUT data set is produced for this table because a
  662. # row or column variable has fewer than 2 nonmissing levels and no
  663. # statistics are computed.
  664. res = stats.somersd([2, 2, 2], [2, 2, 2])
  665. assert_allclose(res.statistic, np.nan)
  666. assert_allclose(res.pvalue, np.nan)
  667. res = stats.somersd([2, 0, 2], [2, 2, 2])
  668. assert_allclose(res.statistic, np.nan)
  669. assert_allclose(res.pvalue, np.nan)
  670. res = stats.somersd([2, 2, 2], [2, 0, 2])
  671. assert_allclose(res.statistic, np.nan)
  672. assert_allclose(res.pvalue, np.nan)
  673. res = stats.somersd([0], [0])
  674. assert_allclose(res.statistic, np.nan)
  675. assert_allclose(res.pvalue, np.nan)
  676. # empty arrays provided as input
  677. res = stats.somersd([], [])
  678. assert_allclose(res.statistic, np.nan)
  679. assert_allclose(res.pvalue, np.nan)
  680. # test unequal length inputs
  681. x = np.arange(10.)
  682. y = np.arange(20.)
  683. assert_raises(ValueError, stats.somersd, x, y)
  684. def test_asymmetry(self):
  685. # test that somersd is asymmetric w.r.t. input order and that
  686. # convention is as described: first input is row variable & independent
  687. # data is from Wikipedia:
  688. # https://en.wikipedia.org/wiki/Somers%27_D
  689. # but currently that example contradicts itself - it says X is
  690. # independent yet take D_XY
  691. x = [1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 1, 2,
  692. 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3]
  693. y = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
  694. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
  695. # Cross-check with result from SAS FREQ:
  696. d_cr = 0.272727272727270
  697. d_rc = 0.342857142857140
  698. p = 0.092891940883700 # same p-value for either direction
  699. res = stats.somersd(x, y)
  700. assert_allclose(res.statistic, d_cr, atol=1e-15)
  701. assert_allclose(res.pvalue, p, atol=1e-4)
  702. assert_equal(res.table.shape, (3, 2))
  703. res = stats.somersd(y, x)
  704. assert_allclose(res.statistic, d_rc, atol=1e-15)
  705. assert_allclose(res.pvalue, p, atol=1e-15)
  706. assert_equal(res.table.shape, (2, 3))
  707. def test_somers_original(self):
  708. # test against Somers' original paper [1]
  709. # Table 5A
  710. # Somers' convention was column IV
  711. table = np.array([[8, 2], [6, 5], [3, 4], [1, 3], [2, 3]])
  712. # Our convention (and that of SAS FREQ) is row IV
  713. table = table.T
  714. dyx = 129/340
  715. assert_allclose(stats.somersd(table).statistic, dyx)
  716. # table 7A - d_yx = 1
  717. table = np.array([[25, 0], [85, 0], [0, 30]])
  718. dxy, dyx = 3300/5425, 3300/3300
  719. assert_allclose(stats.somersd(table).statistic, dxy)
  720. assert_allclose(stats.somersd(table.T).statistic, dyx)
  721. # table 7B - d_yx < 0
  722. table = np.array([[25, 0], [0, 30], [85, 0]])
  723. dyx = -1800/3300
  724. assert_allclose(stats.somersd(table.T).statistic, dyx)
  725. def test_contingency_table_with_zero_rows_cols(self):
  726. # test that zero rows/cols in contingency table don't affect result
  727. N = 100
  728. shape = 4, 6
  729. size = np.prod(shape)
  730. np.random.seed(0)
  731. s = stats.multinomial.rvs(N, p=np.ones(size)/size).reshape(shape)
  732. res = stats.somersd(s)
  733. s2 = np.insert(s, 2, np.zeros(shape[1]), axis=0)
  734. res2 = stats.somersd(s2)
  735. s3 = np.insert(s, 2, np.zeros(shape[0]), axis=1)
  736. res3 = stats.somersd(s3)
  737. s4 = np.insert(s2, 2, np.zeros(shape[0]+1), axis=1)
  738. res4 = stats.somersd(s4)
  739. # Cross-check with result from SAS FREQ:
  740. assert_allclose(res.statistic, -0.116981132075470, atol=1e-15)
  741. assert_allclose(res.statistic, res2.statistic)
  742. assert_allclose(res.statistic, res3.statistic)
  743. assert_allclose(res.statistic, res4.statistic)
  744. assert_allclose(res.pvalue, 0.156376448188150, atol=1e-15)
  745. assert_allclose(res.pvalue, res2.pvalue)
  746. assert_allclose(res.pvalue, res3.pvalue)
  747. assert_allclose(res.pvalue, res4.pvalue)
  748. def test_invalid_contingency_tables(self):
  749. N = 100
  750. shape = 4, 6
  751. size = np.prod(shape)
  752. np.random.seed(0)
  753. # start with a valid contingency table
  754. s = stats.multinomial.rvs(N, p=np.ones(size)/size).reshape(shape)
  755. s5 = s - 2
  756. message = "All elements of the contingency table must be non-negative"
  757. with assert_raises(ValueError, match=message):
  758. stats.somersd(s5)
  759. s6 = s + 0.01
  760. message = "All elements of the contingency table must be integer"
  761. with assert_raises(ValueError, match=message):
  762. stats.somersd(s6)
  763. message = ("At least two elements of the contingency "
  764. "table must be nonzero.")
  765. with assert_raises(ValueError, match=message):
  766. stats.somersd([[]])
  767. with assert_raises(ValueError, match=message):
  768. stats.somersd([[1]])
  769. s7 = np.zeros((3, 3))
  770. with assert_raises(ValueError, match=message):
  771. stats.somersd(s7)
  772. s7[0, 1] = 1
  773. with assert_raises(ValueError, match=message):
  774. stats.somersd(s7)
  775. def test_only_ranks_matter(self):
  776. # only ranks of input data should matter
  777. x = [1, 2, 3]
  778. x2 = [-1, 2.1, np.inf]
  779. y = [3, 2, 1]
  780. y2 = [0, -0.5, -np.inf]
  781. res = stats.somersd(x, y)
  782. res2 = stats.somersd(x2, y2)
  783. assert_equal(res.statistic, res2.statistic)
  784. assert_equal(res.pvalue, res2.pvalue)
  785. def test_contingency_table_return(self):
  786. # check that contingency table is returned
  787. x = np.arange(10)
  788. y = np.arange(10)
  789. res = stats.somersd(x, y)
  790. assert_equal(res.table, np.eye(10))
  791. def test_somersd_alternative(self):
  792. # Test alternative parameter, asymptotic method (due to tie)
  793. # Based on scipy.stats.test_stats.TestCorrSpearman2::test_alternative
  794. x1 = [1, 2, 3, 4, 5]
  795. x2 = [5, 6, 7, 8, 7]
  796. # strong positive correlation
  797. expected = stats.somersd(x1, x2, alternative="two-sided")
  798. assert expected.statistic > 0
  799. # rank correlation > 0 -> large "less" p-value
  800. res = stats.somersd(x1, x2, alternative="less")
  801. assert_equal(res.statistic, expected.statistic)
  802. assert_allclose(res.pvalue, 1 - (expected.pvalue / 2))
  803. # rank correlation > 0 -> small "greater" p-value
  804. res = stats.somersd(x1, x2, alternative="greater")
  805. assert_equal(res.statistic, expected.statistic)
  806. assert_allclose(res.pvalue, expected.pvalue / 2)
  807. # reverse the direction of rank correlation
  808. x2.reverse()
  809. # strong negative correlation
  810. expected = stats.somersd(x1, x2, alternative="two-sided")
  811. assert expected.statistic < 0
  812. # rank correlation < 0 -> large "greater" p-value
  813. res = stats.somersd(x1, x2, alternative="greater")
  814. assert_equal(res.statistic, expected.statistic)
  815. assert_allclose(res.pvalue, 1 - (expected.pvalue / 2))
  816. # rank correlation < 0 -> small "less" p-value
  817. res = stats.somersd(x1, x2, alternative="less")
  818. assert_equal(res.statistic, expected.statistic)
  819. assert_allclose(res.pvalue, expected.pvalue / 2)
  820. with pytest.raises(ValueError, match="alternative must be 'less'..."):
  821. stats.somersd(x1, x2, alternative="ekki-ekki")
  822. @pytest.mark.parametrize("positive_correlation", (False, True))
  823. def test_somersd_perfect_correlation(self, positive_correlation):
  824. # Before the addition of `alternative`, perfect correlation was
  825. # treated as a special case. Now it is treated like any other case, but
  826. # make sure there are no divide by zero warnings or associated errors
  827. x1 = np.arange(10)
  828. x2 = x1 if positive_correlation else np.flip(x1)
  829. expected_statistic = 1 if positive_correlation else -1
  830. # perfect correlation -> small "two-sided" p-value (0)
  831. res = stats.somersd(x1, x2, alternative="two-sided")
  832. assert res.statistic == expected_statistic
  833. assert res.pvalue == 0
  834. # rank correlation > 0 -> large "less" p-value (1)
  835. res = stats.somersd(x1, x2, alternative="less")
  836. assert res.statistic == expected_statistic
  837. assert res.pvalue == (1 if positive_correlation else 0)
  838. # rank correlation > 0 -> small "greater" p-value (0)
  839. res = stats.somersd(x1, x2, alternative="greater")
  840. assert res.statistic == expected_statistic
  841. assert res.pvalue == (0 if positive_correlation else 1)
  842. class TestBarnardExact:
  843. """Some tests to show that barnard_exact() works correctly."""
  844. @pytest.mark.parametrize(
  845. "input_sample,expected",
  846. [
  847. ([[43, 40], [10, 39]], (3.555406779643, 0.000362832367)),
  848. ([[100, 2], [1000, 5]], (-1.776382925679, 0.135126970878)),
  849. ([[2, 7], [8, 2]], (-2.518474945157, 0.019210815430)),
  850. ([[5, 1], [10, 10]], (1.449486150679, 0.156277546306)),
  851. ([[5, 15], [20, 20]], (-1.851640199545, 0.066363501421)),
  852. ([[5, 16], [20, 25]], (-1.609639949352, 0.116984852192)),
  853. ([[10, 5], [10, 1]], (-1.449486150679, 0.177536588915)),
  854. ([[5, 0], [1, 4]], (2.581988897472, 0.013671875000)),
  855. ([[0, 1], [3, 2]], (-1.095445115010, 0.509667991877)),
  856. ([[0, 2], [6, 4]], (-1.549193338483, 0.197019618792)),
  857. ([[2, 7], [8, 2]], (-2.518474945157, 0.019210815430)),
  858. ],
  859. )
  860. def test_precise(self, input_sample, expected):
  861. """The expected values have been generated by R, using a resolution
  862. for the nuisance parameter of 1e-6 :
  863. ```R
  864. library(Barnard)
  865. options(digits=10)
  866. barnard.test(43, 40, 10, 39, dp=1e-6, pooled=TRUE)
  867. ```
  868. """
  869. res = barnard_exact(input_sample)
  870. statistic, pvalue = res.statistic, res.pvalue
  871. assert_allclose([statistic, pvalue], expected)
  872. @pytest.mark.parametrize(
  873. "input_sample,expected",
  874. [
  875. ([[43, 40], [10, 39]], (3.920362887717, 0.000289470662)),
  876. ([[100, 2], [1000, 5]], (-1.139432816087, 0.950272080594)),
  877. ([[2, 7], [8, 2]], (-3.079373904042, 0.020172119141)),
  878. ([[5, 1], [10, 10]], (1.622375939458, 0.150599922226)),
  879. ([[5, 15], [20, 20]], (-1.974771239528, 0.063038448651)),
  880. ([[5, 16], [20, 25]], (-1.722122973346, 0.133329494287)),
  881. ([[10, 5], [10, 1]], (-1.765469659009, 0.250566655215)),
  882. ([[5, 0], [1, 4]], (5.477225575052, 0.007812500000)),
  883. ([[0, 1], [3, 2]], (-1.224744871392, 0.509667991877)),
  884. ([[0, 2], [6, 4]], (-1.732050807569, 0.197019618792)),
  885. ([[2, 7], [8, 2]], (-3.079373904042, 0.020172119141)),
  886. ],
  887. )
  888. def test_pooled_param(self, input_sample, expected):
  889. """The expected values have been generated by R, using a resolution
  890. for the nuisance parameter of 1e-6 :
  891. ```R
  892. library(Barnard)
  893. options(digits=10)
  894. barnard.test(43, 40, 10, 39, dp=1e-6, pooled=FALSE)
  895. ```
  896. """
  897. res = barnard_exact(input_sample, pooled=False)
  898. statistic, pvalue = res.statistic, res.pvalue
  899. assert_allclose([statistic, pvalue], expected)
  900. def test_raises(self):
  901. # test we raise an error for wrong input number of nuisances.
  902. error_msg = (
  903. "Number of points `n` must be strictly positive, found 0"
  904. )
  905. with assert_raises(ValueError, match=error_msg):
  906. barnard_exact([[1, 2], [3, 4]], n=0)
  907. # test we raise an error for wrong shape of input.
  908. error_msg = "The input `table` must be of shape \\(2, 2\\)."
  909. with assert_raises(ValueError, match=error_msg):
  910. barnard_exact(np.arange(6).reshape(2, 3))
  911. # Test all values must be positives
  912. error_msg = "All values in `table` must be nonnegative."
  913. with assert_raises(ValueError, match=error_msg):
  914. barnard_exact([[-1, 2], [3, 4]])
  915. # Test value error on wrong alternative param
  916. error_msg = (
  917. "`alternative` should be one of {'two-sided', 'less', 'greater'},"
  918. " found .*"
  919. )
  920. with assert_raises(ValueError, match=error_msg):
  921. barnard_exact([[1, 2], [3, 4]], "not-correct")
  922. @pytest.mark.parametrize(
  923. "input_sample,expected",
  924. [
  925. ([[0, 0], [4, 3]], (1.0, 0)),
  926. ],
  927. )
  928. def test_edge_cases(self, input_sample, expected):
  929. res = barnard_exact(input_sample)
  930. statistic, pvalue = res.statistic, res.pvalue
  931. assert_equal(pvalue, expected[0])
  932. assert_equal(statistic, expected[1])
  933. @pytest.mark.parametrize(
  934. "input_sample,expected",
  935. [
  936. ([[0, 5], [0, 10]], (1.0, np.nan)),
  937. ([[5, 0], [10, 0]], (1.0, np.nan)),
  938. ],
  939. )
  940. def test_row_or_col_zero(self, input_sample, expected):
  941. res = barnard_exact(input_sample)
  942. statistic, pvalue = res.statistic, res.pvalue
  943. assert_equal(pvalue, expected[0])
  944. assert_equal(statistic, expected[1])
  945. @pytest.mark.parametrize(
  946. "input_sample,expected",
  947. [
  948. ([[2, 7], [8, 2]], (-2.518474945157, 0.009886140845)),
  949. ([[7, 200], [300, 8]], (-21.320036698460, 0.0)),
  950. ([[21, 28], [1957, 6]], (-30.489638143953, 0.0)),
  951. ],
  952. )
  953. @pytest.mark.parametrize("alternative", ["greater", "less"])
  954. def test_less_greater(self, input_sample, expected, alternative):
  955. """
  956. "The expected values have been generated by R, using a resolution
  957. for the nuisance parameter of 1e-6 :
  958. ```R
  959. library(Barnard)
  960. options(digits=10)
  961. a = barnard.test(2, 7, 8, 2, dp=1e-6, pooled=TRUE)
  962. a$p.value[1]
  963. ```
  964. In this test, we are using the "one-sided" return value `a$p.value[1]`
  965. to test our pvalue.
  966. """
  967. expected_stat, less_pvalue_expect = expected
  968. if alternative == "greater":
  969. input_sample = np.array(input_sample)[:, ::-1]
  970. expected_stat = -expected_stat
  971. res = barnard_exact(input_sample, alternative=alternative)
  972. statistic, pvalue = res.statistic, res.pvalue
  973. assert_allclose(
  974. [statistic, pvalue], [expected_stat, less_pvalue_expect], atol=1e-7
  975. )
  976. class TestBoschlooExact:
  977. """Some tests to show that boschloo_exact() works correctly."""
  978. ATOL = 1e-7
  979. @pytest.mark.parametrize(
  980. "input_sample,expected",
  981. [
  982. ([[2, 7], [8, 2]], (0.01852173, 0.009886142)),
  983. ([[5, 1], [10, 10]], (0.9782609, 0.9450994)),
  984. ([[5, 16], [20, 25]], (0.08913823, 0.05827348)),
  985. ([[10, 5], [10, 1]], (0.1652174, 0.08565611)),
  986. ([[5, 0], [1, 4]], (1, 1)),
  987. ([[0, 1], [3, 2]], (0.5, 0.34375)),
  988. ([[2, 7], [8, 2]], (0.01852173, 0.009886142)),
  989. ([[7, 12], [8, 3]], (0.06406797, 0.03410916)),
  990. ([[10, 24], [25, 37]], (0.2009359, 0.1512882)),
  991. ],
  992. )
  993. def test_less(self, input_sample, expected):
  994. """The expected values have been generated by R, using a resolution
  995. for the nuisance parameter of 1e-8 :
  996. ```R
  997. library(Exact)
  998. options(digits=10)
  999. data <- matrix(c(43, 10, 40, 39), 2, 2, byrow=TRUE)
  1000. a = exact.test(data, method="Boschloo", alternative="less",
  1001. tsmethod="central", np.interval=TRUE, beta=1e-8)
  1002. ```
  1003. """
  1004. res = boschloo_exact(input_sample, alternative="less")
  1005. statistic, pvalue = res.statistic, res.pvalue
  1006. assert_allclose([statistic, pvalue], expected, atol=self.ATOL)
  1007. @pytest.mark.parametrize(
  1008. "input_sample,expected",
  1009. [
  1010. ([[43, 40], [10, 39]], (0.0002875544, 0.0001615562)),
  1011. ([[2, 7], [8, 2]], (0.9990149, 0.9918327)),
  1012. ([[5, 1], [10, 10]], (0.1652174, 0.09008534)),
  1013. ([[5, 15], [20, 20]], (0.9849087, 0.9706997)),
  1014. ([[5, 16], [20, 25]], (0.972349, 0.9524124)),
  1015. ([[5, 0], [1, 4]], (0.02380952, 0.006865367)),
  1016. ([[0, 1], [3, 2]], (1, 1)),
  1017. ([[0, 2], [6, 4]], (1, 1)),
  1018. ([[2, 7], [8, 2]], (0.9990149, 0.9918327)),
  1019. ([[7, 12], [8, 3]], (0.9895302, 0.9771215)),
  1020. ([[10, 24], [25, 37]], (0.9012936, 0.8633275)),
  1021. ],
  1022. )
  1023. def test_greater(self, input_sample, expected):
  1024. """The expected values have been generated by R, using a resolution
  1025. for the nuisance parameter of 1e-8 :
  1026. ```R
  1027. library(Exact)
  1028. options(digits=10)
  1029. data <- matrix(c(43, 10, 40, 39), 2, 2, byrow=TRUE)
  1030. a = exact.test(data, method="Boschloo", alternative="greater",
  1031. tsmethod="central", np.interval=TRUE, beta=1e-8)
  1032. ```
  1033. """
  1034. res = boschloo_exact(input_sample, alternative="greater")
  1035. statistic, pvalue = res.statistic, res.pvalue
  1036. assert_allclose([statistic, pvalue], expected, atol=self.ATOL)
  1037. @pytest.mark.parametrize(
  1038. "input_sample,expected",
  1039. [
  1040. ([[43, 40], [10, 39]], (0.0002875544, 0.0003231115)),
  1041. ([[2, 7], [8, 2]], (0.01852173, 0.01977228)),
  1042. ([[5, 1], [10, 10]], (0.1652174, 0.1801707)),
  1043. ([[5, 16], [20, 25]], (0.08913823, 0.116547)),
  1044. ([[5, 0], [1, 4]], (0.02380952, 0.01373073)),
  1045. ([[0, 1], [3, 2]], (0.5, 0.6875)),
  1046. ([[2, 7], [8, 2]], (0.01852173, 0.01977228)),
  1047. ([[7, 12], [8, 3]], (0.06406797, 0.06821831)),
  1048. ],
  1049. )
  1050. def test_two_sided(self, input_sample, expected):
  1051. """The expected values have been generated by R, using a resolution
  1052. for the nuisance parameter of 1e-8 :
  1053. ```R
  1054. library(Exact)
  1055. options(digits=10)
  1056. data <- matrix(c(43, 10, 40, 39), 2, 2, byrow=TRUE)
  1057. a = exact.test(data, method="Boschloo", alternative="two.sided",
  1058. tsmethod="central", np.interval=TRUE, beta=1e-8)
  1059. ```
  1060. """
  1061. res = boschloo_exact(input_sample, alternative="two-sided", n=64)
  1062. # Need n = 64 for python 32-bit
  1063. statistic, pvalue = res.statistic, res.pvalue
  1064. assert_allclose([statistic, pvalue], expected, atol=self.ATOL)
  1065. def test_raises(self):
  1066. # test we raise an error for wrong input number of nuisances.
  1067. error_msg = (
  1068. "Number of points `n` must be strictly positive, found 0"
  1069. )
  1070. with assert_raises(ValueError, match=error_msg):
  1071. boschloo_exact([[1, 2], [3, 4]], n=0)
  1072. # test we raise an error for wrong shape of input.
  1073. error_msg = "The input `table` must be of shape \\(2, 2\\)."
  1074. with assert_raises(ValueError, match=error_msg):
  1075. boschloo_exact(np.arange(6).reshape(2, 3))
  1076. # Test all values must be positives
  1077. error_msg = "All values in `table` must be nonnegative."
  1078. with assert_raises(ValueError, match=error_msg):
  1079. boschloo_exact([[-1, 2], [3, 4]])
  1080. # Test value error on wrong alternative param
  1081. error_msg = (
  1082. r"`alternative` should be one of \('two-sided', 'less', "
  1083. r"'greater'\), found .*"
  1084. )
  1085. with assert_raises(ValueError, match=error_msg):
  1086. boschloo_exact([[1, 2], [3, 4]], "not-correct")
  1087. @pytest.mark.parametrize(
  1088. "input_sample,expected",
  1089. [
  1090. ([[0, 5], [0, 10]], (np.nan, np.nan)),
  1091. ([[5, 0], [10, 0]], (np.nan, np.nan)),
  1092. ],
  1093. )
  1094. def test_row_or_col_zero(self, input_sample, expected):
  1095. res = boschloo_exact(input_sample)
  1096. statistic, pvalue = res.statistic, res.pvalue
  1097. assert_equal(pvalue, expected[0])
  1098. assert_equal(statistic, expected[1])
  1099. def test_two_sided_gt_1(self):
  1100. # Check that returned p-value does not exceed 1 even when twice
  1101. # the minimum of the one-sided p-values does. See gh-15345.
  1102. tbl = [[1, 1], [13, 12]]
  1103. pl = boschloo_exact(tbl, alternative='less').pvalue
  1104. pg = boschloo_exact(tbl, alternative='greater').pvalue
  1105. assert 2*min(pl, pg) > 1
  1106. pt = boschloo_exact(tbl, alternative='two-sided').pvalue
  1107. assert pt == 1.0
  1108. @pytest.mark.parametrize("alternative", ("less", "greater"))
  1109. def test_against_fisher_exact(self, alternative):
  1110. # Check that the statistic of `boschloo_exact` is the same as the
  1111. # p-value of `fisher_exact` (for one-sided tests). See gh-15345.
  1112. tbl = [[2, 7], [8, 2]]
  1113. boschloo_stat = boschloo_exact(tbl, alternative=alternative).statistic
  1114. fisher_p = stats.fisher_exact(tbl, alternative=alternative)[1]
  1115. assert_allclose(boschloo_stat, fisher_p)
  1116. class TestCvm_2samp:
  1117. def test_invalid_input(self):
  1118. x = np.arange(10).reshape((2, 5))
  1119. y = np.arange(5)
  1120. msg = 'The samples must be one-dimensional'
  1121. with pytest.raises(ValueError, match=msg):
  1122. cramervonmises_2samp(x, y)
  1123. with pytest.raises(ValueError, match=msg):
  1124. cramervonmises_2samp(y, x)
  1125. msg = 'x and y must contain at least two observations.'
  1126. with pytest.raises(ValueError, match=msg):
  1127. cramervonmises_2samp([], y)
  1128. with pytest.raises(ValueError, match=msg):
  1129. cramervonmises_2samp(y, [1])
  1130. msg = 'method must be either auto, exact or asymptotic'
  1131. with pytest.raises(ValueError, match=msg):
  1132. cramervonmises_2samp(y, y, 'xyz')
  1133. def test_list_input(self):
  1134. x = [2, 3, 4, 7, 6]
  1135. y = [0.2, 0.7, 12, 18]
  1136. r1 = cramervonmises_2samp(x, y)
  1137. r2 = cramervonmises_2samp(np.array(x), np.array(y))
  1138. assert_equal((r1.statistic, r1.pvalue), (r2.statistic, r2.pvalue))
  1139. def test_example_conover(self):
  1140. # Example 2 in Section 6.2 of W.J. Conover: Practical Nonparametric
  1141. # Statistics, 1971.
  1142. x = [7.6, 8.4, 8.6, 8.7, 9.3, 9.9, 10.1, 10.6, 11.2]
  1143. y = [5.2, 5.7, 5.9, 6.5, 6.8, 8.2, 9.1, 9.8, 10.8, 11.3, 11.5, 12.3,
  1144. 12.5, 13.4, 14.6]
  1145. r = cramervonmises_2samp(x, y)
  1146. assert_allclose(r.statistic, 0.262, atol=1e-3)
  1147. assert_allclose(r.pvalue, 0.18, atol=1e-2)
  1148. @pytest.mark.parametrize('statistic, m, n, pval',
  1149. [(710, 5, 6, 48./462),
  1150. (1897, 7, 7, 117./1716),
  1151. (576, 4, 6, 2./210),
  1152. (1764, 6, 7, 2./1716)])
  1153. def test_exact_pvalue(self, statistic, m, n, pval):
  1154. # the exact values are taken from Anderson: On the distribution of the
  1155. # two-sample Cramer-von-Mises criterion, 1962.
  1156. # The values are taken from Table 2, 3, 4 and 5
  1157. assert_equal(_pval_cvm_2samp_exact(statistic, m, n), pval)
  1158. def test_large_sample(self):
  1159. # for large samples, the statistic U gets very large
  1160. # do a sanity check that p-value is not 0, 1 or nan
  1161. np.random.seed(4367)
  1162. x = distributions.norm.rvs(size=1000000)
  1163. y = distributions.norm.rvs(size=900000)
  1164. r = cramervonmises_2samp(x, y)
  1165. assert_(0 < r.pvalue < 1)
  1166. r = cramervonmises_2samp(x, y+0.1)
  1167. assert_(0 < r.pvalue < 1)
  1168. def test_exact_vs_asymptotic(self):
  1169. np.random.seed(0)
  1170. x = np.random.rand(7)
  1171. y = np.random.rand(8)
  1172. r1 = cramervonmises_2samp(x, y, method='exact')
  1173. r2 = cramervonmises_2samp(x, y, method='asymptotic')
  1174. assert_equal(r1.statistic, r2.statistic)
  1175. assert_allclose(r1.pvalue, r2.pvalue, atol=1e-2)
  1176. def test_method_auto(self):
  1177. x = np.arange(20)
  1178. y = [0.5, 4.7, 13.1]
  1179. r1 = cramervonmises_2samp(x, y, method='exact')
  1180. r2 = cramervonmises_2samp(x, y, method='auto')
  1181. assert_equal(r1.pvalue, r2.pvalue)
  1182. # switch to asymptotic if one sample has more than 20 observations
  1183. x = np.arange(21)
  1184. r1 = cramervonmises_2samp(x, y, method='asymptotic')
  1185. r2 = cramervonmises_2samp(x, y, method='auto')
  1186. assert_equal(r1.pvalue, r2.pvalue)
  1187. def test_same_input(self):
  1188. # make sure trivial edge case can be handled
  1189. # note that _cdf_cvm_inf(0) = nan. implementation avoids nan by
  1190. # returning pvalue=1 for very small values of the statistic
  1191. x = np.arange(15)
  1192. res = cramervonmises_2samp(x, x)
  1193. assert_equal((res.statistic, res.pvalue), (0.0, 1.0))
  1194. # check exact p-value
  1195. res = cramervonmises_2samp(x[:4], x[:4])
  1196. assert_equal((res.statistic, res.pvalue), (0.0, 1.0))
  1197. class TestTukeyHSD:
  1198. data_same_size = ([24.5, 23.5, 26.4, 27.1, 29.9],
  1199. [28.4, 34.2, 29.5, 32.2, 30.1],
  1200. [26.1, 28.3, 24.3, 26.2, 27.8])
  1201. data_diff_size = ([24.5, 23.5, 26.28, 26.4, 27.1, 29.9, 30.1, 30.1],
  1202. [28.4, 34.2, 29.5, 32.2, 30.1],
  1203. [26.1, 28.3, 24.3, 26.2, 27.8])
  1204. extreme_size = ([24.5, 23.5, 26.4],
  1205. [28.4, 34.2, 29.5, 32.2, 30.1, 28.4, 34.2, 29.5, 32.2,
  1206. 30.1],
  1207. [26.1, 28.3, 24.3, 26.2, 27.8])
  1208. sas_same_size = """
  1209. Comparison LowerCL Difference UpperCL Significance
  1210. 2 - 3 0.6908830568 4.34 7.989116943 1
  1211. 2 - 1 0.9508830568 4.6 8.249116943 1
  1212. 3 - 2 -7.989116943 -4.34 -0.6908830568 1
  1213. 3 - 1 -3.389116943 0.26 3.909116943 0
  1214. 1 - 2 -8.249116943 -4.6 -0.9508830568 1
  1215. 1 - 3 -3.909116943 -0.26 3.389116943 0
  1216. """
  1217. sas_diff_size = """
  1218. Comparison LowerCL Difference UpperCL Significance
  1219. 2 - 1 0.2679292645 3.645 7.022070736 1
  1220. 2 - 3 0.5934764007 4.34 8.086523599 1
  1221. 1 - 2 -7.022070736 -3.645 -0.2679292645 1
  1222. 1 - 3 -2.682070736 0.695 4.072070736 0
  1223. 3 - 2 -8.086523599 -4.34 -0.5934764007 1
  1224. 3 - 1 -4.072070736 -0.695 2.682070736 0
  1225. """
  1226. sas_extreme = """
  1227. Comparison LowerCL Difference UpperCL Significance
  1228. 2 - 3 1.561605075 4.34 7.118394925 1
  1229. 2 - 1 2.740784879 6.08 9.419215121 1
  1230. 3 - 2 -7.118394925 -4.34 -1.561605075 1
  1231. 3 - 1 -1.964526566 1.74 5.444526566 0
  1232. 1 - 2 -9.419215121 -6.08 -2.740784879 1
  1233. 1 - 3 -5.444526566 -1.74 1.964526566 0
  1234. """
  1235. @pytest.mark.parametrize("data,res_expect_str,atol",
  1236. ((data_same_size, sas_same_size, 1e-4),
  1237. (data_diff_size, sas_diff_size, 1e-4),
  1238. (extreme_size, sas_extreme, 1e-10),
  1239. ),
  1240. ids=["equal size sample",
  1241. "unequal sample size",
  1242. "extreme sample size differences"])
  1243. def test_compare_sas(self, data, res_expect_str, atol):
  1244. '''
  1245. SAS code used to generate results for each sample:
  1246. DATA ACHE;
  1247. INPUT BRAND RELIEF;
  1248. CARDS;
  1249. 1 24.5
  1250. ...
  1251. 3 27.8
  1252. ;
  1253. ods graphics on; ODS RTF;ODS LISTING CLOSE;
  1254. PROC ANOVA DATA=ACHE;
  1255. CLASS BRAND;
  1256. MODEL RELIEF=BRAND;
  1257. MEANS BRAND/TUKEY CLDIFF;
  1258. TITLE 'COMPARE RELIEF ACROSS MEDICINES - ANOVA EXAMPLE';
  1259. ods output CLDiffs =tc;
  1260. proc print data=tc;
  1261. format LowerCL 17.16 UpperCL 17.16 Difference 17.16;
  1262. title "Output with many digits";
  1263. RUN;
  1264. QUIT;
  1265. ODS RTF close;
  1266. ODS LISTING;
  1267. '''
  1268. res_expect = np.asarray(res_expect_str.replace(" - ", " ").split()[5:],
  1269. dtype=float).reshape((6, 6))
  1270. res_tukey = stats.tukey_hsd(*data)
  1271. conf = res_tukey.confidence_interval()
  1272. # loop over the comparisons
  1273. for i, j, l, s, h, sig in res_expect:
  1274. i, j = int(i) - 1, int(j) - 1
  1275. assert_allclose(conf.low[i, j], l, atol=atol)
  1276. assert_allclose(res_tukey.statistic[i, j], s, atol=atol)
  1277. assert_allclose(conf.high[i, j], h, atol=atol)
  1278. assert_allclose((res_tukey.pvalue[i, j] <= .05), sig == 1)
  1279. matlab_sm_siz = """
  1280. 1 2 -8.2491590248597 -4.6 -0.9508409751403 0.0144483269098
  1281. 1 3 -3.9091590248597 -0.26 3.3891590248597 0.9803107240900
  1282. 2 3 0.6908409751403 4.34 7.9891590248597 0.0203311368795
  1283. """
  1284. matlab_diff_sz = """
  1285. 1 2 -7.02207069748501 -3.645 -0.26792930251500 0.03371498443080
  1286. 1 3 -2.68207069748500 0.695 4.07207069748500 0.85572267328807
  1287. 2 3 0.59347644287720 4.34 8.08652355712281 0.02259047020620
  1288. """
  1289. @pytest.mark.parametrize("data,res_expect_str,atol",
  1290. ((data_same_size, matlab_sm_siz, 1e-12),
  1291. (data_diff_size, matlab_diff_sz, 1e-7)),
  1292. ids=["equal size sample",
  1293. "unequal size sample"])
  1294. def test_compare_matlab(self, data, res_expect_str, atol):
  1295. """
  1296. vals = [24.5, 23.5, 26.4, 27.1, 29.9, 28.4, 34.2, 29.5, 32.2, 30.1,
  1297. 26.1, 28.3, 24.3, 26.2, 27.8]
  1298. names = {'zero', 'zero', 'zero', 'zero', 'zero', 'one', 'one', 'one',
  1299. 'one', 'one', 'two', 'two', 'two', 'two', 'two'}
  1300. [p,t,stats] = anova1(vals,names,"off");
  1301. [c,m,h,nms] = multcompare(stats, "CType","hsd");
  1302. """
  1303. res_expect = np.asarray(res_expect_str.split(),
  1304. dtype=float).reshape((3, 6))
  1305. res_tukey = stats.tukey_hsd(*data)
  1306. conf = res_tukey.confidence_interval()
  1307. # loop over the comparisons
  1308. for i, j, l, s, h, p in res_expect:
  1309. i, j = int(i) - 1, int(j) - 1
  1310. assert_allclose(conf.low[i, j], l, atol=atol)
  1311. assert_allclose(res_tukey.statistic[i, j], s, atol=atol)
  1312. assert_allclose(conf.high[i, j], h, atol=atol)
  1313. assert_allclose(res_tukey.pvalue[i, j], p, atol=atol)
  1314. def test_compare_r(self):
  1315. """
  1316. Testing against results and p-values from R:
  1317. from: https://www.rdocumentation.org/packages/stats/versions/3.6.2/
  1318. topics/TukeyHSD
  1319. > require(graphics)
  1320. > summary(fm1 <- aov(breaks ~ tension, data = warpbreaks))
  1321. > TukeyHSD(fm1, "tension", ordered = TRUE)
  1322. > plot(TukeyHSD(fm1, "tension"))
  1323. Tukey multiple comparisons of means
  1324. 95% family-wise confidence level
  1325. factor levels have been ordered
  1326. Fit: aov(formula = breaks ~ tension, data = warpbreaks)
  1327. $tension
  1328. """
  1329. str_res = """
  1330. diff lwr upr p adj
  1331. 2 - 3 4.722222 -4.8376022 14.28205 0.4630831
  1332. 1 - 3 14.722222 5.1623978 24.28205 0.0014315
  1333. 1 - 2 10.000000 0.4401756 19.55982 0.0384598
  1334. """
  1335. res_expect = np.asarray(str_res.replace(" - ", " ").split()[5:],
  1336. dtype=float).reshape((3, 6))
  1337. data = ([26, 30, 54, 25, 70, 52, 51, 26, 67,
  1338. 27, 14, 29, 19, 29, 31, 41, 20, 44],
  1339. [18, 21, 29, 17, 12, 18, 35, 30, 36,
  1340. 42, 26, 19, 16, 39, 28, 21, 39, 29],
  1341. [36, 21, 24, 18, 10, 43, 28, 15, 26,
  1342. 20, 21, 24, 17, 13, 15, 15, 16, 28])
  1343. res_tukey = stats.tukey_hsd(*data)
  1344. conf = res_tukey.confidence_interval()
  1345. # loop over the comparisons
  1346. for i, j, s, l, h, p in res_expect:
  1347. i, j = int(i) - 1, int(j) - 1
  1348. # atols are set to the number of digits present in the r result.
  1349. assert_allclose(conf.low[i, j], l, atol=1e-7)
  1350. assert_allclose(res_tukey.statistic[i, j], s, atol=1e-6)
  1351. assert_allclose(conf.high[i, j], h, atol=1e-5)
  1352. assert_allclose(res_tukey.pvalue[i, j], p, atol=1e-7)
  1353. def test_engineering_stat_handbook(self):
  1354. '''
  1355. Example sourced from:
  1356. https://www.itl.nist.gov/div898/handbook/prc/section4/prc471.htm
  1357. '''
  1358. group1 = [6.9, 5.4, 5.8, 4.6, 4.0]
  1359. group2 = [8.3, 6.8, 7.8, 9.2, 6.5]
  1360. group3 = [8.0, 10.5, 8.1, 6.9, 9.3]
  1361. group4 = [5.8, 3.8, 6.1, 5.6, 6.2]
  1362. res = stats.tukey_hsd(group1, group2, group3, group4)
  1363. conf = res.confidence_interval()
  1364. lower = np.asarray([
  1365. [0, 0, 0, -2.25],
  1366. [.29, 0, -2.93, .13],
  1367. [1.13, 0, 0, .97],
  1368. [0, 0, 0, 0]])
  1369. upper = np.asarray([
  1370. [0, 0, 0, 1.93],
  1371. [4.47, 0, 1.25, 4.31],
  1372. [5.31, 0, 0, 5.15],
  1373. [0, 0, 0, 0]])
  1374. for (i, j) in [(1, 0), (2, 0), (0, 3), (1, 2), (2, 3)]:
  1375. assert_allclose(conf.low[i, j], lower[i, j], atol=1e-2)
  1376. assert_allclose(conf.high[i, j], upper[i, j], atol=1e-2)
  1377. def test_rand_symm(self):
  1378. # test some expected identities of the results
  1379. np.random.seed(1234)
  1380. data = np.random.rand(3, 100)
  1381. res = stats.tukey_hsd(*data)
  1382. conf = res.confidence_interval()
  1383. # the confidence intervals should be negated symmetric of each other
  1384. assert_equal(conf.low, -conf.high.T)
  1385. # the `high` and `low` center diagonals should be the same since the
  1386. # mean difference in a self comparison is 0.
  1387. assert_equal(np.diagonal(conf.high), conf.high[0, 0])
  1388. assert_equal(np.diagonal(conf.low), conf.low[0, 0])
  1389. # statistic array should be antisymmetric with zeros on the diagonal
  1390. assert_equal(res.statistic, -res.statistic.T)
  1391. assert_equal(np.diagonal(res.statistic), 0)
  1392. # p-values should be symmetric and 1 when compared to itself
  1393. assert_equal(res.pvalue, res.pvalue.T)
  1394. assert_equal(np.diagonal(res.pvalue), 1)
  1395. def test_no_inf(self):
  1396. with assert_raises(ValueError, match="...must be finite."):
  1397. stats.tukey_hsd([1, 2, 3], [2, np.inf], [6, 7, 3])
  1398. def test_is_1d(self):
  1399. with assert_raises(ValueError, match="...must be one-dimensional"):
  1400. stats.tukey_hsd([[1, 2], [2, 3]], [2, 5], [5, 23, 6])
  1401. def test_no_empty(self):
  1402. with assert_raises(ValueError, match="...must be greater than one"):
  1403. stats.tukey_hsd([], [2, 5], [4, 5, 6])
  1404. @pytest.mark.parametrize("nargs", (0, 1))
  1405. def test_not_enough_treatments(self, nargs):
  1406. with assert_raises(ValueError, match="...more than 1 treatment."):
  1407. stats.tukey_hsd(*([[23, 7, 3]] * nargs))
  1408. @pytest.mark.parametrize("cl", [-.5, 0, 1, 2])
  1409. def test_conf_level_invalid(self, cl):
  1410. with assert_raises(ValueError, match="must be between 0 and 1"):
  1411. r = stats.tukey_hsd([23, 7, 3], [3, 4], [9, 4])
  1412. r.confidence_interval(cl)
  1413. def test_2_args_ttest(self):
  1414. # that with 2 treatments the `pvalue` is equal to that of `ttest_ind`
  1415. res_tukey = stats.tukey_hsd(*self.data_diff_size[:2])
  1416. res_ttest = stats.ttest_ind(*self.data_diff_size[:2])
  1417. assert_allclose(res_ttest.pvalue, res_tukey.pvalue[0, 1])
  1418. assert_allclose(res_ttest.pvalue, res_tukey.pvalue[1, 0])
  1419. class TestPoissonMeansTest:
  1420. @pytest.mark.parametrize("c1, n1, c2, n2, p_expect", (
  1421. # example from [1], 6. Illustrative examples: Example 1
  1422. [0, 100, 3, 100, 0.0884],
  1423. [2, 100, 6, 100, 0.1749]
  1424. ))
  1425. def test_paper_examples(self, c1, n1, c2, n2, p_expect):
  1426. res = stats.poisson_means_test(c1, n1, c2, n2)
  1427. assert_allclose(res.pvalue, p_expect, atol=1e-4)
  1428. @pytest.mark.parametrize("c1, n1, c2, n2, p_expect, alt, d", (
  1429. # These test cases are produced by the wrapped fortran code from the
  1430. # original authors. Using a slightly modified version of this fortran,
  1431. # found here, https://github.com/nolanbconaway/poisson-etest,
  1432. # additional tests were created.
  1433. [20, 10, 20, 10, 0.9999997568929630, 'two-sided', 0],
  1434. [10, 10, 10, 10, 0.9999998403241203, 'two-sided', 0],
  1435. [50, 15, 1, 1, 0.09920321053409643, 'two-sided', .05],
  1436. [3, 100, 20, 300, 0.12202725450896404, 'two-sided', 0],
  1437. [3, 12, 4, 20, 0.40416087318539173, 'greater', 0],
  1438. [4, 20, 3, 100, 0.008053640402974236, 'greater', 0],
  1439. # publishing paper does not include a `less` alternative,
  1440. # so it was calculated with switched argument order and
  1441. # alternative="greater"
  1442. [4, 20, 3, 10, 0.3083216325432898, 'less', 0],
  1443. [1, 1, 50, 15, 0.09322998607245102, 'less', 0]
  1444. ))
  1445. def test_fortran_authors(self, c1, n1, c2, n2, p_expect, alt, d):
  1446. res = stats.poisson_means_test(c1, n1, c2, n2, alternative=alt, diff=d)
  1447. assert_allclose(res.pvalue, p_expect, atol=2e-6, rtol=1e-16)
  1448. def test_different_results(self):
  1449. # The implementation in Fortran is known to break down at higher
  1450. # counts and observations, so we expect different results. By
  1451. # inspection we can infer the p-value to be near one.
  1452. count1, count2 = 10000, 10000
  1453. nobs1, nobs2 = 10000, 10000
  1454. res = stats.poisson_means_test(count1, nobs1, count2, nobs2)
  1455. assert_allclose(res.pvalue, 1)
  1456. def test_less_than_zero_lambda_hat2(self):
  1457. # demonstrates behavior that fixes a known fault from original Fortran.
  1458. # p-value should clearly be near one.
  1459. count1, count2 = 0, 0
  1460. nobs1, nobs2 = 1, 1
  1461. res = stats.poisson_means_test(count1, nobs1, count2, nobs2)
  1462. assert_allclose(res.pvalue, 1)
  1463. def test_input_validation(self):
  1464. count1, count2 = 0, 0
  1465. nobs1, nobs2 = 1, 1
  1466. # test non-integral events
  1467. message = '`k1` and `k2` must be integers.'
  1468. with assert_raises(TypeError, match=message):
  1469. stats.poisson_means_test(.7, nobs1, count2, nobs2)
  1470. with assert_raises(TypeError, match=message):
  1471. stats.poisson_means_test(count1, nobs1, .7, nobs2)
  1472. # test negative events
  1473. message = '`k1` and `k2` must be greater than or equal to 0.'
  1474. with assert_raises(ValueError, match=message):
  1475. stats.poisson_means_test(-1, nobs1, count2, nobs2)
  1476. with assert_raises(ValueError, match=message):
  1477. stats.poisson_means_test(count1, nobs1, -1, nobs2)
  1478. # test negative sample size
  1479. message = '`n1` and `n2` must be greater than 0.'
  1480. with assert_raises(ValueError, match=message):
  1481. stats.poisson_means_test(count1, -1, count2, nobs2)
  1482. with assert_raises(ValueError, match=message):
  1483. stats.poisson_means_test(count1, nobs1, count2, -1)
  1484. # test negative difference
  1485. message = 'diff must be greater than or equal to 0.'
  1486. with assert_raises(ValueError, match=message):
  1487. stats.poisson_means_test(count1, nobs1, count2, nobs2, diff=-1)
  1488. # test invalid alternatvie
  1489. message = 'Alternative must be one of ...'
  1490. with assert_raises(ValueError, match=message):
  1491. stats.poisson_means_test(1, 2, 1, 2, alternative='error')