test_morestats.py 111 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673
  1. # Author: Travis Oliphant, 2002
  2. #
  3. # Further enhancements and tests added by numerous SciPy developers.
  4. #
  5. import warnings
  6. import sys
  7. import numpy as np
  8. from numpy.random import RandomState
  9. from numpy.testing import (assert_array_equal,
  10. assert_almost_equal, assert_array_less, assert_array_almost_equal,
  11. assert_, assert_allclose, assert_equal, suppress_warnings)
  12. import pytest
  13. from pytest import raises as assert_raises
  14. import re
  15. from scipy import optimize
  16. from scipy import stats
  17. from scipy.stats._morestats import _abw_state
  18. from .common_tests import check_named_results
  19. from .._hypotests import _get_wilcoxon_distr, _get_wilcoxon_distr2
  20. from scipy.stats._binomtest import _binary_search_for_binom_tst
  21. from scipy.stats._distr_params import distcont
  22. distcont = dict(distcont) # type: ignore
  23. # Matplotlib is not a scipy dependency but is optionally used in probplot, so
  24. # check if it's available
  25. try:
  26. import matplotlib
  27. matplotlib.rcParams['backend'] = 'Agg'
  28. import matplotlib.pyplot as plt
  29. have_matplotlib = True
  30. except Exception:
  31. have_matplotlib = False
  32. # test data gear.dat from NIST for Levene and Bartlett test
  33. # https://www.itl.nist.gov/div898/handbook/eda/section3/eda3581.htm
  34. g1 = [1.006, 0.996, 0.998, 1.000, 0.992, 0.993, 1.002, 0.999, 0.994, 1.000]
  35. g2 = [0.998, 1.006, 1.000, 1.002, 0.997, 0.998, 0.996, 1.000, 1.006, 0.988]
  36. g3 = [0.991, 0.987, 0.997, 0.999, 0.995, 0.994, 1.000, 0.999, 0.996, 0.996]
  37. g4 = [1.005, 1.002, 0.994, 1.000, 0.995, 0.994, 0.998, 0.996, 1.002, 0.996]
  38. g5 = [0.998, 0.998, 0.982, 0.990, 1.002, 0.984, 0.996, 0.993, 0.980, 0.996]
  39. g6 = [1.009, 1.013, 1.009, 0.997, 0.988, 1.002, 0.995, 0.998, 0.981, 0.996]
  40. g7 = [0.990, 1.004, 0.996, 1.001, 0.998, 1.000, 1.018, 1.010, 0.996, 1.002]
  41. g8 = [0.998, 1.000, 1.006, 1.000, 1.002, 0.996, 0.998, 0.996, 1.002, 1.006]
  42. g9 = [1.002, 0.998, 0.996, 0.995, 0.996, 1.004, 1.004, 0.998, 0.999, 0.991]
  43. g10 = [0.991, 0.995, 0.984, 0.994, 0.997, 0.997, 0.991, 0.998, 1.004, 0.997]
  44. # The loggamma RVS stream is changing due to gh-13349; this version
  45. # preserves the old stream so that tests don't change.
  46. def _old_loggamma_rvs(*args, **kwargs):
  47. return np.log(stats.gamma.rvs(*args, **kwargs))
  48. class TestBayes_mvs:
  49. def test_basic(self):
  50. # Expected values in this test simply taken from the function. For
  51. # some checks regarding correctness of implementation, see review in
  52. # gh-674
  53. data = [6, 9, 12, 7, 8, 8, 13]
  54. mean, var, std = stats.bayes_mvs(data)
  55. assert_almost_equal(mean.statistic, 9.0)
  56. assert_allclose(mean.minmax, (7.1036502226125329, 10.896349777387467),
  57. rtol=1e-14)
  58. assert_almost_equal(var.statistic, 10.0)
  59. assert_allclose(var.minmax, (3.1767242068607087, 24.45910381334018),
  60. rtol=1e-09)
  61. assert_almost_equal(std.statistic, 2.9724954732045084, decimal=14)
  62. assert_allclose(std.minmax, (1.7823367265645145, 4.9456146050146312),
  63. rtol=1e-14)
  64. def test_empty_input(self):
  65. assert_raises(ValueError, stats.bayes_mvs, [])
  66. def test_result_attributes(self):
  67. x = np.arange(15)
  68. attributes = ('statistic', 'minmax')
  69. res = stats.bayes_mvs(x)
  70. for i in res:
  71. check_named_results(i, attributes)
  72. class TestMvsdist:
  73. def test_basic(self):
  74. data = [6, 9, 12, 7, 8, 8, 13]
  75. mean, var, std = stats.mvsdist(data)
  76. assert_almost_equal(mean.mean(), 9.0)
  77. assert_allclose(mean.interval(0.9), (7.1036502226125329,
  78. 10.896349777387467), rtol=1e-14)
  79. assert_almost_equal(var.mean(), 10.0)
  80. assert_allclose(var.interval(0.9), (3.1767242068607087,
  81. 24.45910381334018), rtol=1e-09)
  82. assert_almost_equal(std.mean(), 2.9724954732045084, decimal=14)
  83. assert_allclose(std.interval(0.9), (1.7823367265645145,
  84. 4.9456146050146312), rtol=1e-14)
  85. def test_empty_input(self):
  86. assert_raises(ValueError, stats.mvsdist, [])
  87. def test_bad_arg(self):
  88. # Raise ValueError if fewer than two data points are given.
  89. data = [1]
  90. assert_raises(ValueError, stats.mvsdist, data)
  91. def test_warns(self):
  92. # regression test for gh-5270
  93. # make sure there are no spurious divide-by-zero warnings
  94. with warnings.catch_warnings():
  95. warnings.simplefilter('error', RuntimeWarning)
  96. [x.mean() for x in stats.mvsdist([1, 2, 3])]
  97. [x.mean() for x in stats.mvsdist([1, 2, 3, 4, 5])]
  98. class TestShapiro:
  99. def test_basic(self):
  100. x1 = [0.11, 7.87, 4.61, 10.14, 7.95, 3.14, 0.46,
  101. 4.43, 0.21, 4.75, 0.71, 1.52, 3.24,
  102. 0.93, 0.42, 4.97, 9.53, 4.55, 0.47, 6.66]
  103. w, pw = stats.shapiro(x1)
  104. shapiro_test = stats.shapiro(x1)
  105. assert_almost_equal(w, 0.90047299861907959, decimal=6)
  106. assert_almost_equal(shapiro_test.statistic, 0.90047299861907959, decimal=6)
  107. assert_almost_equal(pw, 0.042089745402336121, decimal=6)
  108. assert_almost_equal(shapiro_test.pvalue, 0.042089745402336121, decimal=6)
  109. x2 = [1.36, 1.14, 2.92, 2.55, 1.46, 1.06, 5.27, -1.11,
  110. 3.48, 1.10, 0.88, -0.51, 1.46, 0.52, 6.20, 1.69,
  111. 0.08, 3.67, 2.81, 3.49]
  112. w, pw = stats.shapiro(x2)
  113. shapiro_test = stats.shapiro(x2)
  114. assert_almost_equal(w, 0.9590270, decimal=6)
  115. assert_almost_equal(shapiro_test.statistic, 0.9590270, decimal=6)
  116. assert_almost_equal(pw, 0.52460, decimal=3)
  117. assert_almost_equal(shapiro_test.pvalue, 0.52460, decimal=3)
  118. # Verified against R
  119. x3 = stats.norm.rvs(loc=5, scale=3, size=100, random_state=12345678)
  120. w, pw = stats.shapiro(x3)
  121. shapiro_test = stats.shapiro(x3)
  122. assert_almost_equal(w, 0.9772805571556091, decimal=6)
  123. assert_almost_equal(shapiro_test.statistic, 0.9772805571556091, decimal=6)
  124. assert_almost_equal(pw, 0.08144091814756393, decimal=3)
  125. assert_almost_equal(shapiro_test.pvalue, 0.08144091814756393, decimal=3)
  126. # Extracted from original paper
  127. x4 = [0.139, 0.157, 0.175, 0.256, 0.344, 0.413, 0.503, 0.577, 0.614,
  128. 0.655, 0.954, 1.392, 1.557, 1.648, 1.690, 1.994, 2.174, 2.206,
  129. 3.245, 3.510, 3.571, 4.354, 4.980, 6.084, 8.351]
  130. W_expected = 0.83467
  131. p_expected = 0.000914
  132. w, pw = stats.shapiro(x4)
  133. shapiro_test = stats.shapiro(x4)
  134. assert_almost_equal(w, W_expected, decimal=4)
  135. assert_almost_equal(shapiro_test.statistic, W_expected, decimal=4)
  136. assert_almost_equal(pw, p_expected, decimal=5)
  137. assert_almost_equal(shapiro_test.pvalue, p_expected, decimal=5)
  138. def test_2d(self):
  139. x1 = [[0.11, 7.87, 4.61, 10.14, 7.95, 3.14, 0.46,
  140. 4.43, 0.21, 4.75], [0.71, 1.52, 3.24,
  141. 0.93, 0.42, 4.97, 9.53, 4.55, 0.47, 6.66]]
  142. w, pw = stats.shapiro(x1)
  143. shapiro_test = stats.shapiro(x1)
  144. assert_almost_equal(w, 0.90047299861907959, decimal=6)
  145. assert_almost_equal(shapiro_test.statistic, 0.90047299861907959, decimal=6)
  146. assert_almost_equal(pw, 0.042089745402336121, decimal=6)
  147. assert_almost_equal(shapiro_test.pvalue, 0.042089745402336121, decimal=6)
  148. x2 = [[1.36, 1.14, 2.92, 2.55, 1.46, 1.06, 5.27, -1.11,
  149. 3.48, 1.10], [0.88, -0.51, 1.46, 0.52, 6.20, 1.69,
  150. 0.08, 3.67, 2.81, 3.49]]
  151. w, pw = stats.shapiro(x2)
  152. shapiro_test = stats.shapiro(x2)
  153. assert_almost_equal(w, 0.9590270, decimal=6)
  154. assert_almost_equal(shapiro_test.statistic, 0.9590270, decimal=6)
  155. assert_almost_equal(pw, 0.52460, decimal=3)
  156. assert_almost_equal(shapiro_test.pvalue, 0.52460, decimal=3)
  157. def test_empty_input(self):
  158. assert_raises(ValueError, stats.shapiro, [])
  159. assert_raises(ValueError, stats.shapiro, [[], [], []])
  160. def test_not_enough_values(self):
  161. assert_raises(ValueError, stats.shapiro, [1, 2])
  162. assert_raises(ValueError, stats.shapiro, np.array([[], [2]], dtype=object))
  163. def test_bad_arg(self):
  164. # Length of x is less than 3.
  165. x = [1]
  166. assert_raises(ValueError, stats.shapiro, x)
  167. def test_nan_input(self):
  168. x = np.arange(10.)
  169. x[9] = np.nan
  170. w, pw = stats.shapiro(x)
  171. shapiro_test = stats.shapiro(x)
  172. assert_equal(w, np.nan)
  173. assert_equal(shapiro_test.statistic, np.nan)
  174. assert_almost_equal(pw, 1.0)
  175. assert_almost_equal(shapiro_test.pvalue, 1.0)
  176. def test_gh14462(self):
  177. # shapiro is theoretically location-invariant, but when the magnitude
  178. # of the values is much greater than the variance, there can be
  179. # numerical issues. Fixed by subtracting median from the data.
  180. # See gh-14462.
  181. trans_val, maxlog = stats.boxcox([122500, 474400, 110400])
  182. res = stats.shapiro(trans_val)
  183. # Reference from R:
  184. # options(digits=16)
  185. # x = c(0.00000000e+00, 3.39996924e-08, -6.35166875e-09)
  186. # shapiro.test(x)
  187. ref = (0.86468431705371, 0.2805581751566)
  188. assert_allclose(res, ref, rtol=1e-5)
  189. class TestAnderson:
  190. def test_normal(self):
  191. rs = RandomState(1234567890)
  192. x1 = rs.standard_exponential(size=50)
  193. x2 = rs.standard_normal(size=50)
  194. A, crit, sig = stats.anderson(x1)
  195. assert_array_less(crit[:-1], A)
  196. A, crit, sig = stats.anderson(x2)
  197. assert_array_less(A, crit[-2:])
  198. v = np.ones(10)
  199. v[0] = 0
  200. A, crit, sig = stats.anderson(v)
  201. # The expected statistic 3.208057 was computed independently of scipy.
  202. # For example, in R:
  203. # > library(nortest)
  204. # > v <- rep(1, 10)
  205. # > v[1] <- 0
  206. # > result <- ad.test(v)
  207. # > result$statistic
  208. # A
  209. # 3.208057
  210. assert_allclose(A, 3.208057)
  211. def test_expon(self):
  212. rs = RandomState(1234567890)
  213. x1 = rs.standard_exponential(size=50)
  214. x2 = rs.standard_normal(size=50)
  215. A, crit, sig = stats.anderson(x1, 'expon')
  216. assert_array_less(A, crit[-2:])
  217. with np.errstate(all='ignore'):
  218. A, crit, sig = stats.anderson(x2, 'expon')
  219. assert_(A > crit[-1])
  220. def test_gumbel(self):
  221. # Regression test for gh-6306. Before that issue was fixed,
  222. # this case would return a2=inf.
  223. v = np.ones(100)
  224. v[0] = 0.0
  225. a2, crit, sig = stats.anderson(v, 'gumbel')
  226. # A brief reimplementation of the calculation of the statistic.
  227. n = len(v)
  228. xbar, s = stats.gumbel_l.fit(v)
  229. logcdf = stats.gumbel_l.logcdf(v, xbar, s)
  230. logsf = stats.gumbel_l.logsf(v, xbar, s)
  231. i = np.arange(1, n+1)
  232. expected_a2 = -n - np.mean((2*i - 1) * (logcdf + logsf[::-1]))
  233. assert_allclose(a2, expected_a2)
  234. def test_bad_arg(self):
  235. assert_raises(ValueError, stats.anderson, [1], dist='plate_of_shrimp')
  236. def test_result_attributes(self):
  237. rs = RandomState(1234567890)
  238. x = rs.standard_exponential(size=50)
  239. res = stats.anderson(x)
  240. attributes = ('statistic', 'critical_values', 'significance_level')
  241. check_named_results(res, attributes)
  242. def test_gumbel_l(self):
  243. # gh-2592, gh-6337
  244. # Adds support to 'gumbel_r' and 'gumbel_l' as valid inputs for dist.
  245. rs = RandomState(1234567890)
  246. x = rs.gumbel(size=100)
  247. A1, crit1, sig1 = stats.anderson(x, 'gumbel')
  248. A2, crit2, sig2 = stats.anderson(x, 'gumbel_l')
  249. assert_allclose(A2, A1)
  250. def test_gumbel_r(self):
  251. # gh-2592, gh-6337
  252. # Adds support to 'gumbel_r' and 'gumbel_l' as valid inputs for dist.
  253. rs = RandomState(1234567890)
  254. x1 = rs.gumbel(size=100)
  255. x2 = np.ones(100)
  256. # A constant array is a degenerate case and breaks gumbel_r.fit, so
  257. # change one value in x2.
  258. x2[0] = 0.996
  259. A1, crit1, sig1 = stats.anderson(x1, 'gumbel_r')
  260. A2, crit2, sig2 = stats.anderson(x2, 'gumbel_r')
  261. assert_array_less(A1, crit1[-2:])
  262. assert_(A2 > crit2[-1])
  263. @pytest.mark.parametrize('distname',
  264. ['norm', 'expon', 'gumbel_l', 'extreme1',
  265. 'gumbel', 'gumbel_r', 'logistic'])
  266. def test_anderson_fit_params(self, distname):
  267. # check that anderson now returns a FitResult
  268. rng = np.random.default_rng(330691555377792039)
  269. real_distname = ('gumbel_l' if distname in {'extreme1', 'gumbel'}
  270. else distname)
  271. dist = getattr(stats, real_distname)
  272. params = distcont[real_distname]
  273. x = dist.rvs(*params, size=1000, random_state=rng)
  274. res = stats.anderson(x, distname)
  275. assert res.fit_result.success
  276. class TestAndersonKSamp:
  277. def test_example1a(self):
  278. # Example data from Scholz & Stephens (1987), originally
  279. # published in Lehmann (1995, Nonparametrics, Statistical
  280. # Methods Based on Ranks, p. 309)
  281. # Pass a mixture of lists and arrays
  282. t1 = [38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0]
  283. t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8])
  284. t3 = np.array([34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0])
  285. t4 = np.array([34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8])
  286. Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4), midrank=False)
  287. assert_almost_equal(Tk, 4.449, 3)
  288. assert_array_almost_equal([0.4985, 1.3237, 1.9158, 2.4930, 3.2459],
  289. tm[0:5], 4)
  290. assert_allclose(p, 0.0021, atol=0.00025)
  291. def test_example1b(self):
  292. # Example data from Scholz & Stephens (1987), originally
  293. # published in Lehmann (1995, Nonparametrics, Statistical
  294. # Methods Based on Ranks, p. 309)
  295. # Pass arrays
  296. t1 = np.array([38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0])
  297. t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8])
  298. t3 = np.array([34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0])
  299. t4 = np.array([34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8])
  300. Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4), midrank=True)
  301. assert_almost_equal(Tk, 4.480, 3)
  302. assert_array_almost_equal([0.4985, 1.3237, 1.9158, 2.4930, 3.2459],
  303. tm[0:5], 4)
  304. assert_allclose(p, 0.0020, atol=0.00025)
  305. def test_example2a(self):
  306. # Example data taken from an earlier technical report of
  307. # Scholz and Stephens
  308. # Pass lists instead of arrays
  309. t1 = [194, 15, 41, 29, 33, 181]
  310. t2 = [413, 14, 58, 37, 100, 65, 9, 169, 447, 184, 36, 201, 118]
  311. t3 = [34, 31, 18, 18, 67, 57, 62, 7, 22, 34]
  312. t4 = [90, 10, 60, 186, 61, 49, 14, 24, 56, 20, 79, 84, 44, 59, 29,
  313. 118, 25, 156, 310, 76, 26, 44, 23, 62]
  314. t5 = [130, 208, 70, 101, 208]
  315. t6 = [74, 57, 48, 29, 502, 12, 70, 21, 29, 386, 59, 27]
  316. t7 = [55, 320, 56, 104, 220, 239, 47, 246, 176, 182, 33]
  317. t8 = [23, 261, 87, 7, 120, 14, 62, 47, 225, 71, 246, 21, 42, 20, 5,
  318. 12, 120, 11, 3, 14, 71, 11, 14, 11, 16, 90, 1, 16, 52, 95]
  319. t9 = [97, 51, 11, 4, 141, 18, 142, 68, 77, 80, 1, 16, 106, 206, 82,
  320. 54, 31, 216, 46, 111, 39, 63, 18, 191, 18, 163, 24]
  321. t10 = [50, 44, 102, 72, 22, 39, 3, 15, 197, 188, 79, 88, 46, 5, 5, 36,
  322. 22, 139, 210, 97, 30, 23, 13, 14]
  323. t11 = [359, 9, 12, 270, 603, 3, 104, 2, 438]
  324. t12 = [50, 254, 5, 283, 35, 12]
  325. t13 = [487, 18, 100, 7, 98, 5, 85, 91, 43, 230, 3, 130]
  326. t14 = [102, 209, 14, 57, 54, 32, 67, 59, 134, 152, 27, 14, 230, 66,
  327. 61, 34]
  328. Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4, t5, t6, t7, t8,
  329. t9, t10, t11, t12, t13, t14),
  330. midrank=False)
  331. assert_almost_equal(Tk, 3.288, 3)
  332. assert_array_almost_equal([0.5990, 1.3269, 1.8052, 2.2486, 2.8009],
  333. tm[0:5], 4)
  334. assert_allclose(p, 0.0041, atol=0.00025)
  335. def test_example2b(self):
  336. # Example data taken from an earlier technical report of
  337. # Scholz and Stephens
  338. t1 = [194, 15, 41, 29, 33, 181]
  339. t2 = [413, 14, 58, 37, 100, 65, 9, 169, 447, 184, 36, 201, 118]
  340. t3 = [34, 31, 18, 18, 67, 57, 62, 7, 22, 34]
  341. t4 = [90, 10, 60, 186, 61, 49, 14, 24, 56, 20, 79, 84, 44, 59, 29,
  342. 118, 25, 156, 310, 76, 26, 44, 23, 62]
  343. t5 = [130, 208, 70, 101, 208]
  344. t6 = [74, 57, 48, 29, 502, 12, 70, 21, 29, 386, 59, 27]
  345. t7 = [55, 320, 56, 104, 220, 239, 47, 246, 176, 182, 33]
  346. t8 = [23, 261, 87, 7, 120, 14, 62, 47, 225, 71, 246, 21, 42, 20, 5,
  347. 12, 120, 11, 3, 14, 71, 11, 14, 11, 16, 90, 1, 16, 52, 95]
  348. t9 = [97, 51, 11, 4, 141, 18, 142, 68, 77, 80, 1, 16, 106, 206, 82,
  349. 54, 31, 216, 46, 111, 39, 63, 18, 191, 18, 163, 24]
  350. t10 = [50, 44, 102, 72, 22, 39, 3, 15, 197, 188, 79, 88, 46, 5, 5, 36,
  351. 22, 139, 210, 97, 30, 23, 13, 14]
  352. t11 = [359, 9, 12, 270, 603, 3, 104, 2, 438]
  353. t12 = [50, 254, 5, 283, 35, 12]
  354. t13 = [487, 18, 100, 7, 98, 5, 85, 91, 43, 230, 3, 130]
  355. t14 = [102, 209, 14, 57, 54, 32, 67, 59, 134, 152, 27, 14, 230, 66,
  356. 61, 34]
  357. Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4, t5, t6, t7, t8,
  358. t9, t10, t11, t12, t13, t14),
  359. midrank=True)
  360. assert_almost_equal(Tk, 3.294, 3)
  361. assert_array_almost_equal([0.5990, 1.3269, 1.8052, 2.2486, 2.8009],
  362. tm[0:5], 4)
  363. assert_allclose(p, 0.0041, atol=0.00025)
  364. def test_R_kSamples(self):
  365. # test values generates with R package kSamples
  366. # package version 1.2-6 (2017-06-14)
  367. # r1 = 1:100
  368. # continuous case (no ties) --> version 1
  369. # res <- kSamples::ad.test(r1, r1 + 40.5)
  370. # res$ad[1, "T.AD"] # 41.105
  371. # res$ad[1, " asympt. P-value"] # 5.8399e-18
  372. #
  373. # discrete case (ties allowed) --> version 2 (here: midrank=True)
  374. # res$ad[2, "T.AD"] # 41.235
  375. #
  376. # res <- kSamples::ad.test(r1, r1 + .5)
  377. # res$ad[1, "T.AD"] # -1.2824
  378. # res$ad[1, " asympt. P-value"] # 1
  379. # res$ad[2, "T.AD"] # -1.2944
  380. #
  381. # res <- kSamples::ad.test(r1, r1 + 7.5)
  382. # res$ad[1, "T.AD"] # 1.4923
  383. # res$ad[1, " asympt. P-value"] # 0.077501
  384. #
  385. # res <- kSamples::ad.test(r1, r1 + 6)
  386. # res$ad[2, "T.AD"] # 0.63892
  387. # res$ad[2, " asympt. P-value"] # 0.17981
  388. #
  389. # res <- kSamples::ad.test(r1, r1 + 11.5)
  390. # res$ad[1, "T.AD"] # 4.5042
  391. # res$ad[1, " asympt. P-value"] # 0.00545
  392. #
  393. # res <- kSamples::ad.test(r1, r1 + 13.5)
  394. # res$ad[1, "T.AD"] # 6.2982
  395. # res$ad[1, " asympt. P-value"] # 0.00118
  396. x1 = np.linspace(1, 100, 100)
  397. # test case: different distributions;p-value floored at 0.001
  398. # test case for issue #5493 / #8536
  399. with suppress_warnings() as sup:
  400. sup.filter(UserWarning, message='p-value floored')
  401. s, _, p = stats.anderson_ksamp([x1, x1 + 40.5], midrank=False)
  402. assert_almost_equal(s, 41.105, 3)
  403. assert_equal(p, 0.001)
  404. with suppress_warnings() as sup:
  405. sup.filter(UserWarning, message='p-value floored')
  406. s, _, p = stats.anderson_ksamp([x1, x1 + 40.5])
  407. assert_almost_equal(s, 41.235, 3)
  408. assert_equal(p, 0.001)
  409. # test case: similar distributions --> p-value capped at 0.25
  410. with suppress_warnings() as sup:
  411. sup.filter(UserWarning, message='p-value capped')
  412. s, _, p = stats.anderson_ksamp([x1, x1 + .5], midrank=False)
  413. assert_almost_equal(s, -1.2824, 4)
  414. assert_equal(p, 0.25)
  415. with suppress_warnings() as sup:
  416. sup.filter(UserWarning, message='p-value capped')
  417. s, _, p = stats.anderson_ksamp([x1, x1 + .5])
  418. assert_almost_equal(s, -1.2944, 4)
  419. assert_equal(p, 0.25)
  420. # test case: check interpolated p-value in [0.01, 0.25] (no ties)
  421. s, _, p = stats.anderson_ksamp([x1, x1 + 7.5], midrank=False)
  422. assert_almost_equal(s, 1.4923, 4)
  423. assert_allclose(p, 0.0775, atol=0.005, rtol=0)
  424. # test case: check interpolated p-value in [0.01, 0.25] (w/ ties)
  425. s, _, p = stats.anderson_ksamp([x1, x1 + 6])
  426. assert_almost_equal(s, 0.6389, 4)
  427. assert_allclose(p, 0.1798, atol=0.005, rtol=0)
  428. # test extended critical values for p=0.001 and p=0.005
  429. s, _, p = stats.anderson_ksamp([x1, x1 + 11.5], midrank=False)
  430. assert_almost_equal(s, 4.5042, 4)
  431. assert_allclose(p, 0.00545, atol=0.0005, rtol=0)
  432. s, _, p = stats.anderson_ksamp([x1, x1 + 13.5], midrank=False)
  433. assert_almost_equal(s, 6.2982, 4)
  434. assert_allclose(p, 0.00118, atol=0.0001, rtol=0)
  435. def test_not_enough_samples(self):
  436. assert_raises(ValueError, stats.anderson_ksamp, np.ones(5))
  437. def test_no_distinct_observations(self):
  438. assert_raises(ValueError, stats.anderson_ksamp,
  439. (np.ones(5), np.ones(5)))
  440. def test_empty_sample(self):
  441. assert_raises(ValueError, stats.anderson_ksamp, (np.ones(5), []))
  442. def test_result_attributes(self):
  443. # Pass a mixture of lists and arrays
  444. t1 = [38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0]
  445. t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8])
  446. res = stats.anderson_ksamp((t1, t2), midrank=False)
  447. attributes = ('statistic', 'critical_values', 'significance_level')
  448. check_named_results(res, attributes)
  449. assert_equal(res.significance_level, res.pvalue)
  450. class TestAnsari:
  451. def test_small(self):
  452. x = [1, 2, 3, 3, 4]
  453. y = [3, 2, 6, 1, 6, 1, 4, 1]
  454. with suppress_warnings() as sup:
  455. sup.filter(UserWarning, "Ties preclude use of exact statistic.")
  456. W, pval = stats.ansari(x, y)
  457. assert_almost_equal(W, 23.5, 11)
  458. assert_almost_equal(pval, 0.13499256881897437, 11)
  459. def test_approx(self):
  460. ramsay = np.array((111, 107, 100, 99, 102, 106, 109, 108, 104, 99,
  461. 101, 96, 97, 102, 107, 113, 116, 113, 110, 98))
  462. parekh = np.array((107, 108, 106, 98, 105, 103, 110, 105, 104,
  463. 100, 96, 108, 103, 104, 114, 114, 113, 108,
  464. 106, 99))
  465. with suppress_warnings() as sup:
  466. sup.filter(UserWarning, "Ties preclude use of exact statistic.")
  467. W, pval = stats.ansari(ramsay, parekh)
  468. assert_almost_equal(W, 185.5, 11)
  469. assert_almost_equal(pval, 0.18145819972867083, 11)
  470. def test_exact(self):
  471. W, pval = stats.ansari([1, 2, 3, 4], [15, 5, 20, 8, 10, 12])
  472. assert_almost_equal(W, 10.0, 11)
  473. assert_almost_equal(pval, 0.533333333333333333, 7)
  474. def test_bad_arg(self):
  475. assert_raises(ValueError, stats.ansari, [], [1])
  476. assert_raises(ValueError, stats.ansari, [1], [])
  477. def test_result_attributes(self):
  478. x = [1, 2, 3, 3, 4]
  479. y = [3, 2, 6, 1, 6, 1, 4, 1]
  480. with suppress_warnings() as sup:
  481. sup.filter(UserWarning, "Ties preclude use of exact statistic.")
  482. res = stats.ansari(x, y)
  483. attributes = ('statistic', 'pvalue')
  484. check_named_results(res, attributes)
  485. def test_bad_alternative(self):
  486. # invalid value for alternative must raise a ValueError
  487. x1 = [1, 2, 3, 4]
  488. x2 = [5, 6, 7, 8]
  489. match = "'alternative' must be 'two-sided'"
  490. with assert_raises(ValueError, match=match):
  491. stats.ansari(x1, x2, alternative='foo')
  492. def test_alternative_exact(self):
  493. x1 = [-5, 1, 5, 10, 15, 20, 25] # high scale, loc=10
  494. x2 = [7.5, 8.5, 9.5, 10.5, 11.5, 12.5] # low scale, loc=10
  495. # ratio of scales is greater than 1. So, the
  496. # p-value must be high when `alternative='less'`
  497. # and low when `alternative='greater'`.
  498. statistic, pval = stats.ansari(x1, x2)
  499. pval_l = stats.ansari(x1, x2, alternative='less').pvalue
  500. pval_g = stats.ansari(x1, x2, alternative='greater').pvalue
  501. assert pval_l > 0.95
  502. assert pval_g < 0.05 # level of significance.
  503. # also check if the p-values sum up to 1 plus the probability
  504. # mass under the calculated statistic.
  505. prob = _abw_state.pmf(statistic, len(x1), len(x2))
  506. assert_allclose(pval_g + pval_l, 1 + prob, atol=1e-12)
  507. # also check if one of the one-sided p-value equals half the
  508. # two-sided p-value and the other one-sided p-value is its
  509. # compliment.
  510. assert_allclose(pval_g, pval/2, atol=1e-12)
  511. assert_allclose(pval_l, 1+prob-pval/2, atol=1e-12)
  512. # sanity check. The result should flip if
  513. # we exchange x and y.
  514. pval_l_reverse = stats.ansari(x2, x1, alternative='less').pvalue
  515. pval_g_reverse = stats.ansari(x2, x1, alternative='greater').pvalue
  516. assert pval_l_reverse < 0.05
  517. assert pval_g_reverse > 0.95
  518. @pytest.mark.parametrize(
  519. 'x, y, alternative, expected',
  520. # the tests are designed in such a way that the
  521. # if else statement in ansari test for exact
  522. # mode is covered.
  523. [([1, 2, 3, 4], [5, 6, 7, 8], 'less', 0.6285714285714),
  524. ([1, 2, 3, 4], [5, 6, 7, 8], 'greater', 0.6285714285714),
  525. ([1, 2, 3], [4, 5, 6, 7, 8], 'less', 0.8928571428571),
  526. ([1, 2, 3], [4, 5, 6, 7, 8], 'greater', 0.2857142857143),
  527. ([1, 2, 3, 4, 5], [6, 7, 8], 'less', 0.2857142857143),
  528. ([1, 2, 3, 4, 5], [6, 7, 8], 'greater', 0.8928571428571)]
  529. )
  530. def test_alternative_exact_with_R(self, x, y, alternative, expected):
  531. # testing with R on arbitrary data
  532. # Sample R code used for the third test case above:
  533. # ```R
  534. # > options(digits=16)
  535. # > x <- c(1,2,3)
  536. # > y <- c(4,5,6,7,8)
  537. # > ansari.test(x, y, alternative='less', exact=TRUE)
  538. #
  539. # Ansari-Bradley test
  540. #
  541. # data: x and y
  542. # AB = 6, p-value = 0.8928571428571
  543. # alternative hypothesis: true ratio of scales is less than 1
  544. #
  545. # ```
  546. pval = stats.ansari(x, y, alternative=alternative).pvalue
  547. assert_allclose(pval, expected, atol=1e-12)
  548. def test_alternative_approx(self):
  549. # intuitive tests for approximation
  550. x1 = stats.norm.rvs(0, 5, size=100, random_state=123)
  551. x2 = stats.norm.rvs(0, 2, size=100, random_state=123)
  552. # for m > 55 or n > 55, the test should automatically
  553. # switch to approximation.
  554. pval_l = stats.ansari(x1, x2, alternative='less').pvalue
  555. pval_g = stats.ansari(x1, x2, alternative='greater').pvalue
  556. assert_allclose(pval_l, 1.0, atol=1e-12)
  557. assert_allclose(pval_g, 0.0, atol=1e-12)
  558. # also check if one of the one-sided p-value equals half the
  559. # two-sided p-value and the other one-sided p-value is its
  560. # compliment.
  561. x1 = stats.norm.rvs(0, 2, size=60, random_state=123)
  562. x2 = stats.norm.rvs(0, 1.5, size=60, random_state=123)
  563. pval = stats.ansari(x1, x2).pvalue
  564. pval_l = stats.ansari(x1, x2, alternative='less').pvalue
  565. pval_g = stats.ansari(x1, x2, alternative='greater').pvalue
  566. assert_allclose(pval_g, pval/2, atol=1e-12)
  567. assert_allclose(pval_l, 1-pval/2, atol=1e-12)
  568. class TestBartlett:
  569. def test_data(self):
  570. # https://www.itl.nist.gov/div898/handbook/eda/section3/eda357.htm
  571. args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
  572. T, pval = stats.bartlett(*args)
  573. assert_almost_equal(T, 20.78587342806484, 7)
  574. assert_almost_equal(pval, 0.0136358632781, 7)
  575. def test_bad_arg(self):
  576. # Too few args raises ValueError.
  577. assert_raises(ValueError, stats.bartlett, [1])
  578. def test_result_attributes(self):
  579. args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
  580. res = stats.bartlett(*args)
  581. attributes = ('statistic', 'pvalue')
  582. check_named_results(res, attributes)
  583. def test_empty_arg(self):
  584. args = (g1, g2, g3, g4, g5, g6, g7, g8, g9, g10, [])
  585. assert_equal((np.nan, np.nan), stats.bartlett(*args))
  586. # temporary fix for issue #9252: only accept 1d input
  587. def test_1d_input(self):
  588. x = np.array([[1, 2], [3, 4]])
  589. assert_raises(ValueError, stats.bartlett, g1, x)
  590. class TestLevene:
  591. def test_data(self):
  592. # https://www.itl.nist.gov/div898/handbook/eda/section3/eda35a.htm
  593. args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
  594. W, pval = stats.levene(*args)
  595. assert_almost_equal(W, 1.7059176930008939, 7)
  596. assert_almost_equal(pval, 0.0990829755522, 7)
  597. def test_trimmed1(self):
  598. # Test that center='trimmed' gives the same result as center='mean'
  599. # when proportiontocut=0.
  600. W1, pval1 = stats.levene(g1, g2, g3, center='mean')
  601. W2, pval2 = stats.levene(g1, g2, g3, center='trimmed',
  602. proportiontocut=0.0)
  603. assert_almost_equal(W1, W2)
  604. assert_almost_equal(pval1, pval2)
  605. def test_trimmed2(self):
  606. x = [1.2, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 100.0]
  607. y = [0.0, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 200.0]
  608. np.random.seed(1234)
  609. x2 = np.random.permutation(x)
  610. # Use center='trimmed'
  611. W0, pval0 = stats.levene(x, y, center='trimmed',
  612. proportiontocut=0.125)
  613. W1, pval1 = stats.levene(x2, y, center='trimmed',
  614. proportiontocut=0.125)
  615. # Trim the data here, and use center='mean'
  616. W2, pval2 = stats.levene(x[1:-1], y[1:-1], center='mean')
  617. # Result should be the same.
  618. assert_almost_equal(W0, W2)
  619. assert_almost_equal(W1, W2)
  620. assert_almost_equal(pval1, pval2)
  621. def test_equal_mean_median(self):
  622. x = np.linspace(-1, 1, 21)
  623. np.random.seed(1234)
  624. x2 = np.random.permutation(x)
  625. y = x**3
  626. W1, pval1 = stats.levene(x, y, center='mean')
  627. W2, pval2 = stats.levene(x2, y, center='median')
  628. assert_almost_equal(W1, W2)
  629. assert_almost_equal(pval1, pval2)
  630. def test_bad_keyword(self):
  631. x = np.linspace(-1, 1, 21)
  632. assert_raises(TypeError, stats.levene, x, x, portiontocut=0.1)
  633. def test_bad_center_value(self):
  634. x = np.linspace(-1, 1, 21)
  635. assert_raises(ValueError, stats.levene, x, x, center='trim')
  636. def test_too_few_args(self):
  637. assert_raises(ValueError, stats.levene, [1])
  638. def test_result_attributes(self):
  639. args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10]
  640. res = stats.levene(*args)
  641. attributes = ('statistic', 'pvalue')
  642. check_named_results(res, attributes)
  643. # temporary fix for issue #9252: only accept 1d input
  644. def test_1d_input(self):
  645. x = np.array([[1, 2], [3, 4]])
  646. assert_raises(ValueError, stats.levene, g1, x)
  647. class TestBinomTestP:
  648. """
  649. Tests for stats.binomtest as a replacement for deprecated stats.binom_test.
  650. """
  651. @staticmethod
  652. def binom_test_func(x, n=None, p=0.5, alternative='two-sided'):
  653. # This processing of x and n is copied from binom_test.
  654. x = np.atleast_1d(x).astype(np.int_)
  655. if len(x) == 2:
  656. n = x[1] + x[0]
  657. x = x[0]
  658. elif len(x) == 1:
  659. x = x[0]
  660. if n is None or n < x:
  661. raise ValueError("n must be >= x")
  662. n = np.int_(n)
  663. else:
  664. raise ValueError("Incorrect length for x.")
  665. result = stats.binomtest(x, n, p=p, alternative=alternative)
  666. return result.pvalue
  667. def test_data(self):
  668. pval = self.binom_test_func(100, 250)
  669. assert_almost_equal(pval, 0.0018833009350757682, 11)
  670. pval = self.binom_test_func(201, 405)
  671. assert_almost_equal(pval, 0.92085205962670713, 11)
  672. pval = self.binom_test_func([682, 243], p=3/4)
  673. assert_almost_equal(pval, 0.38249155957481695, 11)
  674. def test_bad_len_x(self):
  675. # Length of x must be 1 or 2.
  676. assert_raises(ValueError, self.binom_test_func, [1, 2, 3])
  677. def test_bad_n(self):
  678. # len(x) is 1, but n is invalid.
  679. # Missing n
  680. assert_raises(ValueError, self.binom_test_func, [100])
  681. # n less than x[0]
  682. assert_raises(ValueError, self.binom_test_func, [100], n=50)
  683. def test_bad_p(self):
  684. assert_raises(ValueError,
  685. self.binom_test_func, [50, 50], p=2.0)
  686. def test_alternatives(self):
  687. res = self.binom_test_func(51, 235, p=1/6, alternative='less')
  688. assert_almost_equal(res, 0.982022657605858)
  689. res = self.binom_test_func(51, 235, p=1/6, alternative='greater')
  690. assert_almost_equal(res, 0.02654424571169085)
  691. res = self.binom_test_func(51, 235, p=1/6, alternative='two-sided')
  692. assert_almost_equal(res, 0.0437479701823997)
  693. @pytest.mark.skipif(sys.maxsize <= 2**32, reason="32-bit does not overflow")
  694. def test_boost_overflow_raises(self):
  695. # Boost.Math error policy should raise exceptions in Python
  696. assert_raises(OverflowError, self.binom_test_func, 5.0, 6, p=sys.float_info.min)
  697. class TestBinomTest:
  698. """Tests for stats.binomtest."""
  699. # Expected results here are from R binom.test, e.g.
  700. # options(digits=16)
  701. # binom.test(484, 967, p=0.48)
  702. #
  703. def test_two_sided_pvalues1(self):
  704. # `tol` could be stricter on most architectures, but the value
  705. # here is limited by accuracy of `binom.cdf` for large inputs on
  706. # Linux_Python_37_32bit_full and aarch64
  707. rtol = 1e-10 # aarch64 observed rtol: 1.5e-11
  708. res = stats.binomtest(10079999, 21000000, 0.48)
  709. assert_allclose(res.pvalue, 1.0, rtol=rtol)
  710. res = stats.binomtest(10079990, 21000000, 0.48)
  711. assert_allclose(res.pvalue, 0.9966892187965, rtol=rtol)
  712. res = stats.binomtest(10080009, 21000000, 0.48)
  713. assert_allclose(res.pvalue, 0.9970377203856, rtol=rtol)
  714. res = stats.binomtest(10080017, 21000000, 0.48)
  715. assert_allclose(res.pvalue, 0.9940754817328, rtol=1e-9)
  716. def test_two_sided_pvalues2(self):
  717. rtol = 1e-10 # no aarch64 failure with 1e-15, preemptive bump
  718. res = stats.binomtest(9, n=21, p=0.48)
  719. assert_allclose(res.pvalue, 0.6689672431939, rtol=rtol)
  720. res = stats.binomtest(4, 21, 0.48)
  721. assert_allclose(res.pvalue, 0.008139563452106, rtol=rtol)
  722. res = stats.binomtest(11, 21, 0.48)
  723. assert_allclose(res.pvalue, 0.8278629664608, rtol=rtol)
  724. res = stats.binomtest(7, 21, 0.48)
  725. assert_allclose(res.pvalue, 0.1966772901718, rtol=rtol)
  726. res = stats.binomtest(3, 10, .5)
  727. assert_allclose(res.pvalue, 0.34375, rtol=rtol)
  728. res = stats.binomtest(2, 2, .4)
  729. assert_allclose(res.pvalue, 0.16, rtol=rtol)
  730. res = stats.binomtest(2, 4, .3)
  731. assert_allclose(res.pvalue, 0.5884, rtol=rtol)
  732. def test_edge_cases(self):
  733. rtol = 1e-10 # aarch64 observed rtol: 1.33e-15
  734. res = stats.binomtest(484, 967, 0.5)
  735. assert_allclose(res.pvalue, 1, rtol=rtol)
  736. res = stats.binomtest(3, 47, 3/47)
  737. assert_allclose(res.pvalue, 1, rtol=rtol)
  738. res = stats.binomtest(13, 46, 13/46)
  739. assert_allclose(res.pvalue, 1, rtol=rtol)
  740. res = stats.binomtest(15, 44, 15/44)
  741. assert_allclose(res.pvalue, 1, rtol=rtol)
  742. res = stats.binomtest(7, 13, 0.5)
  743. assert_allclose(res.pvalue, 1, rtol=rtol)
  744. res = stats.binomtest(6, 11, 0.5)
  745. assert_allclose(res.pvalue, 1, rtol=rtol)
  746. def test_binary_srch_for_binom_tst(self):
  747. # Test that old behavior of binomtest is maintained
  748. # by the new binary search method in cases where d
  749. # exactly equals the input on one side.
  750. n = 10
  751. p = 0.5
  752. k = 3
  753. # First test for the case where k > mode of PMF
  754. i = np.arange(np.ceil(p * n), n+1)
  755. d = stats.binom.pmf(k, n, p)
  756. # Old way of calculating y, probably consistent with R.
  757. y1 = np.sum(stats.binom.pmf(i, n, p) <= d, axis=0)
  758. # New way with binary search.
  759. ix = _binary_search_for_binom_tst(lambda x1:
  760. -stats.binom.pmf(x1, n, p),
  761. -d, np.ceil(p * n), n)
  762. y2 = n - ix + int(d == stats.binom.pmf(ix, n, p))
  763. assert_allclose(y1, y2, rtol=1e-9)
  764. # Now test for the other side.
  765. k = 7
  766. i = np.arange(np.floor(p * n) + 1)
  767. d = stats.binom.pmf(k, n, p)
  768. # Old way of calculating y.
  769. y1 = np.sum(stats.binom.pmf(i, n, p) <= d, axis=0)
  770. # New way with binary search.
  771. ix = _binary_search_for_binom_tst(lambda x1:
  772. stats.binom.pmf(x1, n, p),
  773. d, 0, np.floor(p * n))
  774. y2 = ix + 1
  775. assert_allclose(y1, y2, rtol=1e-9)
  776. # Expected results here are from R 3.6.2 binom.test
  777. @pytest.mark.parametrize('alternative, pval, ci_low, ci_high',
  778. [('less', 0.148831050443,
  779. 0.0, 0.2772002496709138),
  780. ('greater', 0.9004695898947,
  781. 0.1366613252458672, 1.0),
  782. ('two-sided', 0.2983720970096,
  783. 0.1266555521019559, 0.2918426890886281)])
  784. def test_confidence_intervals1(self, alternative, pval, ci_low, ci_high):
  785. res = stats.binomtest(20, n=100, p=0.25, alternative=alternative)
  786. assert_allclose(res.pvalue, pval, rtol=1e-12)
  787. assert_equal(res.statistic, 0.2)
  788. ci = res.proportion_ci(confidence_level=0.95)
  789. assert_allclose((ci.low, ci.high), (ci_low, ci_high), rtol=1e-12)
  790. # Expected results here are from R 3.6.2 binom.test.
  791. @pytest.mark.parametrize('alternative, pval, ci_low, ci_high',
  792. [('less',
  793. 0.005656361, 0.0, 0.1872093),
  794. ('greater',
  795. 0.9987146, 0.008860761, 1.0),
  796. ('two-sided',
  797. 0.01191714, 0.006872485, 0.202706269)])
  798. def test_confidence_intervals2(self, alternative, pval, ci_low, ci_high):
  799. res = stats.binomtest(3, n=50, p=0.2, alternative=alternative)
  800. assert_allclose(res.pvalue, pval, rtol=1e-6)
  801. assert_equal(res.statistic, 0.06)
  802. ci = res.proportion_ci(confidence_level=0.99)
  803. assert_allclose((ci.low, ci.high), (ci_low, ci_high), rtol=1e-6)
  804. # Expected results here are from R 3.6.2 binom.test.
  805. @pytest.mark.parametrize('alternative, pval, ci_high',
  806. [('less', 0.05631351, 0.2588656),
  807. ('greater', 1.0, 1.0),
  808. ('two-sided', 0.07604122, 0.3084971)])
  809. def test_confidence_interval_exact_k0(self, alternative, pval, ci_high):
  810. # Test with k=0, n = 10.
  811. res = stats.binomtest(0, 10, p=0.25, alternative=alternative)
  812. assert_allclose(res.pvalue, pval, rtol=1e-6)
  813. ci = res.proportion_ci(confidence_level=0.95)
  814. assert_equal(ci.low, 0.0)
  815. assert_allclose(ci.high, ci_high, rtol=1e-6)
  816. # Expected results here are from R 3.6.2 binom.test.
  817. @pytest.mark.parametrize('alternative, pval, ci_low',
  818. [('less', 1.0, 0.0),
  819. ('greater', 9.536743e-07, 0.7411344),
  820. ('two-sided', 9.536743e-07, 0.6915029)])
  821. def test_confidence_interval_exact_k_is_n(self, alternative, pval, ci_low):
  822. # Test with k = n = 10.
  823. res = stats.binomtest(10, 10, p=0.25, alternative=alternative)
  824. assert_allclose(res.pvalue, pval, rtol=1e-6)
  825. ci = res.proportion_ci(confidence_level=0.95)
  826. assert_equal(ci.high, 1.0)
  827. assert_allclose(ci.low, ci_low, rtol=1e-6)
  828. # Expected results are from the prop.test function in R 3.6.2.
  829. @pytest.mark.parametrize(
  830. 'k, alternative, corr, conf, ci_low, ci_high',
  831. [[3, 'two-sided', True, 0.95, 0.08094782, 0.64632928],
  832. [3, 'two-sided', True, 0.99, 0.0586329, 0.7169416],
  833. [3, 'two-sided', False, 0.95, 0.1077913, 0.6032219],
  834. [3, 'two-sided', False, 0.99, 0.07956632, 0.6799753],
  835. [3, 'less', True, 0.95, 0.0, 0.6043476],
  836. [3, 'less', True, 0.99, 0.0, 0.6901811],
  837. [3, 'less', False, 0.95, 0.0, 0.5583002],
  838. [3, 'less', False, 0.99, 0.0, 0.6507187],
  839. [3, 'greater', True, 0.95, 0.09644904, 1.0],
  840. [3, 'greater', True, 0.99, 0.06659141, 1.0],
  841. [3, 'greater', False, 0.95, 0.1268766, 1.0],
  842. [3, 'greater', False, 0.99, 0.08974147, 1.0],
  843. [0, 'two-sided', True, 0.95, 0.0, 0.3445372],
  844. [0, 'two-sided', False, 0.95, 0.0, 0.2775328],
  845. [0, 'less', True, 0.95, 0.0, 0.2847374],
  846. [0, 'less', False, 0.95, 0.0, 0.212942],
  847. [0, 'greater', True, 0.95, 0.0, 1.0],
  848. [0, 'greater', False, 0.95, 0.0, 1.0],
  849. [10, 'two-sided', True, 0.95, 0.6554628, 1.0],
  850. [10, 'two-sided', False, 0.95, 0.7224672, 1.0],
  851. [10, 'less', True, 0.95, 0.0, 1.0],
  852. [10, 'less', False, 0.95, 0.0, 1.0],
  853. [10, 'greater', True, 0.95, 0.7152626, 1.0],
  854. [10, 'greater', False, 0.95, 0.787058, 1.0]]
  855. )
  856. def test_ci_wilson_method(self, k, alternative, corr, conf,
  857. ci_low, ci_high):
  858. res = stats.binomtest(k, n=10, p=0.1, alternative=alternative)
  859. if corr:
  860. method = 'wilsoncc'
  861. else:
  862. method = 'wilson'
  863. ci = res.proportion_ci(confidence_level=conf, method=method)
  864. assert_allclose((ci.low, ci.high), (ci_low, ci_high), rtol=1e-6)
  865. def test_estimate_equals_hypothesized_prop(self):
  866. # Test the special case where the estimated proportion equals
  867. # the hypothesized proportion. When alternative is 'two-sided',
  868. # the p-value is 1.
  869. res = stats.binomtest(4, 16, 0.25)
  870. assert_equal(res.statistic, 0.25)
  871. assert_equal(res.pvalue, 1.0)
  872. @pytest.mark.parametrize('k, n', [(0, 0), (-1, 2)])
  873. def test_invalid_k_n(self, k, n):
  874. with pytest.raises(ValueError,
  875. match="must be an integer not less than"):
  876. stats.binomtest(k, n)
  877. def test_invalid_k_too_big(self):
  878. with pytest.raises(ValueError,
  879. match="k must not be greater than n"):
  880. stats.binomtest(11, 10, 0.25)
  881. def test_invalid_confidence_level(self):
  882. res = stats.binomtest(3, n=10, p=0.1)
  883. with pytest.raises(ValueError, match="must be in the interval"):
  884. res.proportion_ci(confidence_level=-1)
  885. def test_invalid_ci_method(self):
  886. res = stats.binomtest(3, n=10, p=0.1)
  887. with pytest.raises(ValueError, match="method must be"):
  888. res.proportion_ci(method="plate of shrimp")
  889. def test_alias(self):
  890. res = stats.binomtest(3, n=10, p=0.1)
  891. assert_equal(res.proportion_estimate, res.statistic)
  892. class TestFligner:
  893. def test_data(self):
  894. # numbers from R: fligner.test in package stats
  895. x1 = np.arange(5)
  896. assert_array_almost_equal(stats.fligner(x1, x1**2),
  897. (3.2282229927203536, 0.072379187848207877),
  898. 11)
  899. def test_trimmed1(self):
  900. # Perturb input to break ties in the transformed data
  901. # See https://github.com/scipy/scipy/pull/8042 for more details
  902. rs = np.random.RandomState(123)
  903. _perturb = lambda g: (np.asarray(g) + 1e-10*rs.randn(len(g))).tolist()
  904. g1_ = _perturb(g1)
  905. g2_ = _perturb(g2)
  906. g3_ = _perturb(g3)
  907. # Test that center='trimmed' gives the same result as center='mean'
  908. # when proportiontocut=0.
  909. Xsq1, pval1 = stats.fligner(g1_, g2_, g3_, center='mean')
  910. Xsq2, pval2 = stats.fligner(g1_, g2_, g3_, center='trimmed',
  911. proportiontocut=0.0)
  912. assert_almost_equal(Xsq1, Xsq2)
  913. assert_almost_equal(pval1, pval2)
  914. def test_trimmed2(self):
  915. x = [1.2, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 100.0]
  916. y = [0.0, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 200.0]
  917. # Use center='trimmed'
  918. Xsq1, pval1 = stats.fligner(x, y, center='trimmed',
  919. proportiontocut=0.125)
  920. # Trim the data here, and use center='mean'
  921. Xsq2, pval2 = stats.fligner(x[1:-1], y[1:-1], center='mean')
  922. # Result should be the same.
  923. assert_almost_equal(Xsq1, Xsq2)
  924. assert_almost_equal(pval1, pval2)
  925. # The following test looks reasonable at first, but fligner() uses the
  926. # function stats.rankdata(), and in one of the cases in this test,
  927. # there are ties, while in the other (because of normal rounding
  928. # errors) there are not. This difference leads to differences in the
  929. # third significant digit of W.
  930. #
  931. #def test_equal_mean_median(self):
  932. # x = np.linspace(-1,1,21)
  933. # y = x**3
  934. # W1, pval1 = stats.fligner(x, y, center='mean')
  935. # W2, pval2 = stats.fligner(x, y, center='median')
  936. # assert_almost_equal(W1, W2)
  937. # assert_almost_equal(pval1, pval2)
  938. def test_bad_keyword(self):
  939. x = np.linspace(-1, 1, 21)
  940. assert_raises(TypeError, stats.fligner, x, x, portiontocut=0.1)
  941. def test_bad_center_value(self):
  942. x = np.linspace(-1, 1, 21)
  943. assert_raises(ValueError, stats.fligner, x, x, center='trim')
  944. def test_bad_num_args(self):
  945. # Too few args raises ValueError.
  946. assert_raises(ValueError, stats.fligner, [1])
  947. def test_empty_arg(self):
  948. x = np.arange(5)
  949. assert_equal((np.nan, np.nan), stats.fligner(x, x**2, []))
  950. def mood_cases_with_ties():
  951. # Generate random `x` and `y` arrays with ties both between and within the
  952. # samples. Expected results are (statistic, pvalue) from SAS.
  953. expected_results = [(-1.76658511464992, .0386488678399305),
  954. (-.694031428192304, .2438312498647250),
  955. (-1.15093525352151, .1248794365836150)]
  956. seeds = [23453254, 1298352315, 987234597]
  957. for si, seed in enumerate(seeds):
  958. rng = np.random.default_rng(seed)
  959. xy = rng.random(100)
  960. # Generate random indices to make ties
  961. tie_ind = rng.integers(low=0, high=99, size=5)
  962. # Generate a random number of ties for each index.
  963. num_ties_per_ind = rng.integers(low=1, high=5, size=5)
  964. # At each `tie_ind`, mark the next `n` indices equal to that value.
  965. for i, n in zip(tie_ind, num_ties_per_ind):
  966. for j in range(i + 1, i + n):
  967. xy[j] = xy[i]
  968. # scramble order of xy before splitting into `x, y`
  969. rng.shuffle(xy)
  970. x, y = np.split(xy, 2)
  971. yield x, y, 'less', *expected_results[si]
  972. class TestMood:
  973. @pytest.mark.parametrize("x,y,alternative,stat_expect,p_expect",
  974. mood_cases_with_ties())
  975. def test_against_SAS(self, x, y, alternative, stat_expect, p_expect):
  976. """
  977. Example code used to generate SAS output:
  978. DATA myData;
  979. INPUT X Y;
  980. CARDS;
  981. 1 0
  982. 1 1
  983. 1 2
  984. 1 3
  985. 1 4
  986. 2 0
  987. 2 1
  988. 2 4
  989. 2 9
  990. 2 16
  991. ods graphics on;
  992. proc npar1way mood data=myData ;
  993. class X;
  994. ods output MoodTest=mt;
  995. proc contents data=mt;
  996. proc print data=mt;
  997. format Prob1 17.16 Prob2 17.16 Statistic 17.16 Z 17.16 ;
  998. title "Mood Two-Sample Test";
  999. proc print data=myData;
  1000. title "Data for above results";
  1001. run;
  1002. """
  1003. statistic, pvalue = stats.mood(x, y, alternative=alternative)
  1004. assert_allclose(stat_expect, statistic, atol=1e-16)
  1005. assert_allclose(p_expect, pvalue, atol=1e-16)
  1006. @pytest.mark.parametrize("alternative, expected",
  1007. [('two-sided', (1.019938533549930,
  1008. .3077576129778760)),
  1009. ('less', (1.019938533549930,
  1010. 1 - .1538788064889380)),
  1011. ('greater', (1.019938533549930,
  1012. .1538788064889380))])
  1013. def test_against_SAS_2(self, alternative, expected):
  1014. # Code to run in SAS in above function
  1015. x = [111, 107, 100, 99, 102, 106, 109, 108, 104, 99,
  1016. 101, 96, 97, 102, 107, 113, 116, 113, 110, 98]
  1017. y = [107, 108, 106, 98, 105, 103, 110, 105, 104, 100,
  1018. 96, 108, 103, 104, 114, 114, 113, 108, 106, 99]
  1019. res = stats.mood(x, y, alternative=alternative)
  1020. assert_allclose(res, expected)
  1021. def test_mood_order_of_args(self):
  1022. # z should change sign when the order of arguments changes, pvalue
  1023. # should not change
  1024. np.random.seed(1234)
  1025. x1 = np.random.randn(10, 1)
  1026. x2 = np.random.randn(15, 1)
  1027. z1, p1 = stats.mood(x1, x2)
  1028. z2, p2 = stats.mood(x2, x1)
  1029. assert_array_almost_equal([z1, p1], [-z2, p2])
  1030. def test_mood_with_axis_none(self):
  1031. # Test with axis = None, compare with results from R
  1032. x1 = [-0.626453810742332, 0.183643324222082, -0.835628612410047,
  1033. 1.59528080213779, 0.329507771815361, -0.820468384118015,
  1034. 0.487429052428485, 0.738324705129217, 0.575781351653492,
  1035. -0.305388387156356, 1.51178116845085, 0.389843236411431,
  1036. -0.621240580541804, -2.2146998871775, 1.12493091814311,
  1037. -0.0449336090152309, -0.0161902630989461, 0.943836210685299,
  1038. 0.821221195098089, 0.593901321217509]
  1039. x2 = [-0.896914546624981, 0.184849184646742, 1.58784533120882,
  1040. -1.13037567424629, -0.0802517565509893, 0.132420284381094,
  1041. 0.707954729271733, -0.23969802417184, 1.98447393665293,
  1042. -0.138787012119665, 0.417650750792556, 0.981752777463662,
  1043. -0.392695355503813, -1.03966897694891, 1.78222896030858,
  1044. -2.31106908460517, 0.878604580921265, 0.035806718015226,
  1045. 1.01282869212708, 0.432265154539617, 2.09081920524915,
  1046. -1.19992581964387, 1.58963820029007, 1.95465164222325,
  1047. 0.00493777682814261, -2.45170638784613, 0.477237302613617,
  1048. -0.596558168631403, 0.792203270299649, 0.289636710177348]
  1049. x1 = np.array(x1)
  1050. x2 = np.array(x2)
  1051. x1.shape = (10, 2)
  1052. x2.shape = (15, 2)
  1053. assert_array_almost_equal(stats.mood(x1, x2, axis=None),
  1054. [-1.31716607555, 0.18778296257])
  1055. def test_mood_2d(self):
  1056. # Test if the results of mood test in 2-D case are consistent with the
  1057. # R result for the same inputs. Numbers from R mood.test().
  1058. ny = 5
  1059. np.random.seed(1234)
  1060. x1 = np.random.randn(10, ny)
  1061. x2 = np.random.randn(15, ny)
  1062. z_vectest, pval_vectest = stats.mood(x1, x2)
  1063. for j in range(ny):
  1064. assert_array_almost_equal([z_vectest[j], pval_vectest[j]],
  1065. stats.mood(x1[:, j], x2[:, j]))
  1066. # inverse order of dimensions
  1067. x1 = x1.transpose()
  1068. x2 = x2.transpose()
  1069. z_vectest, pval_vectest = stats.mood(x1, x2, axis=1)
  1070. for i in range(ny):
  1071. # check axis handling is self consistent
  1072. assert_array_almost_equal([z_vectest[i], pval_vectest[i]],
  1073. stats.mood(x1[i, :], x2[i, :]))
  1074. def test_mood_3d(self):
  1075. shape = (10, 5, 6)
  1076. np.random.seed(1234)
  1077. x1 = np.random.randn(*shape)
  1078. x2 = np.random.randn(*shape)
  1079. for axis in range(3):
  1080. z_vectest, pval_vectest = stats.mood(x1, x2, axis=axis)
  1081. # Tests that result for 3-D arrays is equal to that for the
  1082. # same calculation on a set of 1-D arrays taken from the
  1083. # 3-D array
  1084. axes_idx = ([1, 2], [0, 2], [0, 1]) # the two axes != axis
  1085. for i in range(shape[axes_idx[axis][0]]):
  1086. for j in range(shape[axes_idx[axis][1]]):
  1087. if axis == 0:
  1088. slice1 = x1[:, i, j]
  1089. slice2 = x2[:, i, j]
  1090. elif axis == 1:
  1091. slice1 = x1[i, :, j]
  1092. slice2 = x2[i, :, j]
  1093. else:
  1094. slice1 = x1[i, j, :]
  1095. slice2 = x2[i, j, :]
  1096. assert_array_almost_equal([z_vectest[i, j],
  1097. pval_vectest[i, j]],
  1098. stats.mood(slice1, slice2))
  1099. def test_mood_bad_arg(self):
  1100. # Raise ValueError when the sum of the lengths of the args is
  1101. # less than 3
  1102. assert_raises(ValueError, stats.mood, [1], [])
  1103. def test_mood_alternative(self):
  1104. np.random.seed(0)
  1105. x = stats.norm.rvs(scale=0.75, size=100)
  1106. y = stats.norm.rvs(scale=1.25, size=100)
  1107. stat1, p1 = stats.mood(x, y, alternative='two-sided')
  1108. stat2, p2 = stats.mood(x, y, alternative='less')
  1109. stat3, p3 = stats.mood(x, y, alternative='greater')
  1110. assert stat1 == stat2 == stat3
  1111. assert_allclose(p1, 0, atol=1e-7)
  1112. assert_allclose(p2, p1/2)
  1113. assert_allclose(p3, 1 - p1/2)
  1114. with pytest.raises(ValueError, match="alternative must be..."):
  1115. stats.mood(x, y, alternative='ekki-ekki')
  1116. @pytest.mark.parametrize("alternative", ['two-sided', 'less', 'greater'])
  1117. def test_result(self, alternative):
  1118. rng = np.random.default_rng(265827767938813079281100964083953437622)
  1119. x1 = rng.standard_normal((10, 1))
  1120. x2 = rng.standard_normal((15, 1))
  1121. res = stats.mood(x1, x2, alternative=alternative)
  1122. assert_equal((res.statistic, res.pvalue), res)
  1123. class TestProbplot:
  1124. def test_basic(self):
  1125. x = stats.norm.rvs(size=20, random_state=12345)
  1126. osm, osr = stats.probplot(x, fit=False)
  1127. osm_expected = [-1.8241636, -1.38768012, -1.11829229, -0.91222575,
  1128. -0.73908135, -0.5857176, -0.44506467, -0.31273668,
  1129. -0.18568928, -0.06158146, 0.06158146, 0.18568928,
  1130. 0.31273668, 0.44506467, 0.5857176, 0.73908135,
  1131. 0.91222575, 1.11829229, 1.38768012, 1.8241636]
  1132. assert_allclose(osr, np.sort(x))
  1133. assert_allclose(osm, osm_expected)
  1134. res, res_fit = stats.probplot(x, fit=True)
  1135. res_fit_expected = [1.05361841, 0.31297795, 0.98741609]
  1136. assert_allclose(res_fit, res_fit_expected)
  1137. def test_sparams_keyword(self):
  1138. x = stats.norm.rvs(size=100, random_state=123456)
  1139. # Check that None, () and 0 (loc=0, for normal distribution) all work
  1140. # and give the same results
  1141. osm1, osr1 = stats.probplot(x, sparams=None, fit=False)
  1142. osm2, osr2 = stats.probplot(x, sparams=0, fit=False)
  1143. osm3, osr3 = stats.probplot(x, sparams=(), fit=False)
  1144. assert_allclose(osm1, osm2)
  1145. assert_allclose(osm1, osm3)
  1146. assert_allclose(osr1, osr2)
  1147. assert_allclose(osr1, osr3)
  1148. # Check giving (loc, scale) params for normal distribution
  1149. osm, osr = stats.probplot(x, sparams=(), fit=False)
  1150. def test_dist_keyword(self):
  1151. x = stats.norm.rvs(size=20, random_state=12345)
  1152. osm1, osr1 = stats.probplot(x, fit=False, dist='t', sparams=(3,))
  1153. osm2, osr2 = stats.probplot(x, fit=False, dist=stats.t, sparams=(3,))
  1154. assert_allclose(osm1, osm2)
  1155. assert_allclose(osr1, osr2)
  1156. assert_raises(ValueError, stats.probplot, x, dist='wrong-dist-name')
  1157. assert_raises(AttributeError, stats.probplot, x, dist=[])
  1158. class custom_dist:
  1159. """Some class that looks just enough like a distribution."""
  1160. def ppf(self, q):
  1161. return stats.norm.ppf(q, loc=2)
  1162. osm1, osr1 = stats.probplot(x, sparams=(2,), fit=False)
  1163. osm2, osr2 = stats.probplot(x, dist=custom_dist(), fit=False)
  1164. assert_allclose(osm1, osm2)
  1165. assert_allclose(osr1, osr2)
  1166. @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib")
  1167. def test_plot_kwarg(self):
  1168. fig = plt.figure()
  1169. fig.add_subplot(111)
  1170. x = stats.t.rvs(3, size=100, random_state=7654321)
  1171. res1, fitres1 = stats.probplot(x, plot=plt)
  1172. plt.close()
  1173. res2, fitres2 = stats.probplot(x, plot=None)
  1174. res3 = stats.probplot(x, fit=False, plot=plt)
  1175. plt.close()
  1176. res4 = stats.probplot(x, fit=False, plot=None)
  1177. # Check that results are consistent between combinations of `fit` and
  1178. # `plot` keywords.
  1179. assert_(len(res1) == len(res2) == len(res3) == len(res4) == 2)
  1180. assert_allclose(res1, res2)
  1181. assert_allclose(res1, res3)
  1182. assert_allclose(res1, res4)
  1183. assert_allclose(fitres1, fitres2)
  1184. # Check that a Matplotlib Axes object is accepted
  1185. fig = plt.figure()
  1186. ax = fig.add_subplot(111)
  1187. stats.probplot(x, fit=False, plot=ax)
  1188. plt.close()
  1189. def test_probplot_bad_args(self):
  1190. # Raise ValueError when given an invalid distribution.
  1191. assert_raises(ValueError, stats.probplot, [1], dist="plate_of_shrimp")
  1192. def test_empty(self):
  1193. assert_equal(stats.probplot([], fit=False),
  1194. (np.array([]), np.array([])))
  1195. assert_equal(stats.probplot([], fit=True),
  1196. ((np.array([]), np.array([])),
  1197. (np.nan, np.nan, 0.0)))
  1198. def test_array_of_size_one(self):
  1199. with np.errstate(invalid='ignore'):
  1200. assert_equal(stats.probplot([1], fit=True),
  1201. ((np.array([0.]), np.array([1])),
  1202. (np.nan, np.nan, 0.0)))
  1203. class TestWilcoxon:
  1204. def test_wilcoxon_bad_arg(self):
  1205. # Raise ValueError when two args of different lengths are given or
  1206. # zero_method is unknown.
  1207. assert_raises(ValueError, stats.wilcoxon, [1], [1, 2])
  1208. assert_raises(ValueError, stats.wilcoxon, [1, 2], [1, 2], "dummy")
  1209. assert_raises(ValueError, stats.wilcoxon, [1, 2], [1, 2],
  1210. alternative="dummy")
  1211. assert_raises(ValueError, stats.wilcoxon, [1]*10, mode="xyz")
  1212. def test_zero_diff(self):
  1213. x = np.arange(20)
  1214. # pratt and wilcox do not work if x - y == 0
  1215. assert_raises(ValueError, stats.wilcoxon, x, x, "wilcox",
  1216. mode="approx")
  1217. assert_raises(ValueError, stats.wilcoxon, x, x, "pratt",
  1218. mode="approx")
  1219. # ranksum is n*(n+1)/2, split in half if zero_method == "zsplit"
  1220. assert_equal(stats.wilcoxon(x, x, "zsplit", mode="approx"),
  1221. (20*21/4, 1.0))
  1222. def test_pratt(self):
  1223. # regression test for gh-6805: p-value matches value from R package
  1224. # coin (wilcoxsign_test) reported in the issue
  1225. x = [1, 2, 3, 4]
  1226. y = [1, 2, 3, 5]
  1227. with suppress_warnings() as sup:
  1228. sup.filter(UserWarning, message="Sample size too small")
  1229. res = stats.wilcoxon(x, y, zero_method="pratt", mode="approx")
  1230. assert_allclose(res, (0.0, 0.31731050786291415))
  1231. def test_wilcoxon_arg_type(self):
  1232. # Should be able to accept list as arguments.
  1233. # Address issue 6070.
  1234. arr = [1, 2, 3, 0, -1, 3, 1, 2, 1, 1, 2]
  1235. _ = stats.wilcoxon(arr, zero_method="pratt", mode="approx")
  1236. _ = stats.wilcoxon(arr, zero_method="zsplit", mode="approx")
  1237. _ = stats.wilcoxon(arr, zero_method="wilcox", mode="approx")
  1238. def test_accuracy_wilcoxon(self):
  1239. freq = [1, 4, 16, 15, 8, 4, 5, 1, 2]
  1240. nums = range(-4, 5)
  1241. x = np.concatenate([[u] * v for u, v in zip(nums, freq)])
  1242. y = np.zeros(x.size)
  1243. T, p = stats.wilcoxon(x, y, "pratt", mode="approx")
  1244. assert_allclose(T, 423)
  1245. assert_allclose(p, 0.0031724568006762576)
  1246. T, p = stats.wilcoxon(x, y, "zsplit", mode="approx")
  1247. assert_allclose(T, 441)
  1248. assert_allclose(p, 0.0032145343172473055)
  1249. T, p = stats.wilcoxon(x, y, "wilcox", mode="approx")
  1250. assert_allclose(T, 327)
  1251. assert_allclose(p, 0.00641346115861)
  1252. # Test the 'correction' option, using values computed in R with:
  1253. # > wilcox.test(x, y, paired=TRUE, exact=FALSE, correct={FALSE,TRUE})
  1254. x = np.array([120, 114, 181, 188, 180, 146, 121, 191, 132, 113, 127, 112])
  1255. y = np.array([133, 143, 119, 189, 112, 199, 198, 113, 115, 121, 142, 187])
  1256. T, p = stats.wilcoxon(x, y, correction=False, mode="approx")
  1257. assert_equal(T, 34)
  1258. assert_allclose(p, 0.6948866, rtol=1e-6)
  1259. T, p = stats.wilcoxon(x, y, correction=True, mode="approx")
  1260. assert_equal(T, 34)
  1261. assert_allclose(p, 0.7240817, rtol=1e-6)
  1262. def test_wilcoxon_result_attributes(self):
  1263. x = np.array([120, 114, 181, 188, 180, 146, 121, 191, 132, 113, 127, 112])
  1264. y = np.array([133, 143, 119, 189, 112, 199, 198, 113, 115, 121, 142, 187])
  1265. res = stats.wilcoxon(x, y, correction=False, mode="approx")
  1266. attributes = ('statistic', 'pvalue')
  1267. check_named_results(res, attributes)
  1268. def test_wilcoxon_has_zstatistic(self):
  1269. rng = np.random.default_rng(89426135444)
  1270. x, y = rng.random(15), rng.random(15)
  1271. res = stats.wilcoxon(x, y, mode="approx")
  1272. ref = stats.norm.ppf(res.pvalue/2)
  1273. assert_allclose(res.zstatistic, ref)
  1274. res = stats.wilcoxon(x, y, mode="exact")
  1275. assert not hasattr(res, 'zstatistic')
  1276. res = stats.wilcoxon(x, y)
  1277. assert not hasattr(res, 'zstatistic')
  1278. def test_wilcoxon_tie(self):
  1279. # Regression test for gh-2391.
  1280. # Corresponding R code is:
  1281. # > result = wilcox.test(rep(0.1, 10), exact=FALSE, correct=FALSE)
  1282. # > result$p.value
  1283. # [1] 0.001565402
  1284. # > result = wilcox.test(rep(0.1, 10), exact=FALSE, correct=TRUE)
  1285. # > result$p.value
  1286. # [1] 0.001904195
  1287. stat, p = stats.wilcoxon([0.1] * 10, mode="approx")
  1288. expected_p = 0.001565402
  1289. assert_equal(stat, 0)
  1290. assert_allclose(p, expected_p, rtol=1e-6)
  1291. stat, p = stats.wilcoxon([0.1] * 10, correction=True, mode="approx")
  1292. expected_p = 0.001904195
  1293. assert_equal(stat, 0)
  1294. assert_allclose(p, expected_p, rtol=1e-6)
  1295. def test_onesided(self):
  1296. # tested against "R version 3.4.1 (2017-06-30)"
  1297. # x <- c(125, 115, 130, 140, 140, 115, 140, 125, 140, 135)
  1298. # y <- c(110, 122, 125, 120, 140, 124, 123, 137, 135, 145)
  1299. # cfg <- list(x = x, y = y, paired = TRUE, exact = FALSE)
  1300. # do.call(wilcox.test, c(cfg, list(alternative = "less", correct = FALSE)))
  1301. # do.call(wilcox.test, c(cfg, list(alternative = "less", correct = TRUE)))
  1302. # do.call(wilcox.test, c(cfg, list(alternative = "greater", correct = FALSE)))
  1303. # do.call(wilcox.test, c(cfg, list(alternative = "greater", correct = TRUE)))
  1304. x = [125, 115, 130, 140, 140, 115, 140, 125, 140, 135]
  1305. y = [110, 122, 125, 120, 140, 124, 123, 137, 135, 145]
  1306. with suppress_warnings() as sup:
  1307. sup.filter(UserWarning, message="Sample size too small")
  1308. w, p = stats.wilcoxon(x, y, alternative="less", mode="approx")
  1309. assert_equal(w, 27)
  1310. assert_almost_equal(p, 0.7031847, decimal=6)
  1311. with suppress_warnings() as sup:
  1312. sup.filter(UserWarning, message="Sample size too small")
  1313. w, p = stats.wilcoxon(x, y, alternative="less", correction=True,
  1314. mode="approx")
  1315. assert_equal(w, 27)
  1316. assert_almost_equal(p, 0.7233656, decimal=6)
  1317. with suppress_warnings() as sup:
  1318. sup.filter(UserWarning, message="Sample size too small")
  1319. w, p = stats.wilcoxon(x, y, alternative="greater", mode="approx")
  1320. assert_equal(w, 27)
  1321. assert_almost_equal(p, 0.2968153, decimal=6)
  1322. with suppress_warnings() as sup:
  1323. sup.filter(UserWarning, message="Sample size too small")
  1324. w, p = stats.wilcoxon(x, y, alternative="greater", correction=True,
  1325. mode="approx")
  1326. assert_equal(w, 27)
  1327. assert_almost_equal(p, 0.3176447, decimal=6)
  1328. def test_exact_basic(self):
  1329. for n in range(1, 51):
  1330. pmf1 = _get_wilcoxon_distr(n)
  1331. pmf2 = _get_wilcoxon_distr2(n)
  1332. assert_equal(n*(n+1)/2 + 1, len(pmf1))
  1333. assert_equal(sum(pmf1), 1)
  1334. assert_array_almost_equal(pmf1, pmf2)
  1335. def test_exact_pval(self):
  1336. # expected values computed with "R version 3.4.1 (2017-06-30)"
  1337. x = np.array([1.81, 0.82, 1.56, -0.48, 0.81, 1.28, -1.04, 0.23,
  1338. -0.75, 0.14])
  1339. y = np.array([0.71, 0.65, -0.2, 0.85, -1.1, -0.45, -0.84, -0.24,
  1340. -0.68, -0.76])
  1341. _, p = stats.wilcoxon(x, y, alternative="two-sided", mode="exact")
  1342. assert_almost_equal(p, 0.1054688, decimal=6)
  1343. _, p = stats.wilcoxon(x, y, alternative="less", mode="exact")
  1344. assert_almost_equal(p, 0.9580078, decimal=6)
  1345. _, p = stats.wilcoxon(x, y, alternative="greater", mode="exact")
  1346. assert_almost_equal(p, 0.05273438, decimal=6)
  1347. x = np.arange(0, 20) + 0.5
  1348. y = np.arange(20, 0, -1)
  1349. _, p = stats.wilcoxon(x, y, alternative="two-sided", mode="exact")
  1350. assert_almost_equal(p, 0.8694878, decimal=6)
  1351. _, p = stats.wilcoxon(x, y, alternative="less", mode="exact")
  1352. assert_almost_equal(p, 0.4347439, decimal=6)
  1353. _, p = stats.wilcoxon(x, y, alternative="greater", mode="exact")
  1354. assert_almost_equal(p, 0.5795889, decimal=6)
  1355. # These inputs were chosen to give a W statistic that is either the
  1356. # center of the distribution (when the length of the support is odd), or
  1357. # the value to the left of the center (when the length of the support is
  1358. # even). Also, the numbers are chosen so that the W statistic is the
  1359. # sum of the positive values.
  1360. @pytest.mark.parametrize('x', [[-1, -2, 3],
  1361. [-1, 2, -3, -4, 5],
  1362. [-1, -2, 3, -4, -5, -6, 7, 8]])
  1363. def test_exact_p_1(self, x):
  1364. w, p = stats.wilcoxon(x)
  1365. x = np.array(x)
  1366. wtrue = x[x > 0].sum()
  1367. assert_equal(w, wtrue)
  1368. assert_equal(p, 1)
  1369. def test_auto(self):
  1370. # auto default to exact if there are no ties and n<= 25
  1371. x = np.arange(0, 25) + 0.5
  1372. y = np.arange(25, 0, -1)
  1373. assert_equal(stats.wilcoxon(x, y),
  1374. stats.wilcoxon(x, y, mode="exact"))
  1375. # if there are ties (i.e. zeros in d = x-y), then switch to approx
  1376. d = np.arange(0, 13)
  1377. with suppress_warnings() as sup:
  1378. sup.filter(UserWarning, message="Exact p-value calculation")
  1379. w, p = stats.wilcoxon(d)
  1380. assert_equal(stats.wilcoxon(d, mode="approx"), (w, p))
  1381. # use approximation for samples > 25
  1382. d = np.arange(1, 52)
  1383. assert_equal(stats.wilcoxon(d), stats.wilcoxon(d, mode="approx"))
  1384. class TestKstat:
  1385. def test_moments_normal_distribution(self):
  1386. np.random.seed(32149)
  1387. data = np.random.randn(12345)
  1388. moments = [stats.kstat(data, n) for n in [1, 2, 3, 4]]
  1389. expected = [0.011315, 1.017931, 0.05811052, 0.0754134]
  1390. assert_allclose(moments, expected, rtol=1e-4)
  1391. # test equivalence with `stats.moment`
  1392. m1 = stats.moment(data, moment=1)
  1393. m2 = stats.moment(data, moment=2)
  1394. m3 = stats.moment(data, moment=3)
  1395. assert_allclose((m1, m2, m3), expected[:-1], atol=0.02, rtol=1e-2)
  1396. def test_empty_input(self):
  1397. assert_raises(ValueError, stats.kstat, [])
  1398. def test_nan_input(self):
  1399. data = np.arange(10.)
  1400. data[6] = np.nan
  1401. assert_equal(stats.kstat(data), np.nan)
  1402. def test_kstat_bad_arg(self):
  1403. # Raise ValueError if n > 4 or n < 1.
  1404. data = np.arange(10)
  1405. for n in [0, 4.001]:
  1406. assert_raises(ValueError, stats.kstat, data, n=n)
  1407. class TestKstatVar:
  1408. def test_empty_input(self):
  1409. assert_raises(ValueError, stats.kstatvar, [])
  1410. def test_nan_input(self):
  1411. data = np.arange(10.)
  1412. data[6] = np.nan
  1413. assert_equal(stats.kstat(data), np.nan)
  1414. def test_bad_arg(self):
  1415. # Raise ValueError is n is not 1 or 2.
  1416. data = [1]
  1417. n = 10
  1418. assert_raises(ValueError, stats.kstatvar, data, n=n)
  1419. class TestPpccPlot:
  1420. def setup_method(self):
  1421. self.x = _old_loggamma_rvs(5, size=500, random_state=7654321) + 5
  1422. def test_basic(self):
  1423. N = 5
  1424. svals, ppcc = stats.ppcc_plot(self.x, -10, 10, N=N)
  1425. ppcc_expected = [0.21139644, 0.21384059, 0.98766719, 0.97980182,
  1426. 0.93519298]
  1427. assert_allclose(svals, np.linspace(-10, 10, num=N))
  1428. assert_allclose(ppcc, ppcc_expected)
  1429. def test_dist(self):
  1430. # Test that we can specify distributions both by name and as objects.
  1431. svals1, ppcc1 = stats.ppcc_plot(self.x, -10, 10, dist='tukeylambda')
  1432. svals2, ppcc2 = stats.ppcc_plot(self.x, -10, 10,
  1433. dist=stats.tukeylambda)
  1434. assert_allclose(svals1, svals2, rtol=1e-20)
  1435. assert_allclose(ppcc1, ppcc2, rtol=1e-20)
  1436. # Test that 'tukeylambda' is the default dist
  1437. svals3, ppcc3 = stats.ppcc_plot(self.x, -10, 10)
  1438. assert_allclose(svals1, svals3, rtol=1e-20)
  1439. assert_allclose(ppcc1, ppcc3, rtol=1e-20)
  1440. @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib")
  1441. def test_plot_kwarg(self):
  1442. # Check with the matplotlib.pyplot module
  1443. fig = plt.figure()
  1444. ax = fig.add_subplot(111)
  1445. stats.ppcc_plot(self.x, -20, 20, plot=plt)
  1446. fig.delaxes(ax)
  1447. # Check that a Matplotlib Axes object is accepted
  1448. ax = fig.add_subplot(111)
  1449. stats.ppcc_plot(self.x, -20, 20, plot=ax)
  1450. plt.close()
  1451. def test_invalid_inputs(self):
  1452. # `b` has to be larger than `a`
  1453. assert_raises(ValueError, stats.ppcc_plot, self.x, 1, 0)
  1454. # Raise ValueError when given an invalid distribution.
  1455. assert_raises(ValueError, stats.ppcc_plot, [1, 2, 3], 0, 1,
  1456. dist="plate_of_shrimp")
  1457. def test_empty(self):
  1458. # For consistency with probplot return for one empty array,
  1459. # ppcc contains all zeros and svals is the same as for normal array
  1460. # input.
  1461. svals, ppcc = stats.ppcc_plot([], 0, 1)
  1462. assert_allclose(svals, np.linspace(0, 1, num=80))
  1463. assert_allclose(ppcc, np.zeros(80, dtype=float))
  1464. class TestPpccMax:
  1465. def test_ppcc_max_bad_arg(self):
  1466. # Raise ValueError when given an invalid distribution.
  1467. data = [1]
  1468. assert_raises(ValueError, stats.ppcc_max, data, dist="plate_of_shrimp")
  1469. def test_ppcc_max_basic(self):
  1470. x = stats.tukeylambda.rvs(-0.7, loc=2, scale=0.5, size=10000,
  1471. random_state=1234567) + 1e4
  1472. assert_almost_equal(stats.ppcc_max(x), -0.71215366521264145, decimal=7)
  1473. def test_dist(self):
  1474. x = stats.tukeylambda.rvs(-0.7, loc=2, scale=0.5, size=10000,
  1475. random_state=1234567) + 1e4
  1476. # Test that we can specify distributions both by name and as objects.
  1477. max1 = stats.ppcc_max(x, dist='tukeylambda')
  1478. max2 = stats.ppcc_max(x, dist=stats.tukeylambda)
  1479. assert_almost_equal(max1, -0.71215366521264145, decimal=5)
  1480. assert_almost_equal(max2, -0.71215366521264145, decimal=5)
  1481. # Test that 'tukeylambda' is the default dist
  1482. max3 = stats.ppcc_max(x)
  1483. assert_almost_equal(max3, -0.71215366521264145, decimal=5)
  1484. def test_brack(self):
  1485. x = stats.tukeylambda.rvs(-0.7, loc=2, scale=0.5, size=10000,
  1486. random_state=1234567) + 1e4
  1487. assert_raises(ValueError, stats.ppcc_max, x, brack=(0.0, 1.0, 0.5))
  1488. assert_almost_equal(stats.ppcc_max(x, brack=(0, 1)),
  1489. -0.71215366521264145, decimal=7)
  1490. assert_almost_equal(stats.ppcc_max(x, brack=(-2, 2)),
  1491. -0.71215366521264145, decimal=7)
  1492. class TestBoxcox_llf:
  1493. def test_basic(self):
  1494. x = stats.norm.rvs(size=10000, loc=10, random_state=54321)
  1495. lmbda = 1
  1496. llf = stats.boxcox_llf(lmbda, x)
  1497. llf_expected = -x.size / 2. * np.log(np.sum(x.std()**2))
  1498. assert_allclose(llf, llf_expected)
  1499. def test_array_like(self):
  1500. x = stats.norm.rvs(size=100, loc=10, random_state=54321)
  1501. lmbda = 1
  1502. llf = stats.boxcox_llf(lmbda, x)
  1503. llf2 = stats.boxcox_llf(lmbda, list(x))
  1504. assert_allclose(llf, llf2, rtol=1e-12)
  1505. def test_2d_input(self):
  1506. # Note: boxcox_llf() was already working with 2-D input (sort of), so
  1507. # keep it like that. boxcox() doesn't work with 2-D input though, due
  1508. # to brent() returning a scalar.
  1509. x = stats.norm.rvs(size=100, loc=10, random_state=54321)
  1510. lmbda = 1
  1511. llf = stats.boxcox_llf(lmbda, x)
  1512. llf2 = stats.boxcox_llf(lmbda, np.vstack([x, x]).T)
  1513. assert_allclose([llf, llf], llf2, rtol=1e-12)
  1514. def test_empty(self):
  1515. assert_(np.isnan(stats.boxcox_llf(1, [])))
  1516. def test_gh_6873(self):
  1517. # Regression test for gh-6873.
  1518. # This example was taken from gh-7534, a duplicate of gh-6873.
  1519. data = [198.0, 233.0, 233.0, 392.0]
  1520. llf = stats.boxcox_llf(-8, data)
  1521. # The expected value was computed with mpmath.
  1522. assert_allclose(llf, -17.93934208579061)
  1523. # This is the data from github user Qukaiyi, given as an example
  1524. # of a data set that caused boxcox to fail.
  1525. _boxcox_data = [
  1526. 15957, 112079, 1039553, 711775, 173111, 307382, 183155, 53366, 760875,
  1527. 207500, 160045, 473714, 40194, 440319, 133261, 265444, 155590, 36660,
  1528. 904939, 55108, 138391, 339146, 458053, 63324, 1377727, 1342632, 41575,
  1529. 68685, 172755, 63323, 368161, 199695, 538214, 167760, 388610, 398855,
  1530. 1001873, 364591, 1320518, 194060, 194324, 2318551, 196114, 64225, 272000,
  1531. 198668, 123585, 86420, 1925556, 695798, 88664, 46199, 759135, 28051,
  1532. 345094, 1977752, 51778, 82746, 638126, 2560910, 45830, 140576, 1603787,
  1533. 57371, 548730, 5343629, 2298913, 998813, 2156812, 423966, 68350, 145237,
  1534. 131935, 1600305, 342359, 111398, 1409144, 281007, 60314, 242004, 113418,
  1535. 246211, 61940, 95858, 957805, 40909, 307955, 174159, 124278, 241193,
  1536. 872614, 304180, 146719, 64361, 87478, 509360, 167169, 933479, 620561,
  1537. 483333, 97416, 143518, 286905, 597837, 2556043, 89065, 69944, 196858,
  1538. 88883, 49379, 916265, 1527392, 626954, 54415, 89013, 2883386, 106096,
  1539. 402697, 45578, 349852, 140379, 34648, 757343, 1305442, 2054757, 121232,
  1540. 606048, 101492, 51426, 1820833, 83412, 136349, 1379924, 505977, 1303486,
  1541. 95853, 146451, 285422, 2205423, 259020, 45864, 684547, 182014, 784334,
  1542. 174793, 563068, 170745, 1195531, 63337, 71833, 199978, 2330904, 227335,
  1543. 898280, 75294, 2011361, 116771, 157489, 807147, 1321443, 1148635, 2456524,
  1544. 81839, 1228251, 97488, 1051892, 75397, 3009923, 2732230, 90923, 39735,
  1545. 132433, 225033, 337555, 1204092, 686588, 1062402, 40362, 1361829, 1497217,
  1546. 150074, 551459, 2019128, 39581, 45349, 1117187, 87845, 1877288, 164448,
  1547. 10338362, 24942, 64737, 769946, 2469124, 2366997, 259124, 2667585, 29175,
  1548. 56250, 74450, 96697, 5920978, 838375, 225914, 119494, 206004, 430907,
  1549. 244083, 219495, 322239, 407426, 618748, 2087536, 2242124, 4736149, 124624,
  1550. 406305, 240921, 2675273, 4425340, 821457, 578467, 28040, 348943, 48795,
  1551. 145531, 52110, 1645730, 1768364, 348363, 85042, 2673847, 81935, 169075,
  1552. 367733, 135474, 383327, 1207018, 93481, 5934183, 352190, 636533, 145870,
  1553. 55659, 146215, 73191, 248681, 376907, 1606620, 169381, 81164, 246390,
  1554. 236093, 885778, 335969, 49266, 381430, 307437, 350077, 34346, 49340,
  1555. 84715, 527120, 40163, 46898, 4609439, 617038, 2239574, 159905, 118337,
  1556. 120357, 430778, 3799158, 3516745, 54198, 2970796, 729239, 97848, 6317375,
  1557. 887345, 58198, 88111, 867595, 210136, 1572103, 1420760, 574046, 845988,
  1558. 509743, 397927, 1119016, 189955, 3883644, 291051, 126467, 1239907, 2556229,
  1559. 411058, 657444, 2025234, 1211368, 93151, 577594, 4842264, 1531713, 305084,
  1560. 479251, 20591, 1466166, 137417, 897756, 594767, 3606337, 32844, 82426,
  1561. 1294831, 57174, 290167, 322066, 813146, 5671804, 4425684, 895607, 450598,
  1562. 1048958, 232844, 56871, 46113, 70366, 701618, 97739, 157113, 865047,
  1563. 194810, 1501615, 1765727, 38125, 2733376, 40642, 437590, 127337, 106310,
  1564. 4167579, 665303, 809250, 1210317, 45750, 1853687, 348954, 156786, 90793,
  1565. 1885504, 281501, 3902273, 359546, 797540, 623508, 3672775, 55330, 648221,
  1566. 266831, 90030, 7118372, 735521, 1009925, 283901, 806005, 2434897, 94321,
  1567. 309571, 4213597, 2213280, 120339, 64403, 8155209, 1686948, 4327743,
  1568. 1868312, 135670, 3189615, 1569446, 706058, 58056, 2438625, 520619, 105201,
  1569. 141961, 179990, 1351440, 3148662, 2804457, 2760144, 70775, 33807, 1926518,
  1570. 2362142, 186761, 240941, 97860, 1040429, 1431035, 78892, 484039, 57845,
  1571. 724126, 3166209, 175913, 159211, 1182095, 86734, 1921472, 513546, 326016,
  1572. 1891609
  1573. ]
  1574. class TestBoxcox:
  1575. def test_fixed_lmbda(self):
  1576. x = _old_loggamma_rvs(5, size=50, random_state=12345) + 5
  1577. xt = stats.boxcox(x, lmbda=1)
  1578. assert_allclose(xt, x - 1)
  1579. xt = stats.boxcox(x, lmbda=-1)
  1580. assert_allclose(xt, 1 - 1/x)
  1581. xt = stats.boxcox(x, lmbda=0)
  1582. assert_allclose(xt, np.log(x))
  1583. # Also test that array_like input works
  1584. xt = stats.boxcox(list(x), lmbda=0)
  1585. assert_allclose(xt, np.log(x))
  1586. # test that constant input is accepted; see gh-12225
  1587. xt = stats.boxcox(np.ones(10), 2)
  1588. assert_equal(xt, np.zeros(10))
  1589. def test_lmbda_None(self):
  1590. # Start from normal rv's, do inverse transform to check that
  1591. # optimization function gets close to the right answer.
  1592. lmbda = 2.5
  1593. x = stats.norm.rvs(loc=10, size=50000, random_state=1245)
  1594. x_inv = (x * lmbda + 1)**(-lmbda)
  1595. xt, maxlog = stats.boxcox(x_inv)
  1596. assert_almost_equal(maxlog, -1 / lmbda, decimal=2)
  1597. def test_alpha(self):
  1598. rng = np.random.RandomState(1234)
  1599. x = _old_loggamma_rvs(5, size=50, random_state=rng) + 5
  1600. # Some regular values for alpha, on a small sample size
  1601. _, _, interval = stats.boxcox(x, alpha=0.75)
  1602. assert_allclose(interval, [4.004485780226041, 5.138756355035744])
  1603. _, _, interval = stats.boxcox(x, alpha=0.05)
  1604. assert_allclose(interval, [1.2138178554857557, 8.209033272375663])
  1605. # Try some extreme values, see we don't hit the N=500 limit
  1606. x = _old_loggamma_rvs(7, size=500, random_state=rng) + 15
  1607. _, _, interval = stats.boxcox(x, alpha=0.001)
  1608. assert_allclose(interval, [0.3988867, 11.40553131])
  1609. _, _, interval = stats.boxcox(x, alpha=0.999)
  1610. assert_allclose(interval, [5.83316246, 5.83735292])
  1611. def test_boxcox_bad_arg(self):
  1612. # Raise ValueError if any data value is negative.
  1613. x = np.array([-1, 2])
  1614. assert_raises(ValueError, stats.boxcox, x)
  1615. # Raise ValueError if data is constant.
  1616. assert_raises(ValueError, stats.boxcox, np.array([1]))
  1617. # Raise ValueError if data is not 1-dimensional.
  1618. assert_raises(ValueError, stats.boxcox, np.array([[1], [2]]))
  1619. def test_empty(self):
  1620. assert_(stats.boxcox([]).shape == (0,))
  1621. def test_gh_6873(self):
  1622. # Regression test for gh-6873.
  1623. y, lam = stats.boxcox(_boxcox_data)
  1624. # The expected value of lam was computed with the function
  1625. # powerTransform in the R library 'car'. I trust that value
  1626. # to only about five significant digits.
  1627. assert_allclose(lam, -0.051654, rtol=1e-5)
  1628. @pytest.mark.parametrize("bounds", [(-1, 1), (1.1, 2), (-2, -1.1)])
  1629. def test_bounded_optimizer_within_bounds(self, bounds):
  1630. # Define custom optimizer with bounds.
  1631. def optimizer(fun):
  1632. return optimize.minimize_scalar(fun, bounds=bounds,
  1633. method="bounded")
  1634. _, lmbda = stats.boxcox(_boxcox_data, lmbda=None, optimizer=optimizer)
  1635. assert bounds[0] < lmbda < bounds[1]
  1636. def test_bounded_optimizer_against_unbounded_optimizer(self):
  1637. # Test whether setting bounds on optimizer excludes solution from
  1638. # unbounded optimizer.
  1639. # Get unbounded solution.
  1640. _, lmbda = stats.boxcox(_boxcox_data, lmbda=None)
  1641. # Set tolerance and bounds around solution.
  1642. bounds = (lmbda + 0.1, lmbda + 1)
  1643. options = {'xatol': 1e-12}
  1644. def optimizer(fun):
  1645. return optimize.minimize_scalar(fun, bounds=bounds,
  1646. method="bounded", options=options)
  1647. # Check bounded solution. Lower bound should be active.
  1648. _, lmbda_bounded = stats.boxcox(_boxcox_data, lmbda=None,
  1649. optimizer=optimizer)
  1650. assert lmbda_bounded != lmbda
  1651. assert_allclose(lmbda_bounded, bounds[0])
  1652. @pytest.mark.parametrize("optimizer", ["str", (1, 2), 0.1])
  1653. def test_bad_optimizer_type_raises_error(self, optimizer):
  1654. # Check if error is raised if string, tuple or float is passed
  1655. with pytest.raises(ValueError, match="`optimizer` must be a callable"):
  1656. stats.boxcox(_boxcox_data, lmbda=None, optimizer=optimizer)
  1657. def test_bad_optimizer_value_raises_error(self):
  1658. # Check if error is raised if `optimizer` function does not return
  1659. # `OptimizeResult` object
  1660. # Define test function that always returns 1
  1661. def optimizer(fun):
  1662. return 1
  1663. message = "`optimizer` must return an object containing the optimal..."
  1664. with pytest.raises(ValueError, match=message):
  1665. stats.boxcox(_boxcox_data, lmbda=None, optimizer=optimizer)
  1666. class TestBoxcoxNormmax:
  1667. def setup_method(self):
  1668. self.x = _old_loggamma_rvs(5, size=50, random_state=12345) + 5
  1669. def test_pearsonr(self):
  1670. maxlog = stats.boxcox_normmax(self.x)
  1671. assert_allclose(maxlog, 1.804465, rtol=1e-6)
  1672. def test_mle(self):
  1673. maxlog = stats.boxcox_normmax(self.x, method='mle')
  1674. assert_allclose(maxlog, 1.758101, rtol=1e-6)
  1675. # Check that boxcox() uses 'mle'
  1676. _, maxlog_boxcox = stats.boxcox(self.x)
  1677. assert_allclose(maxlog_boxcox, maxlog)
  1678. def test_all(self):
  1679. maxlog_all = stats.boxcox_normmax(self.x, method='all')
  1680. assert_allclose(maxlog_all, [1.804465, 1.758101], rtol=1e-6)
  1681. @pytest.mark.parametrize("method", ["mle", "pearsonr", "all"])
  1682. @pytest.mark.parametrize("bounds", [(-1, 1), (1.1, 2), (-2, -1.1)])
  1683. def test_bounded_optimizer_within_bounds(self, method, bounds):
  1684. def optimizer(fun):
  1685. return optimize.minimize_scalar(fun, bounds=bounds,
  1686. method="bounded")
  1687. maxlog = stats.boxcox_normmax(self.x, method=method,
  1688. optimizer=optimizer)
  1689. assert np.all(bounds[0] < maxlog)
  1690. assert np.all(maxlog < bounds[1])
  1691. def test_user_defined_optimizer(self):
  1692. # tests an optimizer that is not based on scipy.optimize.minimize
  1693. lmbda = stats.boxcox_normmax(self.x)
  1694. lmbda_rounded = np.round(lmbda, 5)
  1695. lmbda_range = np.linspace(lmbda_rounded-0.01, lmbda_rounded+0.01, 1001)
  1696. class MyResult:
  1697. pass
  1698. def optimizer(fun):
  1699. # brute force minimum over the range
  1700. objs = []
  1701. for lmbda in lmbda_range:
  1702. objs.append(fun(lmbda))
  1703. res = MyResult()
  1704. res.x = lmbda_range[np.argmin(objs)]
  1705. return res
  1706. lmbda2 = stats.boxcox_normmax(self.x, optimizer=optimizer)
  1707. assert lmbda2 != lmbda # not identical
  1708. assert_allclose(lmbda2, lmbda, 1e-5) # but as close as it should be
  1709. def test_user_defined_optimizer_and_brack_raises_error(self):
  1710. optimizer = optimize.minimize_scalar
  1711. # Using default `brack=None` with user-defined `optimizer` works as
  1712. # expected.
  1713. stats.boxcox_normmax(self.x, brack=None, optimizer=optimizer)
  1714. # Using user-defined `brack` with user-defined `optimizer` is expected
  1715. # to throw an error. Instead, users should specify
  1716. # optimizer-specific parameters in the optimizer function itself.
  1717. with pytest.raises(ValueError, match="`brack` must be None if "
  1718. "`optimizer` is given"):
  1719. stats.boxcox_normmax(self.x, brack=(-2.0, 2.0),
  1720. optimizer=optimizer)
  1721. class TestBoxcoxNormplot:
  1722. def setup_method(self):
  1723. self.x = _old_loggamma_rvs(5, size=500, random_state=7654321) + 5
  1724. def test_basic(self):
  1725. N = 5
  1726. lmbdas, ppcc = stats.boxcox_normplot(self.x, -10, 10, N=N)
  1727. ppcc_expected = [0.57783375, 0.83610988, 0.97524311, 0.99756057,
  1728. 0.95843297]
  1729. assert_allclose(lmbdas, np.linspace(-10, 10, num=N))
  1730. assert_allclose(ppcc, ppcc_expected)
  1731. @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib")
  1732. def test_plot_kwarg(self):
  1733. # Check with the matplotlib.pyplot module
  1734. fig = plt.figure()
  1735. ax = fig.add_subplot(111)
  1736. stats.boxcox_normplot(self.x, -20, 20, plot=plt)
  1737. fig.delaxes(ax)
  1738. # Check that a Matplotlib Axes object is accepted
  1739. ax = fig.add_subplot(111)
  1740. stats.boxcox_normplot(self.x, -20, 20, plot=ax)
  1741. plt.close()
  1742. def test_invalid_inputs(self):
  1743. # `lb` has to be larger than `la`
  1744. assert_raises(ValueError, stats.boxcox_normplot, self.x, 1, 0)
  1745. # `x` can not contain negative values
  1746. assert_raises(ValueError, stats.boxcox_normplot, [-1, 1], 0, 1)
  1747. def test_empty(self):
  1748. assert_(stats.boxcox_normplot([], 0, 1).size == 0)
  1749. class TestYeojohnson_llf:
  1750. def test_array_like(self):
  1751. x = stats.norm.rvs(size=100, loc=0, random_state=54321)
  1752. lmbda = 1
  1753. llf = stats.yeojohnson_llf(lmbda, x)
  1754. llf2 = stats.yeojohnson_llf(lmbda, list(x))
  1755. assert_allclose(llf, llf2, rtol=1e-12)
  1756. def test_2d_input(self):
  1757. x = stats.norm.rvs(size=100, loc=10, random_state=54321)
  1758. lmbda = 1
  1759. llf = stats.yeojohnson_llf(lmbda, x)
  1760. llf2 = stats.yeojohnson_llf(lmbda, np.vstack([x, x]).T)
  1761. assert_allclose([llf, llf], llf2, rtol=1e-12)
  1762. def test_empty(self):
  1763. assert_(np.isnan(stats.yeojohnson_llf(1, [])))
  1764. class TestYeojohnson:
  1765. def test_fixed_lmbda(self):
  1766. rng = np.random.RandomState(12345)
  1767. # Test positive input
  1768. x = _old_loggamma_rvs(5, size=50, random_state=rng) + 5
  1769. assert np.all(x > 0)
  1770. xt = stats.yeojohnson(x, lmbda=1)
  1771. assert_allclose(xt, x)
  1772. xt = stats.yeojohnson(x, lmbda=-1)
  1773. assert_allclose(xt, 1 - 1 / (x + 1))
  1774. xt = stats.yeojohnson(x, lmbda=0)
  1775. assert_allclose(xt, np.log(x + 1))
  1776. xt = stats.yeojohnson(x, lmbda=1)
  1777. assert_allclose(xt, x)
  1778. # Test negative input
  1779. x = _old_loggamma_rvs(5, size=50, random_state=rng) - 5
  1780. assert np.all(x < 0)
  1781. xt = stats.yeojohnson(x, lmbda=2)
  1782. assert_allclose(xt, -np.log(-x + 1))
  1783. xt = stats.yeojohnson(x, lmbda=1)
  1784. assert_allclose(xt, x)
  1785. xt = stats.yeojohnson(x, lmbda=3)
  1786. assert_allclose(xt, 1 / (-x + 1) - 1)
  1787. # test both positive and negative input
  1788. x = _old_loggamma_rvs(5, size=50, random_state=rng) - 2
  1789. assert not np.all(x < 0)
  1790. assert not np.all(x >= 0)
  1791. pos = x >= 0
  1792. xt = stats.yeojohnson(x, lmbda=1)
  1793. assert_allclose(xt[pos], x[pos])
  1794. xt = stats.yeojohnson(x, lmbda=-1)
  1795. assert_allclose(xt[pos], 1 - 1 / (x[pos] + 1))
  1796. xt = stats.yeojohnson(x, lmbda=0)
  1797. assert_allclose(xt[pos], np.log(x[pos] + 1))
  1798. xt = stats.yeojohnson(x, lmbda=1)
  1799. assert_allclose(xt[pos], x[pos])
  1800. neg = ~pos
  1801. xt = stats.yeojohnson(x, lmbda=2)
  1802. assert_allclose(xt[neg], -np.log(-x[neg] + 1))
  1803. xt = stats.yeojohnson(x, lmbda=1)
  1804. assert_allclose(xt[neg], x[neg])
  1805. xt = stats.yeojohnson(x, lmbda=3)
  1806. assert_allclose(xt[neg], 1 / (-x[neg] + 1) - 1)
  1807. @pytest.mark.parametrize('lmbda', [0, .1, .5, 2])
  1808. def test_lmbda_None(self, lmbda):
  1809. # Start from normal rv's, do inverse transform to check that
  1810. # optimization function gets close to the right answer.
  1811. def _inverse_transform(x, lmbda):
  1812. x_inv = np.zeros(x.shape, dtype=x.dtype)
  1813. pos = x >= 0
  1814. # when x >= 0
  1815. if abs(lmbda) < np.spacing(1.):
  1816. x_inv[pos] = np.exp(x[pos]) - 1
  1817. else: # lmbda != 0
  1818. x_inv[pos] = np.power(x[pos] * lmbda + 1, 1 / lmbda) - 1
  1819. # when x < 0
  1820. if abs(lmbda - 2) > np.spacing(1.):
  1821. x_inv[~pos] = 1 - np.power(-(2 - lmbda) * x[~pos] + 1,
  1822. 1 / (2 - lmbda))
  1823. else: # lmbda == 2
  1824. x_inv[~pos] = 1 - np.exp(-x[~pos])
  1825. return x_inv
  1826. n_samples = 20000
  1827. np.random.seed(1234567)
  1828. x = np.random.normal(loc=0, scale=1, size=(n_samples))
  1829. x_inv = _inverse_transform(x, lmbda)
  1830. xt, maxlog = stats.yeojohnson(x_inv)
  1831. assert_allclose(maxlog, lmbda, atol=1e-2)
  1832. assert_almost_equal(0, np.linalg.norm(x - xt) / n_samples, decimal=2)
  1833. assert_almost_equal(0, xt.mean(), decimal=1)
  1834. assert_almost_equal(1, xt.std(), decimal=1)
  1835. def test_empty(self):
  1836. assert_(stats.yeojohnson([]).shape == (0,))
  1837. def test_array_like(self):
  1838. x = stats.norm.rvs(size=100, loc=0, random_state=54321)
  1839. xt1, _ = stats.yeojohnson(x)
  1840. xt2, _ = stats.yeojohnson(list(x))
  1841. assert_allclose(xt1, xt2, rtol=1e-12)
  1842. @pytest.mark.parametrize('dtype', [np.complex64, np.complex128])
  1843. def test_input_dtype_complex(self, dtype):
  1844. x = np.arange(6, dtype=dtype)
  1845. err_msg = ('Yeo-Johnson transformation is not defined for complex '
  1846. 'numbers.')
  1847. with pytest.raises(ValueError, match=err_msg):
  1848. stats.yeojohnson(x)
  1849. @pytest.mark.parametrize('dtype', [np.int8, np.uint8, np.int16, np.int32])
  1850. def test_input_dtype_integer(self, dtype):
  1851. x_int = np.arange(8, dtype=dtype)
  1852. x_float = np.arange(8, dtype=np.float64)
  1853. xt_int, lmbda_int = stats.yeojohnson(x_int)
  1854. xt_float, lmbda_float = stats.yeojohnson(x_float)
  1855. assert_allclose(xt_int, xt_float, rtol=1e-7)
  1856. assert_allclose(lmbda_int, lmbda_float, rtol=1e-7)
  1857. def test_input_high_variance(self):
  1858. # non-regression test for gh-10821
  1859. x = np.array([3251637.22, 620695.44, 11642969.00, 2223468.22,
  1860. 85307500.00, 16494389.89, 917215.88, 11642969.00,
  1861. 2145773.87, 4962000.00, 620695.44, 651234.50,
  1862. 1907876.71, 4053297.88, 3251637.22, 3259103.08,
  1863. 9547969.00, 20631286.23, 12807072.08, 2383819.84,
  1864. 90114500.00, 17209575.46, 12852969.00, 2414609.99,
  1865. 2170368.23])
  1866. xt_yeo, lam_yeo = stats.yeojohnson(x)
  1867. xt_box, lam_box = stats.boxcox(x + 1)
  1868. assert_allclose(xt_yeo, xt_box, rtol=1e-6)
  1869. assert_allclose(lam_yeo, lam_box, rtol=1e-6)
  1870. class TestYeojohnsonNormmax:
  1871. def setup_method(self):
  1872. self.x = _old_loggamma_rvs(5, size=50, random_state=12345) + 5
  1873. def test_mle(self):
  1874. maxlog = stats.yeojohnson_normmax(self.x)
  1875. assert_allclose(maxlog, 1.876393, rtol=1e-6)
  1876. def test_darwin_example(self):
  1877. # test from original paper "A new family of power transformations to
  1878. # improve normality or symmetry" by Yeo and Johnson.
  1879. x = [6.1, -8.4, 1.0, 2.0, 0.7, 2.9, 3.5, 5.1, 1.8, 3.6, 7.0, 3.0, 9.3,
  1880. 7.5, -6.0]
  1881. lmbda = stats.yeojohnson_normmax(x)
  1882. assert np.allclose(lmbda, 1.305, atol=1e-3)
  1883. class TestCircFuncs:
  1884. # In gh-5747, the R package `circular` was used to calculate reference
  1885. # values for the circular variance, e.g.:
  1886. # library(circular)
  1887. # options(digits=16)
  1888. # x = c(0, 2*pi/3, 5*pi/3)
  1889. # var.circular(x)
  1890. @pytest.mark.parametrize("test_func,expected",
  1891. [(stats.circmean, 0.167690146),
  1892. (stats.circvar, 0.006455174270186603),
  1893. (stats.circstd, 6.520702116)])
  1894. def test_circfuncs(self, test_func, expected):
  1895. x = np.array([355, 5, 2, 359, 10, 350])
  1896. assert_allclose(test_func(x, high=360), expected, rtol=1e-7)
  1897. def test_circfuncs_small(self):
  1898. x = np.array([20, 21, 22, 18, 19, 20.5, 19.2])
  1899. M1 = x.mean()
  1900. M2 = stats.circmean(x, high=360)
  1901. assert_allclose(M2, M1, rtol=1e-5)
  1902. V1 = (x*np.pi/180).var()
  1903. # for small variations, circvar is approximately half the
  1904. # linear variance
  1905. V1 = V1 / 2.
  1906. V2 = stats.circvar(x, high=360)
  1907. assert_allclose(V2, V1, rtol=1e-4)
  1908. S1 = x.std()
  1909. S2 = stats.circstd(x, high=360)
  1910. assert_allclose(S2, S1, rtol=1e-4)
  1911. @pytest.mark.parametrize("test_func, numpy_func",
  1912. [(stats.circmean, np.mean),
  1913. (stats.circvar, np.var),
  1914. (stats.circstd, np.std)])
  1915. def test_circfuncs_close(self, test_func, numpy_func):
  1916. # circfuncs should handle very similar inputs (gh-12740)
  1917. x = np.array([0.12675364631578953] * 10 + [0.12675365920187928] * 100)
  1918. circstat = test_func(x)
  1919. normal = numpy_func(x)
  1920. assert_allclose(circstat, normal, atol=2e-8)
  1921. def test_circmean_axis(self):
  1922. x = np.array([[355, 5, 2, 359, 10, 350],
  1923. [351, 7, 4, 352, 9, 349],
  1924. [357, 9, 8, 358, 4, 356]])
  1925. M1 = stats.circmean(x, high=360)
  1926. M2 = stats.circmean(x.ravel(), high=360)
  1927. assert_allclose(M1, M2, rtol=1e-14)
  1928. M1 = stats.circmean(x, high=360, axis=1)
  1929. M2 = [stats.circmean(x[i], high=360) for i in range(x.shape[0])]
  1930. assert_allclose(M1, M2, rtol=1e-14)
  1931. M1 = stats.circmean(x, high=360, axis=0)
  1932. M2 = [stats.circmean(x[:, i], high=360) for i in range(x.shape[1])]
  1933. assert_allclose(M1, M2, rtol=1e-14)
  1934. def test_circvar_axis(self):
  1935. x = np.array([[355, 5, 2, 359, 10, 350],
  1936. [351, 7, 4, 352, 9, 349],
  1937. [357, 9, 8, 358, 4, 356]])
  1938. V1 = stats.circvar(x, high=360)
  1939. V2 = stats.circvar(x.ravel(), high=360)
  1940. assert_allclose(V1, V2, rtol=1e-11)
  1941. V1 = stats.circvar(x, high=360, axis=1)
  1942. V2 = [stats.circvar(x[i], high=360) for i in range(x.shape[0])]
  1943. assert_allclose(V1, V2, rtol=1e-11)
  1944. V1 = stats.circvar(x, high=360, axis=0)
  1945. V2 = [stats.circvar(x[:, i], high=360) for i in range(x.shape[1])]
  1946. assert_allclose(V1, V2, rtol=1e-11)
  1947. def test_circstd_axis(self):
  1948. x = np.array([[355, 5, 2, 359, 10, 350],
  1949. [351, 7, 4, 352, 9, 349],
  1950. [357, 9, 8, 358, 4, 356]])
  1951. S1 = stats.circstd(x, high=360)
  1952. S2 = stats.circstd(x.ravel(), high=360)
  1953. assert_allclose(S1, S2, rtol=1e-11)
  1954. S1 = stats.circstd(x, high=360, axis=1)
  1955. S2 = [stats.circstd(x[i], high=360) for i in range(x.shape[0])]
  1956. assert_allclose(S1, S2, rtol=1e-11)
  1957. S1 = stats.circstd(x, high=360, axis=0)
  1958. S2 = [stats.circstd(x[:, i], high=360) for i in range(x.shape[1])]
  1959. assert_allclose(S1, S2, rtol=1e-11)
  1960. @pytest.mark.parametrize("test_func,expected",
  1961. [(stats.circmean, 0.167690146),
  1962. (stats.circvar, 0.006455174270186603),
  1963. (stats.circstd, 6.520702116)])
  1964. def test_circfuncs_array_like(self, test_func, expected):
  1965. x = [355, 5, 2, 359, 10, 350]
  1966. assert_allclose(test_func(x, high=360), expected, rtol=1e-7)
  1967. @pytest.mark.parametrize("test_func", [stats.circmean, stats.circvar,
  1968. stats.circstd])
  1969. def test_empty(self, test_func):
  1970. assert_(np.isnan(test_func([])))
  1971. @pytest.mark.parametrize("test_func", [stats.circmean, stats.circvar,
  1972. stats.circstd])
  1973. def test_nan_propagate(self, test_func):
  1974. x = [355, 5, 2, 359, 10, 350, np.nan]
  1975. assert_(np.isnan(test_func(x, high=360)))
  1976. @pytest.mark.parametrize("test_func,expected",
  1977. [(stats.circmean,
  1978. {None: np.nan, 0: 355.66582264, 1: 0.28725053}),
  1979. (stats.circvar,
  1980. {None: np.nan,
  1981. 0: 0.002570671054089924,
  1982. 1: 0.005545914017677123}),
  1983. (stats.circstd,
  1984. {None: np.nan, 0: 4.11093193, 1: 6.04265394})])
  1985. def test_nan_propagate_array(self, test_func, expected):
  1986. x = np.array([[355, 5, 2, 359, 10, 350, 1],
  1987. [351, 7, 4, 352, 9, 349, np.nan],
  1988. [1, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]])
  1989. for axis in expected.keys():
  1990. out = test_func(x, high=360, axis=axis)
  1991. if axis is None:
  1992. assert_(np.isnan(out))
  1993. else:
  1994. assert_allclose(out[0], expected[axis], rtol=1e-7)
  1995. assert_(np.isnan(out[1:]).all())
  1996. @pytest.mark.parametrize("test_func,expected",
  1997. [(stats.circmean,
  1998. {None: 359.4178026893944,
  1999. 0: np.array([353.0, 6.0, 3.0, 355.5, 9.5,
  2000. 349.5]),
  2001. 1: np.array([0.16769015, 358.66510252])}),
  2002. (stats.circvar,
  2003. {None: 0.008396678483192477,
  2004. 0: np.array([1.9997969, 0.4999873, 0.4999873,
  2005. 6.1230956, 0.1249992, 0.1249992]
  2006. )*(np.pi/180)**2,
  2007. 1: np.array([0.006455174270186603,
  2008. 0.01016767581393285])}),
  2009. (stats.circstd,
  2010. {None: 7.440570778057074,
  2011. 0: np.array([2.00020313, 1.00002539, 1.00002539,
  2012. 3.50108929, 0.50000317,
  2013. 0.50000317]),
  2014. 1: np.array([6.52070212, 8.19138093])})])
  2015. def test_nan_omit_array(self, test_func, expected):
  2016. x = np.array([[355, 5, 2, 359, 10, 350, np.nan],
  2017. [351, 7, 4, 352, 9, 349, np.nan],
  2018. [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]])
  2019. for axis in expected.keys():
  2020. out = test_func(x, high=360, nan_policy='omit', axis=axis)
  2021. if axis is None:
  2022. assert_allclose(out, expected[axis], rtol=1e-7)
  2023. else:
  2024. assert_allclose(out[:-1], expected[axis], rtol=1e-7)
  2025. assert_(np.isnan(out[-1]))
  2026. @pytest.mark.parametrize("test_func,expected",
  2027. [(stats.circmean, 0.167690146),
  2028. (stats.circvar, 0.006455174270186603),
  2029. (stats.circstd, 6.520702116)])
  2030. def test_nan_omit(self, test_func, expected):
  2031. x = [355, 5, 2, 359, 10, 350, np.nan]
  2032. assert_allclose(test_func(x, high=360, nan_policy='omit'),
  2033. expected, rtol=1e-7)
  2034. @pytest.mark.parametrize("test_func", [stats.circmean, stats.circvar,
  2035. stats.circstd])
  2036. def test_nan_omit_all(self, test_func):
  2037. x = [np.nan, np.nan, np.nan, np.nan, np.nan]
  2038. assert_(np.isnan(test_func(x, nan_policy='omit')))
  2039. @pytest.mark.parametrize("test_func", [stats.circmean, stats.circvar,
  2040. stats.circstd])
  2041. def test_nan_omit_all_axis(self, test_func):
  2042. x = np.array([[np.nan, np.nan, np.nan, np.nan, np.nan],
  2043. [np.nan, np.nan, np.nan, np.nan, np.nan]])
  2044. out = test_func(x, nan_policy='omit', axis=1)
  2045. assert_(np.isnan(out).all())
  2046. assert_(len(out) == 2)
  2047. @pytest.mark.parametrize("x",
  2048. [[355, 5, 2, 359, 10, 350, np.nan],
  2049. np.array([[355, 5, 2, 359, 10, 350, np.nan],
  2050. [351, 7, 4, 352, np.nan, 9, 349]])])
  2051. @pytest.mark.parametrize("test_func", [stats.circmean, stats.circvar,
  2052. stats.circstd])
  2053. def test_nan_raise(self, test_func, x):
  2054. assert_raises(ValueError, test_func, x, high=360, nan_policy='raise')
  2055. @pytest.mark.parametrize("x",
  2056. [[355, 5, 2, 359, 10, 350, np.nan],
  2057. np.array([[355, 5, 2, 359, 10, 350, np.nan],
  2058. [351, 7, 4, 352, np.nan, 9, 349]])])
  2059. @pytest.mark.parametrize("test_func", [stats.circmean, stats.circvar,
  2060. stats.circstd])
  2061. def test_bad_nan_policy(self, test_func, x):
  2062. assert_raises(ValueError, test_func, x, high=360, nan_policy='foobar')
  2063. def test_circmean_scalar(self):
  2064. x = 1.
  2065. M1 = x
  2066. M2 = stats.circmean(x)
  2067. assert_allclose(M2, M1, rtol=1e-5)
  2068. def test_circmean_range(self):
  2069. # regression test for gh-6420: circmean(..., high, low) must be
  2070. # between `high` and `low`
  2071. m = stats.circmean(np.arange(0, 2, 0.1), np.pi, -np.pi)
  2072. assert_(m < np.pi)
  2073. assert_(m > -np.pi)
  2074. def test_circfuncs_uint8(self):
  2075. # regression test for gh-7255: overflow when working with
  2076. # numpy uint8 data type
  2077. x = np.array([150, 10], dtype='uint8')
  2078. assert_equal(stats.circmean(x, high=180), 170.0)
  2079. assert_allclose(stats.circvar(x, high=180), 0.2339555554617, rtol=1e-7)
  2080. assert_allclose(stats.circstd(x, high=180), 20.91551378, rtol=1e-7)
  2081. class TestMedianTest:
  2082. def test_bad_n_samples(self):
  2083. # median_test requires at least two samples.
  2084. assert_raises(ValueError, stats.median_test, [1, 2, 3])
  2085. def test_empty_sample(self):
  2086. # Each sample must contain at least one value.
  2087. assert_raises(ValueError, stats.median_test, [], [1, 2, 3])
  2088. def test_empty_when_ties_ignored(self):
  2089. # The grand median is 1, and all values in the first argument are
  2090. # equal to the grand median. With ties="ignore", those values are
  2091. # ignored, which results in the first sample being (in effect) empty.
  2092. # This should raise a ValueError.
  2093. assert_raises(ValueError, stats.median_test,
  2094. [1, 1, 1, 1], [2, 0, 1], [2, 0], ties="ignore")
  2095. def test_empty_contingency_row(self):
  2096. # The grand median is 1, and with the default ties="below", all the
  2097. # values in the samples are counted as being below the grand median.
  2098. # This would result a row of zeros in the contingency table, which is
  2099. # an error.
  2100. assert_raises(ValueError, stats.median_test, [1, 1, 1], [1, 1, 1])
  2101. # With ties="above", all the values are counted as above the
  2102. # grand median.
  2103. assert_raises(ValueError, stats.median_test, [1, 1, 1], [1, 1, 1],
  2104. ties="above")
  2105. def test_bad_ties(self):
  2106. assert_raises(ValueError, stats.median_test, [1, 2, 3], [4, 5],
  2107. ties="foo")
  2108. def test_bad_nan_policy(self):
  2109. assert_raises(ValueError, stats.median_test, [1, 2, 3], [4, 5], nan_policy='foobar')
  2110. def test_bad_keyword(self):
  2111. assert_raises(TypeError, stats.median_test, [1, 2, 3], [4, 5],
  2112. foo="foo")
  2113. def test_simple(self):
  2114. x = [1, 2, 3]
  2115. y = [1, 2, 3]
  2116. stat, p, med, tbl = stats.median_test(x, y)
  2117. # The median is floating point, but this equality test should be safe.
  2118. assert_equal(med, 2.0)
  2119. assert_array_equal(tbl, [[1, 1], [2, 2]])
  2120. # The expected values of the contingency table equal the contingency
  2121. # table, so the statistic should be 0 and the p-value should be 1.
  2122. assert_equal(stat, 0)
  2123. assert_equal(p, 1)
  2124. def test_ties_options(self):
  2125. # Test the contingency table calculation.
  2126. x = [1, 2, 3, 4]
  2127. y = [5, 6]
  2128. z = [7, 8, 9]
  2129. # grand median is 5.
  2130. # Default 'ties' option is "below".
  2131. stat, p, m, tbl = stats.median_test(x, y, z)
  2132. assert_equal(m, 5)
  2133. assert_equal(tbl, [[0, 1, 3], [4, 1, 0]])
  2134. stat, p, m, tbl = stats.median_test(x, y, z, ties="ignore")
  2135. assert_equal(m, 5)
  2136. assert_equal(tbl, [[0, 1, 3], [4, 0, 0]])
  2137. stat, p, m, tbl = stats.median_test(x, y, z, ties="above")
  2138. assert_equal(m, 5)
  2139. assert_equal(tbl, [[0, 2, 3], [4, 0, 0]])
  2140. def test_nan_policy_options(self):
  2141. x = [1, 2, np.nan]
  2142. y = [4, 5, 6]
  2143. mt1 = stats.median_test(x, y, nan_policy='propagate')
  2144. s, p, m, t = stats.median_test(x, y, nan_policy='omit')
  2145. assert_equal(mt1, (np.nan, np.nan, np.nan, None))
  2146. assert_allclose(s, 0.31250000000000006)
  2147. assert_allclose(p, 0.57615012203057869)
  2148. assert_equal(m, 4.0)
  2149. assert_equal(t, np.array([[0, 2],[2, 1]]))
  2150. assert_raises(ValueError, stats.median_test, x, y, nan_policy='raise')
  2151. def test_basic(self):
  2152. # median_test calls chi2_contingency to compute the test statistic
  2153. # and p-value. Make sure it hasn't screwed up the call...
  2154. x = [1, 2, 3, 4, 5]
  2155. y = [2, 4, 6, 8]
  2156. stat, p, m, tbl = stats.median_test(x, y)
  2157. assert_equal(m, 4)
  2158. assert_equal(tbl, [[1, 2], [4, 2]])
  2159. exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl)
  2160. assert_allclose(stat, exp_stat)
  2161. assert_allclose(p, exp_p)
  2162. stat, p, m, tbl = stats.median_test(x, y, lambda_=0)
  2163. assert_equal(m, 4)
  2164. assert_equal(tbl, [[1, 2], [4, 2]])
  2165. exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl, lambda_=0)
  2166. assert_allclose(stat, exp_stat)
  2167. assert_allclose(p, exp_p)
  2168. stat, p, m, tbl = stats.median_test(x, y, correction=False)
  2169. assert_equal(m, 4)
  2170. assert_equal(tbl, [[1, 2], [4, 2]])
  2171. exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl, correction=False)
  2172. assert_allclose(stat, exp_stat)
  2173. assert_allclose(p, exp_p)
  2174. @pytest.mark.parametrize("correction", [False, True])
  2175. def test_result(self, correction):
  2176. x = [1, 2, 3]
  2177. y = [1, 2, 3]
  2178. res = stats.median_test(x, y, correction=correction)
  2179. assert_equal((res.statistic, res.pvalue, res.median, res.table), res)
  2180. class TestDirectionalStats:
  2181. # Reference implementations are not available
  2182. def test_directional_stats_correctness(self):
  2183. # Data from Fisher: Dispersion on a sphere, 1953 and
  2184. # Mardia and Jupp, Directional Statistics.
  2185. decl = -np.deg2rad(np.array([343.2, 62., 36.9, 27., 359.,
  2186. 5.7, 50.4, 357.6, 44.]))
  2187. incl = -np.deg2rad(np.array([66.1, 68.7, 70.1, 82.1, 79.5,
  2188. 73., 69.3, 58.8, 51.4]))
  2189. data = np.stack((np.cos(incl) * np.cos(decl),
  2190. np.cos(incl) * np.sin(decl),
  2191. np.sin(incl)),
  2192. axis=1)
  2193. dirstats = stats.directional_stats(data)
  2194. directional_mean = dirstats.mean_direction
  2195. mean_rounded = np.round(directional_mean, 4)
  2196. reference_mean = np.array([0.2984, -0.1346, -0.9449])
  2197. assert_allclose(mean_rounded, reference_mean)
  2198. @pytest.mark.parametrize('angles, ref', [
  2199. ([-np.pi/2, np.pi/2], 1.),
  2200. ([0, 2*np.pi], 0.)
  2201. ])
  2202. def test_directional_stats_2d_special_cases(self, angles, ref):
  2203. if callable(ref):
  2204. ref = ref(angles)
  2205. data = np.stack([np.cos(angles), np.sin(angles)], axis=1)
  2206. res = 1 - stats.directional_stats(data).mean_resultant_length
  2207. assert_allclose(res, ref)
  2208. def test_directional_stats_2d(self):
  2209. # Test that for circular data directional_stats
  2210. # yields the same result as circmean/circvar
  2211. rng = np.random.default_rng(0xec9a6899d5a2830e0d1af479dbe1fd0c)
  2212. testdata = 2 * np.pi * rng.random((1000, ))
  2213. testdata_vector = np.stack((np.cos(testdata),
  2214. np.sin(testdata)),
  2215. axis=1)
  2216. dirstats = stats.directional_stats(testdata_vector)
  2217. directional_mean = dirstats.mean_direction
  2218. directional_mean_angle = np.arctan2(directional_mean[1],
  2219. directional_mean[0])
  2220. directional_mean_angle = directional_mean_angle % (2*np.pi)
  2221. circmean = stats.circmean(testdata)
  2222. assert_allclose(circmean, directional_mean_angle)
  2223. directional_var = 1 - dirstats.mean_resultant_length
  2224. circular_var = stats.circvar(testdata)
  2225. assert_allclose(directional_var, circular_var)
  2226. def test_directional_mean_higher_dim(self):
  2227. # test that directional_stats works for higher dimensions
  2228. # here a 4D array is reduced over axis = 2
  2229. data = np.array([[0.8660254, 0.5, 0.],
  2230. [0.8660254, -0.5, 0.]])
  2231. full_array = np.tile(data, (2, 2, 2, 1))
  2232. expected = np.array([[[1., 0., 0.],
  2233. [1., 0., 0.]],
  2234. [[1., 0., 0.],
  2235. [1., 0., 0.]]])
  2236. dirstats = stats.directional_stats(full_array, axis=2)
  2237. assert_allclose(expected, dirstats.mean_direction)
  2238. def test_directional_stats_list_ndarray_input(self):
  2239. # test that list and numpy array inputs yield same results
  2240. data = [[0.8660254, 0.5, 0.], [0.8660254, -0.5, 0]]
  2241. data_array = np.asarray(data)
  2242. res = stats.directional_stats(data)
  2243. ref = stats.directional_stats(data_array)
  2244. assert_allclose(res.mean_direction, ref.mean_direction)
  2245. assert_allclose(res.mean_resultant_length,
  2246. res.mean_resultant_length)
  2247. def test_directional_stats_1d_error(self):
  2248. # test that one-dimensional data raises ValueError
  2249. data = np.ones((5, ))
  2250. message = (r"samples must at least be two-dimensional. "
  2251. r"Instead samples has shape: (5,)")
  2252. with pytest.raises(ValueError, match=re.escape(message)):
  2253. stats.directional_stats(data)
  2254. def test_directional_stats_normalize(self):
  2255. # test that directional stats calculations yield same results
  2256. # for unnormalized input with normalize=True and normalized
  2257. # input with normalize=False
  2258. data = np.array([[0.8660254, 0.5, 0.],
  2259. [1.7320508, -1., 0.]])
  2260. res = stats.directional_stats(data, normalize=True)
  2261. normalized_data = data / np.linalg.norm(data, axis=-1,
  2262. keepdims=True)
  2263. ref = stats.directional_stats(normalized_data,
  2264. normalize=False)
  2265. assert_allclose(res.mean_direction, ref.mean_direction)
  2266. assert_allclose(res.mean_resultant_length,
  2267. ref.mean_resultant_length)