test_odds_ratio.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. import pytest
  2. import numpy as np
  3. from numpy.testing import assert_equal, assert_allclose
  4. from .._discrete_distns import nchypergeom_fisher, hypergeom
  5. from scipy.stats._odds_ratio import odds_ratio
  6. from .data.fisher_exact_results_from_r import data
  7. class TestOddsRatio:
  8. @pytest.mark.parametrize('parameters, rresult', data)
  9. def test_results_from_r(self, parameters, rresult):
  10. alternative = parameters.alternative.replace('.', '-')
  11. result = odds_ratio(parameters.table)
  12. # The results computed by R are not very accurate.
  13. if result.statistic < 400:
  14. or_rtol = 5e-4
  15. ci_rtol = 2e-2
  16. else:
  17. or_rtol = 5e-2
  18. ci_rtol = 1e-1
  19. assert_allclose(result.statistic,
  20. rresult.conditional_odds_ratio, rtol=or_rtol)
  21. ci = result.confidence_interval(parameters.confidence_level,
  22. alternative)
  23. assert_allclose((ci.low, ci.high), rresult.conditional_odds_ratio_ci,
  24. rtol=ci_rtol)
  25. # Also do a self-check for the conditional odds ratio.
  26. # With the computed conditional odds ratio as the noncentrality
  27. # parameter of the noncentral hypergeometric distribution with
  28. # parameters table.sum(), table[0].sum(), and table[:,0].sum() as
  29. # total, ngood and nsample, respectively, the mean of the distribution
  30. # should equal table[0, 0].
  31. cor = result.statistic
  32. table = np.array(parameters.table)
  33. total = table.sum()
  34. ngood = table[0].sum()
  35. nsample = table[:, 0].sum()
  36. # nchypergeom_fisher does not allow the edge cases where the
  37. # noncentrality parameter is 0 or inf, so handle those values
  38. # separately here.
  39. if cor == 0:
  40. nchg_mean = hypergeom.support(total, ngood, nsample)[0]
  41. elif cor == np.inf:
  42. nchg_mean = hypergeom.support(total, ngood, nsample)[1]
  43. else:
  44. nchg_mean = nchypergeom_fisher.mean(total, ngood, nsample, cor)
  45. assert_allclose(nchg_mean, table[0, 0], rtol=1e-13)
  46. # Check that the confidence interval is correct.
  47. alpha = 1 - parameters.confidence_level
  48. if alternative == 'two-sided':
  49. if ci.low > 0:
  50. sf = nchypergeom_fisher.sf(table[0, 0] - 1,
  51. total, ngood, nsample, ci.low)
  52. assert_allclose(sf, alpha/2, rtol=1e-11)
  53. if np.isfinite(ci.high):
  54. cdf = nchypergeom_fisher.cdf(table[0, 0],
  55. total, ngood, nsample, ci.high)
  56. assert_allclose(cdf, alpha/2, rtol=1e-11)
  57. elif alternative == 'less':
  58. if np.isfinite(ci.high):
  59. cdf = nchypergeom_fisher.cdf(table[0, 0],
  60. total, ngood, nsample, ci.high)
  61. assert_allclose(cdf, alpha, rtol=1e-11)
  62. else:
  63. # alternative == 'greater'
  64. if ci.low > 0:
  65. sf = nchypergeom_fisher.sf(table[0, 0] - 1,
  66. total, ngood, nsample, ci.low)
  67. assert_allclose(sf, alpha, rtol=1e-11)
  68. @pytest.mark.parametrize('table', [
  69. [[0, 0], [5, 10]],
  70. [[5, 10], [0, 0]],
  71. [[0, 5], [0, 10]],
  72. [[5, 0], [10, 0]],
  73. ])
  74. def test_row_or_col_zero(self, table):
  75. result = odds_ratio(table)
  76. assert_equal(result.statistic, np.nan)
  77. ci = result.confidence_interval()
  78. assert_equal((ci.low, ci.high), (0, np.inf))
  79. @pytest.mark.parametrize("case",
  80. [[0.95, 'two-sided', 0.4879913, 2.635883],
  81. [0.90, 'two-sided', 0.5588516, 2.301663]])
  82. def test_sample_odds_ratio_ci(self, case):
  83. # Compare the sample odds ratio confidence interval to the R function
  84. # oddsratio.wald from the epitools package, e.g.
  85. # > library(epitools)
  86. # > table = matrix(c(10, 20, 41, 93), nrow=2, ncol=2, byrow=TRUE)
  87. # > result = oddsratio.wald(table)
  88. # > result$measure
  89. # odds ratio with 95% C.I.
  90. # Predictor estimate lower upper
  91. # Exposed1 1.000000 NA NA
  92. # Exposed2 1.134146 0.4879913 2.635883
  93. confidence_level, alternative, ref_low, ref_high = case
  94. table = [[10, 20], [41, 93]]
  95. result = odds_ratio(table, kind='sample')
  96. assert_allclose(result.statistic, 1.134146, rtol=1e-6)
  97. ci = result.confidence_interval(confidence_level, alternative)
  98. assert_allclose([ci.low, ci.high], [ref_low, ref_high], rtol=1e-6)
  99. @pytest.mark.parametrize('alternative', ['less', 'greater', 'two-sided'])
  100. def test_sample_odds_ratio_one_sided_ci(self, alternative):
  101. # can't find a good reference for one-sided CI, so bump up the sample
  102. # size and compare against the conditional odds ratio CI
  103. table = [[1000, 2000], [4100, 9300]]
  104. res = odds_ratio(table, kind='sample')
  105. ref = odds_ratio(table, kind='conditional')
  106. assert_allclose(res.statistic, ref.statistic, atol=1e-5)
  107. assert_allclose(res.confidence_interval(alternative=alternative),
  108. ref.confidence_interval(alternative=alternative),
  109. atol=2e-3)
  110. @pytest.mark.parametrize('kind', ['sample', 'conditional'])
  111. @pytest.mark.parametrize('bad_table', [123, "foo", [10, 11, 12]])
  112. def test_invalid_table_shape(self, kind, bad_table):
  113. with pytest.raises(ValueError, match="Invalid shape"):
  114. odds_ratio(bad_table, kind=kind)
  115. def test_invalid_table_type(self):
  116. with pytest.raises(ValueError, match='must be an array of integers'):
  117. odds_ratio([[1.0, 3.4], [5.0, 9.9]])
  118. def test_negative_table_values(self):
  119. with pytest.raises(ValueError, match='must be nonnegative'):
  120. odds_ratio([[1, 2], [3, -4]])
  121. def test_invalid_kind(self):
  122. with pytest.raises(ValueError, match='`kind` must be'):
  123. odds_ratio([[10, 20], [30, 14]], kind='magnetoreluctance')
  124. def test_invalid_alternative(self):
  125. result = odds_ratio([[5, 10], [2, 32]])
  126. with pytest.raises(ValueError, match='`alternative` must be'):
  127. result.confidence_interval(alternative='depleneration')
  128. @pytest.mark.parametrize('level', [-0.5, 1.5])
  129. def test_invalid_confidence_level(self, level):
  130. result = odds_ratio([[5, 10], [2, 32]])
  131. with pytest.raises(ValueError, match='must be between 0 and 1'):
  132. result.confidence_interval(confidence_level=level)