_ksstats.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596
  1. # Compute the two-sided one-sample Kolmogorov-Smirnov Prob(Dn <= d) where:
  2. # D_n = sup_x{|F_n(x) - F(x)|},
  3. # F_n(x) is the empirical CDF for a sample of size n {x_i: i=1,...,n},
  4. # F(x) is the CDF of a probability distribution.
  5. #
  6. # Exact methods:
  7. # Prob(D_n >= d) can be computed via a matrix algorithm of Durbin[1]
  8. # or a recursion algorithm due to Pomeranz[2].
  9. # Marsaglia, Tsang & Wang[3] gave a computation-efficient way to perform
  10. # the Durbin algorithm.
  11. # D_n >= d <==> D_n+ >= d or D_n- >= d (the one-sided K-S statistics), hence
  12. # Prob(D_n >= d) = 2*Prob(D_n+ >= d) - Prob(D_n+ >= d and D_n- >= d).
  13. # For d > 0.5, the latter intersection probability is 0.
  14. #
  15. # Approximate methods:
  16. # For d close to 0.5, ignoring that intersection term may still give a
  17. # reasonable approximation.
  18. # Li-Chien[4] and Korolyuk[5] gave an asymptotic formula extending
  19. # Kolmogorov's initial asymptotic, suitable for large d. (See
  20. # scipy.special.kolmogorov for that asymptotic)
  21. # Pelz-Good[6] used the functional equation for Jacobi theta functions to
  22. # transform the Li-Chien/Korolyuk formula produce a computational formula
  23. # suitable for small d.
  24. #
  25. # Simard and L'Ecuyer[7] provided an algorithm to decide when to use each of
  26. # the above approaches and it is that which is used here.
  27. #
  28. # Other approaches:
  29. # Carvalho[8] optimizes Durbin's matrix algorithm for large values of d.
  30. # Moscovich and Nadler[9] use FFTs to compute the convolutions.
  31. # References:
  32. # [1] Durbin J (1968).
  33. # "The Probability that the Sample Distribution Function Lies Between Two
  34. # Parallel Straight Lines."
  35. # Annals of Mathematical Statistics, 39, 398-411.
  36. # [2] Pomeranz J (1974).
  37. # "Exact Cumulative Distribution of the Kolmogorov-Smirnov Statistic for
  38. # Small Samples (Algorithm 487)."
  39. # Communications of the ACM, 17(12), 703-704.
  40. # [3] Marsaglia G, Tsang WW, Wang J (2003).
  41. # "Evaluating Kolmogorov's Distribution."
  42. # Journal of Statistical Software, 8(18), 1-4.
  43. # [4] LI-CHIEN, C. (1956).
  44. # "On the exact distribution of the statistics of A. N. Kolmogorov and
  45. # their asymptotic expansion."
  46. # Acta Matematica Sinica, 6, 55-81.
  47. # [5] KOROLYUK, V. S. (1960).
  48. # "Asymptotic analysis of the distribution of the maximum deviation in
  49. # the Bernoulli scheme."
  50. # Theor. Probability Appl., 4, 339-366.
  51. # [6] Pelz W, Good IJ (1976).
  52. # "Approximating the Lower Tail-areas of the Kolmogorov-Smirnov One-sample
  53. # Statistic."
  54. # Journal of the Royal Statistical Society, Series B, 38(2), 152-156.
  55. # [7] Simard, R., L'Ecuyer, P. (2011)
  56. # "Computing the Two-Sided Kolmogorov-Smirnov Distribution",
  57. # Journal of Statistical Software, Vol 39, 11, 1-18.
  58. # [8] Carvalho, Luis (2015)
  59. # "An Improved Evaluation of Kolmogorov's Distribution"
  60. # Journal of Statistical Software, Code Snippets; Vol 65(3), 1-8.
  61. # [9] Amit Moscovich, Boaz Nadler (2017)
  62. # "Fast calculation of boundary crossing probabilities for Poisson
  63. # processes",
  64. # Statistics & Probability Letters, Vol 123, 177-182.
  65. import numpy as np
  66. import scipy.special
  67. import scipy.special._ufuncs as scu
  68. from scipy._lib._finite_differences import _derivative
  69. _E128 = 128
  70. _EP128 = np.ldexp(np.longdouble(1), _E128)
  71. _EM128 = np.ldexp(np.longdouble(1), -_E128)
  72. _SQRT2PI = np.sqrt(2 * np.pi)
  73. _LOG_2PI = np.log(2 * np.pi)
  74. _MIN_LOG = -708
  75. _SQRT3 = np.sqrt(3)
  76. _PI_SQUARED = np.pi ** 2
  77. _PI_FOUR = np.pi ** 4
  78. _PI_SIX = np.pi ** 6
  79. # [Lifted from _loggamma.pxd.] If B_m are the Bernoulli numbers,
  80. # then Stirling coeffs are B_{2j}/(2j)/(2j-1) for j=8,...1.
  81. _STIRLING_COEFFS = [-2.955065359477124183e-2, 6.4102564102564102564e-3,
  82. -1.9175269175269175269e-3, 8.4175084175084175084e-4,
  83. -5.952380952380952381e-4, 7.9365079365079365079e-4,
  84. -2.7777777777777777778e-3, 8.3333333333333333333e-2]
  85. def _log_nfactorial_div_n_pow_n(n):
  86. # Computes n! / n**n
  87. # = (n-1)! / n**(n-1)
  88. # Uses Stirling's approximation, but removes n*log(n) up-front to
  89. # avoid subtractive cancellation.
  90. # = log(n)/2 - n + log(sqrt(2pi)) + sum B_{2j}/(2j)/(2j-1)/n**(2j-1)
  91. rn = 1.0/n
  92. return np.log(n)/2 - n + _LOG_2PI/2 + rn * np.polyval(_STIRLING_COEFFS, rn/n)
  93. def _clip_prob(p):
  94. """clips a probability to range 0<=p<=1."""
  95. return np.clip(p, 0.0, 1.0)
  96. def _select_and_clip_prob(cdfprob, sfprob, cdf=True):
  97. """Selects either the CDF or SF, and then clips to range 0<=p<=1."""
  98. p = np.where(cdf, cdfprob, sfprob)
  99. return _clip_prob(p)
  100. def _kolmogn_DMTW(n, d, cdf=True):
  101. r"""Computes the Kolmogorov CDF: Pr(D_n <= d) using the MTW approach to
  102. the Durbin matrix algorithm.
  103. Durbin (1968); Marsaglia, Tsang, Wang (2003). [1], [3].
  104. """
  105. # Write d = (k-h)/n, where k is positive integer and 0 <= h < 1
  106. # Generate initial matrix H of size m*m where m=(2k-1)
  107. # Compute k-th row of (n!/n^n) * H^n, scaling intermediate results.
  108. # Requires memory O(m^2) and computation O(m^2 log(n)).
  109. # Most suitable for small m.
  110. if d >= 1.0:
  111. return _select_and_clip_prob(1.0, 0.0, cdf)
  112. nd = n * d
  113. if nd <= 0.5:
  114. return _select_and_clip_prob(0.0, 1.0, cdf)
  115. k = int(np.ceil(nd))
  116. h = k - nd
  117. m = 2 * k - 1
  118. H = np.zeros([m, m])
  119. # Initialize: v is first column (and last row) of H
  120. # v[j] = (1-h^(j+1)/(j+1)! (except for v[-1])
  121. # w[j] = 1/(j)!
  122. # q = k-th row of H (actually i!/n^i*H^i)
  123. intm = np.arange(1, m + 1)
  124. v = 1.0 - h ** intm
  125. w = np.empty(m)
  126. fac = 1.0
  127. for j in intm:
  128. w[j - 1] = fac
  129. fac /= j # This might underflow. Isn't a problem.
  130. v[j - 1] *= fac
  131. tt = max(2 * h - 1.0, 0)**m - 2*h**m
  132. v[-1] = (1.0 + tt) * fac
  133. for i in range(1, m):
  134. H[i - 1:, i] = w[:m - i + 1]
  135. H[:, 0] = v
  136. H[-1, :] = np.flip(v, axis=0)
  137. Hpwr = np.eye(np.shape(H)[0]) # Holds intermediate powers of H
  138. nn = n
  139. expnt = 0 # Scaling of Hpwr
  140. Hexpnt = 0 # Scaling of H
  141. while nn > 0:
  142. if nn % 2:
  143. Hpwr = np.matmul(Hpwr, H)
  144. expnt += Hexpnt
  145. H = np.matmul(H, H)
  146. Hexpnt *= 2
  147. # Scale as needed.
  148. if np.abs(H[k - 1, k - 1]) > _EP128:
  149. H /= _EP128
  150. Hexpnt += _E128
  151. nn = nn // 2
  152. p = Hpwr[k - 1, k - 1]
  153. # Multiply by n!/n^n
  154. for i in range(1, n + 1):
  155. p = i * p / n
  156. if np.abs(p) < _EM128:
  157. p *= _EP128
  158. expnt -= _E128
  159. # unscale
  160. if expnt != 0:
  161. p = np.ldexp(p, expnt)
  162. return _select_and_clip_prob(p, 1.0-p, cdf)
  163. def _pomeranz_compute_j1j2(i, n, ll, ceilf, roundf):
  164. """Compute the endpoints of the interval for row i."""
  165. if i == 0:
  166. j1, j2 = -ll - ceilf - 1, ll + ceilf - 1
  167. else:
  168. # i + 1 = 2*ip1div2 + ip1mod2
  169. ip1div2, ip1mod2 = divmod(i + 1, 2)
  170. if ip1mod2 == 0: # i is odd
  171. if ip1div2 == n + 1:
  172. j1, j2 = n - ll - ceilf - 1, n + ll + ceilf - 1
  173. else:
  174. j1, j2 = ip1div2 - 1 - ll - roundf - 1, ip1div2 + ll - 1 + ceilf - 1
  175. else:
  176. j1, j2 = ip1div2 - 1 - ll - 1, ip1div2 + ll + roundf - 1
  177. return max(j1 + 2, 0), min(j2, n)
  178. def _kolmogn_Pomeranz(n, x, cdf=True):
  179. r"""Computes Pr(D_n <= d) using the Pomeranz recursion algorithm.
  180. Pomeranz (1974) [2]
  181. """
  182. # V is n*(2n+2) matrix.
  183. # Each row is convolution of the previous row and probabilities from a
  184. # Poisson distribution.
  185. # Desired CDF probability is n! V[n-1, 2n+1] (final entry in final row).
  186. # Only two rows are needed at any given stage:
  187. # - Call them V0 and V1.
  188. # - Swap each iteration
  189. # Only a few (contiguous) entries in each row can be non-zero.
  190. # - Keep track of start and end (j1 and j2 below)
  191. # - V0s and V1s track the start in the two rows
  192. # Scale intermediate results as needed.
  193. # Only a few different Poisson distributions can occur
  194. t = n * x
  195. ll = int(np.floor(t))
  196. f = 1.0 * (t - ll) # fractional part of t
  197. g = min(f, 1.0 - f)
  198. ceilf = (1 if f > 0 else 0)
  199. roundf = (1 if f > 0.5 else 0)
  200. npwrs = 2 * (ll + 1) # Maximum number of powers needed in convolutions
  201. gpower = np.empty(npwrs) # gpower = (g/n)^m/m!
  202. twogpower = np.empty(npwrs) # twogpower = (2g/n)^m/m!
  203. onem2gpower = np.empty(npwrs) # onem2gpower = ((1-2g)/n)^m/m!
  204. # gpower etc are *almost* Poisson probs, just missing normalizing factor.
  205. gpower[0] = 1.0
  206. twogpower[0] = 1.0
  207. onem2gpower[0] = 1.0
  208. expnt = 0
  209. g_over_n, two_g_over_n, one_minus_two_g_over_n = g/n, 2*g/n, (1 - 2*g)/n
  210. for m in range(1, npwrs):
  211. gpower[m] = gpower[m - 1] * g_over_n / m
  212. twogpower[m] = twogpower[m - 1] * two_g_over_n / m
  213. onem2gpower[m] = onem2gpower[m - 1] * one_minus_two_g_over_n / m
  214. V0 = np.zeros([npwrs])
  215. V1 = np.zeros([npwrs])
  216. V1[0] = 1 # first row
  217. V0s, V1s = 0, 0 # start indices of the two rows
  218. j1, j2 = _pomeranz_compute_j1j2(0, n, ll, ceilf, roundf)
  219. for i in range(1, 2 * n + 2):
  220. # Preserve j1, V1, V1s, V0s from last iteration
  221. k1 = j1
  222. V0, V1 = V1, V0
  223. V0s, V1s = V1s, V0s
  224. V1.fill(0.0)
  225. j1, j2 = _pomeranz_compute_j1j2(i, n, ll, ceilf, roundf)
  226. if i == 1 or i == 2 * n + 1:
  227. pwrs = gpower
  228. else:
  229. pwrs = (twogpower if i % 2 else onem2gpower)
  230. ln2 = j2 - k1 + 1
  231. if ln2 > 0:
  232. conv = np.convolve(V0[k1 - V0s:k1 - V0s + ln2], pwrs[:ln2])
  233. conv_start = j1 - k1 # First index to use from conv
  234. conv_len = j2 - j1 + 1 # Number of entries to use from conv
  235. V1[:conv_len] = conv[conv_start:conv_start + conv_len]
  236. # Scale to avoid underflow.
  237. if 0 < np.max(V1) < _EM128:
  238. V1 *= _EP128
  239. expnt -= _E128
  240. V1s = V0s + j1 - k1
  241. # multiply by n!
  242. ans = V1[n - V1s]
  243. for m in range(1, n + 1):
  244. if np.abs(ans) > _EP128:
  245. ans *= _EM128
  246. expnt += _E128
  247. ans *= m
  248. # Undo any intermediate scaling
  249. if expnt != 0:
  250. ans = np.ldexp(ans, expnt)
  251. ans = _select_and_clip_prob(ans, 1.0 - ans, cdf)
  252. return ans
  253. def _kolmogn_PelzGood(n, x, cdf=True):
  254. """Computes the Pelz-Good approximation to Prob(Dn <= x) with 0<=x<=1.
  255. Start with Li-Chien, Korolyuk approximation:
  256. Prob(Dn <= x) ~ K0(z) + K1(z)/sqrt(n) + K2(z)/n + K3(z)/n**1.5
  257. where z = x*sqrt(n).
  258. Transform each K_(z) using Jacobi theta functions into a form suitable
  259. for small z.
  260. Pelz-Good (1976). [6]
  261. """
  262. if x <= 0.0:
  263. return _select_and_clip_prob(0.0, 1.0, cdf=cdf)
  264. if x >= 1.0:
  265. return _select_and_clip_prob(1.0, 0.0, cdf=cdf)
  266. z = np.sqrt(n) * x
  267. zsquared, zthree, zfour, zsix = z**2, z**3, z**4, z**6
  268. qlog = -_PI_SQUARED / 8 / zsquared
  269. if qlog < _MIN_LOG: # z ~ 0.041743441416853426
  270. return _select_and_clip_prob(0.0, 1.0, cdf=cdf)
  271. q = np.exp(qlog)
  272. # Coefficients of terms in the sums for K1, K2 and K3
  273. k1a = -zsquared
  274. k1b = _PI_SQUARED / 4
  275. k2a = 6 * zsix + 2 * zfour
  276. k2b = (2 * zfour - 5 * zsquared) * _PI_SQUARED / 4
  277. k2c = _PI_FOUR * (1 - 2 * zsquared) / 16
  278. k3d = _PI_SIX * (5 - 30 * zsquared) / 64
  279. k3c = _PI_FOUR * (-60 * zsquared + 212 * zfour) / 16
  280. k3b = _PI_SQUARED * (135 * zfour - 96 * zsix) / 4
  281. k3a = -30 * zsix - 90 * z**8
  282. K0to3 = np.zeros(4)
  283. # Use a Horner scheme to evaluate sum c_i q^(i^2)
  284. # Reduces to a sum over odd integers.
  285. maxk = int(np.ceil(16 * z / np.pi))
  286. for k in range(maxk, 0, -1):
  287. m = 2 * k - 1
  288. msquared, mfour, msix = m**2, m**4, m**6
  289. qpower = np.power(q, 8 * k)
  290. coeffs = np.array([1.0,
  291. k1a + k1b*msquared,
  292. k2a + k2b*msquared + k2c*mfour,
  293. k3a + k3b*msquared + k3c*mfour + k3d*msix])
  294. K0to3 *= qpower
  295. K0to3 += coeffs
  296. K0to3 *= q
  297. K0to3 *= _SQRT2PI
  298. # z**10 > 0 as z > 0.04
  299. K0to3 /= np.array([z, 6 * zfour, 72 * z**7, 6480 * z**10])
  300. # Now do the other sum over the other terms, all integers k
  301. # K_2: (pi^2 k^2) q^(k^2),
  302. # K_3: (3pi^2 k^2 z^2 - pi^4 k^4)*q^(k^2)
  303. # Don't expect much subtractive cancellation so use direct calculation
  304. q = np.exp(-_PI_SQUARED / 2 / zsquared)
  305. ks = np.arange(maxk, 0, -1)
  306. ksquared = ks ** 2
  307. sqrt3z = _SQRT3 * z
  308. kspi = np.pi * ks
  309. qpwers = q ** ksquared
  310. k2extra = np.sum(ksquared * qpwers)
  311. k2extra *= _PI_SQUARED * _SQRT2PI/(-36 * zthree)
  312. K0to3[2] += k2extra
  313. k3extra = np.sum((sqrt3z + kspi) * (sqrt3z - kspi) * ksquared * qpwers)
  314. k3extra *= _PI_SQUARED * _SQRT2PI/(216 * zsix)
  315. K0to3[3] += k3extra
  316. powers_of_n = np.power(n * 1.0, np.arange(len(K0to3)) / 2.0)
  317. K0to3 /= powers_of_n
  318. if not cdf:
  319. K0to3 *= -1
  320. K0to3[0] += 1
  321. Ksum = sum(K0to3)
  322. return Ksum
  323. def _kolmogn(n, x, cdf=True):
  324. """Computes the CDF(or SF) for the two-sided Kolmogorov-Smirnov statistic.
  325. x must be of type float, n of type integer.
  326. Simard & L'Ecuyer (2011) [7].
  327. """
  328. if np.isnan(n):
  329. return n # Keep the same type of nan
  330. if int(n) != n or n <= 0:
  331. return np.nan
  332. if x >= 1.0:
  333. return _select_and_clip_prob(1.0, 0.0, cdf=cdf)
  334. if x <= 0.0:
  335. return _select_and_clip_prob(0.0, 1.0, cdf=cdf)
  336. t = n * x
  337. if t <= 1.0: # Ruben-Gambino: 1/2n <= x <= 1/n
  338. if t <= 0.5:
  339. return _select_and_clip_prob(0.0, 1.0, cdf=cdf)
  340. if n <= 140:
  341. prob = np.prod(np.arange(1, n+1) * (1.0/n) * (2*t - 1))
  342. else:
  343. prob = np.exp(_log_nfactorial_div_n_pow_n(n) + n * np.log(2*t-1))
  344. return _select_and_clip_prob(prob, 1.0 - prob, cdf=cdf)
  345. if t >= n - 1: # Ruben-Gambino
  346. prob = 2 * (1.0 - x)**n
  347. return _select_and_clip_prob(1 - prob, prob, cdf=cdf)
  348. if x >= 0.5: # Exact: 2 * smirnov
  349. prob = 2 * scipy.special.smirnov(n, x)
  350. return _select_and_clip_prob(1.0 - prob, prob, cdf=cdf)
  351. nxsquared = t * x
  352. if n <= 140:
  353. if nxsquared <= 0.754693:
  354. prob = _kolmogn_DMTW(n, x, cdf=True)
  355. return _select_and_clip_prob(prob, 1.0 - prob, cdf=cdf)
  356. if nxsquared <= 4:
  357. prob = _kolmogn_Pomeranz(n, x, cdf=True)
  358. return _select_and_clip_prob(prob, 1.0 - prob, cdf=cdf)
  359. # Now use Miller approximation of 2*smirnov
  360. prob = 2 * scipy.special.smirnov(n, x)
  361. return _select_and_clip_prob(1.0 - prob, prob, cdf=cdf)
  362. # Split CDF and SF as they have different cutoffs on nxsquared.
  363. if not cdf:
  364. if nxsquared >= 370.0:
  365. return 0.0
  366. if nxsquared >= 2.2:
  367. prob = 2 * scipy.special.smirnov(n, x)
  368. return _clip_prob(prob)
  369. # Fall through and compute the SF as 1.0-CDF
  370. if nxsquared >= 18.0:
  371. cdfprob = 1.0
  372. elif n <= 100000 and n * x**1.5 <= 1.4:
  373. cdfprob = _kolmogn_DMTW(n, x, cdf=True)
  374. else:
  375. cdfprob = _kolmogn_PelzGood(n, x, cdf=True)
  376. return _select_and_clip_prob(cdfprob, 1.0 - cdfprob, cdf=cdf)
  377. def _kolmogn_p(n, x):
  378. """Computes the PDF for the two-sided Kolmogorov-Smirnov statistic.
  379. x must be of type float, n of type integer.
  380. """
  381. if np.isnan(n):
  382. return n # Keep the same type of nan
  383. if int(n) != n or n <= 0:
  384. return np.nan
  385. if x >= 1.0 or x <= 0:
  386. return 0
  387. t = n * x
  388. if t <= 1.0:
  389. # Ruben-Gambino: n!/n^n * (2t-1)^n -> 2 n!/n^n * n^2 * (2t-1)^(n-1)
  390. if t <= 0.5:
  391. return 0.0
  392. if n <= 140:
  393. prd = np.prod(np.arange(1, n) * (1.0 / n) * (2 * t - 1))
  394. else:
  395. prd = np.exp(_log_nfactorial_div_n_pow_n(n) + (n-1) * np.log(2 * t - 1))
  396. return prd * 2 * n**2
  397. if t >= n - 1:
  398. # Ruben-Gambino : 1-2(1-x)**n -> 2n*(1-x)**(n-1)
  399. return 2 * (1.0 - x) ** (n-1) * n
  400. if x >= 0.5:
  401. return 2 * scipy.stats.ksone.pdf(x, n)
  402. # Just take a small delta.
  403. # Ideally x +/- delta would stay within [i/n, (i+1)/n] for some integer a.
  404. # as the CDF is a piecewise degree n polynomial.
  405. # It has knots at 1/n, 2/n, ... (n-1)/n
  406. # and is not a C-infinity function at the knots
  407. delta = x / 2.0**16
  408. delta = min(delta, x - 1.0/n)
  409. delta = min(delta, 0.5 - x)
  410. def _kk(_x):
  411. return kolmogn(n, _x)
  412. return _derivative(_kk, x, dx=delta, order=5)
  413. def _kolmogni(n, p, q):
  414. """Computes the PPF/ISF of kolmogn.
  415. n of type integer, n>= 1
  416. p is the CDF, q the SF, p+q=1
  417. """
  418. if np.isnan(n):
  419. return n # Keep the same type of nan
  420. if int(n) != n or n <= 0:
  421. return np.nan
  422. if p <= 0:
  423. return 1.0/n
  424. if q <= 0:
  425. return 1.0
  426. delta = np.exp((np.log(p) - scipy.special.loggamma(n+1))/n)
  427. if delta <= 1.0/n:
  428. return (delta + 1.0 / n) / 2
  429. x = -np.expm1(np.log(q/2.0)/n)
  430. if x >= 1 - 1.0/n:
  431. return x
  432. x1 = scu._kolmogci(p)/np.sqrt(n)
  433. x1 = min(x1, 1.0 - 1.0/n)
  434. _f = lambda x: _kolmogn(n, x) - p
  435. return scipy.optimize.brentq(_f, 1.0/n, x1, xtol=1e-14)
  436. def kolmogn(n, x, cdf=True):
  437. """Computes the CDF for the two-sided Kolmogorov-Smirnov distribution.
  438. The two-sided Kolmogorov-Smirnov distribution has as its CDF Pr(D_n <= x),
  439. for a sample of size n drawn from a distribution with CDF F(t), where
  440. D_n &= sup_t |F_n(t) - F(t)|, and
  441. F_n(t) is the Empirical Cumulative Distribution Function of the sample.
  442. Parameters
  443. ----------
  444. n : integer, array_like
  445. the number of samples
  446. x : float, array_like
  447. The K-S statistic, float between 0 and 1
  448. cdf : bool, optional
  449. whether to compute the CDF(default=true) or the SF.
  450. Returns
  451. -------
  452. cdf : ndarray
  453. CDF (or SF it cdf is False) at the specified locations.
  454. The return value has shape the result of numpy broadcasting n and x.
  455. """
  456. it = np.nditer([n, x, cdf, None],
  457. op_dtypes=[None, np.float64, np.bool_, np.float64])
  458. for _n, _x, _cdf, z in it:
  459. if np.isnan(_n):
  460. z[...] = _n
  461. continue
  462. if int(_n) != _n:
  463. raise ValueError(f'n is not integral: {_n}')
  464. z[...] = _kolmogn(int(_n), _x, cdf=_cdf)
  465. result = it.operands[-1]
  466. return result
  467. def kolmognp(n, x):
  468. """Computes the PDF for the two-sided Kolmogorov-Smirnov distribution.
  469. Parameters
  470. ----------
  471. n : integer, array_like
  472. the number of samples
  473. x : float, array_like
  474. The K-S statistic, float between 0 and 1
  475. Returns
  476. -------
  477. pdf : ndarray
  478. The PDF at the specified locations
  479. The return value has shape the result of numpy broadcasting n and x.
  480. """
  481. it = np.nditer([n, x, None])
  482. for _n, _x, z in it:
  483. if np.isnan(_n):
  484. z[...] = _n
  485. continue
  486. if int(_n) != _n:
  487. raise ValueError(f'n is not integral: {_n}')
  488. z[...] = _kolmogn_p(int(_n), _x)
  489. result = it.operands[-1]
  490. return result
  491. def kolmogni(n, q, cdf=True):
  492. """Computes the PPF(or ISF) for the two-sided Kolmogorov-Smirnov distribution.
  493. Parameters
  494. ----------
  495. n : integer, array_like
  496. the number of samples
  497. q : float, array_like
  498. Probabilities, float between 0 and 1
  499. cdf : bool, optional
  500. whether to compute the PPF(default=true) or the ISF.
  501. Returns
  502. -------
  503. ppf : ndarray
  504. PPF (or ISF if cdf is False) at the specified locations
  505. The return value has shape the result of numpy broadcasting n and x.
  506. """
  507. it = np.nditer([n, q, cdf, None])
  508. for _n, _q, _cdf, z in it:
  509. if np.isnan(_n):
  510. z[...] = _n
  511. continue
  512. if int(_n) != _n:
  513. raise ValueError(f'n is not integral: {_n}')
  514. _pcdf, _psf = (_q, 1-_q) if _cdf else (1-_q, _q)
  515. z[...] = _kolmogni(int(_n), _pcdf, _psf)
  516. result = it.operands[-1]
  517. return result