__init__.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. """
  2. .. _statsrefmanual:
  3. ==========================================
  4. Statistical functions (:mod:`scipy.stats`)
  5. ==========================================
  6. .. currentmodule:: scipy.stats
  7. This module contains a large number of probability distributions,
  8. summary and frequency statistics, correlation functions and statistical
  9. tests, masked statistics, kernel density estimation, quasi-Monte Carlo
  10. functionality, and more.
  11. Statistics is a very large area, and there are topics that are out of scope
  12. for SciPy and are covered by other packages. Some of the most important ones
  13. are:
  14. - `statsmodels <https://www.statsmodels.org/stable/index.html>`__:
  15. regression, linear models, time series analysis, extensions to topics
  16. also covered by ``scipy.stats``.
  17. - `Pandas <https://pandas.pydata.org/>`__: tabular data, time series
  18. functionality, interfaces to other statistical languages.
  19. - `PyMC <https://docs.pymc.io/>`__: Bayesian statistical
  20. modeling, probabilistic machine learning.
  21. - `scikit-learn <https://scikit-learn.org/>`__: classification, regression,
  22. model selection.
  23. - `Seaborn <https://seaborn.pydata.org/>`__: statistical data visualization.
  24. - `rpy2 <https://rpy2.github.io/>`__: Python to R bridge.
  25. Probability distributions
  26. =========================
  27. Each univariate distribution is an instance of a subclass of `rv_continuous`
  28. (`rv_discrete` for discrete distributions):
  29. .. autosummary::
  30. :toctree: generated/
  31. rv_continuous
  32. rv_discrete
  33. rv_histogram
  34. Continuous distributions
  35. ------------------------
  36. .. autosummary::
  37. :toctree: generated/
  38. alpha -- Alpha
  39. anglit -- Anglit
  40. arcsine -- Arcsine
  41. argus -- Argus
  42. beta -- Beta
  43. betaprime -- Beta Prime
  44. bradford -- Bradford
  45. burr -- Burr (Type III)
  46. burr12 -- Burr (Type XII)
  47. cauchy -- Cauchy
  48. chi -- Chi
  49. chi2 -- Chi-squared
  50. cosine -- Cosine
  51. crystalball -- Crystalball
  52. dgamma -- Double Gamma
  53. dweibull -- Double Weibull
  54. erlang -- Erlang
  55. expon -- Exponential
  56. exponnorm -- Exponentially Modified Normal
  57. exponweib -- Exponentiated Weibull
  58. exponpow -- Exponential Power
  59. f -- F (Snecdor F)
  60. fatiguelife -- Fatigue Life (Birnbaum-Saunders)
  61. fisk -- Fisk
  62. foldcauchy -- Folded Cauchy
  63. foldnorm -- Folded Normal
  64. genlogistic -- Generalized Logistic
  65. gennorm -- Generalized normal
  66. genpareto -- Generalized Pareto
  67. genexpon -- Generalized Exponential
  68. genextreme -- Generalized Extreme Value
  69. gausshyper -- Gauss Hypergeometric
  70. gamma -- Gamma
  71. gengamma -- Generalized gamma
  72. genhalflogistic -- Generalized Half Logistic
  73. genhyperbolic -- Generalized Hyperbolic
  74. geninvgauss -- Generalized Inverse Gaussian
  75. gibrat -- Gibrat
  76. gilbrat -- Gilbrat
  77. gompertz -- Gompertz (Truncated Gumbel)
  78. gumbel_r -- Right Sided Gumbel, Log-Weibull, Fisher-Tippett, Extreme Value Type I
  79. gumbel_l -- Left Sided Gumbel, etc.
  80. halfcauchy -- Half Cauchy
  81. halflogistic -- Half Logistic
  82. halfnorm -- Half Normal
  83. halfgennorm -- Generalized Half Normal
  84. hypsecant -- Hyperbolic Secant
  85. invgamma -- Inverse Gamma
  86. invgauss -- Inverse Gaussian
  87. invweibull -- Inverse Weibull
  88. johnsonsb -- Johnson SB
  89. johnsonsu -- Johnson SU
  90. kappa4 -- Kappa 4 parameter
  91. kappa3 -- Kappa 3 parameter
  92. ksone -- Distribution of Kolmogorov-Smirnov one-sided test statistic
  93. kstwo -- Distribution of Kolmogorov-Smirnov two-sided test statistic
  94. kstwobign -- Limiting Distribution of scaled Kolmogorov-Smirnov two-sided test statistic.
  95. laplace -- Laplace
  96. laplace_asymmetric -- Asymmetric Laplace
  97. levy -- Levy
  98. levy_l
  99. levy_stable
  100. logistic -- Logistic
  101. loggamma -- Log-Gamma
  102. loglaplace -- Log-Laplace (Log Double Exponential)
  103. lognorm -- Log-Normal
  104. loguniform -- Log-Uniform
  105. lomax -- Lomax (Pareto of the second kind)
  106. maxwell -- Maxwell
  107. mielke -- Mielke's Beta-Kappa
  108. moyal -- Moyal
  109. nakagami -- Nakagami
  110. ncx2 -- Non-central chi-squared
  111. ncf -- Non-central F
  112. nct -- Non-central Student's T
  113. norm -- Normal (Gaussian)
  114. norminvgauss -- Normal Inverse Gaussian
  115. pareto -- Pareto
  116. pearson3 -- Pearson type III
  117. powerlaw -- Power-function
  118. powerlognorm -- Power log normal
  119. powernorm -- Power normal
  120. rdist -- R-distribution
  121. rayleigh -- Rayleigh
  122. rice -- Rice
  123. recipinvgauss -- Reciprocal Inverse Gaussian
  124. semicircular -- Semicircular
  125. skewcauchy -- Skew Cauchy
  126. skewnorm -- Skew normal
  127. studentized_range -- Studentized Range
  128. t -- Student's T
  129. trapezoid -- Trapezoidal
  130. triang -- Triangular
  131. truncexpon -- Truncated Exponential
  132. truncnorm -- Truncated Normal
  133. truncpareto -- Truncated Pareto
  134. truncweibull_min -- Truncated minimum Weibull distribution
  135. tukeylambda -- Tukey-Lambda
  136. uniform -- Uniform
  137. vonmises -- Von-Mises (Circular)
  138. vonmises_line -- Von-Mises (Line)
  139. wald -- Wald
  140. weibull_min -- Minimum Weibull (see Frechet)
  141. weibull_max -- Maximum Weibull (see Frechet)
  142. wrapcauchy -- Wrapped Cauchy
  143. Multivariate distributions
  144. --------------------------
  145. .. autosummary::
  146. :toctree: generated/
  147. multivariate_normal -- Multivariate normal distribution
  148. matrix_normal -- Matrix normal distribution
  149. dirichlet -- Dirichlet
  150. wishart -- Wishart
  151. invwishart -- Inverse Wishart
  152. multinomial -- Multinomial distribution
  153. special_ortho_group -- SO(N) group
  154. ortho_group -- O(N) group
  155. unitary_group -- U(N) group
  156. random_correlation -- random correlation matrices
  157. multivariate_t -- Multivariate t-distribution
  158. multivariate_hypergeom -- Multivariate hypergeometric distribution
  159. random_table -- Distribution of random tables with given marginals
  160. uniform_direction -- Uniform distribution on S(N-1)
  161. `scipy.stats.multivariate_normal` methods accept instances
  162. of the following class to represent the covariance.
  163. .. autosummary::
  164. :toctree: generated/
  165. Covariance -- Representation of a covariance matrix
  166. Discrete distributions
  167. ----------------------
  168. .. autosummary::
  169. :toctree: generated/
  170. bernoulli -- Bernoulli
  171. betabinom -- Beta-Binomial
  172. binom -- Binomial
  173. boltzmann -- Boltzmann (Truncated Discrete Exponential)
  174. dlaplace -- Discrete Laplacian
  175. geom -- Geometric
  176. hypergeom -- Hypergeometric
  177. logser -- Logarithmic (Log-Series, Series)
  178. nbinom -- Negative Binomial
  179. nchypergeom_fisher -- Fisher's Noncentral Hypergeometric
  180. nchypergeom_wallenius -- Wallenius's Noncentral Hypergeometric
  181. nhypergeom -- Negative Hypergeometric
  182. planck -- Planck (Discrete Exponential)
  183. poisson -- Poisson
  184. randint -- Discrete Uniform
  185. skellam -- Skellam
  186. yulesimon -- Yule-Simon
  187. zipf -- Zipf (Zeta)
  188. zipfian -- Zipfian
  189. An overview of statistical functions is given below. Many of these functions
  190. have a similar version in `scipy.stats.mstats` which work for masked arrays.
  191. Summary statistics
  192. ==================
  193. .. autosummary::
  194. :toctree: generated/
  195. describe -- Descriptive statistics
  196. gmean -- Geometric mean
  197. hmean -- Harmonic mean
  198. pmean -- Power mean
  199. kurtosis -- Fisher or Pearson kurtosis
  200. mode -- Modal value
  201. moment -- Central moment
  202. expectile -- Expectile
  203. skew -- Skewness
  204. kstat --
  205. kstatvar --
  206. tmean -- Truncated arithmetic mean
  207. tvar -- Truncated variance
  208. tmin --
  209. tmax --
  210. tstd --
  211. tsem --
  212. variation -- Coefficient of variation
  213. find_repeats
  214. trim_mean
  215. gstd -- Geometric Standard Deviation
  216. iqr
  217. sem
  218. bayes_mvs
  219. mvsdist
  220. entropy
  221. differential_entropy
  222. median_abs_deviation
  223. Frequency statistics
  224. ====================
  225. .. autosummary::
  226. :toctree: generated/
  227. cumfreq
  228. percentileofscore
  229. scoreatpercentile
  230. relfreq
  231. .. autosummary::
  232. :toctree: generated/
  233. binned_statistic -- Compute a binned statistic for a set of data.
  234. binned_statistic_2d -- Compute a 2-D binned statistic for a set of data.
  235. binned_statistic_dd -- Compute a d-D binned statistic for a set of data.
  236. Correlation functions
  237. =====================
  238. .. autosummary::
  239. :toctree: generated/
  240. f_oneway
  241. alexandergovern
  242. pearsonr
  243. spearmanr
  244. pointbiserialr
  245. kendalltau
  246. weightedtau
  247. somersd
  248. linregress
  249. siegelslopes
  250. theilslopes
  251. multiscale_graphcorr
  252. Statistical tests
  253. =================
  254. .. autosummary::
  255. :toctree: generated/
  256. ttest_1samp
  257. ttest_ind
  258. ttest_ind_from_stats
  259. ttest_rel
  260. chisquare
  261. cramervonmises
  262. cramervonmises_2samp
  263. power_divergence
  264. kstest
  265. ks_1samp
  266. ks_2samp
  267. epps_singleton_2samp
  268. mannwhitneyu
  269. tiecorrect
  270. rankdata
  271. ranksums
  272. wilcoxon
  273. kruskal
  274. friedmanchisquare
  275. brunnermunzel
  276. combine_pvalues
  277. jarque_bera
  278. page_trend_test
  279. tukey_hsd
  280. poisson_means_test
  281. .. autosummary::
  282. :toctree: generated/
  283. ansari
  284. bartlett
  285. levene
  286. shapiro
  287. anderson
  288. anderson_ksamp
  289. binom_test
  290. binomtest
  291. fligner
  292. median_test
  293. mood
  294. skewtest
  295. kurtosistest
  296. normaltest
  297. goodness_of_fit
  298. Quasi-Monte Carlo
  299. =================
  300. .. toctree::
  301. :maxdepth: 4
  302. stats.qmc
  303. Resampling Methods
  304. ==================
  305. .. autosummary::
  306. :toctree: generated/
  307. bootstrap
  308. permutation_test
  309. monte_carlo_test
  310. Masked statistics functions
  311. ===========================
  312. .. toctree::
  313. stats.mstats
  314. Other statistical functionality
  315. ===============================
  316. Transformations
  317. ---------------
  318. .. autosummary::
  319. :toctree: generated/
  320. boxcox
  321. boxcox_normmax
  322. boxcox_llf
  323. yeojohnson
  324. yeojohnson_normmax
  325. yeojohnson_llf
  326. obrientransform
  327. sigmaclip
  328. trimboth
  329. trim1
  330. zmap
  331. zscore
  332. gzscore
  333. Statistical distances
  334. ---------------------
  335. .. autosummary::
  336. :toctree: generated/
  337. wasserstein_distance
  338. energy_distance
  339. Sampling
  340. --------
  341. .. toctree::
  342. :maxdepth: 4
  343. stats.sampling
  344. Random variate generation / CDF Inversion
  345. -----------------------------------------
  346. .. autosummary::
  347. :toctree: generated/
  348. rvs_ratio_uniforms
  349. Distribution Fitting
  350. --------------------
  351. .. autosummary::
  352. :toctree: generated/
  353. fit
  354. Directional statistical functions
  355. ---------------------------------
  356. .. autosummary::
  357. :toctree: generated/
  358. directional_stats
  359. circmean
  360. circvar
  361. circstd
  362. Contingency table functions
  363. ---------------------------
  364. .. autosummary::
  365. :toctree: generated/
  366. chi2_contingency
  367. contingency.crosstab
  368. contingency.expected_freq
  369. contingency.margins
  370. contingency.relative_risk
  371. contingency.association
  372. contingency.odds_ratio
  373. fisher_exact
  374. barnard_exact
  375. boschloo_exact
  376. Plot-tests
  377. ----------
  378. .. autosummary::
  379. :toctree: generated/
  380. ppcc_max
  381. ppcc_plot
  382. probplot
  383. boxcox_normplot
  384. yeojohnson_normplot
  385. Univariate and multivariate kernel density estimation
  386. -----------------------------------------------------
  387. .. autosummary::
  388. :toctree: generated/
  389. gaussian_kde
  390. Warnings / Errors used in :mod:`scipy.stats`
  391. --------------------------------------------
  392. .. autosummary::
  393. :toctree: generated/
  394. DegenerateDataWarning
  395. ConstantInputWarning
  396. NearConstantInputWarning
  397. FitError
  398. """
  399. from ._warnings_errors import (ConstantInputWarning, NearConstantInputWarning,
  400. DegenerateDataWarning, FitError)
  401. from ._stats_py import *
  402. from ._variation import variation
  403. from .distributions import *
  404. from ._morestats import *
  405. from ._binomtest import binomtest
  406. from ._binned_statistic import *
  407. from ._kde import gaussian_kde
  408. from . import mstats
  409. from . import qmc
  410. from ._multivariate import *
  411. from . import contingency
  412. from .contingency import chi2_contingency
  413. from ._resampling import bootstrap, monte_carlo_test, permutation_test
  414. from ._entropy import *
  415. from ._hypotests import *
  416. from ._rvs_sampling import rvs_ratio_uniforms
  417. from ._page_trend_test import page_trend_test
  418. from ._mannwhitneyu import mannwhitneyu
  419. from ._fit import fit, goodness_of_fit
  420. from ._covariance import Covariance
  421. # Deprecated namespaces, to be removed in v2.0.0
  422. from . import (
  423. biasedurn, kde, morestats, mstats_basic, mstats_extras, mvn, statlib, stats
  424. )
  425. __all__ = [s for s in dir() if not s.startswith("_")] # Remove dunders.
  426. from scipy._lib._testutils import PytestTester
  427. test = PytestTester(__name__)
  428. del PytestTester