123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121 |
- /*
- * Copyright Nick Thompson, 2019
- * Use, modification and distribution are subject to the
- * Boost Software License, Version 1.0. (See accompanying file
- * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- */
- #ifndef BOOST_MATH_STATISTICS_RUNS_TEST_HPP
- #define BOOST_MATH_STATISTICS_RUNS_TEST_HPP
- #include <cmath>
- #include <algorithm>
- #include <utility>
- #include <boost/math/statistics/univariate_statistics.hpp>
- #include <boost/math/distributions/normal.hpp>
- namespace boost::math::statistics {
- template<class RandomAccessContainer>
- auto runs_above_and_below_threshold(RandomAccessContainer const & v,
- typename RandomAccessContainer::value_type threshold)
- {
- using Real = typename RandomAccessContainer::value_type;
- using std::sqrt;
- using std::abs;
- if (v.size() <= 1)
- {
- throw std::domain_error("At least 2 samples are required to get number of runs.");
- }
- typedef boost::math::policies::policy<
- boost::math::policies::promote_float<false>,
- boost::math::policies::promote_double<false> >
- no_promote_policy;
- decltype(v.size()) nabove = 0;
- decltype(v.size()) nbelow = 0;
- decltype(v.size()) imin = 0;
- // Take care of the case that v[0] == threshold:
- while (imin < v.size() && v[imin] == threshold) {
- ++imin;
- }
- // Take care of the constant vector case:
- if (imin == v.size()) {
- return std::make_pair(std::numeric_limits<Real>::quiet_NaN(), Real(0));
- }
- bool run_up = (v[imin] > threshold);
- if (run_up) {
- ++nabove;
- } else {
- ++nbelow;
- }
- decltype(v.size()) runs = 1;
- for (decltype(v.size()) i = imin + 1; i < v.size(); ++i) {
- if (v[i] == threshold) {
- // skip values precisely equal to threshold (following R's randtests package)
- continue;
- }
- bool above = (v[i] > threshold);
- if (above) {
- ++nabove;
- } else {
- ++nbelow;
- }
- if (run_up == above) {
- continue;
- }
- else {
- run_up = above;
- runs++;
- }
- }
- // If you make n an int, the subtraction is gonna be bad in the variance:
- Real n = nabove + nbelow;
- Real expected_runs = Real(1) + Real(2*nabove*nbelow)/Real(n);
- Real variance = 2*nabove*nbelow*(2*nabove*nbelow-n)/Real(n*n*(n-1));
- // Bizarre, pathological limits:
- if (variance == 0)
- {
- if (runs == expected_runs)
- {
- Real statistic = 0;
- Real pvalue = 1;
- return std::make_pair(statistic, pvalue);
- }
- else
- {
- return std::make_pair(std::numeric_limits<Real>::quiet_NaN(), Real(0));
- }
- }
- Real sd = sqrt(variance);
- Real statistic = (runs - expected_runs)/sd;
- auto normal = boost::math::normal_distribution<Real, no_promote_policy>(0,1);
- Real pvalue = 2*boost::math::cdf(normal, -abs(statistic));
- return std::make_pair(statistic, pvalue);
- }
- template<class RandomAccessContainer>
- auto runs_above_and_below_median(RandomAccessContainer const & v)
- {
- using Real = typename RandomAccessContainer::value_type;
- using std::log;
- using std::sqrt;
- // We have to memcpy v because the median does a partial sort,
- // and that would be catastrophic for the runs test.
- auto w = v;
- Real median = boost::math::statistics::median(w);
- return runs_above_and_below_threshold(v, median);
- }
- }
- #endif
|