linear_regression.hpp 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. /*
  2. * Copyright Nick Thompson, 2019
  3. * Copyright Matt Borland, 2021
  4. * Use, modification and distribution are subject to the
  5. * Boost Software License, Version 1.0. (See accompanying file
  6. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. */
  8. #ifndef BOOST_MATH_STATISTICS_LINEAR_REGRESSION_HPP
  9. #define BOOST_MATH_STATISTICS_LINEAR_REGRESSION_HPP
  10. #include <cmath>
  11. #include <algorithm>
  12. #include <utility>
  13. #include <tuple>
  14. #include <stdexcept>
  15. #include <type_traits>
  16. #include <boost/math/statistics/univariate_statistics.hpp>
  17. #include <boost/math/statistics/bivariate_statistics.hpp>
  18. namespace boost { namespace math { namespace statistics { namespace detail {
  19. template<class ReturnType, class RandomAccessContainer>
  20. ReturnType simple_ordinary_least_squares_impl(RandomAccessContainer const & x,
  21. RandomAccessContainer const & y)
  22. {
  23. using Real = typename std::tuple_element<0, ReturnType>::type;
  24. if (x.size() <= 1)
  25. {
  26. throw std::domain_error("At least 2 samples are required to perform a linear regression.");
  27. }
  28. if (x.size() != y.size())
  29. {
  30. throw std::domain_error("The same number of samples must be in the independent and dependent variable.");
  31. }
  32. std::tuple<Real, Real, Real> temp = boost::math::statistics::means_and_covariance(x, y);
  33. Real mu_x = std::get<0>(temp);
  34. Real mu_y = std::get<1>(temp);
  35. Real cov_xy = std::get<2>(temp);
  36. Real var_x = boost::math::statistics::variance(x);
  37. if (var_x <= 0) {
  38. throw std::domain_error("Independent variable has no variance; this breaks linear regression.");
  39. }
  40. Real c1 = cov_xy/var_x;
  41. Real c0 = mu_y - c1*mu_x;
  42. return std::make_pair(c0, c1);
  43. }
  44. template<class ReturnType, class RandomAccessContainer>
  45. ReturnType simple_ordinary_least_squares_with_R_squared_impl(RandomAccessContainer const & x,
  46. RandomAccessContainer const & y)
  47. {
  48. using Real = typename std::tuple_element<0, ReturnType>::type;
  49. if (x.size() <= 1)
  50. {
  51. throw std::domain_error("At least 2 samples are required to perform a linear regression.");
  52. }
  53. if (x.size() != y.size())
  54. {
  55. throw std::domain_error("The same number of samples must be in the independent and dependent variable.");
  56. }
  57. std::tuple<Real, Real, Real> temp = boost::math::statistics::means_and_covariance(x, y);
  58. Real mu_x = std::get<0>(temp);
  59. Real mu_y = std::get<1>(temp);
  60. Real cov_xy = std::get<2>(temp);
  61. Real var_x = boost::math::statistics::variance(x);
  62. if (var_x <= 0) {
  63. throw std::domain_error("Independent variable has no variance; this breaks linear regression.");
  64. }
  65. Real c1 = cov_xy/var_x;
  66. Real c0 = mu_y - c1*mu_x;
  67. Real squared_residuals = 0;
  68. Real squared_mean_deviation = 0;
  69. for(decltype(y.size()) i = 0; i < y.size(); ++i) {
  70. squared_mean_deviation += (y[i] - mu_y)*(y[i]-mu_y);
  71. Real ei = (c0 + c1*x[i]) - y[i];
  72. squared_residuals += ei*ei;
  73. }
  74. Real Rsquared;
  75. if (squared_mean_deviation == 0) {
  76. // Then y = constant, so the linear regression is perfect.
  77. Rsquared = 1;
  78. } else {
  79. Rsquared = 1 - squared_residuals/squared_mean_deviation;
  80. }
  81. return std::make_tuple(c0, c1, Rsquared);
  82. }
  83. } // namespace detail
  84. template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type,
  85. typename std::enable_if<std::is_integral<Real>::value, bool>::type = true>
  86. inline auto simple_ordinary_least_squares(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::pair<double, double>
  87. {
  88. return detail::simple_ordinary_least_squares_impl<std::pair<double, double>>(x, y);
  89. }
  90. template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type,
  91. typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true>
  92. inline auto simple_ordinary_least_squares(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::pair<Real, Real>
  93. {
  94. return detail::simple_ordinary_least_squares_impl<std::pair<Real, Real>>(x, y);
  95. }
  96. template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type,
  97. typename std::enable_if<std::is_integral<Real>::value, bool>::type = true>
  98. inline auto simple_ordinary_least_squares_with_R_squared(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::tuple<double, double, double>
  99. {
  100. return detail::simple_ordinary_least_squares_with_R_squared_impl<std::tuple<double, double, double>>(x, y);
  101. }
  102. template<typename RandomAccessContainer, typename Real = typename RandomAccessContainer::value_type,
  103. typename std::enable_if<!std::is_integral<Real>::value, bool>::type = true>
  104. inline auto simple_ordinary_least_squares_with_R_squared(RandomAccessContainer const & x, RandomAccessContainer const & y) -> std::tuple<Real, Real, Real>
  105. {
  106. return detail::simple_ordinary_least_squares_with_R_squared_impl<std::tuple<Real, Real, Real>>(x, y);
  107. }
  108. }}} // namespace boost::math::statistics
  109. #endif