running_statistics.h 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. /*
  2. * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #ifndef RTC_BASE_NUMERICS_RUNNING_STATISTICS_H_
  11. #define RTC_BASE_NUMERICS_RUNNING_STATISTICS_H_
  12. #include <algorithm>
  13. #include <cmath>
  14. #include <limits>
  15. #include "absl/types/optional.h"
  16. #include "rtc_base/checks.h"
  17. #include "rtc_base/numerics/math_utils.h"
  18. namespace webrtc {
  19. // tl;dr: Robust and efficient online computation of statistics,
  20. // using Welford's method for variance. [1]
  21. //
  22. // This should be your go-to class if you ever need to compute
  23. // min, max, mean, variance and standard deviation.
  24. // If you need to get percentiles, please use webrtc::SamplesStatsCounter.
  25. //
  26. // Please note RemoveSample() won't affect min and max.
  27. // If you want a full-fledged moving window over N last samples,
  28. // please use webrtc::RollingAccumulator.
  29. //
  30. // The measures return absl::nullopt if no samples were fed (Size() == 0),
  31. // otherwise the returned optional is guaranteed to contain a value.
  32. //
  33. // [1]
  34. // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
  35. // The type T is a scalar which must be convertible to double.
  36. // Rationale: we often need greater precision for measures
  37. // than for the samples themselves.
  38. template <typename T>
  39. class RunningStatistics {
  40. public:
  41. // Update stats ////////////////////////////////////////////
  42. // Add a value participating in the statistics in O(1) time.
  43. void AddSample(T sample) {
  44. max_ = std::max(max_, sample);
  45. min_ = std::min(min_, sample);
  46. ++size_;
  47. // Welford's incremental update.
  48. const double delta = sample - mean_;
  49. mean_ += delta / size_;
  50. const double delta2 = sample - mean_;
  51. cumul_ += delta * delta2;
  52. }
  53. // Remove a previously added value in O(1) time.
  54. // Nb: This doesn't affect min or max.
  55. // Calling RemoveSample when Size()==0 is incorrect.
  56. void RemoveSample(T sample) {
  57. RTC_DCHECK_GT(Size(), 0);
  58. // In production, just saturate at 0.
  59. if (Size() == 0) {
  60. return;
  61. }
  62. // Since samples order doesn't matter, this is the
  63. // exact reciprocal of Welford's incremental update.
  64. --size_;
  65. const double delta = sample - mean_;
  66. mean_ -= delta / size_;
  67. const double delta2 = sample - mean_;
  68. cumul_ -= delta * delta2;
  69. }
  70. // Merge other stats, as if samples were added one by one, but in O(1).
  71. void MergeStatistics(const RunningStatistics<T>& other) {
  72. if (other.size_ == 0) {
  73. return;
  74. }
  75. max_ = std::max(max_, other.max_);
  76. min_ = std::min(min_, other.min_);
  77. const int64_t new_size = size_ + other.size_;
  78. const double new_mean =
  79. (mean_ * size_ + other.mean_ * other.size_) / new_size;
  80. // Each cumulant must be corrected.
  81. // * from: sum((x_i - mean_)²)
  82. // * to: sum((x_i - new_mean)²)
  83. auto delta = [new_mean](const RunningStatistics<T>& stats) {
  84. return stats.size_ * (new_mean * (new_mean - 2 * stats.mean_) +
  85. stats.mean_ * stats.mean_);
  86. };
  87. cumul_ = cumul_ + delta(*this) + other.cumul_ + delta(other);
  88. mean_ = new_mean;
  89. size_ = new_size;
  90. }
  91. // Get Measures ////////////////////////////////////////////
  92. // Returns number of samples involved via AddSample() or MergeStatistics(),
  93. // minus number of times RemoveSample() was called.
  94. int64_t Size() const { return size_; }
  95. // Returns minimum among all seen samples, in O(1) time.
  96. // This isn't affected by RemoveSample().
  97. absl::optional<T> GetMin() const {
  98. if (size_ == 0) {
  99. return absl::nullopt;
  100. }
  101. return min_;
  102. }
  103. // Returns maximum among all seen samples, in O(1) time.
  104. // This isn't affected by RemoveSample().
  105. absl::optional<T> GetMax() const {
  106. if (size_ == 0) {
  107. return absl::nullopt;
  108. }
  109. return max_;
  110. }
  111. // Returns mean in O(1) time.
  112. absl::optional<double> GetMean() const {
  113. if (size_ == 0) {
  114. return absl::nullopt;
  115. }
  116. return mean_;
  117. }
  118. // Returns unbiased sample variance in O(1) time.
  119. absl::optional<double> GetVariance() const {
  120. if (size_ == 0) {
  121. return absl::nullopt;
  122. }
  123. return cumul_ / size_;
  124. }
  125. // Returns unbiased standard deviation in O(1) time.
  126. absl::optional<double> GetStandardDeviation() const {
  127. if (size_ == 0) {
  128. return absl::nullopt;
  129. }
  130. return std::sqrt(*GetVariance());
  131. }
  132. private:
  133. int64_t size_ = 0; // Samples seen.
  134. T min_ = infinity_or_max<T>();
  135. T max_ = minus_infinity_or_min<T>();
  136. double mean_ = 0;
  137. double cumul_ = 0; // Variance * size_, sometimes noted m2.
  138. };
  139. } // namespace webrtc
  140. #endif // RTC_BASE_NUMERICS_RUNNING_STATISTICS_H_