running_statistics.h 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. /*
  2. * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #ifndef API_NUMERICS_RUNNING_STATISTICS_H_
  11. #define API_NUMERICS_RUNNING_STATISTICS_H_
  12. #include <algorithm>
  13. #include <cmath>
  14. #include <limits>
  15. #include "absl/types/optional.h"
  16. #include "rtc_base/checks.h"
  17. #include "rtc_base/numerics/math_utils.h"
  18. namespace webrtc {
  19. namespace webrtc_impl {
  20. // tl;dr: Robust and efficient online computation of statistics,
  21. // using Welford's method for variance. [1]
  22. //
  23. // This should be your go-to class if you ever need to compute
  24. // min, max, mean, variance and standard deviation.
  25. // If you need to get percentiles, please use webrtc::SamplesStatsCounter.
  26. //
  27. // Please note RemoveSample() won't affect min and max.
  28. // If you want a full-fledged moving window over N last samples,
  29. // please use webrtc::RollingAccumulator.
  30. //
  31. // The measures return absl::nullopt if no samples were fed (Size() == 0),
  32. // otherwise the returned optional is guaranteed to contain a value.
  33. //
  34. // [1]
  35. // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
  36. // The type T is a scalar which must be convertible to double.
  37. // Rationale: we often need greater precision for measures
  38. // than for the samples themselves.
  39. template <typename T>
  40. class RunningStatistics {
  41. public:
  42. // Update stats ////////////////////////////////////////////
  43. // Add a value participating in the statistics in O(1) time.
  44. void AddSample(T sample) {
  45. max_ = std::max(max_, sample);
  46. min_ = std::min(min_, sample);
  47. ++size_;
  48. // Welford's incremental update.
  49. const double delta = sample - mean_;
  50. mean_ += delta / size_;
  51. const double delta2 = sample - mean_;
  52. cumul_ += delta * delta2;
  53. }
  54. // Remove a previously added value in O(1) time.
  55. // Nb: This doesn't affect min or max.
  56. // Calling RemoveSample when Size()==0 is incorrect.
  57. void RemoveSample(T sample) {
  58. RTC_DCHECK_GT(Size(), 0);
  59. // In production, just saturate at 0.
  60. if (Size() == 0) {
  61. return;
  62. }
  63. // Since samples order doesn't matter, this is the
  64. // exact reciprocal of Welford's incremental update.
  65. --size_;
  66. const double delta = sample - mean_;
  67. mean_ -= delta / size_;
  68. const double delta2 = sample - mean_;
  69. cumul_ -= delta * delta2;
  70. }
  71. // Merge other stats, as if samples were added one by one, but in O(1).
  72. void MergeStatistics(const RunningStatistics<T>& other) {
  73. if (other.size_ == 0) {
  74. return;
  75. }
  76. max_ = std::max(max_, other.max_);
  77. min_ = std::min(min_, other.min_);
  78. const int64_t new_size = size_ + other.size_;
  79. const double new_mean =
  80. (mean_ * size_ + other.mean_ * other.size_) / new_size;
  81. // Each cumulant must be corrected.
  82. // * from: sum((x_i - mean_)²)
  83. // * to: sum((x_i - new_mean)²)
  84. auto delta = [new_mean](const RunningStatistics<T>& stats) {
  85. return stats.size_ * (new_mean * (new_mean - 2 * stats.mean_) +
  86. stats.mean_ * stats.mean_);
  87. };
  88. cumul_ = cumul_ + delta(*this) + other.cumul_ + delta(other);
  89. mean_ = new_mean;
  90. size_ = new_size;
  91. }
  92. // Get Measures ////////////////////////////////////////////
  93. // Returns number of samples involved via AddSample() or MergeStatistics(),
  94. // minus number of times RemoveSample() was called.
  95. int64_t Size() const { return size_; }
  96. // Returns minimum among all seen samples, in O(1) time.
  97. // This isn't affected by RemoveSample().
  98. absl::optional<T> GetMin() const {
  99. if (size_ == 0) {
  100. return absl::nullopt;
  101. }
  102. return min_;
  103. }
  104. // Returns maximum among all seen samples, in O(1) time.
  105. // This isn't affected by RemoveSample().
  106. absl::optional<T> GetMax() const {
  107. if (size_ == 0) {
  108. return absl::nullopt;
  109. }
  110. return max_;
  111. }
  112. // Returns mean in O(1) time.
  113. absl::optional<double> GetMean() const {
  114. if (size_ == 0) {
  115. return absl::nullopt;
  116. }
  117. return mean_;
  118. }
  119. // Returns unbiased sample variance in O(1) time.
  120. absl::optional<double> GetVariance() const {
  121. if (size_ == 0) {
  122. return absl::nullopt;
  123. }
  124. return cumul_ / size_;
  125. }
  126. // Returns unbiased standard deviation in O(1) time.
  127. absl::optional<double> GetStandardDeviation() const {
  128. if (size_ == 0) {
  129. return absl::nullopt;
  130. }
  131. return std::sqrt(*GetVariance());
  132. }
  133. private:
  134. int64_t size_ = 0; // Samples seen.
  135. T min_ = infinity_or_max<T>();
  136. T max_ = minus_infinity_or_min<T>();
  137. double mean_ = 0;
  138. double cumul_ = 0; // Variance * size_, sometimes noted m2.
  139. };
  140. } // namespace webrtc_impl
  141. } // namespace webrtc
  142. #endif // API_NUMERICS_RUNNING_STATISTICS_H_