123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 |
- /*
- * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
- #ifndef MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_
- #define MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_
- #include <assert.h>
- #include <string.h> // memset, size_t
- #include "modules/audio_coding/neteq/audio_multi_vector.h"
- #include "rtc_base/constructor_magic.h"
- namespace webrtc {
- // Forward declarations.
- class BackgroundNoise;
- // This is the base class for Accelerate and PreemptiveExpand. This class
- // cannot be instantiated, but must be used through either of the derived
- // classes.
- class TimeStretch {
- public:
- enum ReturnCodes {
- kSuccess = 0,
- kSuccessLowEnergy = 1,
- kNoStretch = 2,
- kError = -1
- };
- TimeStretch(int sample_rate_hz,
- size_t num_channels,
- const BackgroundNoise& background_noise)
- : sample_rate_hz_(sample_rate_hz),
- fs_mult_(sample_rate_hz / 8000),
- num_channels_(num_channels),
- background_noise_(background_noise),
- max_input_value_(0) {
- assert(sample_rate_hz_ == 8000 || sample_rate_hz_ == 16000 ||
- sample_rate_hz_ == 32000 || sample_rate_hz_ == 48000);
- assert(num_channels_ > 0);
- memset(auto_correlation_, 0, sizeof(auto_correlation_));
- }
- virtual ~TimeStretch() {}
- // This method performs the processing common to both Accelerate and
- // PreemptiveExpand.
- ReturnCodes Process(const int16_t* input,
- size_t input_len,
- bool fast_mode,
- AudioMultiVector* output,
- size_t* length_change_samples);
- protected:
- // Sets the parameters |best_correlation| and |peak_index| to suitable
- // values when the signal contains no active speech. This method must be
- // implemented by the sub-classes.
- virtual void SetParametersForPassiveSpeech(size_t input_length,
- int16_t* best_correlation,
- size_t* peak_index) const = 0;
- // Checks the criteria for performing the time-stretching operation and,
- // if possible, performs the time-stretching. This method must be implemented
- // by the sub-classes.
- virtual ReturnCodes CheckCriteriaAndStretch(
- const int16_t* input,
- size_t input_length,
- size_t peak_index,
- int16_t best_correlation,
- bool active_speech,
- bool fast_mode,
- AudioMultiVector* output) const = 0;
- static const size_t kCorrelationLen = 50;
- static const size_t kLogCorrelationLen = 6; // >= log2(kCorrelationLen).
- static const size_t kMinLag = 10;
- static const size_t kMaxLag = 60;
- static const size_t kDownsampledLen = kCorrelationLen + kMaxLag;
- static const int kCorrelationThreshold = 14746; // 0.9 in Q14.
- static constexpr size_t kRefChannel = 0; // First channel is reference.
- const int sample_rate_hz_;
- const int fs_mult_; // Sample rate multiplier = sample_rate_hz_ / 8000.
- const size_t num_channels_;
- const BackgroundNoise& background_noise_;
- int16_t max_input_value_;
- int16_t downsampled_input_[kDownsampledLen];
- // Adding 1 to the size of |auto_correlation_| because of how it is used
- // by the peak-detection algorithm.
- int16_t auto_correlation_[kCorrelationLen + 1];
- private:
- // Calculates the auto-correlation of |downsampled_input_| and writes the
- // result to |auto_correlation_|.
- void AutoCorrelation();
- // Performs a simple voice-activity detection based on the input parameters.
- bool SpeechDetection(int32_t vec1_energy,
- int32_t vec2_energy,
- size_t peak_index,
- int scaling) const;
- RTC_DISALLOW_COPY_AND_ASSIGN(TimeStretch);
- };
- } // namespace webrtc
- #endif // MODULES_AUDIO_CODING_NETEQ_TIME_STRETCH_H_
|