vad_audio_proc.h 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. /*
  2. * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
  11. #define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
  12. #include <stddef.h>
  13. #include <stdint.h>
  14. #include <memory>
  15. #include "modules/audio_processing/vad/common.h" // AudioFeatures, kSampleR...
  16. namespace webrtc {
  17. class PoleZeroFilter;
  18. class VadAudioProc {
  19. public:
  20. // Forward declare iSAC structs.
  21. struct PitchAnalysisStruct;
  22. struct PreFiltBankstr;
  23. VadAudioProc();
  24. ~VadAudioProc();
  25. int ExtractFeatures(const int16_t* audio_frame,
  26. size_t length,
  27. AudioFeatures* audio_features);
  28. static const size_t kDftSize = 512;
  29. private:
  30. void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length);
  31. void SubframeCorrelation(double* corr,
  32. size_t length_corr,
  33. size_t subframe_index);
  34. void GetLpcPolynomials(double* lpc, size_t length_lpc);
  35. void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak);
  36. void Rms(double* rms, size_t length_rms);
  37. void ResetBuffer();
  38. // To compute spectral peak we perform LPC analysis to get spectral envelope.
  39. // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis.
  40. // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame
  41. // we need 5 ms of past signal to create the input of LPC analysis.
  42. enum : size_t {
  43. kNumPastSignalSamples = static_cast<size_t>(kSampleRateHz / 200)
  44. };
  45. // TODO(turajs): maybe defining this at a higher level (maybe enum) so that
  46. // all the code recognize it as "no-error."
  47. enum : int { kNoError = 0 };
  48. enum : size_t { kNum10msSubframes = 3 };
  49. enum : size_t {
  50. kNumSubframeSamples = static_cast<size_t>(kSampleRateHz / 100)
  51. };
  52. enum : size_t {
  53. // Samples in 30 ms @ given sampling rate.
  54. kNumSamplesToProcess = kNum10msSubframes * kNumSubframeSamples
  55. };
  56. enum : size_t {
  57. kBufferLength = kNumPastSignalSamples + kNumSamplesToProcess
  58. };
  59. enum : size_t { kIpLength = kDftSize >> 1 };
  60. enum : size_t { kWLength = kDftSize >> 1 };
  61. enum : size_t { kLpcOrder = 16 };
  62. size_t ip_[kIpLength];
  63. float w_fft_[kWLength];
  64. // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ).
  65. float audio_buffer_[kBufferLength];
  66. size_t num_buffer_samples_;
  67. double log_old_gain_;
  68. double old_lag_;
  69. std::unique_ptr<PitchAnalysisStruct> pitch_analysis_handle_;
  70. std::unique_ptr<PreFiltBankstr> pre_filter_handle_;
  71. std::unique_ptr<PoleZeroFilter> high_pass_filter_;
  72. };
  73. } // namespace webrtc
  74. #endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_