voice_detection.h 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. /*
  2. * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #ifndef MODULES_AUDIO_PROCESSING_VOICE_DETECTION_H_
  11. #define MODULES_AUDIO_PROCESSING_VOICE_DETECTION_H_
  12. #include <stddef.h>
  13. #include <memory>
  14. #include "modules/audio_processing/include/audio_processing.h"
  15. namespace webrtc {
  16. class AudioBuffer;
  17. // The voice activity detection (VAD) component analyzes the stream to
  18. // determine if voice is present.
  19. class VoiceDetection {
  20. public:
  21. // Specifies the likelihood that a frame will be declared to contain voice.
  22. // A higher value makes it more likely that speech will not be clipped, at
  23. // the expense of more noise being detected as voice.
  24. enum Likelihood {
  25. kVeryLowLikelihood,
  26. kLowLikelihood,
  27. kModerateLikelihood,
  28. kHighLikelihood
  29. };
  30. VoiceDetection(int sample_rate_hz, Likelihood likelihood);
  31. ~VoiceDetection();
  32. VoiceDetection(VoiceDetection&) = delete;
  33. VoiceDetection& operator=(VoiceDetection&) = delete;
  34. // Returns true if voice is detected in the current frame.
  35. bool ProcessCaptureAudio(AudioBuffer* audio);
  36. Likelihood likelihood() const { return likelihood_; }
  37. private:
  38. class Vad;
  39. int sample_rate_hz_;
  40. size_t frame_size_samples_;
  41. Likelihood likelihood_;
  42. std::unique_ptr<Vad> vad_;
  43. };
  44. } // namespace webrtc
  45. #endif // MODULES_AUDIO_PROCESSING_VOICE_DETECTION_H_