neteq_impl.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. /*
  2. * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #ifndef MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
  11. #define MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
  12. #include <map>
  13. #include <memory>
  14. #include <string>
  15. #include <utility>
  16. #include <vector>
  17. #include "absl/types/optional.h"
  18. #include "api/audio/audio_frame.h"
  19. #include "api/neteq/neteq.h"
  20. #include "api/neteq/neteq_controller.h"
  21. #include "api/neteq/neteq_controller_factory.h"
  22. #include "api/neteq/tick_timer.h"
  23. #include "api/rtp_packet_info.h"
  24. #include "modules/audio_coding/neteq/audio_multi_vector.h"
  25. #include "modules/audio_coding/neteq/expand_uma_logger.h"
  26. #include "modules/audio_coding/neteq/packet.h"
  27. #include "modules/audio_coding/neteq/random_vector.h"
  28. #include "modules/audio_coding/neteq/statistics_calculator.h"
  29. #include "rtc_base/constructor_magic.h"
  30. #include "rtc_base/synchronization/mutex.h"
  31. #include "rtc_base/thread_annotations.h"
  32. namespace webrtc {
  33. // Forward declarations.
  34. class Accelerate;
  35. class BackgroundNoise;
  36. class Clock;
  37. class ComfortNoise;
  38. class DecoderDatabase;
  39. class DtmfBuffer;
  40. class DtmfToneGenerator;
  41. class Expand;
  42. class Merge;
  43. class NackTracker;
  44. class Normal;
  45. class PacketBuffer;
  46. class RedPayloadSplitter;
  47. class PostDecodeVad;
  48. class PreemptiveExpand;
  49. class RandomVector;
  50. class SyncBuffer;
  51. class TimestampScaler;
  52. struct AccelerateFactory;
  53. struct DtmfEvent;
  54. struct ExpandFactory;
  55. struct PreemptiveExpandFactory;
  56. class NetEqImpl : public webrtc::NetEq {
  57. public:
  58. enum class OutputType {
  59. kNormalSpeech,
  60. kPLC,
  61. kCNG,
  62. kPLCCNG,
  63. kVadPassive,
  64. kCodecPLC
  65. };
  66. enum ErrorCodes {
  67. kNoError = 0,
  68. kOtherError,
  69. kUnknownRtpPayloadType,
  70. kDecoderNotFound,
  71. kInvalidPointer,
  72. kAccelerateError,
  73. kPreemptiveExpandError,
  74. kComfortNoiseErrorCode,
  75. kDecoderErrorCode,
  76. kOtherDecoderError,
  77. kInvalidOperation,
  78. kDtmfParsingError,
  79. kDtmfInsertError,
  80. kSampleUnderrun,
  81. kDecodedTooMuch,
  82. kRedundancySplitError,
  83. kPacketBufferCorruption
  84. };
  85. struct Dependencies {
  86. // The constructor populates the Dependencies struct with the default
  87. // implementations of the objects. They can all be replaced by the user
  88. // before sending the struct to the NetEqImpl constructor. However, there
  89. // are dependencies between some of the classes inside the struct, so
  90. // swapping out one may make it necessary to re-create another one.
  91. Dependencies(const NetEq::Config& config,
  92. Clock* clock,
  93. const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory,
  94. const NetEqControllerFactory& controller_factory);
  95. ~Dependencies();
  96. Clock* const clock;
  97. std::unique_ptr<TickTimer> tick_timer;
  98. std::unique_ptr<StatisticsCalculator> stats;
  99. std::unique_ptr<DecoderDatabase> decoder_database;
  100. std::unique_ptr<DtmfBuffer> dtmf_buffer;
  101. std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator;
  102. std::unique_ptr<PacketBuffer> packet_buffer;
  103. std::unique_ptr<NetEqController> neteq_controller;
  104. std::unique_ptr<RedPayloadSplitter> red_payload_splitter;
  105. std::unique_ptr<TimestampScaler> timestamp_scaler;
  106. std::unique_ptr<AccelerateFactory> accelerate_factory;
  107. std::unique_ptr<ExpandFactory> expand_factory;
  108. std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory;
  109. };
  110. // Creates a new NetEqImpl object.
  111. NetEqImpl(const NetEq::Config& config,
  112. Dependencies&& deps,
  113. bool create_components = true);
  114. ~NetEqImpl() override;
  115. // Inserts a new packet into NetEq. Returns 0 on success, -1 on failure.
  116. int InsertPacket(const RTPHeader& rtp_header,
  117. rtc::ArrayView<const uint8_t> payload) override;
  118. void InsertEmptyPacket(const RTPHeader& rtp_header) override;
  119. int GetAudio(
  120. AudioFrame* audio_frame,
  121. bool* muted,
  122. absl::optional<Operation> action_override = absl::nullopt) override;
  123. void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) override;
  124. bool RegisterPayloadType(int rtp_payload_type,
  125. const SdpAudioFormat& audio_format) override;
  126. // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
  127. // -1 on failure.
  128. int RemovePayloadType(uint8_t rtp_payload_type) override;
  129. void RemoveAllPayloadTypes() override;
  130. bool SetMinimumDelay(int delay_ms) override;
  131. bool SetMaximumDelay(int delay_ms) override;
  132. bool SetBaseMinimumDelayMs(int delay_ms) override;
  133. int GetBaseMinimumDelayMs() const override;
  134. int TargetDelayMs() const override;
  135. int FilteredCurrentDelayMs() const override;
  136. // Writes the current network statistics to |stats|. The statistics are reset
  137. // after the call.
  138. int NetworkStatistics(NetEqNetworkStatistics* stats) override;
  139. NetEqNetworkStatistics CurrentNetworkStatistics() const override;
  140. NetEqLifetimeStatistics GetLifetimeStatistics() const override;
  141. NetEqOperationsAndState GetOperationsAndState() const override;
  142. // Enables post-decode VAD. When enabled, GetAudio() will return
  143. // kOutputVADPassive when the signal contains no speech.
  144. void EnableVad() override;
  145. // Disables post-decode VAD.
  146. void DisableVad() override;
  147. absl::optional<uint32_t> GetPlayoutTimestamp() const override;
  148. int last_output_sample_rate_hz() const override;
  149. absl::optional<DecoderFormat> GetDecoderFormat(
  150. int payload_type) const override;
  151. // Flushes both the packet buffer and the sync buffer.
  152. void FlushBuffers() override;
  153. void EnableNack(size_t max_nack_list_size) override;
  154. void DisableNack() override;
  155. std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms) const override;
  156. std::vector<uint32_t> LastDecodedTimestamps() const override;
  157. int SyncBufferSizeMs() const override;
  158. // This accessor method is only intended for testing purposes.
  159. const SyncBuffer* sync_buffer_for_test() const;
  160. Operation last_operation_for_test() const;
  161. protected:
  162. static const int kOutputSizeMs = 10;
  163. static const size_t kMaxFrameSize = 5760; // 120 ms @ 48 kHz.
  164. // TODO(hlundin): Provide a better value for kSyncBufferSize.
  165. // Current value is kMaxFrameSize + 60 ms * 48 kHz, which is enough for
  166. // calculating correlations of current frame against history.
  167. static const size_t kSyncBufferSize = kMaxFrameSize + 60 * 48;
  168. // Inserts a new packet into NetEq. This is used by the InsertPacket method
  169. // above. Returns 0 on success, otherwise an error code.
  170. // TODO(hlundin): Merge this with InsertPacket above?
  171. int InsertPacketInternal(const RTPHeader& rtp_header,
  172. rtc::ArrayView<const uint8_t> payload)
  173. RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  174. // Delivers 10 ms of audio data. The data is written to |audio_frame|.
  175. // Returns 0 on success, otherwise an error code.
  176. int GetAudioInternal(AudioFrame* audio_frame,
  177. bool* muted,
  178. absl::optional<Operation> action_override)
  179. RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  180. // Provides a decision to the GetAudioInternal method. The decision what to
  181. // do is written to |operation|. Packets to decode are written to
  182. // |packet_list|, and a DTMF event to play is written to |dtmf_event|. When
  183. // DTMF should be played, |play_dtmf| is set to true by the method.
  184. // Returns 0 on success, otherwise an error code.
  185. int GetDecision(Operation* operation,
  186. PacketList* packet_list,
  187. DtmfEvent* dtmf_event,
  188. bool* play_dtmf,
  189. absl::optional<Operation> action_override)
  190. RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  191. // Decodes the speech packets in |packet_list|, and writes the results to
  192. // |decoded_buffer|, which is allocated to hold |decoded_buffer_length|
  193. // elements. The length of the decoded data is written to |decoded_length|.
  194. // The speech type -- speech or (codec-internal) comfort noise -- is written
  195. // to |speech_type|. If |packet_list| contains any SID frames for RFC 3389
  196. // comfort noise, those are not decoded.
  197. int Decode(PacketList* packet_list,
  198. Operation* operation,
  199. int* decoded_length,
  200. AudioDecoder::SpeechType* speech_type)
  201. RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  202. // Sub-method to Decode(). Performs codec internal CNG.
  203. int DecodeCng(AudioDecoder* decoder,
  204. int* decoded_length,
  205. AudioDecoder::SpeechType* speech_type)
  206. RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  207. // Sub-method to Decode(). Performs the actual decoding.
  208. int DecodeLoop(PacketList* packet_list,
  209. const Operation& operation,
  210. AudioDecoder* decoder,
  211. int* decoded_length,
  212. AudioDecoder::SpeechType* speech_type)
  213. RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  214. // Sub-method which calls the Normal class to perform the normal operation.
  215. void DoNormal(const int16_t* decoded_buffer,
  216. size_t decoded_length,
  217. AudioDecoder::SpeechType speech_type,
  218. bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  219. // Sub-method which calls the Merge class to perform the merge operation.
  220. void DoMerge(int16_t* decoded_buffer,
  221. size_t decoded_length,
  222. AudioDecoder::SpeechType speech_type,
  223. bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  224. bool DoCodecPlc() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  225. // Sub-method which calls the Expand class to perform the expand operation.
  226. int DoExpand(bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  227. // Sub-method which calls the Accelerate class to perform the accelerate
  228. // operation.
  229. int DoAccelerate(int16_t* decoded_buffer,
  230. size_t decoded_length,
  231. AudioDecoder::SpeechType speech_type,
  232. bool play_dtmf,
  233. bool fast_accelerate) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  234. // Sub-method which calls the PreemptiveExpand class to perform the
  235. // preemtive expand operation.
  236. int DoPreemptiveExpand(int16_t* decoded_buffer,
  237. size_t decoded_length,
  238. AudioDecoder::SpeechType speech_type,
  239. bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  240. // Sub-method which calls the ComfortNoise class to generate RFC 3389 comfort
  241. // noise. |packet_list| can either contain one SID frame to update the
  242. // noise parameters, or no payload at all, in which case the previously
  243. // received parameters are used.
  244. int DoRfc3389Cng(PacketList* packet_list, bool play_dtmf)
  245. RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  246. // Calls the audio decoder to generate codec-internal comfort noise when
  247. // no packet was received.
  248. void DoCodecInternalCng(const int16_t* decoded_buffer, size_t decoded_length)
  249. RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  250. // Calls the DtmfToneGenerator class to generate DTMF tones.
  251. int DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf)
  252. RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  253. // Overdub DTMF on top of |output|.
  254. int DtmfOverdub(const DtmfEvent& dtmf_event,
  255. size_t num_channels,
  256. int16_t* output) const RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  257. // Extracts packets from |packet_buffer_| to produce at least
  258. // |required_samples| samples. The packets are inserted into |packet_list|.
  259. // Returns the number of samples that the packets in the list will produce, or
  260. // -1 in case of an error.
  261. int ExtractPackets(size_t required_samples, PacketList* packet_list)
  262. RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  263. // Resets various variables and objects to new values based on the sample rate
  264. // |fs_hz| and |channels| number audio channels.
  265. void SetSampleRateAndChannels(int fs_hz, size_t channels)
  266. RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  267. // Returns the output type for the audio produced by the latest call to
  268. // GetAudio().
  269. OutputType LastOutputType() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  270. // Updates Expand and Merge.
  271. virtual void UpdatePlcComponents(int fs_hz, size_t channels)
  272. RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  273. NetEqNetworkStatistics CurrentNetworkStatisticsInternal() const
  274. RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
  275. Clock* const clock_;
  276. mutable Mutex mutex_;
  277. const std::unique_ptr<TickTimer> tick_timer_ RTC_GUARDED_BY(mutex_);
  278. const std::unique_ptr<DecoderDatabase> decoder_database_
  279. RTC_GUARDED_BY(mutex_);
  280. const std::unique_ptr<DtmfBuffer> dtmf_buffer_ RTC_GUARDED_BY(mutex_);
  281. const std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator_
  282. RTC_GUARDED_BY(mutex_);
  283. const std::unique_ptr<PacketBuffer> packet_buffer_ RTC_GUARDED_BY(mutex_);
  284. const std::unique_ptr<RedPayloadSplitter> red_payload_splitter_
  285. RTC_GUARDED_BY(mutex_);
  286. const std::unique_ptr<TimestampScaler> timestamp_scaler_
  287. RTC_GUARDED_BY(mutex_);
  288. const std::unique_ptr<PostDecodeVad> vad_ RTC_GUARDED_BY(mutex_);
  289. const std::unique_ptr<ExpandFactory> expand_factory_ RTC_GUARDED_BY(mutex_);
  290. const std::unique_ptr<AccelerateFactory> accelerate_factory_
  291. RTC_GUARDED_BY(mutex_);
  292. const std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory_
  293. RTC_GUARDED_BY(mutex_);
  294. const std::unique_ptr<StatisticsCalculator> stats_ RTC_GUARDED_BY(mutex_);
  295. std::unique_ptr<BackgroundNoise> background_noise_ RTC_GUARDED_BY(mutex_);
  296. std::unique_ptr<NetEqController> controller_ RTC_GUARDED_BY(mutex_);
  297. std::unique_ptr<AudioMultiVector> algorithm_buffer_ RTC_GUARDED_BY(mutex_);
  298. std::unique_ptr<SyncBuffer> sync_buffer_ RTC_GUARDED_BY(mutex_);
  299. std::unique_ptr<Expand> expand_ RTC_GUARDED_BY(mutex_);
  300. std::unique_ptr<Normal> normal_ RTC_GUARDED_BY(mutex_);
  301. std::unique_ptr<Merge> merge_ RTC_GUARDED_BY(mutex_);
  302. std::unique_ptr<Accelerate> accelerate_ RTC_GUARDED_BY(mutex_);
  303. std::unique_ptr<PreemptiveExpand> preemptive_expand_ RTC_GUARDED_BY(mutex_);
  304. RandomVector random_vector_ RTC_GUARDED_BY(mutex_);
  305. std::unique_ptr<ComfortNoise> comfort_noise_ RTC_GUARDED_BY(mutex_);
  306. int fs_hz_ RTC_GUARDED_BY(mutex_);
  307. int fs_mult_ RTC_GUARDED_BY(mutex_);
  308. int last_output_sample_rate_hz_ RTC_GUARDED_BY(mutex_);
  309. size_t output_size_samples_ RTC_GUARDED_BY(mutex_);
  310. size_t decoder_frame_length_ RTC_GUARDED_BY(mutex_);
  311. Mode last_mode_ RTC_GUARDED_BY(mutex_);
  312. Operation last_operation_ RTC_GUARDED_BY(mutex_);
  313. size_t decoded_buffer_length_ RTC_GUARDED_BY(mutex_);
  314. std::unique_ptr<int16_t[]> decoded_buffer_ RTC_GUARDED_BY(mutex_);
  315. uint32_t playout_timestamp_ RTC_GUARDED_BY(mutex_);
  316. bool new_codec_ RTC_GUARDED_BY(mutex_);
  317. uint32_t timestamp_ RTC_GUARDED_BY(mutex_);
  318. bool reset_decoder_ RTC_GUARDED_BY(mutex_);
  319. absl::optional<uint8_t> current_rtp_payload_type_ RTC_GUARDED_BY(mutex_);
  320. absl::optional<uint8_t> current_cng_rtp_payload_type_ RTC_GUARDED_BY(mutex_);
  321. bool first_packet_ RTC_GUARDED_BY(mutex_);
  322. bool enable_fast_accelerate_ RTC_GUARDED_BY(mutex_);
  323. std::unique_ptr<NackTracker> nack_ RTC_GUARDED_BY(mutex_);
  324. bool nack_enabled_ RTC_GUARDED_BY(mutex_);
  325. const bool enable_muted_state_ RTC_GUARDED_BY(mutex_);
  326. AudioFrame::VADActivity last_vad_activity_ RTC_GUARDED_BY(mutex_) =
  327. AudioFrame::kVadPassive;
  328. std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_
  329. RTC_GUARDED_BY(mutex_);
  330. std::vector<uint32_t> last_decoded_timestamps_ RTC_GUARDED_BY(mutex_);
  331. std::vector<RtpPacketInfo> last_decoded_packet_infos_ RTC_GUARDED_BY(mutex_);
  332. ExpandUmaLogger expand_uma_logger_ RTC_GUARDED_BY(mutex_);
  333. ExpandUmaLogger speech_expand_uma_logger_ RTC_GUARDED_BY(mutex_);
  334. bool no_time_stretching_ RTC_GUARDED_BY(mutex_); // Only used for test.
  335. rtc::BufferT<int16_t> concealment_audio_ RTC_GUARDED_BY(mutex_);
  336. const bool enable_rtx_handling_ RTC_GUARDED_BY(mutex_);
  337. // Data members used for adding extra delay to the output of NetEq.
  338. // The delay in ms (which is 10 times the number of elements in
  339. // output_delay_chain_).
  340. const int output_delay_chain_ms_ RTC_GUARDED_BY(mutex_);
  341. // Vector of AudioFrames which contains the delayed audio. Accessed as a
  342. // circular buffer.
  343. std::vector<AudioFrame> output_delay_chain_ RTC_GUARDED_BY(mutex_);
  344. // Index into output_delay_chain_.
  345. size_t output_delay_chain_ix_ RTC_GUARDED_BY(mutex_) = 0;
  346. // Did output_delay_chain_ get populated yet?
  347. bool output_delay_chain_empty_ RTC_GUARDED_BY(mutex_) = true;
  348. // Contains the sample rate of the AudioFrame last emitted from the delay
  349. // chain. If the extra output delay chain is not used, or if no audio has been
  350. // emitted yet, the variable is empty.
  351. absl::optional<int> delayed_last_output_sample_rate_hz_
  352. RTC_GUARDED_BY(mutex_);
  353. private:
  354. RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);
  355. };
  356. } // namespace webrtc
  357. #endif // MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_