video_encoder.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. /*
  2. * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #ifndef API_VIDEO_CODECS_VIDEO_ENCODER_H_
  11. #define API_VIDEO_CODECS_VIDEO_ENCODER_H_
  12. #include <limits>
  13. #include <memory>
  14. #include <string>
  15. #include <vector>
  16. #include "absl/container/inlined_vector.h"
  17. #include "absl/types/optional.h"
  18. #include "api/fec_controller_override.h"
  19. #include "api/units/data_rate.h"
  20. #include "api/video/encoded_image.h"
  21. #include "api/video/video_bitrate_allocation.h"
  22. #include "api/video/video_codec_constants.h"
  23. #include "api/video/video_frame.h"
  24. #include "api/video_codecs/video_codec.h"
  25. #include "rtc_base/checks.h"
  26. #include "rtc_base/system/rtc_export.h"
  27. namespace webrtc {
  28. // TODO(pbos): Expose these through a public (root) header or change these APIs.
  29. struct CodecSpecificInfo;
  30. constexpr int kDefaultMinPixelsPerFrame = 320 * 180;
  31. class RTC_EXPORT EncodedImageCallback {
  32. public:
  33. virtual ~EncodedImageCallback() {}
  34. struct Result {
  35. enum Error {
  36. OK,
  37. // Failed to send the packet.
  38. ERROR_SEND_FAILED,
  39. };
  40. explicit Result(Error error) : error(error) {}
  41. Result(Error error, uint32_t frame_id) : error(error), frame_id(frame_id) {}
  42. Error error;
  43. // Frame ID assigned to the frame. The frame ID should be the same as the ID
  44. // seen by the receiver for this frame. RTP timestamp of the frame is used
  45. // as frame ID when RTP is used to send video. Must be used only when
  46. // error=OK.
  47. uint32_t frame_id = 0;
  48. // Tells the encoder that the next frame is should be dropped.
  49. bool drop_next_frame = false;
  50. };
  51. // Used to signal the encoder about reason a frame is dropped.
  52. // kDroppedByMediaOptimizations - dropped by MediaOptimizations (for rate
  53. // limiting purposes).
  54. // kDroppedByEncoder - dropped by encoder's internal rate limiter.
  55. enum class DropReason : uint8_t {
  56. kDroppedByMediaOptimizations,
  57. kDroppedByEncoder
  58. };
  59. // Callback function which is called when an image has been encoded.
  60. virtual Result OnEncodedImage(
  61. const EncodedImage& encoded_image,
  62. const CodecSpecificInfo* codec_specific_info) = 0;
  63. virtual void OnDroppedFrame(DropReason reason) {}
  64. };
  65. class RTC_EXPORT VideoEncoder {
  66. public:
  67. struct QpThresholds {
  68. QpThresholds(int l, int h) : low(l), high(h) {}
  69. QpThresholds() : low(-1), high(-1) {}
  70. int low;
  71. int high;
  72. };
  73. // Quality scaling is enabled if thresholds are provided.
  74. struct RTC_EXPORT ScalingSettings {
  75. private:
  76. // Private magic type for kOff, implicitly convertible to
  77. // ScalingSettings.
  78. struct KOff {};
  79. public:
  80. // TODO(nisse): Would be nicer if kOff were a constant ScalingSettings
  81. // rather than a magic value. However, absl::optional is not trivially copy
  82. // constructible, and hence a constant ScalingSettings needs a static
  83. // initializer, which is strongly discouraged in Chrome. We can hopefully
  84. // fix this when we switch to absl::optional or std::optional.
  85. static constexpr KOff kOff = {};
  86. ScalingSettings(int low, int high);
  87. ScalingSettings(int low, int high, int min_pixels);
  88. ScalingSettings(const ScalingSettings&);
  89. ScalingSettings(KOff); // NOLINT(runtime/explicit)
  90. ~ScalingSettings();
  91. absl::optional<QpThresholds> thresholds;
  92. // We will never ask for a resolution lower than this.
  93. // TODO(kthelgason): Lower this limit when better testing
  94. // on MediaCodec and fallback implementations are in place.
  95. // See https://bugs.chromium.org/p/webrtc/issues/detail?id=7206
  96. int min_pixels_per_frame = kDefaultMinPixelsPerFrame;
  97. private:
  98. // Private constructor; to get an object without thresholds, use
  99. // the magic constant ScalingSettings::kOff.
  100. ScalingSettings();
  101. };
  102. // Bitrate limits for resolution.
  103. struct ResolutionBitrateLimits {
  104. ResolutionBitrateLimits(int frame_size_pixels,
  105. int min_start_bitrate_bps,
  106. int min_bitrate_bps,
  107. int max_bitrate_bps)
  108. : frame_size_pixels(frame_size_pixels),
  109. min_start_bitrate_bps(min_start_bitrate_bps),
  110. min_bitrate_bps(min_bitrate_bps),
  111. max_bitrate_bps(max_bitrate_bps) {}
  112. // Size of video frame, in pixels, the bitrate thresholds are intended for.
  113. int frame_size_pixels = 0;
  114. // Recommended minimum bitrate to start encoding.
  115. int min_start_bitrate_bps = 0;
  116. // Recommended minimum bitrate.
  117. int min_bitrate_bps = 0;
  118. // Recommended maximum bitrate.
  119. int max_bitrate_bps = 0;
  120. bool operator==(const ResolutionBitrateLimits& rhs) const;
  121. bool operator!=(const ResolutionBitrateLimits& rhs) const {
  122. return !(*this == rhs);
  123. }
  124. };
  125. // Struct containing metadata about the encoder implementing this interface.
  126. struct RTC_EXPORT EncoderInfo {
  127. static constexpr uint8_t kMaxFramerateFraction =
  128. std::numeric_limits<uint8_t>::max();
  129. EncoderInfo();
  130. EncoderInfo(const EncoderInfo&);
  131. ~EncoderInfo();
  132. std::string ToString() const;
  133. bool operator==(const EncoderInfo& rhs) const;
  134. bool operator!=(const EncoderInfo& rhs) const { return !(*this == rhs); }
  135. // Any encoder implementation wishing to use the WebRTC provided
  136. // quality scaler must populate this field.
  137. ScalingSettings scaling_settings;
  138. // The width and height of the incoming video frames should be divisible
  139. // by |requested_resolution_alignment|. If they are not, the encoder may
  140. // drop the incoming frame.
  141. // For example: With I420, this value would be a multiple of 2.
  142. // Note that this field is unrelated to any horizontal or vertical stride
  143. // requirements the encoder has on the incoming video frame buffers.
  144. int requested_resolution_alignment;
  145. // Same as above but if true, each simulcast layer should also be divisible
  146. // by |requested_resolution_alignment|.
  147. // Note that scale factors |scale_resolution_down_by| may be adjusted so a
  148. // common multiple is not too large to avoid largely cropped frames and
  149. // possibly with an aspect ratio far from the original.
  150. // Warning: large values of scale_resolution_down_by could be changed
  151. // considerably, especially if |requested_resolution_alignment| is large.
  152. bool apply_alignment_to_all_simulcast_layers;
  153. // If true, encoder supports working with a native handle (e.g. texture
  154. // handle for hw codecs) rather than requiring a raw I420 buffer.
  155. bool supports_native_handle;
  156. // The name of this particular encoder implementation, e.g. "libvpx".
  157. std::string implementation_name;
  158. // If this field is true, the encoder rate controller must perform
  159. // well even in difficult situations, and produce close to the specified
  160. // target bitrate seen over a reasonable time window, drop frames if
  161. // necessary in order to keep the rate correct, and react quickly to
  162. // changing bitrate targets. If this method returns true, we disable the
  163. // frame dropper in the media optimization module and rely entirely on the
  164. // encoder to produce media at a bitrate that closely matches the target.
  165. // Any overshooting may result in delay buildup. If this method returns
  166. // false (default behavior), the media opt frame dropper will drop input
  167. // frames if it suspect encoder misbehavior. Misbehavior is common,
  168. // especially in hardware codecs. Disable media opt at your own risk.
  169. bool has_trusted_rate_controller;
  170. // If this field is true, the encoder uses hardware support and different
  171. // thresholds will be used in CPU adaptation.
  172. bool is_hardware_accelerated;
  173. // If this field is true, the encoder uses internal camera sources, meaning
  174. // that it does not require/expect frames to be delivered via
  175. // webrtc::VideoEncoder::Encode.
  176. // Internal source encoders are deprecated and support for them will be
  177. // phased out.
  178. bool has_internal_source;
  179. // For each spatial layer (simulcast stream or SVC layer), represented as an
  180. // element in |fps_allocation| a vector indicates how many temporal layers
  181. // the encoder is using for that spatial layer.
  182. // For each spatial/temporal layer pair, the frame rate fraction is given as
  183. // an 8bit unsigned integer where 0 = 0% and 255 = 100%.
  184. //
  185. // If the vector is empty for a given spatial layer, it indicates that frame
  186. // rates are not defined and we can't count on any specific frame rate to be
  187. // generated. Likely this indicates Vp8TemporalLayersType::kBitrateDynamic.
  188. //
  189. // The encoder may update this on a per-frame basis in response to both
  190. // internal and external signals.
  191. //
  192. // Spatial layers are treated independently, but temporal layers are
  193. // cumulative. For instance, if:
  194. // fps_allocation[0][0] = kFullFramerate / 2;
  195. // fps_allocation[0][1] = kFullFramerate;
  196. // Then half of the frames are in the base layer and half is in TL1, but
  197. // since TL1 is assumed to depend on the base layer, the frame rate is
  198. // indicated as the full 100% for the top layer.
  199. //
  200. // Defaults to a single spatial layer containing a single temporal layer
  201. // with a 100% frame rate fraction.
  202. absl::InlinedVector<uint8_t, kMaxTemporalStreams>
  203. fps_allocation[kMaxSpatialLayers];
  204. // Recommended bitrate limits for different resolutions.
  205. std::vector<ResolutionBitrateLimits> resolution_bitrate_limits;
  206. // Obtains the limits from |resolution_bitrate_limits| that best matches the
  207. // |frame_size_pixels|.
  208. absl::optional<ResolutionBitrateLimits>
  209. GetEncoderBitrateLimitsForResolution(int frame_size_pixels) const;
  210. // If true, this encoder has internal support for generating simulcast
  211. // streams. Otherwise, an adapter class will be needed.
  212. // Even if true, the config provided to InitEncode() might not be supported,
  213. // in such case the encoder should return
  214. // WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED.
  215. bool supports_simulcast;
  216. };
  217. struct RTC_EXPORT RateControlParameters {
  218. RateControlParameters();
  219. RateControlParameters(const VideoBitrateAllocation& bitrate,
  220. double framerate_fps);
  221. RateControlParameters(const VideoBitrateAllocation& bitrate,
  222. double framerate_fps,
  223. DataRate bandwidth_allocation);
  224. virtual ~RateControlParameters();
  225. // Target bitrate, per spatial/temporal layer.
  226. // A target bitrate of 0bps indicates a layer should not be encoded at all.
  227. VideoBitrateAllocation bitrate;
  228. // Target framerate, in fps. A value <= 0.0 is invalid and should be
  229. // interpreted as framerate target not available. In this case the encoder
  230. // should fall back to the max framerate specified in |codec_settings| of
  231. // the last InitEncode() call.
  232. double framerate_fps;
  233. // The network bandwidth available for video. This is at least
  234. // |bitrate.get_sum_bps()|, but may be higher if the application is not
  235. // network constrained.
  236. DataRate bandwidth_allocation;
  237. bool operator==(const RateControlParameters& rhs) const;
  238. bool operator!=(const RateControlParameters& rhs) const;
  239. };
  240. struct LossNotification {
  241. // The timestamp of the last decodable frame *prior* to the last received.
  242. // (The last received - described below - might itself be decodable or not.)
  243. uint32_t timestamp_of_last_decodable;
  244. // The timestamp of the last received frame.
  245. uint32_t timestamp_of_last_received;
  246. // Describes whether the dependencies of the last received frame were
  247. // all decodable.
  248. // |false| if some dependencies were undecodable, |true| if all dependencies
  249. // were decodable, and |nullopt| if the dependencies are unknown.
  250. absl::optional<bool> dependencies_of_last_received_decodable;
  251. // Describes whether the received frame was decodable.
  252. // |false| if some dependency was undecodable or if some packet belonging
  253. // to the last received frame was missed.
  254. // |true| if all dependencies were decodable and all packets belonging
  255. // to the last received frame were received.
  256. // |nullopt| if no packet belonging to the last frame was missed, but the
  257. // last packet in the frame was not yet received.
  258. absl::optional<bool> last_received_decodable;
  259. };
  260. // Negotiated capabilities which the VideoEncoder may expect the other
  261. // side to use.
  262. struct Capabilities {
  263. explicit Capabilities(bool loss_notification)
  264. : loss_notification(loss_notification) {}
  265. bool loss_notification;
  266. };
  267. struct Settings {
  268. Settings(const Capabilities& capabilities,
  269. int number_of_cores,
  270. size_t max_payload_size)
  271. : capabilities(capabilities),
  272. number_of_cores(number_of_cores),
  273. max_payload_size(max_payload_size) {}
  274. Capabilities capabilities;
  275. int number_of_cores;
  276. size_t max_payload_size;
  277. };
  278. static VideoCodecVP8 GetDefaultVp8Settings();
  279. static VideoCodecVP9 GetDefaultVp9Settings();
  280. static VideoCodecH264 GetDefaultH264Settings();
  281. virtual ~VideoEncoder() {}
  282. // Set a FecControllerOverride, through which the encoder may override
  283. // decisions made by FecController.
  284. // TODO(bugs.webrtc.org/10769): Update downstream, then make pure-virtual.
  285. virtual void SetFecControllerOverride(
  286. FecControllerOverride* fec_controller_override);
  287. // Initialize the encoder with the information from the codecSettings
  288. //
  289. // Input:
  290. // - codec_settings : Codec settings
  291. // - settings : Settings affecting the encoding itself.
  292. // Input for deprecated version:
  293. // - number_of_cores : Number of cores available for the encoder
  294. // - max_payload_size : The maximum size each payload is allowed
  295. // to have. Usually MTU - overhead.
  296. //
  297. // Return value : Set bit rate if OK
  298. // <0 - Errors:
  299. // WEBRTC_VIDEO_CODEC_ERR_PARAMETER
  300. // WEBRTC_VIDEO_CODEC_ERR_SIZE
  301. // WEBRTC_VIDEO_CODEC_MEMORY
  302. // WEBRTC_VIDEO_CODEC_ERROR
  303. // TODO(bugs.webrtc.org/10720): After updating downstream projects and posting
  304. // an announcement to discuss-webrtc, remove the three-parameters variant
  305. // and make the two-parameters variant pure-virtual.
  306. /* RTC_DEPRECATED */ virtual int32_t InitEncode(
  307. const VideoCodec* codec_settings,
  308. int32_t number_of_cores,
  309. size_t max_payload_size);
  310. virtual int InitEncode(const VideoCodec* codec_settings,
  311. const VideoEncoder::Settings& settings);
  312. // Register an encode complete callback object.
  313. //
  314. // Input:
  315. // - callback : Callback object which handles encoded images.
  316. //
  317. // Return value : WEBRTC_VIDEO_CODEC_OK if OK, < 0 otherwise.
  318. virtual int32_t RegisterEncodeCompleteCallback(
  319. EncodedImageCallback* callback) = 0;
  320. // Free encoder memory.
  321. // Return value : WEBRTC_VIDEO_CODEC_OK if OK, < 0 otherwise.
  322. virtual int32_t Release() = 0;
  323. // Encode an I420 image (as a part of a video stream). The encoded image
  324. // will be returned to the user through the encode complete callback.
  325. //
  326. // Input:
  327. // - frame : Image to be encoded
  328. // - frame_types : Frame type to be generated by the encoder.
  329. //
  330. // Return value : WEBRTC_VIDEO_CODEC_OK if OK
  331. // <0 - Errors:
  332. // WEBRTC_VIDEO_CODEC_ERR_PARAMETER
  333. // WEBRTC_VIDEO_CODEC_MEMORY
  334. // WEBRTC_VIDEO_CODEC_ERROR
  335. virtual int32_t Encode(const VideoFrame& frame,
  336. const std::vector<VideoFrameType>* frame_types) = 0;
  337. // Sets rate control parameters: bitrate, framerate, etc. These settings are
  338. // instantaneous (i.e. not moving averages) and should apply from now until
  339. // the next call to SetRates().
  340. virtual void SetRates(const RateControlParameters& parameters) = 0;
  341. // Inform the encoder when the packet loss rate changes.
  342. //
  343. // Input: - packet_loss_rate : The packet loss rate (0.0 to 1.0).
  344. virtual void OnPacketLossRateUpdate(float packet_loss_rate);
  345. // Inform the encoder when the round trip time changes.
  346. //
  347. // Input: - rtt_ms : The new RTT, in milliseconds.
  348. virtual void OnRttUpdate(int64_t rtt_ms);
  349. // Called when a loss notification is received.
  350. virtual void OnLossNotification(const LossNotification& loss_notification);
  351. // Returns meta-data about the encoder, such as implementation name.
  352. // The output of this method may change during runtime. For instance if a
  353. // hardware encoder fails, it may fall back to doing software encoding using
  354. // an implementation with different characteristics.
  355. virtual EncoderInfo GetEncoderInfo() const;
  356. };
  357. } // namespace webrtc
  358. #endif // API_VIDEO_CODECS_VIDEO_ENCODER_H_