video_encoder.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. /*
  2. * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #ifndef API_VIDEO_CODECS_VIDEO_ENCODER_H_
  11. #define API_VIDEO_CODECS_VIDEO_ENCODER_H_
  12. #include <limits>
  13. #include <memory>
  14. #include <string>
  15. #include <vector>
  16. #include "absl/container/inlined_vector.h"
  17. #include "absl/types/optional.h"
  18. #include "api/fec_controller_override.h"
  19. #include "api/units/data_rate.h"
  20. #include "api/video/encoded_image.h"
  21. #include "api/video/video_bitrate_allocation.h"
  22. #include "api/video/video_codec_constants.h"
  23. #include "api/video/video_frame.h"
  24. #include "api/video_codecs/video_codec.h"
  25. #include "rtc_base/checks.h"
  26. #include "rtc_base/system/rtc_export.h"
  27. namespace webrtc {
  28. class RTPFragmentationHeader;
  29. // TODO(pbos): Expose these through a public (root) header or change these APIs.
  30. struct CodecSpecificInfo;
  31. constexpr int kDefaultMinPixelsPerFrame = 320 * 180;
  32. class EncodedImageCallback {
  33. public:
  34. virtual ~EncodedImageCallback() {}
  35. struct Result {
  36. enum Error {
  37. OK,
  38. // Failed to send the packet.
  39. ERROR_SEND_FAILED,
  40. };
  41. explicit Result(Error error) : error(error) {}
  42. Result(Error error, uint32_t frame_id) : error(error), frame_id(frame_id) {}
  43. Error error;
  44. // Frame ID assigned to the frame. The frame ID should be the same as the ID
  45. // seen by the receiver for this frame. RTP timestamp of the frame is used
  46. // as frame ID when RTP is used to send video. Must be used only when
  47. // error=OK.
  48. uint32_t frame_id = 0;
  49. // Tells the encoder that the next frame is should be dropped.
  50. bool drop_next_frame = false;
  51. };
  52. // Used to signal the encoder about reason a frame is dropped.
  53. // kDroppedByMediaOptimizations - dropped by MediaOptimizations (for rate
  54. // limiting purposes).
  55. // kDroppedByEncoder - dropped by encoder's internal rate limiter.
  56. enum class DropReason : uint8_t {
  57. kDroppedByMediaOptimizations,
  58. kDroppedByEncoder
  59. };
  60. // Callback function which is called when an image has been encoded.
  61. virtual Result OnEncodedImage(
  62. const EncodedImage& encoded_image,
  63. const CodecSpecificInfo* codec_specific_info,
  64. const RTPFragmentationHeader* fragmentation) = 0;
  65. virtual void OnDroppedFrame(DropReason reason) {}
  66. };
  67. class RTC_EXPORT VideoEncoder {
  68. public:
  69. struct QpThresholds {
  70. QpThresholds(int l, int h) : low(l), high(h) {}
  71. QpThresholds() : low(-1), high(-1) {}
  72. int low;
  73. int high;
  74. };
  75. // Quality scaling is enabled if thresholds are provided.
  76. struct RTC_EXPORT ScalingSettings {
  77. private:
  78. // Private magic type for kOff, implicitly convertible to
  79. // ScalingSettings.
  80. struct KOff {};
  81. public:
  82. // TODO(nisse): Would be nicer if kOff were a constant ScalingSettings
  83. // rather than a magic value. However, absl::optional is not trivially copy
  84. // constructible, and hence a constant ScalingSettings needs a static
  85. // initializer, which is strongly discouraged in Chrome. We can hopefully
  86. // fix this when we switch to absl::optional or std::optional.
  87. static constexpr KOff kOff = {};
  88. ScalingSettings(int low, int high);
  89. ScalingSettings(int low, int high, int min_pixels);
  90. ScalingSettings(const ScalingSettings&);
  91. ScalingSettings(KOff); // NOLINT(runtime/explicit)
  92. ~ScalingSettings();
  93. absl::optional<QpThresholds> thresholds;
  94. // We will never ask for a resolution lower than this.
  95. // TODO(kthelgason): Lower this limit when better testing
  96. // on MediaCodec and fallback implementations are in place.
  97. // See https://bugs.chromium.org/p/webrtc/issues/detail?id=7206
  98. int min_pixels_per_frame = kDefaultMinPixelsPerFrame;
  99. private:
  100. // Private constructor; to get an object without thresholds, use
  101. // the magic constant ScalingSettings::kOff.
  102. ScalingSettings();
  103. };
  104. // Bitrate limits for resolution.
  105. struct ResolutionBitrateLimits {
  106. ResolutionBitrateLimits(int frame_size_pixels,
  107. int min_start_bitrate_bps,
  108. int min_bitrate_bps,
  109. int max_bitrate_bps)
  110. : frame_size_pixels(frame_size_pixels),
  111. min_start_bitrate_bps(min_start_bitrate_bps),
  112. min_bitrate_bps(min_bitrate_bps),
  113. max_bitrate_bps(max_bitrate_bps) {}
  114. // Size of video frame, in pixels, the bitrate thresholds are intended for.
  115. int frame_size_pixels = 0;
  116. // Recommended minimum bitrate to start encoding.
  117. int min_start_bitrate_bps = 0;
  118. // Recommended minimum bitrate.
  119. int min_bitrate_bps = 0;
  120. // Recommended maximum bitrate.
  121. int max_bitrate_bps = 0;
  122. bool operator==(const ResolutionBitrateLimits& rhs) const;
  123. bool operator!=(const ResolutionBitrateLimits& rhs) const {
  124. return !(*this == rhs);
  125. }
  126. };
  127. // Struct containing metadata about the encoder implementing this interface.
  128. struct RTC_EXPORT EncoderInfo {
  129. static constexpr uint8_t kMaxFramerateFraction =
  130. std::numeric_limits<uint8_t>::max();
  131. EncoderInfo();
  132. EncoderInfo(const EncoderInfo&);
  133. ~EncoderInfo();
  134. std::string ToString() const;
  135. bool operator==(const EncoderInfo& rhs) const;
  136. bool operator!=(const EncoderInfo& rhs) const { return !(*this == rhs); }
  137. // Any encoder implementation wishing to use the WebRTC provided
  138. // quality scaler must populate this field.
  139. ScalingSettings scaling_settings;
  140. // The width and height of the incoming video frames should be divisible
  141. // by |requested_resolution_alignment|. If they are not, the encoder may
  142. // drop the incoming frame.
  143. // For example: With I420, this value would be a multiple of 2.
  144. // Note that this field is unrelated to any horizontal or vertical stride
  145. // requirements the encoder has on the incoming video frame buffers.
  146. int requested_resolution_alignment;
  147. // If true, encoder supports working with a native handle (e.g. texture
  148. // handle for hw codecs) rather than requiring a raw I420 buffer.
  149. bool supports_native_handle;
  150. // The name of this particular encoder implementation, e.g. "libvpx".
  151. std::string implementation_name;
  152. // If this field is true, the encoder rate controller must perform
  153. // well even in difficult situations, and produce close to the specified
  154. // target bitrate seen over a reasonable time window, drop frames if
  155. // necessary in order to keep the rate correct, and react quickly to
  156. // changing bitrate targets. If this method returns true, we disable the
  157. // frame dropper in the media optimization module and rely entirely on the
  158. // encoder to produce media at a bitrate that closely matches the target.
  159. // Any overshooting may result in delay buildup. If this method returns
  160. // false (default behavior), the media opt frame dropper will drop input
  161. // frames if it suspect encoder misbehavior. Misbehavior is common,
  162. // especially in hardware codecs. Disable media opt at your own risk.
  163. bool has_trusted_rate_controller;
  164. // If this field is true, the encoder uses hardware support and different
  165. // thresholds will be used in CPU adaptation.
  166. bool is_hardware_accelerated;
  167. // If this field is true, the encoder uses internal camera sources, meaning
  168. // that it does not require/expect frames to be delivered via
  169. // webrtc::VideoEncoder::Encode.
  170. // Internal source encoders are deprecated and support for them will be
  171. // phased out.
  172. bool has_internal_source;
  173. // For each spatial layer (simulcast stream or SVC layer), represented as an
  174. // element in |fps_allocation| a vector indicates how many temporal layers
  175. // the encoder is using for that spatial layer.
  176. // For each spatial/temporal layer pair, the frame rate fraction is given as
  177. // an 8bit unsigned integer where 0 = 0% and 255 = 100%.
  178. //
  179. // If the vector is empty for a given spatial layer, it indicates that frame
  180. // rates are not defined and we can't count on any specific frame rate to be
  181. // generated. Likely this indicates Vp8TemporalLayersType::kBitrateDynamic.
  182. //
  183. // The encoder may update this on a per-frame basis in response to both
  184. // internal and external signals.
  185. //
  186. // Spatial layers are treated independently, but temporal layers are
  187. // cumulative. For instance, if:
  188. // fps_allocation[0][0] = kFullFramerate / 2;
  189. // fps_allocation[0][1] = kFullFramerate;
  190. // Then half of the frames are in the base layer and half is in TL1, but
  191. // since TL1 is assumed to depend on the base layer, the frame rate is
  192. // indicated as the full 100% for the top layer.
  193. //
  194. // Defaults to a single spatial layer containing a single temporal layer
  195. // with a 100% frame rate fraction.
  196. absl::InlinedVector<uint8_t, kMaxTemporalStreams>
  197. fps_allocation[kMaxSpatialLayers];
  198. // Recommended bitrate limits for different resolutions.
  199. std::vector<ResolutionBitrateLimits> resolution_bitrate_limits;
  200. // Obtains the limits from |resolution_bitrate_limits| that best matches the
  201. // |frame_size_pixels|.
  202. absl::optional<ResolutionBitrateLimits>
  203. GetEncoderBitrateLimitsForResolution(int frame_size_pixels) const;
  204. // If true, this encoder has internal support for generating simulcast
  205. // streams. Otherwise, an adapter class will be needed.
  206. // Even if true, the config provided to InitEncode() might not be supported,
  207. // in such case the encoder should return
  208. // WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED.
  209. bool supports_simulcast;
  210. };
  211. struct RTC_EXPORT RateControlParameters {
  212. RateControlParameters();
  213. RateControlParameters(const VideoBitrateAllocation& bitrate,
  214. double framerate_fps);
  215. RateControlParameters(const VideoBitrateAllocation& bitrate,
  216. double framerate_fps,
  217. DataRate bandwidth_allocation);
  218. virtual ~RateControlParameters();
  219. // Target bitrate, per spatial/temporal layer.
  220. // A target bitrate of 0bps indicates a layer should not be encoded at all.
  221. VideoBitrateAllocation bitrate;
  222. // Target framerate, in fps. A value <= 0.0 is invalid and should be
  223. // interpreted as framerate target not available. In this case the encoder
  224. // should fall back to the max framerate specified in |codec_settings| of
  225. // the last InitEncode() call.
  226. double framerate_fps;
  227. // The network bandwidth available for video. This is at least
  228. // |bitrate.get_sum_bps()|, but may be higher if the application is not
  229. // network constrained.
  230. DataRate bandwidth_allocation;
  231. bool operator==(const RateControlParameters& rhs) const;
  232. bool operator!=(const RateControlParameters& rhs) const;
  233. };
  234. struct LossNotification {
  235. // The timestamp of the last decodable frame *prior* to the last received.
  236. // (The last received - described below - might itself be decodable or not.)
  237. uint32_t timestamp_of_last_decodable;
  238. // The timestamp of the last received frame.
  239. uint32_t timestamp_of_last_received;
  240. // Describes whether the dependencies of the last received frame were
  241. // all decodable.
  242. // |false| if some dependencies were undecodable, |true| if all dependencies
  243. // were decodable, and |nullopt| if the dependencies are unknown.
  244. absl::optional<bool> dependencies_of_last_received_decodable;
  245. // Describes whether the received frame was decodable.
  246. // |false| if some dependency was undecodable or if some packet belonging
  247. // to the last received frame was missed.
  248. // |true| if all dependencies were decodable and all packets belonging
  249. // to the last received frame were received.
  250. // |nullopt| if no packet belonging to the last frame was missed, but the
  251. // last packet in the frame was not yet received.
  252. absl::optional<bool> last_received_decodable;
  253. };
  254. // Negotiated capabilities which the VideoEncoder may expect the other
  255. // side to use.
  256. struct Capabilities {
  257. explicit Capabilities(bool loss_notification)
  258. : loss_notification(loss_notification) {}
  259. bool loss_notification;
  260. };
  261. struct Settings {
  262. Settings(const Capabilities& capabilities,
  263. int number_of_cores,
  264. size_t max_payload_size)
  265. : capabilities(capabilities),
  266. number_of_cores(number_of_cores),
  267. max_payload_size(max_payload_size) {}
  268. Capabilities capabilities;
  269. int number_of_cores;
  270. size_t max_payload_size;
  271. };
  272. static VideoCodecVP8 GetDefaultVp8Settings();
  273. static VideoCodecVP9 GetDefaultVp9Settings();
  274. static VideoCodecH264 GetDefaultH264Settings();
  275. virtual ~VideoEncoder() {}
  276. // Set a FecControllerOverride, through which the encoder may override
  277. // decisions made by FecController.
  278. // TODO(bugs.webrtc.org/10769): Update downstream, then make pure-virtual.
  279. virtual void SetFecControllerOverride(
  280. FecControllerOverride* fec_controller_override);
  281. // Initialize the encoder with the information from the codecSettings
  282. //
  283. // Input:
  284. // - codec_settings : Codec settings
  285. // - settings : Settings affecting the encoding itself.
  286. // Input for deprecated version:
  287. // - number_of_cores : Number of cores available for the encoder
  288. // - max_payload_size : The maximum size each payload is allowed
  289. // to have. Usually MTU - overhead.
  290. //
  291. // Return value : Set bit rate if OK
  292. // <0 - Errors:
  293. // WEBRTC_VIDEO_CODEC_ERR_PARAMETER
  294. // WEBRTC_VIDEO_CODEC_ERR_SIZE
  295. // WEBRTC_VIDEO_CODEC_MEMORY
  296. // WEBRTC_VIDEO_CODEC_ERROR
  297. // TODO(bugs.webrtc.org/10720): After updating downstream projects and posting
  298. // an announcement to discuss-webrtc, remove the three-parameters variant
  299. // and make the two-parameters variant pure-virtual.
  300. /* RTC_DEPRECATED */ virtual int32_t InitEncode(
  301. const VideoCodec* codec_settings,
  302. int32_t number_of_cores,
  303. size_t max_payload_size);
  304. virtual int InitEncode(const VideoCodec* codec_settings,
  305. const VideoEncoder::Settings& settings);
  306. // Register an encode complete callback object.
  307. //
  308. // Input:
  309. // - callback : Callback object which handles encoded images.
  310. //
  311. // Return value : WEBRTC_VIDEO_CODEC_OK if OK, < 0 otherwise.
  312. virtual int32_t RegisterEncodeCompleteCallback(
  313. EncodedImageCallback* callback) = 0;
  314. // Free encoder memory.
  315. // Return value : WEBRTC_VIDEO_CODEC_OK if OK, < 0 otherwise.
  316. virtual int32_t Release() = 0;
  317. // Encode an I420 image (as a part of a video stream). The encoded image
  318. // will be returned to the user through the encode complete callback.
  319. //
  320. // Input:
  321. // - frame : Image to be encoded
  322. // - frame_types : Frame type to be generated by the encoder.
  323. //
  324. // Return value : WEBRTC_VIDEO_CODEC_OK if OK
  325. // <0 - Errors:
  326. // WEBRTC_VIDEO_CODEC_ERR_PARAMETER
  327. // WEBRTC_VIDEO_CODEC_MEMORY
  328. // WEBRTC_VIDEO_CODEC_ERROR
  329. virtual int32_t Encode(const VideoFrame& frame,
  330. const std::vector<VideoFrameType>* frame_types) = 0;
  331. // Sets rate control parameters: bitrate, framerate, etc. These settings are
  332. // instantaneous (i.e. not moving averages) and should apply from now until
  333. // the next call to SetRates().
  334. virtual void SetRates(const RateControlParameters& parameters) = 0;
  335. // Inform the encoder when the packet loss rate changes.
  336. //
  337. // Input: - packet_loss_rate : The packet loss rate (0.0 to 1.0).
  338. virtual void OnPacketLossRateUpdate(float packet_loss_rate);
  339. // Inform the encoder when the round trip time changes.
  340. //
  341. // Input: - rtt_ms : The new RTT, in milliseconds.
  342. virtual void OnRttUpdate(int64_t rtt_ms);
  343. // Called when a loss notification is received.
  344. virtual void OnLossNotification(const LossNotification& loss_notification);
  345. // Returns meta-data about the encoder, such as implementation name.
  346. // The output of this method may change during runtime. For instance if a
  347. // hardware encoder fails, it may fall back to doing software encoding using
  348. // an implementation with different characteristics.
  349. virtual EncoderInfo GetEncoderInfo() const;
  350. };
  351. } // namespace webrtc
  352. #endif // API_VIDEO_CODECS_VIDEO_ENCODER_H_