defs.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. #pragma once
  2. #include <array>
  3. #include <functional>
  4. #include <memory>
  5. #include <set>
  6. #include <string>
  7. #include <unordered_set>
  8. #include <vector>
  9. extern "C" {
  10. #include <libavcodec/avcodec.h>
  11. #include <libavformat/avformat.h>
  12. #include <libavformat/avio.h>
  13. #include <libavutil/avutil.h>
  14. #include <libavutil/imgutils.h>
  15. #include <libswresample/swresample.h>
  16. #include "libswscale/swscale.h"
  17. }
  18. namespace ffmpeg {
  19. // bit mask of formats, keep them in form 2^n
  20. enum MediaType : size_t {
  21. TYPE_AUDIO = 1,
  22. TYPE_VIDEO = 2,
  23. TYPE_SUBTITLE = 4,
  24. TYPE_CC = 8, // closed captions from transport streams
  25. };
  26. // audio
  27. struct AudioFormat {
  28. // fields are initialized for the auto detection
  29. // caller can specify some/all of field values if specific output is desirable
  30. bool operator==(const AudioFormat& x) const {
  31. return x.format == format && x.samples == samples && x.channels == channels;
  32. }
  33. size_t samples{0}; // number samples per second (frequency)
  34. size_t channels{0}; // number of channels
  35. long format{-1}; // AVSampleFormat, auto AV_SAMPLE_FMT_NONE
  36. size_t padding[2];
  37. // -- alignment 40 bytes
  38. };
  39. // video
  40. struct VideoFormat {
  41. // fields are initialized for the auto detection
  42. // caller can specify some/all of field values if specific output is desirable
  43. bool operator==(const VideoFormat& x) const {
  44. return x.format == format && x.width == width && x.height == height;
  45. }
  46. /*
  47. When width = 0, height = 0, minDimension = 0, and maxDimension = 0,
  48. keep the original frame resolution
  49. When width = 0, height = 0, minDimension != 0, and maxDimension = 0,
  50. keep the aspect ratio and resize the frame so that shorter edge size is
  51. minDimension
  52. When width = 0, height = 0, minDimension = 0, and maxDimension != 0,
  53. keep the aspect ratio and resize the frame so that longer edge size is
  54. maxDimension
  55. When width = 0, height = 0, minDimension != 0, and maxDimension != 0,
  56. resize the frame so that shorter edge size is minDimension, and
  57. longer edge size is maxDimension. The aspect ratio may not be preserved
  58. When width = 0, height != 0, minDimension = 0, and maxDimension = 0,
  59. keep the aspect ratio and resize the frame so that frame height is $height
  60. When width != 0, height = 0, minDimension = 0, and maxDimension = 0,
  61. keep the aspect ratio and resize the frame so that frame width is $width
  62. When width != 0, height != 0, minDimension = 0, and maxDimension = 0,
  63. resize the frame so that frame width and height are set to $width and
  64. $height,
  65. respectively
  66. */
  67. size_t width{0}; // width in pixels
  68. size_t height{0}; // height in pixels
  69. long format{-1}; // AVPixelFormat, auto AV_PIX_FMT_NONE
  70. size_t minDimension{0}; // choose min dimension and rescale accordingly
  71. size_t maxDimension{0}; // choose max dimension and rescale accordingly
  72. size_t cropImage{0}; // request image crop
  73. // -- alignment 40 bytes
  74. };
  75. // subtitle/cc
  76. struct SubtitleFormat {
  77. long type{0}; // AVSubtitleType, auto SUBTITLE_NONE
  78. size_t padding[4];
  79. // -- alignment 40 bytes
  80. };
  81. union FormatUnion {
  82. FormatUnion() : audio() {}
  83. explicit FormatUnion(int) : video() {}
  84. explicit FormatUnion(char) : subtitle() {}
  85. explicit FormatUnion(double) : subtitle() {}
  86. AudioFormat audio;
  87. VideoFormat video;
  88. SubtitleFormat subtitle;
  89. // -- alignment 40 bytes
  90. };
  91. /*
  92. MediaFormat data structure serves as input/output parameter.
  93. Caller assigns values for input formats
  94. or leave default values for auto detection
  95. For output formats all fields will be set to the specific values
  96. */
  97. struct MediaFormat {
  98. // for using map/set data structures
  99. bool operator<(const MediaFormat& x) const {
  100. return type < x.type;
  101. }
  102. bool operator==(const MediaFormat& x) const {
  103. if (type != x.type) {
  104. return false;
  105. }
  106. switch (type) {
  107. case TYPE_AUDIO:
  108. return format.audio == x.format.audio;
  109. case TYPE_VIDEO:
  110. return format.video == x.format.video;
  111. case TYPE_SUBTITLE:
  112. case TYPE_CC:
  113. return true;
  114. default:
  115. return false;
  116. }
  117. }
  118. explicit MediaFormat(long s = -1) : type(TYPE_AUDIO), stream(s), format() {}
  119. explicit MediaFormat(int x, long s = -1)
  120. : type(TYPE_VIDEO), stream(s), format(x) {}
  121. explicit MediaFormat(char x, long s = -1)
  122. : type(TYPE_SUBTITLE), stream(s), format(x) {}
  123. explicit MediaFormat(double x, long s = -1)
  124. : type(TYPE_CC), stream(s), format(x) {}
  125. static MediaFormat makeMediaFormat(AudioFormat format, long stream) {
  126. MediaFormat result(stream);
  127. result.format.audio = format;
  128. return result;
  129. }
  130. static MediaFormat makeMediaFormat(VideoFormat format, long stream) {
  131. MediaFormat result(0, stream);
  132. result.format.video = format;
  133. return result;
  134. }
  135. static MediaFormat makeMediaFormat(SubtitleFormat format, long stream) {
  136. MediaFormat result('0', stream);
  137. result.format.subtitle = format;
  138. return result;
  139. }
  140. // format type
  141. MediaType type;
  142. // stream index:
  143. // set -1 for one stream auto detection, -2 for all streams auto detection,
  144. // >= 0, specified stream, if caller knows the stream index (unlikely)
  145. long stream;
  146. // union keeps one of the possible formats, defined by MediaType
  147. FormatUnion format;
  148. };
  149. struct DecoderParameters {
  150. // local file, remote file, http url, rtmp stream uri, etc. anything that
  151. // ffmpeg can recognize
  152. std::string uri{std::string()};
  153. // timeout on getting bytes for decoding
  154. size_t timeoutMs{1000};
  155. // logging level, default AV_LOG_PANIC
  156. long logLevel{0};
  157. // when decoder would give up, 0 means never
  158. size_t maxPackageErrors{0};
  159. // max allowed consecutive times no bytes are processed. 0 means for infinite.
  160. size_t maxProcessNoBytes{0};
  161. // start offset (us)
  162. long startOffset{0};
  163. // end offset (us)
  164. long endOffset{-1};
  165. // logging id
  166. int64_t loggingUuid{0};
  167. // internal max seekable buffer size
  168. size_t maxSeekableBytes{0};
  169. // adjust header pts to the epoch time
  170. bool convertPtsToWallTime{false};
  171. // indicate if input stream is an encoded image
  172. bool isImage{false};
  173. // listen and wait for new rtmp stream
  174. bool listen{false};
  175. // don't copy frame body, only header
  176. bool headerOnly{false};
  177. // enable fast seek (seek only to keyframes)
  178. bool fastSeek{false};
  179. // interrupt init method on timeout
  180. bool preventStaleness{true};
  181. // seek tolerated accuracy (us)
  182. double seekAccuracy{1000000.0};
  183. // Allow multithreaded decoding for numThreads > 1;
  184. // 0 numThreads=0 sets up sensible defaults
  185. int numThreads{1};
  186. // what media types should be processed, default none
  187. std::set<MediaFormat> formats;
  188. // can be used for asynchronous decoders
  189. size_t cacheSize{8192}; // mow many bytes to cache before stop reading bytes
  190. size_t cacheTimeoutMs{1000}; // timeout on bytes writing
  191. bool enforceCacheSize{false}; // drop output frames if cache is full
  192. bool mergeAudioMessages{false}; // combine collocated audio messages together
  193. std::string tlsCertFile;
  194. std::string tlsKeyFile;
  195. // Skip packets that fail with EPERM errors and continue decoding.
  196. bool skipOperationNotPermittedPackets{false};
  197. // probing size in bytes, i.e. the size of the data to analyze to get stream
  198. // information. A higher value will enable detecting more information in case
  199. // it is dispersed into the stream, but will increase latency. Must be an
  200. // integer not lesser than 32. It is 5000000 by default.
  201. int64_t probeSize{5000000};
  202. };
  203. struct DecoderHeader {
  204. // message id, from 0 till ...
  205. size_t seqno{0};
  206. // decoded timestamp in microseconds from either beginning of the stream or
  207. // from epoch time, see DecoderParameters::convertPtsToWallTime
  208. long pts{0};
  209. // decoded key frame
  210. size_t keyFrame{0};
  211. // frames per second, valid only for video streams
  212. double fps{0};
  213. // format specifies what kind frame is in a payload
  214. MediaFormat format;
  215. };
  216. // Abstract interface ByteStorage class
  217. class ByteStorage {
  218. public:
  219. virtual ~ByteStorage() = default;
  220. // makes sure that buffer has at least n bytes available for writing, if not
  221. // storage must reallocate memory.
  222. virtual void ensure(size_t n) = 0;
  223. // caller must not to write more than available bytes
  224. virtual uint8_t* writableTail() = 0;
  225. // caller confirms that n bytes were written to the writable tail
  226. virtual void append(size_t n) = 0;
  227. // caller confirms that n bytes were read from the read buffer
  228. virtual void trim(size_t n) = 0;
  229. // gives an access to the beginning of the read buffer
  230. virtual const uint8_t* data() const = 0;
  231. // returns the stored size in bytes
  232. virtual size_t length() const = 0;
  233. // returns available capacity for writable tail
  234. virtual size_t tail() const = 0;
  235. // clears content, keeps capacity
  236. virtual void clear() = 0;
  237. };
  238. struct DecoderOutputMessage {
  239. DecoderHeader header;
  240. std::unique_ptr<ByteStorage> payload;
  241. };
  242. /*
  243. * External provider of the ecnoded bytes, specific implementation is left for
  244. * different use cases, like file, memory, external network end-points, etc.
  245. * Normally input/output parameter @out set to valid, not null buffer pointer,
  246. * which indicates "read" call, however there are "seek" modes as well.
  247. * @out != nullptr => read from the current offset, @whence got ignored,
  248. * @size bytes to read => return number bytes got read, 0 if no more bytes
  249. * available, < 0 on error.
  250. * @out == nullptr, @timeoutMs == 0 => does provider support "seek"
  251. * capability in a first place? @size & @whence got ignored, return 0 on
  252. * success, < 0 if "seek" mode is not supported.
  253. * @out == nullptr, @timeoutMs != 0 => normal seek call
  254. * offset == @size, i.e. @whence = [SEEK_SET, SEEK_CUR, SEEK_END, AVSEEK_SIZE)
  255. * return < 0 on error, position if @whence = [SEEK_SET, SEEK_CUR, SEEK_END],
  256. * length of buffer if @whence = [AVSEEK_SIZE].
  257. */
  258. using DecoderInCallback =
  259. std::function<int(uint8_t* out, int size, int whence, uint64_t timeoutMs)>;
  260. using DecoderOutCallback = std::function<void(DecoderOutputMessage&&)>;
  261. struct DecoderMetadata {
  262. // time base numerator
  263. long num{0};
  264. // time base denominator
  265. long den{1};
  266. // duration of the stream, in miscroseconds, if available
  267. long duration{-1};
  268. // frames per second, valid only for video streams
  269. double fps{0};
  270. // format specifies what kind frame is in a payload
  271. MediaFormat format;
  272. };
  273. /**
  274. * Abstract class for decoding media bytes
  275. * It has two different modes. Internal media bytes retrieval for given uri and
  276. * external media bytes provider in case of memory streams
  277. */
  278. class MediaDecoder {
  279. public:
  280. virtual ~MediaDecoder() = default;
  281. /**
  282. * Initializes media decoder with parameters,
  283. * calls callback when media bytes are available.
  284. * Media bytes get fetched internally from provided URI
  285. * or invokes provided input callback to get media bytes.
  286. * Input callback must be empty for the internal media provider
  287. * Caller can provide non-null pointer for the input container
  288. * if headers to obtain the streams metadata (optional)
  289. */
  290. virtual bool init(
  291. const DecoderParameters& params,
  292. DecoderInCallback&& in,
  293. std::vector<DecoderMetadata>* metadata) = 0;
  294. /**
  295. * Polls available decoded one frame from decoder
  296. * Returns error code, 0 - for success
  297. */
  298. virtual int decode(DecoderOutputMessage* out, uint64_t timeoutMs) = 0;
  299. /**
  300. * Polls available decoded bytes from decoder, till EOF or error
  301. */
  302. virtual int decode_all(const DecoderOutCallback& callback) = 0;
  303. /**
  304. * Stops calling callback, releases resources
  305. */
  306. virtual void shutdown() = 0;
  307. /**
  308. * Interrupts whatever decoder is doing at any time
  309. */
  310. virtual void interrupt() = 0;
  311. /**
  312. * Factory to create ByteStorage class instances, particular implementation is
  313. * left to the derived class. Caller provides the initially allocated size
  314. */
  315. virtual std::unique_ptr<ByteStorage> createByteStorage(size_t n) = 0;
  316. };
  317. struct SamplerParameters {
  318. MediaType type{TYPE_AUDIO};
  319. FormatUnion in;
  320. FormatUnion out;
  321. int64_t loggingUuid{0};
  322. };
  323. /**
  324. * Abstract class for sampling media bytes
  325. */
  326. class MediaSampler {
  327. public:
  328. virtual ~MediaSampler() = default;
  329. /**
  330. * Initializes media sampler with parameters
  331. */
  332. virtual bool init(const SamplerParameters& params) = 0;
  333. /**
  334. * Samples media bytes
  335. * Returns error code < 0, or >=0 - for success, indicating number of bytes
  336. * processed.
  337. * set @in to null for flushing data
  338. */
  339. virtual int sample(const ByteStorage* in, ByteStorage* out) = 0;
  340. /**
  341. * Releases resources
  342. */
  343. virtual void shutdown() = 0;
  344. /*
  345. * Returns media type
  346. */
  347. MediaType getMediaType() const {
  348. return params_.type;
  349. }
  350. /*
  351. * Returns formats
  352. */
  353. FormatUnion getInputFormat() const {
  354. return params_.in;
  355. }
  356. FormatUnion getOutFormat() const {
  357. return params_.out;
  358. }
  359. protected:
  360. SamplerParameters params_;
  361. };
  362. } // namespace ffmpeg