sync_decoder_test.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. #include <c10/util/Logging.h>
  2. #include <dirent.h>
  3. #include <gtest/gtest.h>
  4. #include "memory_buffer.h"
  5. #include "sync_decoder.h"
  6. #include "util.h"
  7. using namespace ffmpeg;
  8. namespace {
  9. struct VideoFileStats {
  10. std::string name;
  11. size_t durationPts{0};
  12. int num{0};
  13. int den{0};
  14. int fps{0};
  15. };
  16. void gotAllTestFiles(
  17. const std::string& folder,
  18. std::vector<VideoFileStats>* stats) {
  19. DIR* d = opendir(folder.c_str());
  20. CHECK(d);
  21. struct dirent* dir;
  22. while ((dir = readdir(d))) {
  23. if (dir->d_type != DT_DIR && 0 != strcmp(dir->d_name, "README")) {
  24. VideoFileStats item;
  25. item.name = folder + '/' + dir->d_name;
  26. LOG(INFO) << "Found video file: " << item.name;
  27. stats->push_back(std::move(item));
  28. }
  29. }
  30. closedir(d);
  31. }
  32. void gotFilesStats(std::vector<VideoFileStats>& stats) {
  33. DecoderParameters params;
  34. params.timeoutMs = 10000;
  35. params.startOffset = 1000000;
  36. params.seekAccuracy = 100000;
  37. params.formats = {MediaFormat(0)};
  38. params.headerOnly = true;
  39. params.preventStaleness = false;
  40. size_t avgProvUs = 0;
  41. const size_t rounds = 100;
  42. for (auto& item : stats) {
  43. LOG(INFO) << "Decoding video file in memory: " << item.name;
  44. FILE* f = fopen(item.name.c_str(), "rb");
  45. CHECK(f != nullptr);
  46. fseek(f, 0, SEEK_END);
  47. std::vector<uint8_t> buffer(ftell(f));
  48. rewind(f);
  49. size_t s = fread(buffer.data(), 1, buffer.size(), f);
  50. TORCH_CHECK_EQ(buffer.size(), s);
  51. fclose(f);
  52. for (size_t i = 0; i < rounds; ++i) {
  53. SyncDecoder decoder;
  54. std::vector<DecoderMetadata> metadata;
  55. const auto now = std::chrono::steady_clock::now();
  56. CHECK(decoder.init(
  57. params,
  58. MemoryBuffer::getCallback(buffer.data(), buffer.size()),
  59. &metadata));
  60. const auto then = std::chrono::steady_clock::now();
  61. decoder.shutdown();
  62. avgProvUs +=
  63. std::chrono::duration_cast<std::chrono::microseconds>(then - now)
  64. .count();
  65. TORCH_CHECK_EQ(metadata.size(), 1);
  66. item.num = metadata[0].num;
  67. item.den = metadata[0].den;
  68. item.fps = metadata[0].fps;
  69. item.durationPts =
  70. av_rescale_q(metadata[0].duration, AV_TIME_BASE_Q, {1, item.fps});
  71. }
  72. }
  73. LOG(INFO) << "Probing (us) " << avgProvUs / stats.size() / rounds;
  74. }
  75. size_t measurePerformanceUs(
  76. const std::vector<VideoFileStats>& stats,
  77. size_t rounds,
  78. size_t num,
  79. size_t stride) {
  80. size_t avgClipDecodingUs = 0;
  81. std::srand(time(nullptr));
  82. for (const auto& item : stats) {
  83. FILE* f = fopen(item.name.c_str(), "rb");
  84. CHECK(f != nullptr);
  85. fseek(f, 0, SEEK_END);
  86. std::vector<uint8_t> buffer(ftell(f));
  87. rewind(f);
  88. size_t s = fread(buffer.data(), 1, buffer.size(), f);
  89. TORCH_CHECK_EQ(buffer.size(), s);
  90. fclose(f);
  91. for (size_t i = 0; i < rounds; ++i) {
  92. // randomy select clip
  93. size_t rOffset = std::rand();
  94. size_t fOffset = rOffset % item.durationPts;
  95. size_t clipFrames = num + (num - 1) * stride;
  96. if (fOffset + clipFrames > item.durationPts) {
  97. fOffset = item.durationPts - clipFrames;
  98. }
  99. DecoderParameters params;
  100. params.timeoutMs = 10000;
  101. params.startOffset = 1000000;
  102. params.seekAccuracy = 100000;
  103. params.preventStaleness = false;
  104. for (size_t n = 0; n < num; ++n) {
  105. std::list<DecoderOutputMessage> msgs;
  106. params.startOffset =
  107. av_rescale_q(fOffset, {1, item.fps}, AV_TIME_BASE_Q);
  108. params.endOffset = params.startOffset + 100;
  109. auto now = std::chrono::steady_clock::now();
  110. SyncDecoder decoder;
  111. CHECK(decoder.init(
  112. params,
  113. MemoryBuffer::getCallback(buffer.data(), buffer.size()),
  114. nullptr));
  115. DecoderOutputMessage out;
  116. while (0 == decoder.decode(&out, params.timeoutMs)) {
  117. msgs.push_back(std::move(out));
  118. }
  119. decoder.shutdown();
  120. const auto then = std::chrono::steady_clock::now();
  121. fOffset += 1 + stride;
  122. avgClipDecodingUs +=
  123. std::chrono::duration_cast<std::chrono::microseconds>(then - now)
  124. .count();
  125. }
  126. }
  127. }
  128. return avgClipDecodingUs / rounds / num / stats.size();
  129. }
  130. void runDecoder(SyncDecoder& decoder) {
  131. DecoderOutputMessage out;
  132. size_t audioFrames = 0, videoFrames = 0, totalBytes = 0;
  133. while (0 == decoder.decode(&out, 10000)) {
  134. if (out.header.format.type == TYPE_AUDIO) {
  135. ++audioFrames;
  136. } else if (out.header.format.type == TYPE_VIDEO) {
  137. ++videoFrames;
  138. } else if (out.header.format.type == TYPE_SUBTITLE && out.payload) {
  139. // deserialize
  140. LOG(INFO) << "Deserializing subtitle";
  141. AVSubtitle sub;
  142. memset(&sub, 0, sizeof(sub));
  143. EXPECT_TRUE(Util::deserialize(*out.payload, &sub));
  144. LOG(INFO) << "Found subtitles"
  145. << ", num rects: " << sub.num_rects;
  146. for (int i = 0; i < sub.num_rects; ++i) {
  147. std::string text = "picture";
  148. if (sub.rects[i]->type == SUBTITLE_TEXT) {
  149. text = sub.rects[i]->text;
  150. } else if (sub.rects[i]->type == SUBTITLE_ASS) {
  151. text = sub.rects[i]->ass;
  152. }
  153. LOG(INFO) << "Rect num: " << i << ", type:" << sub.rects[i]->type
  154. << ", text: " << text;
  155. }
  156. avsubtitle_free(&sub);
  157. }
  158. if (out.payload) {
  159. totalBytes += out.payload->length();
  160. }
  161. }
  162. LOG(INFO) << "Decoded audio frames: " << audioFrames
  163. << ", video frames: " << videoFrames
  164. << ", total bytes: " << totalBytes;
  165. }
  166. } // namespace
  167. TEST(SyncDecoder, TestSyncDecoderPerformance) {
  168. // Measure the average time of decoding per clip
  169. // 1. list of the videos in testing directory
  170. // 2. for each video got number of frames with timestamps
  171. // 3. randomly select frame offset
  172. // 4. adjust offset for number frames and strides,
  173. // if it's out out upper boundary
  174. // 5. repeat multiple times, measuring and accumulating decoding time
  175. // per clip.
  176. /*
  177. 1) 4 x 2
  178. 2) 8 x 8
  179. 3) 16 x 8
  180. 4) 32 x 4
  181. */
  182. const std::string kFolder = "pytorch/vision/test/assets/videos";
  183. std::vector<VideoFileStats> stats;
  184. gotAllTestFiles(kFolder, &stats);
  185. gotFilesStats(stats);
  186. const size_t kRounds = 10;
  187. auto new4x2 = measurePerformanceUs(stats, kRounds, 4, 2);
  188. auto new8x8 = measurePerformanceUs(stats, kRounds, 8, 8);
  189. auto new16x8 = measurePerformanceUs(stats, kRounds, 16, 8);
  190. auto new32x4 = measurePerformanceUs(stats, kRounds, 32, 4);
  191. LOG(INFO) << "Clip decoding (us)"
  192. << ", new(4x2): " << new4x2 << ", new(8x8): " << new8x8
  193. << ", new(16x8): " << new16x8 << ", new(32x4): " << new32x4;
  194. }
  195. TEST(SyncDecoder, Test) {
  196. SyncDecoder decoder;
  197. DecoderParameters params;
  198. params.timeoutMs = 10000;
  199. params.startOffset = 1000000;
  200. params.seekAccuracy = 100000;
  201. params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')};
  202. params.uri = "pytorch/vision/test/assets/videos/R6llTwEh07w.mp4";
  203. CHECK(decoder.init(params, nullptr, nullptr));
  204. runDecoder(decoder);
  205. decoder.shutdown();
  206. }
  207. TEST(SyncDecoder, TestSubtitles) {
  208. SyncDecoder decoder;
  209. DecoderParameters params;
  210. params.timeoutMs = 10000;
  211. params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')};
  212. params.uri = "vue/synergy/data/robotsub.mp4";
  213. CHECK(decoder.init(params, nullptr, nullptr));
  214. runDecoder(decoder);
  215. decoder.shutdown();
  216. }
  217. TEST(SyncDecoder, TestHeadersOnly) {
  218. SyncDecoder decoder;
  219. DecoderParameters params;
  220. params.timeoutMs = 10000;
  221. params.startOffset = 1000000;
  222. params.seekAccuracy = 100000;
  223. params.headerOnly = true;
  224. params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')};
  225. params.uri = "pytorch/vision/test/assets/videos/R6llTwEh07w.mp4";
  226. CHECK(decoder.init(params, nullptr, nullptr));
  227. runDecoder(decoder);
  228. decoder.shutdown();
  229. params.uri = "pytorch/vision/test/assets/videos/SOX5yA1l24A.mp4";
  230. CHECK(decoder.init(params, nullptr, nullptr));
  231. runDecoder(decoder);
  232. decoder.shutdown();
  233. params.uri = "pytorch/vision/test/assets/videos/WUzgd7C1pWA.mp4";
  234. CHECK(decoder.init(params, nullptr, nullptr));
  235. runDecoder(decoder);
  236. decoder.shutdown();
  237. }
  238. TEST(SyncDecoder, TestHeadersOnlyDownSampling) {
  239. SyncDecoder decoder;
  240. DecoderParameters params;
  241. params.timeoutMs = 10000;
  242. params.startOffset = 1000000;
  243. params.seekAccuracy = 100000;
  244. params.headerOnly = true;
  245. MediaFormat format;
  246. format.type = TYPE_AUDIO;
  247. format.format.audio.samples = 8000;
  248. params.formats.insert(format);
  249. format.type = TYPE_VIDEO;
  250. format.format.video.width = 224;
  251. format.format.video.height = 224;
  252. params.formats.insert(format);
  253. params.uri = "pytorch/vision/test/assets/videos/R6llTwEh07w.mp4";
  254. CHECK(decoder.init(params, nullptr, nullptr));
  255. runDecoder(decoder);
  256. decoder.shutdown();
  257. params.uri = "pytorch/vision/test/assets/videos/SOX5yA1l24A.mp4";
  258. CHECK(decoder.init(params, nullptr, nullptr));
  259. runDecoder(decoder);
  260. decoder.shutdown();
  261. params.uri = "pytorch/vision/test/assets/videos/WUzgd7C1pWA.mp4";
  262. CHECK(decoder.init(params, nullptr, nullptr));
  263. runDecoder(decoder);
  264. decoder.shutdown();
  265. }
  266. TEST(SyncDecoder, TestInitOnlyNoShutdown) {
  267. SyncDecoder decoder;
  268. DecoderParameters params;
  269. params.timeoutMs = 10000;
  270. params.startOffset = 1000000;
  271. params.seekAccuracy = 100000;
  272. params.headerOnly = false;
  273. params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')};
  274. params.uri = "pytorch/vision/test/assets/videos/R6llTwEh07w.mp4";
  275. std::vector<DecoderMetadata> metadata;
  276. CHECK(decoder.init(params, nullptr, &metadata));
  277. }
  278. TEST(SyncDecoder, TestMemoryBuffer) {
  279. SyncDecoder decoder;
  280. DecoderParameters params;
  281. params.timeoutMs = 10000;
  282. params.startOffset = 1000000;
  283. params.endOffset = 9000000;
  284. params.seekAccuracy = 10000;
  285. params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')};
  286. FILE* f = fopen(
  287. "pytorch/vision/test/assets/videos/RATRACE_wave_f_nm_np1_fr_goo_37.avi",
  288. "rb");
  289. CHECK(f != nullptr);
  290. fseek(f, 0, SEEK_END);
  291. std::vector<uint8_t> buffer(ftell(f));
  292. rewind(f);
  293. size_t s = fread(buffer.data(), 1, buffer.size(), f);
  294. TORCH_CHECK_EQ(buffer.size(), s);
  295. fclose(f);
  296. CHECK(decoder.init(
  297. params,
  298. MemoryBuffer::getCallback(buffer.data(), buffer.size()),
  299. nullptr));
  300. LOG(INFO) << "Decoding from memory bytes: " << buffer.size();
  301. runDecoder(decoder);
  302. decoder.shutdown();
  303. }
  304. TEST(SyncDecoder, TestMemoryBufferNoSeekableWithFullRead) {
  305. SyncDecoder decoder;
  306. DecoderParameters params;
  307. params.timeoutMs = 10000;
  308. params.startOffset = 1000000;
  309. params.endOffset = 9000000;
  310. params.seekAccuracy = 10000;
  311. params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')};
  312. FILE* f = fopen("pytorch/vision/test/assets/videos/R6llTwEh07w.mp4", "rb");
  313. CHECK(f != nullptr);
  314. fseek(f, 0, SEEK_END);
  315. std::vector<uint8_t> buffer(ftell(f));
  316. rewind(f);
  317. size_t s = fread(buffer.data(), 1, buffer.size(), f);
  318. TORCH_CHECK_EQ(buffer.size(), s);
  319. fclose(f);
  320. params.maxSeekableBytes = buffer.size() + 1;
  321. MemoryBuffer object(buffer.data(), buffer.size());
  322. CHECK(decoder.init(
  323. params,
  324. [object](uint8_t* out, int size, int whence, uint64_t timeoutMs) mutable
  325. -> int {
  326. if (out) { // see defs.h file
  327. // read mode
  328. return object.read(out, size);
  329. }
  330. // seek mode
  331. if (!timeoutMs) {
  332. // seek capability, yes - no
  333. return -1;
  334. }
  335. return object.seek(size, whence);
  336. },
  337. nullptr));
  338. runDecoder(decoder);
  339. decoder.shutdown();
  340. }
  341. TEST(SyncDecoder, TestMemoryBufferNoSeekableWithPartialRead) {
  342. SyncDecoder decoder;
  343. DecoderParameters params;
  344. params.timeoutMs = 10000;
  345. params.startOffset = 1000000;
  346. params.endOffset = 9000000;
  347. params.seekAccuracy = 10000;
  348. params.formats = {MediaFormat(), MediaFormat(0), MediaFormat('0')};
  349. FILE* f = fopen("pytorch/vision/test/assets/videos/R6llTwEh07w.mp4", "rb");
  350. CHECK(f != nullptr);
  351. fseek(f, 0, SEEK_END);
  352. std::vector<uint8_t> buffer(ftell(f));
  353. rewind(f);
  354. size_t s = fread(buffer.data(), 1, buffer.size(), f);
  355. TORCH_CHECK_EQ(buffer.size(), s);
  356. fclose(f);
  357. params.maxSeekableBytes = buffer.size() / 2;
  358. MemoryBuffer object(buffer.data(), buffer.size());
  359. CHECK(!decoder.init(
  360. params,
  361. [object](uint8_t* out, int size, int whence, uint64_t timeoutMs) mutable
  362. -> int {
  363. if (out) { // see defs.h file
  364. // read mode
  365. return object.read(out, size);
  366. }
  367. // seek mode
  368. if (!timeoutMs) {
  369. // seek capability, yes - no
  370. return -1;
  371. }
  372. return object.seek(size, whence);
  373. },
  374. nullptr));
  375. }