audio_sampler.cpp 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. #include "audio_sampler.h"
  2. #include <c10/util/Logging.h>
  3. #include "util.h"
  4. #define AVRESAMPLE_MAX_CHANNELS 32
  5. // www.ffmpeg.org/doxygen/1.1/doc_2examples_2resampling_audio_8c-example.html#a24
  6. namespace ffmpeg {
  7. namespace {
  8. int preparePlanes(
  9. const AudioFormat& fmt,
  10. const uint8_t* buffer,
  11. int numSamples,
  12. uint8_t** planes) {
  13. int result;
  14. if ((result = av_samples_fill_arrays(
  15. planes,
  16. nullptr, // linesize is not needed
  17. buffer,
  18. fmt.channels,
  19. numSamples,
  20. (AVSampleFormat)fmt.format,
  21. 1)) < 0) {
  22. LOG(ERROR) << "av_samples_fill_arrays failed, err: "
  23. << Util::generateErrorDesc(result)
  24. << ", numSamples: " << numSamples << ", fmt: " << fmt.format;
  25. }
  26. return result;
  27. }
  28. } // namespace
  29. AudioSampler::AudioSampler(void* logCtx) : logCtx_(logCtx) {}
  30. AudioSampler::~AudioSampler() {
  31. cleanUp();
  32. }
  33. void AudioSampler::shutdown() {
  34. cleanUp();
  35. }
  36. bool AudioSampler::init(const SamplerParameters& params) {
  37. cleanUp();
  38. if (params.type != MediaType::TYPE_AUDIO) {
  39. LOG(ERROR) << "Invalid media type, expected MediaType::TYPE_AUDIO";
  40. return false;
  41. }
  42. swrContext_ = swr_alloc_set_opts(
  43. nullptr,
  44. av_get_default_channel_layout(params.out.audio.channels),
  45. (AVSampleFormat)params.out.audio.format,
  46. params.out.audio.samples,
  47. av_get_default_channel_layout(params.in.audio.channels),
  48. (AVSampleFormat)params.in.audio.format,
  49. params.in.audio.samples,
  50. 0,
  51. logCtx_);
  52. if (swrContext_ == nullptr) {
  53. LOG(ERROR) << "Cannot allocate SwrContext";
  54. return false;
  55. }
  56. int result;
  57. if ((result = swr_init(swrContext_)) < 0) {
  58. LOG(ERROR) << "swr_init failed, err: " << Util::generateErrorDesc(result)
  59. << ", in -> format: " << params.in.audio.format
  60. << ", channels: " << params.in.audio.channels
  61. << ", samples: " << params.in.audio.samples
  62. << ", out -> format: " << params.out.audio.format
  63. << ", channels: " << params.out.audio.channels
  64. << ", samples: " << params.out.audio.samples;
  65. return false;
  66. }
  67. // set formats
  68. params_ = params;
  69. return true;
  70. }
  71. int AudioSampler::numOutputSamples(int inSamples) const {
  72. return swr_get_out_samples(swrContext_, inSamples);
  73. }
  74. int AudioSampler::sample(
  75. const uint8_t* inPlanes[],
  76. int inNumSamples,
  77. ByteStorage* out,
  78. int outNumSamples) {
  79. int result;
  80. int outBufferBytes = av_samples_get_buffer_size(
  81. nullptr,
  82. params_.out.audio.channels,
  83. outNumSamples,
  84. (AVSampleFormat)params_.out.audio.format,
  85. 1);
  86. if (out) {
  87. out->ensure(outBufferBytes);
  88. uint8_t* outPlanes[AVRESAMPLE_MAX_CHANNELS] = {nullptr};
  89. if ((result = preparePlanes(
  90. params_.out.audio,
  91. out->writableTail(),
  92. outNumSamples,
  93. outPlanes)) < 0) {
  94. return result;
  95. }
  96. if ((result = swr_convert(
  97. swrContext_,
  98. &outPlanes[0],
  99. outNumSamples,
  100. inPlanes,
  101. inNumSamples)) < 0) {
  102. LOG(ERROR) << "swr_convert failed, err: "
  103. << Util::generateErrorDesc(result);
  104. return result;
  105. }
  106. TORCH_CHECK_LE(result, outNumSamples);
  107. if (result) {
  108. if ((result = av_samples_get_buffer_size(
  109. nullptr,
  110. params_.out.audio.channels,
  111. result,
  112. (AVSampleFormat)params_.out.audio.format,
  113. 1)) >= 0) {
  114. out->append(result);
  115. } else {
  116. LOG(ERROR) << "av_samples_get_buffer_size failed, err: "
  117. << Util::generateErrorDesc(result);
  118. }
  119. }
  120. } else {
  121. // allocate a temporary buffer
  122. auto* tmpBuffer = static_cast<uint8_t*>(av_malloc(outBufferBytes));
  123. if (!tmpBuffer) {
  124. LOG(ERROR) << "av_alloc failed, for size: " << outBufferBytes;
  125. return -1;
  126. }
  127. uint8_t* outPlanes[AVRESAMPLE_MAX_CHANNELS] = {nullptr};
  128. if ((result = preparePlanes(
  129. params_.out.audio, tmpBuffer, outNumSamples, outPlanes)) < 0) {
  130. av_free(tmpBuffer);
  131. return result;
  132. }
  133. if ((result = swr_convert(
  134. swrContext_,
  135. &outPlanes[0],
  136. outNumSamples,
  137. inPlanes,
  138. inNumSamples)) < 0) {
  139. LOG(ERROR) << "swr_convert failed, err: "
  140. << Util::generateErrorDesc(result);
  141. av_free(tmpBuffer);
  142. return result;
  143. }
  144. av_free(tmpBuffer);
  145. TORCH_CHECK_LE(result, outNumSamples);
  146. if (result) {
  147. result = av_samples_get_buffer_size(
  148. nullptr,
  149. params_.out.audio.channels,
  150. result,
  151. (AVSampleFormat)params_.out.audio.format,
  152. 1);
  153. }
  154. }
  155. return result;
  156. }
  157. int AudioSampler::sample(AVFrame* frame, ByteStorage* out) {
  158. const auto outNumSamples = numOutputSamples(frame ? frame->nb_samples : 0);
  159. if (!outNumSamples) {
  160. return 0;
  161. }
  162. return sample(
  163. frame ? (const uint8_t**)&frame->data[0] : nullptr,
  164. frame ? frame->nb_samples : 0,
  165. out,
  166. outNumSamples);
  167. }
  168. int AudioSampler::sample(const ByteStorage* in, ByteStorage* out) {
  169. const auto inSampleSize =
  170. av_get_bytes_per_sample((AVSampleFormat)params_.in.audio.format);
  171. const auto inNumSamples =
  172. !in ? 0 : in->length() / inSampleSize / params_.in.audio.channels;
  173. const auto outNumSamples = numOutputSamples(inNumSamples);
  174. if (!outNumSamples) {
  175. return 0;
  176. }
  177. uint8_t* inPlanes[AVRESAMPLE_MAX_CHANNELS] = {nullptr};
  178. int result;
  179. if (in &&
  180. (result = preparePlanes(
  181. params_.in.audio, in->data(), inNumSamples, inPlanes)) < 0) {
  182. return result;
  183. }
  184. return sample(
  185. in ? (const uint8_t**)inPlanes : nullptr,
  186. inNumSamples,
  187. out,
  188. outNumSamples);
  189. }
  190. void AudioSampler::cleanUp() {
  191. if (swrContext_) {
  192. swr_free(&swrContext_);
  193. swrContext_ = nullptr;
  194. }
  195. }
  196. } // namespace ffmpeg