123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684 |
- #include "video_reader.h"
- #ifdef USE_PYTHON
- #include <Python.h>
- #endif
- #include "../decoder/memory_buffer.h"
- #include "../decoder/sync_decoder.h"
- #ifdef USE_PYTHON
- // If we are in a Windows environment, we need to define
- // initialization functions for the _custom_ops extension
- #ifdef _WIN32
- PyMODINIT_FUNC PyInit_video_reader(void) {
- // No need to do anything.
- return NULL;
- }
- #endif
- #endif // USE_PYTHONs
- using namespace ffmpeg;
- namespace vision {
- namespace video_reader {
- namespace {
- const AVPixelFormat defaultVideoPixelFormat = AV_PIX_FMT_RGB24;
- const AVSampleFormat defaultAudioSampleFormat = AV_SAMPLE_FMT_FLT;
- const AVRational timeBaseQ = AVRational{1, AV_TIME_BASE};
- const size_t decoderTimeoutMs = 600000;
- // A jitter can be added to the end of the range to avoid conversion/rounding
- // error, small value 100us won't be enough to select the next frame, but enough
- // to compensate rounding error due to the multiple conversions.
- const size_t timeBaseJitterUs = 100;
- DecoderParameters getDecoderParams(
- int64_t videoStartUs,
- int64_t videoEndUs,
- double seekFrameMarginUs,
- int64_t getPtsOnly,
- int64_t readVideoStream,
- int videoWidth,
- int videoHeight,
- int videoMinDimension,
- int videoMaxDimension,
- int64_t readAudioStream,
- int audioSamples,
- int audioChannels) {
- DecoderParameters params;
- params.headerOnly = getPtsOnly != 0;
- params.seekAccuracy = seekFrameMarginUs;
- params.startOffset = videoStartUs;
- params.endOffset = videoEndUs;
- params.timeoutMs = decoderTimeoutMs;
- params.preventStaleness = false;
- if (readVideoStream == 1) {
- MediaFormat videoFormat(0);
- videoFormat.type = TYPE_VIDEO;
- videoFormat.format.video.format = defaultVideoPixelFormat;
- videoFormat.format.video.width = videoWidth;
- videoFormat.format.video.height = videoHeight;
- videoFormat.format.video.minDimension = videoMinDimension;
- videoFormat.format.video.maxDimension = videoMaxDimension;
- params.formats.insert(videoFormat);
- }
- if (readAudioStream == 1) {
- MediaFormat audioFormat;
- audioFormat.type = TYPE_AUDIO;
- audioFormat.format.audio.format = defaultAudioSampleFormat;
- audioFormat.format.audio.samples = audioSamples;
- audioFormat.format.audio.channels = audioChannels;
- params.formats.insert(audioFormat);
- }
- return params;
- }
- // returns number of written bytes
- template <typename T>
- size_t fillTensor(
- std::vector<DecoderOutputMessage>& msgs,
- torch::Tensor& frame,
- torch::Tensor& framePts,
- int64_t num,
- int64_t den) {
- if (msgs.empty()) {
- return 0;
- }
- T* frameData = frame.numel() > 0 ? frame.data_ptr<T>() : nullptr;
- int64_t* framePtsData = framePts.data_ptr<int64_t>();
- TORCH_CHECK_EQ(framePts.size(0), (int64_t)msgs.size());
- size_t avgElementsInFrame = frame.numel() / msgs.size();
- size_t offset = 0;
- for (size_t i = 0; i < msgs.size(); ++i) {
- const auto& msg = msgs[i];
- // convert pts into original time_base
- AVRational avr = AVRational{(int)num, (int)den};
- framePtsData[i] = av_rescale_q(msg.header.pts, timeBaseQ, avr);
- VLOG(2) << "PTS type: " << sizeof(T) << ", us: " << msg.header.pts
- << ", original: " << framePtsData[i];
- if (frameData) {
- auto sizeInBytes = msg.payload->length();
- memcpy(frameData + offset, msg.payload->data(), sizeInBytes);
- if (sizeof(T) == sizeof(uint8_t)) {
- // Video - move by allocated frame size
- offset += avgElementsInFrame / sizeof(T);
- } else {
- // Audio - move by number of samples
- offset += sizeInBytes / sizeof(T);
- }
- }
- }
- return offset * sizeof(T);
- }
- size_t fillVideoTensor(
- std::vector<DecoderOutputMessage>& msgs,
- torch::Tensor& videoFrame,
- torch::Tensor& videoFramePts,
- int64_t num,
- int64_t den) {
- return fillTensor<uint8_t>(msgs, videoFrame, videoFramePts, num, den);
- }
- size_t fillAudioTensor(
- std::vector<DecoderOutputMessage>& msgs,
- torch::Tensor& audioFrame,
- torch::Tensor& audioFramePts,
- int64_t num,
- int64_t den) {
- return fillTensor<float>(msgs, audioFrame, audioFramePts, num, den);
- }
- void offsetsToUs(
- double& seekFrameMargin,
- int64_t readVideoStream,
- int64_t videoStartPts,
- int64_t videoEndPts,
- int64_t videoTimeBaseNum,
- int64_t videoTimeBaseDen,
- int64_t readAudioStream,
- int64_t audioStartPts,
- int64_t audioEndPts,
- int64_t audioTimeBaseNum,
- int64_t audioTimeBaseDen,
- int64_t& videoStartUs,
- int64_t& videoEndUs) {
- seekFrameMargin *= AV_TIME_BASE;
- videoStartUs = 0;
- videoEndUs = -1;
- if (readVideoStream) {
- AVRational vr = AVRational{(int)videoTimeBaseNum, (int)videoTimeBaseDen};
- if (videoStartPts > 0) {
- videoStartUs = av_rescale_q(videoStartPts, vr, timeBaseQ);
- }
- if (videoEndPts > 0) {
- // Add jitter to the end of the range to avoid conversion/rounding error.
- // Small value 100us won't be enough to select the next frame, but enough
- // to compensate rounding error due to the multiple conversions.
- videoEndUs = timeBaseJitterUs + av_rescale_q(videoEndPts, vr, timeBaseQ);
- }
- } else if (readAudioStream) {
- AVRational ar = AVRational{(int)audioTimeBaseNum, (int)audioTimeBaseDen};
- if (audioStartPts > 0) {
- videoStartUs = av_rescale_q(audioStartPts, ar, timeBaseQ);
- }
- if (audioEndPts > 0) {
- // Add jitter to the end of the range to avoid conversion/rounding error.
- // Small value 100us won't be enough to select the next frame, but enough
- // to compensate rounding error due to the multiple conversions.
- videoEndUs = timeBaseJitterUs + av_rescale_q(audioEndPts, ar, timeBaseQ);
- }
- }
- }
- torch::List<torch::Tensor> readVideo(
- bool isReadFile,
- const torch::Tensor& input_video,
- std::string videoPath,
- double seekFrameMargin,
- int64_t getPtsOnly,
- int64_t readVideoStream,
- int64_t width,
- int64_t height,
- int64_t minDimension,
- int64_t maxDimension,
- int64_t videoStartPts,
- int64_t videoEndPts,
- int64_t videoTimeBaseNum,
- int64_t videoTimeBaseDen,
- int64_t readAudioStream,
- int64_t audioSamples,
- int64_t audioChannels,
- int64_t audioStartPts,
- int64_t audioEndPts,
- int64_t audioTimeBaseNum,
- int64_t audioTimeBaseDen) {
- int64_t videoStartUs, videoEndUs;
- offsetsToUs(
- seekFrameMargin,
- readVideoStream,
- videoStartPts,
- videoEndPts,
- videoTimeBaseNum,
- videoTimeBaseDen,
- readAudioStream,
- audioStartPts,
- audioEndPts,
- audioTimeBaseNum,
- audioTimeBaseDen,
- videoStartUs,
- videoEndUs);
- DecoderParameters params = getDecoderParams(
- videoStartUs, // videoStartPts
- videoEndUs, // videoEndPts
- seekFrameMargin, // seekFrameMargin
- getPtsOnly, // getPtsOnly
- readVideoStream, // readVideoStream
- width, // width
- height, // height
- minDimension, // minDimension
- maxDimension, // maxDimension
- readAudioStream, // readAudioStream
- audioSamples, // audioSamples
- audioChannels // audioChannels
- );
- SyncDecoder decoder;
- std::vector<DecoderOutputMessage> audioMessages, videoMessages;
- DecoderInCallback callback = nullptr;
- std::string logMessage, logType;
- if (isReadFile) {
- params.uri = videoPath;
- logType = "file";
- logMessage = videoPath;
- } else {
- callback = MemoryBuffer::getCallback(
- input_video.data_ptr<uint8_t>(), input_video.size(0));
- logType = "memory";
- logMessage = std::to_string(input_video.size(0));
- }
- VLOG(1) << "Video decoding from " << logType << " [" << logMessage
- << "] has started";
- const auto now = std::chrono::system_clock::now();
- bool succeeded;
- DecoderMetadata audioMetadata, videoMetadata;
- std::vector<DecoderMetadata> metadata;
- if ((succeeded = decoder.init(params, std::move(callback), &metadata))) {
- for (const auto& header : metadata) {
- if (header.format.type == TYPE_VIDEO) {
- videoMetadata = header;
- } else if (header.format.type == TYPE_AUDIO) {
- audioMetadata = header;
- }
- }
- int res;
- DecoderOutputMessage msg;
- while (0 == (res = decoder.decode(&msg, decoderTimeoutMs))) {
- if (msg.header.format.type == TYPE_VIDEO) {
- videoMessages.push_back(std::move(msg));
- }
- if (msg.header.format.type == TYPE_AUDIO) {
- audioMessages.push_back(std::move(msg));
- }
- msg.payload.reset();
- }
- } else {
- LOG(ERROR) << "Decoder initialization has failed";
- }
- const auto then = std::chrono::system_clock::now();
- VLOG(1) << "Video decoding from " << logType << " [" << logMessage
- << "] has finished, "
- << std::chrono::duration_cast<std::chrono::microseconds>(then - now)
- .count()
- << " us";
- decoder.shutdown();
- // video section
- torch::Tensor videoFrame = torch::zeros({0}, torch::kByte);
- torch::Tensor videoFramePts = torch::zeros({0}, torch::kLong);
- torch::Tensor videoTimeBase = torch::zeros({0}, torch::kInt);
- torch::Tensor videoFps = torch::zeros({0}, torch::kFloat);
- torch::Tensor videoDuration = torch::zeros({0}, torch::kLong);
- if (succeeded && readVideoStream == 1) {
- if (!videoMessages.empty()) {
- const auto& header = videoMetadata;
- const auto& format = header.format.format.video;
- int numVideoFrames = videoMessages.size();
- int outHeight = format.height;
- int outWidth = format.width;
- int numChannels = 3; // decoder guarantees the default AV_PIX_FMT_RGB24
- size_t expectedWrittenBytes = 0;
- if (getPtsOnly == 0) {
- videoFrame = torch::zeros(
- {numVideoFrames, outHeight, outWidth, numChannels}, torch::kByte);
- expectedWrittenBytes =
- (size_t)numVideoFrames * outHeight * outWidth * numChannels;
- }
- videoFramePts = torch::zeros({numVideoFrames}, torch::kLong);
- VLOG(2) << "video duration: " << header.duration
- << ", fps: " << header.fps << ", num: " << header.num
- << ", den: " << header.den << ", num frames: " << numVideoFrames;
- auto numberWrittenBytes = fillVideoTensor(
- videoMessages, videoFrame, videoFramePts, header.num, header.den);
- TORCH_CHECK_EQ(numberWrittenBytes, expectedWrittenBytes);
- videoTimeBase = torch::zeros({2}, torch::kInt);
- int* videoTimeBaseData = videoTimeBase.data_ptr<int>();
- videoTimeBaseData[0] = header.num;
- videoTimeBaseData[1] = header.den;
- videoFps = torch::zeros({1}, torch::kFloat);
- float* videoFpsData = videoFps.data_ptr<float>();
- videoFpsData[0] = header.fps;
- videoDuration = torch::zeros({1}, torch::kLong);
- int64_t* videoDurationData = videoDuration.data_ptr<int64_t>();
- AVRational vr = AVRational{(int)header.num, (int)header.den};
- videoDurationData[0] = av_rescale_q(header.duration, timeBaseQ, vr);
- VLOG(1) << "Video decoding from " << logType << " [" << logMessage
- << "] filled video tensors";
- } else {
- VLOG(1) << "Miss video stream";
- }
- }
- // audio section
- torch::Tensor audioFrame = torch::zeros({0}, torch::kFloat);
- torch::Tensor audioFramePts = torch::zeros({0}, torch::kLong);
- torch::Tensor audioTimeBase = torch::zeros({0}, torch::kInt);
- torch::Tensor audioSampleRate = torch::zeros({0}, torch::kInt);
- torch::Tensor audioDuration = torch::zeros({0}, torch::kLong);
- if (succeeded && readAudioStream == 1) {
- if (!audioMessages.empty()) {
- const auto& header = audioMetadata;
- const auto& format = header.format.format.audio;
- int64_t outAudioChannels = format.channels;
- int bytesPerSample =
- av_get_bytes_per_sample(static_cast<AVSampleFormat>(format.format));
- int numAudioFrames = audioMessages.size();
- int64_t numAudioSamples = 0;
- if (getPtsOnly == 0) {
- int64_t frameSizeTotal = 0;
- for (auto const& audioMessage : audioMessages) {
- frameSizeTotal += audioMessage.payload->length();
- }
- TORCH_CHECK_EQ(frameSizeTotal % (outAudioChannels * bytesPerSample), 0);
- numAudioSamples = frameSizeTotal / (outAudioChannels * bytesPerSample);
- audioFrame =
- torch::zeros({numAudioSamples, outAudioChannels}, torch::kFloat);
- }
- audioFramePts = torch::zeros({numAudioFrames}, torch::kLong);
- VLOG(2) << "audio duration: " << header.duration
- << ", channels: " << format.channels
- << ", sample rate: " << format.samples << ", num: " << header.num
- << ", den: " << header.den;
- auto numberWrittenBytes = fillAudioTensor(
- audioMessages, audioFrame, audioFramePts, header.num, header.den);
- TORCH_CHECK_EQ(
- numberWrittenBytes,
- numAudioSamples * outAudioChannels * sizeof(float));
- audioTimeBase = torch::zeros({2}, torch::kInt);
- int* audioTimeBaseData = audioTimeBase.data_ptr<int>();
- audioTimeBaseData[0] = header.num;
- audioTimeBaseData[1] = header.den;
- audioSampleRate = torch::zeros({1}, torch::kInt);
- int* audioSampleRateData = audioSampleRate.data_ptr<int>();
- audioSampleRateData[0] = format.samples;
- audioDuration = torch::zeros({1}, torch::kLong);
- int64_t* audioDurationData = audioDuration.data_ptr<int64_t>();
- AVRational ar = AVRational{(int)header.num, (int)header.den};
- audioDurationData[0] = av_rescale_q(header.duration, timeBaseQ, ar);
- VLOG(1) << "Video decoding from " << logType << " [" << logMessage
- << "] filled audio tensors";
- } else {
- VLOG(1) << "Miss audio stream";
- }
- }
- torch::List<torch::Tensor> result;
- result.push_back(std::move(videoFrame));
- result.push_back(std::move(videoFramePts));
- result.push_back(std::move(videoTimeBase));
- result.push_back(std::move(videoFps));
- result.push_back(std::move(videoDuration));
- result.push_back(std::move(audioFrame));
- result.push_back(std::move(audioFramePts));
- result.push_back(std::move(audioTimeBase));
- result.push_back(std::move(audioSampleRate));
- result.push_back(std::move(audioDuration));
- VLOG(1) << "Video decoding from " << logType << " [" << logMessage
- << "] about to return";
- return result;
- }
- torch::List<torch::Tensor> probeVideo(
- bool isReadFile,
- const torch::Tensor& input_video,
- std::string videoPath) {
- DecoderParameters params = getDecoderParams(
- 0, // videoStartUs
- -1, // videoEndUs
- 0, // seekFrameMargin
- 1, // getPtsOnly
- 1, // readVideoStream
- 0, // width
- 0, // height
- 0, // minDimension
- 0, // maxDimension
- 1, // readAudioStream
- 0, // audioSamples
- 0 // audioChannels
- );
- SyncDecoder decoder;
- DecoderInCallback callback = nullptr;
- std::string logMessage, logType;
- if (isReadFile) {
- params.uri = videoPath;
- logType = "file";
- logMessage = videoPath;
- } else {
- callback = MemoryBuffer::getCallback(
- input_video.data_ptr<uint8_t>(), input_video.size(0));
- logType = "memory";
- logMessage = std::to_string(input_video.size(0));
- }
- VLOG(1) << "Video probing from " << logType << " [" << logMessage
- << "] has started";
- const auto now = std::chrono::system_clock::now();
- bool succeeded;
- bool gotAudio = false, gotVideo = false;
- DecoderMetadata audioMetadata, videoMetadata;
- std::vector<DecoderMetadata> metadata;
- if ((succeeded = decoder.init(params, std::move(callback), &metadata))) {
- for (const auto& header : metadata) {
- if (header.format.type == TYPE_VIDEO) {
- gotVideo = true;
- videoMetadata = header;
- } else if (header.format.type == TYPE_AUDIO) {
- gotAudio = true;
- audioMetadata = header;
- }
- }
- const auto then = std::chrono::system_clock::now();
- VLOG(1) << "Video probing from " << logType << " [" << logMessage
- << "] has finished, "
- << std::chrono::duration_cast<std::chrono::microseconds>(then - now)
- .count()
- << " us";
- } else {
- LOG(ERROR) << "Decoder initialization has failed";
- }
- decoder.shutdown();
- // video section
- torch::Tensor videoTimeBase = torch::zeros({0}, torch::kInt);
- torch::Tensor videoFps = torch::zeros({0}, torch::kFloat);
- torch::Tensor videoDuration = torch::zeros({0}, torch::kLong);
- if (succeeded && gotVideo) {
- videoTimeBase = torch::zeros({2}, torch::kInt);
- int* videoTimeBaseData = videoTimeBase.data_ptr<int>();
- const auto& header = videoMetadata;
- videoTimeBaseData[0] = header.num;
- videoTimeBaseData[1] = header.den;
- videoFps = torch::zeros({1}, torch::kFloat);
- float* videoFpsData = videoFps.data_ptr<float>();
- videoFpsData[0] = header.fps;
- videoDuration = torch::zeros({1}, torch::kLong);
- int64_t* videoDurationData = videoDuration.data_ptr<int64_t>();
- AVRational avr = AVRational{(int)header.num, (int)header.den};
- videoDurationData[0] = av_rescale_q(header.duration, timeBaseQ, avr);
- VLOG(2) << "Prob fps: " << header.fps << ", duration: " << header.duration
- << ", num: " << header.num << ", den: " << header.den;
- VLOG(1) << "Video probing from " << logType << " [" << logMessage
- << "] filled video tensors";
- } else {
- LOG(ERROR) << "Miss video stream";
- }
- // audio section
- torch::Tensor audioTimeBase = torch::zeros({0}, torch::kInt);
- torch::Tensor audioSampleRate = torch::zeros({0}, torch::kInt);
- torch::Tensor audioDuration = torch::zeros({0}, torch::kLong);
- if (succeeded && gotAudio) {
- audioTimeBase = torch::zeros({2}, torch::kInt);
- int* audioTimeBaseData = audioTimeBase.data_ptr<int>();
- const auto& header = audioMetadata;
- const auto& media = header.format;
- const auto& format = media.format.audio;
- audioTimeBaseData[0] = header.num;
- audioTimeBaseData[1] = header.den;
- audioSampleRate = torch::zeros({1}, torch::kInt);
- int* audioSampleRateData = audioSampleRate.data_ptr<int>();
- audioSampleRateData[0] = format.samples;
- audioDuration = torch::zeros({1}, torch::kLong);
- int64_t* audioDurationData = audioDuration.data_ptr<int64_t>();
- AVRational avr = AVRational{(int)header.num, (int)header.den};
- audioDurationData[0] = av_rescale_q(header.duration, timeBaseQ, avr);
- VLOG(2) << "Prob sample rate: " << format.samples
- << ", duration: " << header.duration << ", num: " << header.num
- << ", den: " << header.den;
- VLOG(1) << "Video probing from " << logType << " [" << logMessage
- << "] filled audio tensors";
- } else {
- VLOG(1) << "Miss audio stream";
- }
- torch::List<torch::Tensor> result;
- result.push_back(std::move(videoTimeBase));
- result.push_back(std::move(videoFps));
- result.push_back(std::move(videoDuration));
- result.push_back(std::move(audioTimeBase));
- result.push_back(std::move(audioSampleRate));
- result.push_back(std::move(audioDuration));
- VLOG(1) << "Video probing from " << logType << " [" << logMessage
- << "] is about to return";
- return result;
- }
- } // namespace
- torch::List<torch::Tensor> read_video_from_memory(
- torch::Tensor input_video,
- double seekFrameMargin,
- int64_t getPtsOnly,
- int64_t readVideoStream,
- int64_t width,
- int64_t height,
- int64_t minDimension,
- int64_t maxDimension,
- int64_t videoStartPts,
- int64_t videoEndPts,
- int64_t videoTimeBaseNum,
- int64_t videoTimeBaseDen,
- int64_t readAudioStream,
- int64_t audioSamples,
- int64_t audioChannels,
- int64_t audioStartPts,
- int64_t audioEndPts,
- int64_t audioTimeBaseNum,
- int64_t audioTimeBaseDen) {
- C10_LOG_API_USAGE_ONCE(
- "torchvision.csrc.io.video_reader.video_reader.read_video_from_memory");
- return readVideo(
- false,
- input_video,
- "", // videoPath
- seekFrameMargin,
- getPtsOnly,
- readVideoStream,
- width,
- height,
- minDimension,
- maxDimension,
- videoStartPts,
- videoEndPts,
- videoTimeBaseNum,
- videoTimeBaseDen,
- readAudioStream,
- audioSamples,
- audioChannels,
- audioStartPts,
- audioEndPts,
- audioTimeBaseNum,
- audioTimeBaseDen);
- }
- torch::List<torch::Tensor> read_video_from_file(
- std::string videoPath,
- double seekFrameMargin,
- int64_t getPtsOnly,
- int64_t readVideoStream,
- int64_t width,
- int64_t height,
- int64_t minDimension,
- int64_t maxDimension,
- int64_t videoStartPts,
- int64_t videoEndPts,
- int64_t videoTimeBaseNum,
- int64_t videoTimeBaseDen,
- int64_t readAudioStream,
- int64_t audioSamples,
- int64_t audioChannels,
- int64_t audioStartPts,
- int64_t audioEndPts,
- int64_t audioTimeBaseNum,
- int64_t audioTimeBaseDen) {
- C10_LOG_API_USAGE_ONCE(
- "torchvision.csrc.io.video_reader.video_reader.read_video_from_file");
- torch::Tensor dummy_input_video = torch::ones({0});
- return readVideo(
- true,
- dummy_input_video,
- videoPath,
- seekFrameMargin,
- getPtsOnly,
- readVideoStream,
- width,
- height,
- minDimension,
- maxDimension,
- videoStartPts,
- videoEndPts,
- videoTimeBaseNum,
- videoTimeBaseDen,
- readAudioStream,
- audioSamples,
- audioChannels,
- audioStartPts,
- audioEndPts,
- audioTimeBaseNum,
- audioTimeBaseDen);
- }
- torch::List<torch::Tensor> probe_video_from_memory(torch::Tensor input_video) {
- C10_LOG_API_USAGE_ONCE(
- "torchvision.csrc.io.video_reader.video_reader.probe_video_from_memory");
- return probeVideo(false, input_video, "");
- }
- torch::List<torch::Tensor> probe_video_from_file(std::string videoPath) {
- C10_LOG_API_USAGE_ONCE(
- "torchvision.csrc.io.video_reader.video_reader.probe_video_from_file");
- torch::Tensor dummy_input_video = torch::ones({0});
- return probeVideo(true, dummy_input_video, videoPath);
- }
- TORCH_LIBRARY_FRAGMENT(video_reader, m) {
- m.def("read_video_from_memory", read_video_from_memory);
- m.def("read_video_from_file", read_video_from_file);
- m.def("probe_video_from_memory", probe_video_from_memory);
- m.def("probe_video_from_file", probe_video_from_file);
- }
- } // namespace video_reader
- } // namespace vision
|