123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404 |
- #include "decoder.h"
- #include <c10/util/Logging.h>
- #include <nppi_color_conversion.h>
- #include <cmath>
- #include <cstring>
- #include <unordered_map>
- static float chroma_height_factor(cudaVideoSurfaceFormat surface_format) {
- return (surface_format == cudaVideoSurfaceFormat_YUV444 ||
- surface_format == cudaVideoSurfaceFormat_YUV444_16Bit)
- ? 1.0
- : 0.5;
- }
- static int chroma_plane_count(cudaVideoSurfaceFormat surface_format) {
- return (surface_format == cudaVideoSurfaceFormat_YUV444 ||
- surface_format == cudaVideoSurfaceFormat_YUV444_16Bit)
- ? 2
- : 1;
- }
- /* Initialise cu_context and video_codec, create context lock and create parser
- * object.
- */
- void Decoder::init(CUcontext context, cudaVideoCodec codec) {
- cu_context = context;
- video_codec = codec;
- check_for_cuda_errors(
- cuvidCtxLockCreate(&ctx_lock, cu_context), __LINE__, __FILE__);
- CUVIDPARSERPARAMS parser_params = {};
- parser_params.CodecType = codec;
- parser_params.ulMaxNumDecodeSurfaces = 1;
- parser_params.ulClockRate = 1000;
- parser_params.ulMaxDisplayDelay = 0u;
- parser_params.pUserData = this;
- parser_params.pfnSequenceCallback = video_sequence_handler;
- parser_params.pfnDecodePicture = picture_decode_handler;
- parser_params.pfnDisplayPicture = picture_display_handler;
- parser_params.pfnGetOperatingPoint = operating_point_handler;
- check_for_cuda_errors(
- cuvidCreateVideoParser(&parser, &parser_params), __LINE__, __FILE__);
- }
- /* Destroy parser object and context lock.
- */
- Decoder::~Decoder() {
- if (parser) {
- cuvidDestroyVideoParser(parser);
- }
- cuvidCtxLockDestroy(ctx_lock);
- }
- /* Destroy CUvideodecoder object and free up all the unreturned decoded frames.
- */
- void Decoder::release() {
- cuCtxPushCurrent(cu_context);
- if (decoder) {
- cuvidDestroyDecoder(decoder);
- }
- cuCtxPopCurrent(NULL);
- }
- /* Trigger video decoding.
- */
- void Decoder::decode(const uint8_t* data, unsigned long size) {
- CUVIDSOURCEDATAPACKET pkt = {};
- pkt.flags = CUVID_PKT_TIMESTAMP;
- pkt.payload_size = size;
- pkt.payload = data;
- pkt.timestamp = 0;
- if (!data || size == 0) {
- pkt.flags |= CUVID_PKT_ENDOFSTREAM;
- }
- check_for_cuda_errors(cuvidParseVideoData(parser, &pkt), __LINE__, __FILE__);
- cuvidStream = 0;
- }
- /* Fetch a decoded frame and remove it from the queue.
- */
- torch::Tensor Decoder::fetch_frame() {
- if (decoded_frames.empty()) {
- auto options =
- torch::TensorOptions().dtype(torch::kU8).device(torch::kCUDA);
- return torch::zeros({0}, options);
- }
- torch::Tensor frame = decoded_frames.front();
- decoded_frames.pop();
- return frame;
- }
- /* Called when a picture is ready to be decoded.
- */
- int Decoder::handle_picture_decode(CUVIDPICPARAMS* pic_params) {
- if (!decoder) {
- TORCH_CHECK(false, "Uninitialised decoder");
- }
- pic_num_in_decode_order[pic_params->CurrPicIdx] = decode_pic_count++;
- check_for_cuda_errors(cuCtxPushCurrent(cu_context), __LINE__, __FILE__);
- check_for_cuda_errors(
- cuvidDecodePicture(decoder, pic_params), __LINE__, __FILE__);
- check_for_cuda_errors(cuCtxPopCurrent(NULL), __LINE__, __FILE__);
- return 1;
- }
- /* Process the decoded data and copy it to a cuda memory location.
- */
- int Decoder::handle_picture_display(CUVIDPARSERDISPINFO* disp_info) {
- CUVIDPROCPARAMS proc_params = {};
- proc_params.progressive_frame = disp_info->progressive_frame;
- proc_params.second_field = disp_info->repeat_first_field + 1;
- proc_params.top_field_first = disp_info->top_field_first;
- proc_params.unpaired_field = disp_info->repeat_first_field < 0;
- proc_params.output_stream = cuvidStream;
- CUdeviceptr source_frame = 0;
- unsigned int source_pitch = 0;
- check_for_cuda_errors(cuCtxPushCurrent(cu_context), __LINE__, __FILE__);
- check_for_cuda_errors(
- cuvidMapVideoFrame(
- decoder,
- disp_info->picture_index,
- &source_frame,
- &source_pitch,
- &proc_params),
- __LINE__,
- __FILE__);
- CUVIDGETDECODESTATUS decode_status;
- memset(&decode_status, 0, sizeof(decode_status));
- CUresult result =
- cuvidGetDecodeStatus(decoder, disp_info->picture_index, &decode_status);
- if (result == CUDA_SUCCESS &&
- (decode_status.decodeStatus == cuvidDecodeStatus_Error ||
- decode_status.decodeStatus == cuvidDecodeStatus_Error_Concealed)) {
- VLOG(1) << "Decode Error occurred for picture "
- << pic_num_in_decode_order[disp_info->picture_index];
- }
- auto options = torch::TensorOptions().dtype(torch::kU8).device(torch::kCUDA);
- torch::Tensor decoded_frame = torch::empty({get_height(), width, 3}, options);
- uint8_t* frame_ptr = decoded_frame.data_ptr<uint8_t>();
- const uint8_t* const source_arr[] = {
- (const uint8_t* const)source_frame,
- (const uint8_t* const)(source_frame + source_pitch * ((surface_height + 1) & ~1))};
- auto err = nppiNV12ToRGB_709CSC_8u_P2C3R(
- source_arr,
- source_pitch,
- frame_ptr,
- width * 3,
- {(int)decoded_frame.size(1), (int)decoded_frame.size(0)});
- TORCH_CHECK(
- err == NPP_NO_ERROR,
- "Failed to convert from NV12 to RGB. Error code:",
- err);
- check_for_cuda_errors(cuStreamSynchronize(cuvidStream), __LINE__, __FILE__);
- decoded_frames.push(decoded_frame);
- check_for_cuda_errors(cuCtxPopCurrent(NULL), __LINE__, __FILE__);
- check_for_cuda_errors(
- cuvidUnmapVideoFrame(decoder, source_frame), __LINE__, __FILE__);
- return 1;
- }
- /* Query the capabilities of the underlying hardware video decoder and
- * verify if the hardware supports decoding the passed video.
- */
- void Decoder::query_hardware(CUVIDEOFORMAT* video_format) {
- CUVIDDECODECAPS decode_caps = {};
- decode_caps.eCodecType = video_format->codec;
- decode_caps.eChromaFormat = video_format->chroma_format;
- decode_caps.nBitDepthMinus8 = video_format->bit_depth_luma_minus8;
- check_for_cuda_errors(cuCtxPushCurrent(cu_context), __LINE__, __FILE__);
- check_for_cuda_errors(cuvidGetDecoderCaps(&decode_caps), __LINE__, __FILE__);
- check_for_cuda_errors(cuCtxPopCurrent(NULL), __LINE__, __FILE__);
- if (!decode_caps.bIsSupported) {
- TORCH_CHECK(false, "Codec not supported on this GPU");
- }
- if ((video_format->coded_width > decode_caps.nMaxWidth) ||
- (video_format->coded_height > decode_caps.nMaxHeight)) {
- TORCH_CHECK(
- false,
- "Resolution : ",
- video_format->coded_width,
- "x",
- video_format->coded_height,
- "\nMax Supported (wxh) : ",
- decode_caps.nMaxWidth,
- "x",
- decode_caps.nMaxHeight,
- "\nResolution not supported on this GPU");
- }
- if ((video_format->coded_width >> 4) * (video_format->coded_height >> 4) >
- decode_caps.nMaxMBCount) {
- TORCH_CHECK(
- false,
- "MBCount : ",
- (video_format->coded_width >> 4) * (video_format->coded_height >> 4),
- "\nMax Supported mbcnt : ",
- decode_caps.nMaxMBCount,
- "\nMBCount not supported on this GPU");
- }
- // Check if output format supported. If not, check fallback options
- if (!(decode_caps.nOutputFormatMask & (1 << video_output_format))) {
- if (decode_caps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_NV12)) {
- video_output_format = cudaVideoSurfaceFormat_NV12;
- } else if (
- decode_caps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_P016)) {
- video_output_format = cudaVideoSurfaceFormat_P016;
- } else if (
- decode_caps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_YUV444)) {
- video_output_format = cudaVideoSurfaceFormat_YUV444;
- } else if (
- decode_caps.nOutputFormatMask &
- (1 << cudaVideoSurfaceFormat_YUV444_16Bit)) {
- video_output_format = cudaVideoSurfaceFormat_YUV444_16Bit;
- } else {
- TORCH_CHECK(false, "No supported output format found");
- }
- }
- }
- /* Called before decoding frames and/or whenever there is a configuration
- * change.
- */
- int Decoder::handle_video_sequence(CUVIDEOFORMAT* video_format) {
- // video_codec has been set in init(). Here it's set
- // again for potential correction.
- video_codec = video_format->codec;
- video_chroma_format = video_format->chroma_format;
- bit_depth_minus8 = video_format->bit_depth_luma_minus8;
- bytes_per_pixel = bit_depth_minus8 > 0 ? 2 : 1;
- // Set the output surface format same as chroma format
- switch (video_chroma_format) {
- case cudaVideoChromaFormat_Monochrome:
- case cudaVideoChromaFormat_420:
- video_output_format = video_format->bit_depth_luma_minus8
- ? cudaVideoSurfaceFormat_P016
- : cudaVideoSurfaceFormat_NV12;
- break;
- case cudaVideoChromaFormat_444:
- video_output_format = video_format->bit_depth_luma_minus8
- ? cudaVideoSurfaceFormat_YUV444_16Bit
- : cudaVideoSurfaceFormat_YUV444;
- break;
- case cudaVideoChromaFormat_422:
- video_output_format = cudaVideoSurfaceFormat_NV12;
- }
- query_hardware(video_format);
- if (width && luma_height && chroma_height) {
- // cuvidCreateDecoder() has been called before and now there's possible
- // config change.
- return reconfigure_decoder(video_format);
- }
- cu_video_format = *video_format;
- unsigned long decode_surface = video_format->min_num_decode_surfaces;
- cudaVideoDeinterlaceMode deinterlace_mode = cudaVideoDeinterlaceMode_Adaptive;
- if (video_format->progressive_sequence) {
- deinterlace_mode = cudaVideoDeinterlaceMode_Weave;
- }
- CUVIDDECODECREATEINFO video_decode_create_info = {};
- video_decode_create_info.ulWidth = video_format->coded_width;
- video_decode_create_info.ulHeight = video_format->coded_height;
- video_decode_create_info.ulNumDecodeSurfaces = decode_surface;
- video_decode_create_info.CodecType = video_format->codec;
- video_decode_create_info.ChromaFormat = video_format->chroma_format;
- // With PreferCUVID, JPEG is still decoded by CUDA while video is decoded
- // by NVDEC hardware
- video_decode_create_info.ulCreationFlags = cudaVideoCreate_PreferCUVID;
- video_decode_create_info.bitDepthMinus8 = video_format->bit_depth_luma_minus8;
- video_decode_create_info.OutputFormat = video_output_format;
- video_decode_create_info.DeinterlaceMode = deinterlace_mode;
- video_decode_create_info.ulNumOutputSurfaces = 2;
- video_decode_create_info.vidLock = ctx_lock;
- // AV1 has max width/height of sequence in sequence header
- if (video_format->codec == cudaVideoCodec_AV1 &&
- video_format->seqhdr_data_length > 0) {
- CUVIDEOFORMATEX* video_format_ex = (CUVIDEOFORMATEX*)video_format;
- max_width = video_format_ex->av1.max_width;
- max_height = video_format_ex->av1.max_height;
- }
- if (max_width < video_format->coded_width) {
- max_width = video_format->coded_width;
- }
- if (max_height < video_format->coded_height) {
- max_height = video_format->coded_height;
- }
- video_decode_create_info.ulMaxWidth = max_width;
- video_decode_create_info.ulMaxHeight = max_height;
- width = video_format->display_area.right - video_format->display_area.left;
- luma_height =
- video_format->display_area.bottom - video_format->display_area.top;
- video_decode_create_info.ulTargetWidth = video_format->coded_width;
- video_decode_create_info.ulTargetHeight = video_format->coded_height;
- chroma_height =
- (int)(ceil(luma_height * chroma_height_factor(video_output_format)));
- num_chroma_planes = chroma_plane_count(video_output_format);
- surface_height = video_decode_create_info.ulTargetHeight;
- surface_width = video_decode_create_info.ulTargetWidth;
- display_rect.bottom = video_decode_create_info.display_area.bottom;
- display_rect.top = video_decode_create_info.display_area.top;
- display_rect.left = video_decode_create_info.display_area.left;
- display_rect.right = video_decode_create_info.display_area.right;
- check_for_cuda_errors(cuCtxPushCurrent(cu_context), __LINE__, __FILE__);
- check_for_cuda_errors(
- cuvidCreateDecoder(&decoder, &video_decode_create_info),
- __LINE__,
- __FILE__);
- check_for_cuda_errors(cuCtxPopCurrent(NULL), __LINE__, __FILE__);
- return decode_surface;
- }
- int Decoder::reconfigure_decoder(CUVIDEOFORMAT* video_format) {
- if (video_format->bit_depth_luma_minus8 !=
- cu_video_format.bit_depth_luma_minus8 ||
- video_format->bit_depth_chroma_minus8 !=
- cu_video_format.bit_depth_chroma_minus8) {
- TORCH_CHECK(false, "Reconfigure not supported for bit depth change");
- }
- if (video_format->chroma_format != cu_video_format.chroma_format) {
- TORCH_CHECK(false, "Reconfigure not supported for chroma format change");
- }
- bool decode_res_change =
- !(video_format->coded_width == cu_video_format.coded_width &&
- video_format->coded_height == cu_video_format.coded_height);
- bool display_rect_change =
- !(video_format->display_area.bottom ==
- cu_video_format.display_area.bottom &&
- video_format->display_area.top == cu_video_format.display_area.top &&
- video_format->display_area.left == cu_video_format.display_area.left &&
- video_format->display_area.right == cu_video_format.display_area.right);
- unsigned int decode_surface = video_format->min_num_decode_surfaces;
- if ((video_format->coded_width > max_width) ||
- (video_format->coded_height > max_height)) {
- // For VP9, let driver handle the change if new width/height >
- // maxwidth/maxheight
- if (video_codec != cudaVideoCodec_VP9) {
- TORCH_CHECK(
- false,
- "Reconfigure not supported when width/height > maxwidth/maxheight");
- }
- return 1;
- }
- if (!decode_res_change) {
- // If the coded_width/coded_height hasn't changed but display resolution has
- // changed, then need to update width/height for correct output without
- // cropping. Example : 1920x1080 vs 1920x1088.
- if (display_rect_change) {
- width =
- video_format->display_area.right - video_format->display_area.left;
- luma_height =
- video_format->display_area.bottom - video_format->display_area.top;
- chroma_height =
- (int)ceil(luma_height * chroma_height_factor(video_output_format));
- num_chroma_planes = chroma_plane_count(video_output_format);
- }
- return 1;
- }
- cu_video_format.coded_width = video_format->coded_width;
- cu_video_format.coded_height = video_format->coded_height;
- CUVIDRECONFIGUREDECODERINFO reconfig_params = {};
- reconfig_params.ulWidth = video_format->coded_width;
- reconfig_params.ulHeight = video_format->coded_height;
- reconfig_params.ulTargetWidth = surface_width;
- reconfig_params.ulTargetHeight = surface_height;
- reconfig_params.ulNumDecodeSurfaces = decode_surface;
- reconfig_params.display_area.bottom = display_rect.bottom;
- reconfig_params.display_area.top = display_rect.top;
- reconfig_params.display_area.left = display_rect.left;
- reconfig_params.display_area.right = display_rect.right;
- check_for_cuda_errors(cuCtxPushCurrent(cu_context), __LINE__, __FILE__);
- check_for_cuda_errors(
- cuvidReconfigureDecoder(decoder, &reconfig_params), __LINE__, __FILE__);
- check_for_cuda_errors(cuCtxPopCurrent(NULL), __LINE__, __FILE__);
- return decode_surface;
- }
- /* Called from AV1 sequence header to get operating point of an AV1 bitstream.
- */
- int Decoder::get_operating_point(CUVIDOPERATINGPOINTINFO* oper_point_info) {
- return oper_point_info->codec == cudaVideoCodec_AV1 &&
- oper_point_info->av1.operating_points_cnt > 1
- ? 0
- : -1;
- }
|