diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..3c2dd3d --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,34 @@ +{ + "configurations": [ + { + "name": "Win32", + "includePath": [ + "${workspaceFolder}/src", + "${workspaceFolder}/_install/boost/include", + "${workspaceFolder}/_install/json/include", + "${workspaceFolder}/NvCodec/include", + "${workspaceFolder}/NvCodec/NvCodec", + "${workspaceFolder}/_install/webrtc/include", + "${workspaceFolder}/_install/webrtc/include/third_party/abseil-cpp", + "${workspaceFolder}/_install/webrtc/include/third_party/boringssl/src/include", + "${workspaceFolder}/_install/webrtc/include/third_party/libyuv/include" + ], + "defines": [ + "_DEBUG", + "UNICODE", + "_UNICODE", + "_CONSOLE", + "_WIN32_WINNT=0x0A00", + "WEBRTC_WIN", + "NOMINMAX", + "WIN32_LEAN_AND_MEAN" + ], + "windowsSdkVersion": "10.0.18362.0", + "compilerPath": "C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.24.28314/bin/Hostx64/x64/cl.exe", + "cStandard": "c11", + "cppStandard": "c++17", + "intelliSenseMode": "msvc-x64" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/CHANGES.md b/CHANGES.md index ea83537..ca7c597 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -11,6 +11,9 @@ ## develop +- [UPDATE] Windows 版では H.264 エンコードに NVIDIA VIDEO CODEC SDK を利用する(利用可能な場合のみ) +- [UPDATE] macOS 版では H.264 のエンコード/デコードに VideoToolbox を利用する + ## 1.0.4 - [UPDATE] webrtc-build を 80.3987.2.2 に上げた diff --git a/CMakeLists.txt b/CMakeLists.txt index 0d3b160..2c1c40e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,7 +34,9 @@ if (MSVC) iphlpapi.lib msdmo.lib Secur32.lib - wmcodecdspuuid.lib) + wmcodecdspuuid.lib + dxgi.lib + D3D11.lib) set(_COMPILE_DEFINITIONS _CONSOLE @@ -75,8 +77,14 @@ endif () string(SUBSTRING ${SORA_UNITY_SDK_COMMIT} 0 8 SORA_UNITY_SDK_COMMIT_SHORT) string(SUBSTRING ${WEBRTC_SRC_COMMIT} 0 8 WEBRTC_SRC_COMMIT_SHORT) +set(_INCLUDE_DIRECTORIES) + configure_file(src/sora_version.h.template ${CMAKE_CURRENT_BINARY_DIR}/sora_version.h) -set(_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_BINARY_DIR}) +list(APPEND _INCLUDE_DIRECTORIES ${CMAKE_CURRENT_BINARY_DIR}) + +list(APPEND _INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}/src) +list(APPEND _INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}/NvCodec/include) +list(APPEND _INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}/NvCodec/NvCodec) set(Boost_USE_STATIC_LIBS ON) @@ -99,11 +107,16 @@ set(_SOURCES src/rtc/rtc_connection.cpp src/rtc/rtc_manager.cpp src/rtc/scalable_track_source.cpp + src/rtc/hw_video_encoder_factory.cpp + src/rtc/h264_format.cpp ) if (WIN32) list(APPEND _SOURCES - src/unity_camera_capturer_d3d11.cpp) + src/unity_camera_capturer_d3d11.cpp + src/hwenc_nvcodec/nvcodec_h264_encoder.cpp + NvCodec/NvCodec/NvEncoder/NvEncoder.cpp + NvCodec/NvCodec/NvEncoder/NvEncoderD3D11.cpp) add_library(SoraUnitySdk SHARED ${_SOURCES}) elseif (APPLE) @@ -121,6 +134,7 @@ elseif (APPLE) COMPILE_OPTIONS "-x;objective-c++;-fmodules;-fcxx-modules") add_library(SoraUnitySdk MODULE ${_SOURCES}) + target_link_options(SoraUnitySdk PRIVATE -ObjC) set_target_properties(SoraUnitySdk PROPERTIES BUNDLE TRUE) set_target_properties(SoraUnitySdk PROPERTIES CXX_VISIBILITY_PRESET hidden) endif () diff --git a/NvCodec/NvCodec/NvDecoder/NvDecoder.cpp b/NvCodec/NvCodec/NvDecoder/NvDecoder.cpp new file mode 100644 index 0000000..b062602 --- /dev/null +++ b/NvCodec/NvCodec/NvDecoder/NvDecoder.cpp @@ -0,0 +1,671 @@ +/* +* Copyright 2017-2019 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#include +#include +#include + +#include "nvcuvid.h" +#include "NvDecoder/NvDecoder.h" + +#define START_TIMER auto start = std::chrono::high_resolution_clock::now(); +#define STOP_TIMER(print_message) std::cout << print_message << \ + std::chrono::duration_cast( \ + std::chrono::high_resolution_clock::now() - start).count() \ + << " ms " << std::endl; + +#define CUDA_DRVAPI_CALL( call ) \ + do \ + { \ + CUresult err__ = call; \ + if (err__ != CUDA_SUCCESS) \ + { \ + const char *szErrName = NULL; \ + cuGetErrorName(err__, &szErrName); \ + std::ostringstream errorLog; \ + errorLog << "CUDA driver API error " << szErrName ; \ + throw NVDECException::makeNVDECException(errorLog.str(), err__, __FUNCTION__, __FILE__, __LINE__); \ + } \ + } \ + while (0) + +static const char * GetVideoCodecString(cudaVideoCodec eCodec) { + static struct { + cudaVideoCodec eCodec; + const char *name; + } aCodecName [] = { + { cudaVideoCodec_MPEG1, "MPEG-1" }, + { cudaVideoCodec_MPEG2, "MPEG-2" }, + { cudaVideoCodec_MPEG4, "MPEG-4 (ASP)" }, + { cudaVideoCodec_VC1, "VC-1/WMV" }, + { cudaVideoCodec_H264, "AVC/H.264" }, + { cudaVideoCodec_JPEG, "M-JPEG" }, + { cudaVideoCodec_H264_SVC, "H.264/SVC" }, + { cudaVideoCodec_H264_MVC, "H.264/MVC" }, + { cudaVideoCodec_HEVC, "H.265/HEVC" }, + { cudaVideoCodec_VP8, "VP8" }, + { cudaVideoCodec_VP9, "VP9" }, + { cudaVideoCodec_NumCodecs, "Invalid" }, + { cudaVideoCodec_YUV420, "YUV 4:2:0" }, + { cudaVideoCodec_YV12, "YV12 4:2:0" }, + { cudaVideoCodec_NV12, "NV12 4:2:0" }, + { cudaVideoCodec_YUYV, "YUYV 4:2:2" }, + { cudaVideoCodec_UYVY, "UYVY 4:2:2" }, + }; + + if (eCodec >= 0 && eCodec <= cudaVideoCodec_NumCodecs) { + return aCodecName[eCodec].name; + } + for (int i = cudaVideoCodec_NumCodecs + 1; i < sizeof(aCodecName) / sizeof(aCodecName[0]); i++) { + if (eCodec == aCodecName[i].eCodec) { + return aCodecName[eCodec].name; + } + } + return "Unknown"; +} + +static const char * GetVideoChromaFormatString(cudaVideoChromaFormat eChromaFormat) { + static struct { + cudaVideoChromaFormat eChromaFormat; + const char *name; + } aChromaFormatName[] = { + { cudaVideoChromaFormat_Monochrome, "YUV 400 (Monochrome)" }, + { cudaVideoChromaFormat_420, "YUV 420" }, + { cudaVideoChromaFormat_422, "YUV 422" }, + { cudaVideoChromaFormat_444, "YUV 444" }, + }; + + if (eChromaFormat >= 0 && eChromaFormat < sizeof(aChromaFormatName) / sizeof(aChromaFormatName[0])) { + return aChromaFormatName[eChromaFormat].name; + } + return "Unknown"; +} + +static float GetChromaHeightFactor(cudaVideoChromaFormat eChromaFormat) +{ + float factor = 0.5; + switch (eChromaFormat) + { + case cudaVideoChromaFormat_Monochrome: + factor = 0.0; + break; + case cudaVideoChromaFormat_420: + factor = 0.5; + break; + case cudaVideoChromaFormat_422: + factor = 1.0; + break; + case cudaVideoChromaFormat_444: + factor = 1.0; + break; + } + + return factor; +} + +static int GetChromaPlaneCount(cudaVideoChromaFormat eChromaFormat) +{ + int numPlane = 1; + switch (eChromaFormat) + { + case cudaVideoChromaFormat_Monochrome: + numPlane = 0; + break; + case cudaVideoChromaFormat_420: + numPlane = 1; + break; + case cudaVideoChromaFormat_444: + numPlane = 2; + break; + } + + return numPlane; +} + +/* Return value from HandleVideoSequence() are interpreted as : +* 0: fail, 1: succeeded, > 1: override dpb size of parser (set by CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces while creating parser) +*/ +int NvDecoder::HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat) +{ + START_TIMER + m_videoInfo.str(""); + m_videoInfo.clear(); + m_videoInfo << "Video Input Information" << std::endl + << "\tCodec : " << GetVideoCodecString(pVideoFormat->codec) << std::endl + << "\tFrame rate : " << pVideoFormat->frame_rate.numerator << "/" << pVideoFormat->frame_rate.denominator + << " = " << 1.0 * pVideoFormat->frame_rate.numerator / pVideoFormat->frame_rate.denominator << " fps" << std::endl + << "\tSequence : " << (pVideoFormat->progressive_sequence ? "Progressive" : "Interlaced") << std::endl + << "\tCoded size : [" << pVideoFormat->coded_width << ", " << pVideoFormat->coded_height << "]" << std::endl + << "\tDisplay area : [" << pVideoFormat->display_area.left << ", " << pVideoFormat->display_area.top << ", " + << pVideoFormat->display_area.right << ", " << pVideoFormat->display_area.bottom << "]" << std::endl + << "\tChroma : " << GetVideoChromaFormatString(pVideoFormat->chroma_format) << std::endl + << "\tBit depth : " << pVideoFormat->bit_depth_luma_minus8 + 8 + ; + m_videoInfo << std::endl; + + int nDecodeSurface = pVideoFormat->min_num_decode_surfaces; + + CUVIDDECODECAPS decodecaps; + memset(&decodecaps, 0, sizeof(decodecaps)); + + decodecaps.eCodecType = pVideoFormat->codec; + decodecaps.eChromaFormat = pVideoFormat->chroma_format; + decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8; + + CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext)); + NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps)); + CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); + + if(!decodecaps.bIsSupported){ + NVDEC_THROW_ERROR("Codec not supported on this GPU", CUDA_ERROR_NOT_SUPPORTED); + return nDecodeSurface; + } + + if ((pVideoFormat->coded_width > decodecaps.nMaxWidth) || + (pVideoFormat->coded_height > decodecaps.nMaxHeight)){ + + std::ostringstream errorString; + errorString << std::endl + << "Resolution : " << pVideoFormat->coded_width << "x" << pVideoFormat->coded_height << std::endl + << "Max Supported (wxh) : " << decodecaps.nMaxWidth << "x" << decodecaps.nMaxHeight << std::endl + << "Resolution not supported on this GPU"; + + const std::string cErr = errorString.str(); + NVDEC_THROW_ERROR(cErr, CUDA_ERROR_NOT_SUPPORTED); + return nDecodeSurface; + } + + if ((pVideoFormat->coded_width>>4)*(pVideoFormat->coded_height>>4) > decodecaps.nMaxMBCount){ + + std::ostringstream errorString; + errorString << std::endl + << "MBCount : " << (pVideoFormat->coded_width >> 4)*(pVideoFormat->coded_height >> 4) << std::endl + << "Max Supported mbcnt : " << decodecaps.nMaxMBCount << std::endl + << "MBCount not supported on this GPU"; + + const std::string cErr = errorString.str(); + NVDEC_THROW_ERROR(cErr, CUDA_ERROR_NOT_SUPPORTED); + return nDecodeSurface; + } + + if (m_nWidth && m_nLumaHeight && m_nChromaHeight) { + + // cuvidCreateDecoder() has been called before, and now there's possible config change + return ReconfigureDecoder(pVideoFormat); + } + + // eCodec has been set in the constructor (for parser). Here it's set again for potential correction + m_eCodec = pVideoFormat->codec; + m_eChromaFormat = pVideoFormat->chroma_format; + m_nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8; + m_nBPP = m_nBitDepthMinus8 > 0 ? 2 : 1; + + if (m_eChromaFormat == cudaVideoChromaFormat_420) + m_eOutputFormat = pVideoFormat->bit_depth_luma_minus8 ? cudaVideoSurfaceFormat_P016 : cudaVideoSurfaceFormat_NV12; + else if (m_eChromaFormat == cudaVideoChromaFormat_444) + m_eOutputFormat = pVideoFormat->bit_depth_luma_minus8 ? cudaVideoSurfaceFormat_YUV444_16Bit : cudaVideoSurfaceFormat_YUV444; + + m_videoFormat = *pVideoFormat; + + CUVIDDECODECREATEINFO videoDecodeCreateInfo = { 0 }; + videoDecodeCreateInfo.CodecType = pVideoFormat->codec; + videoDecodeCreateInfo.ChromaFormat = pVideoFormat->chroma_format; + videoDecodeCreateInfo.OutputFormat = m_eOutputFormat; + videoDecodeCreateInfo.bitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8; + if (pVideoFormat->progressive_sequence) + videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave; + else + videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Adaptive; + videoDecodeCreateInfo.ulNumOutputSurfaces = 2; + // With PreferCUVID, JPEG is still decoded by CUDA while video is decoded by NVDEC hardware + videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID; + videoDecodeCreateInfo.ulNumDecodeSurfaces = nDecodeSurface; + videoDecodeCreateInfo.vidLock = m_ctxLock; + videoDecodeCreateInfo.ulWidth = pVideoFormat->coded_width; + videoDecodeCreateInfo.ulHeight = pVideoFormat->coded_height; + if (m_nMaxWidth < (int)pVideoFormat->coded_width) + m_nMaxWidth = pVideoFormat->coded_width; + if (m_nMaxHeight < (int)pVideoFormat->coded_height) + m_nMaxHeight = pVideoFormat->coded_height; + videoDecodeCreateInfo.ulMaxWidth = m_nMaxWidth; + videoDecodeCreateInfo.ulMaxHeight = m_nMaxHeight; + + if (!(m_cropRect.r && m_cropRect.b) && !(m_resizeDim.w && m_resizeDim.h)) { + m_nWidth = pVideoFormat->display_area.right - pVideoFormat->display_area.left; + m_nLumaHeight = pVideoFormat->display_area.bottom - pVideoFormat->display_area.top; + videoDecodeCreateInfo.ulTargetWidth = pVideoFormat->coded_width; + videoDecodeCreateInfo.ulTargetHeight = pVideoFormat->coded_height; + } else { + if (m_resizeDim.w && m_resizeDim.h) { + videoDecodeCreateInfo.display_area.left = pVideoFormat->display_area.left; + videoDecodeCreateInfo.display_area.top = pVideoFormat->display_area.top; + videoDecodeCreateInfo.display_area.right = pVideoFormat->display_area.right; + videoDecodeCreateInfo.display_area.bottom = pVideoFormat->display_area.bottom; + m_nWidth = m_resizeDim.w; + m_nLumaHeight = m_resizeDim.h; + } + + if (m_cropRect.r && m_cropRect.b) { + videoDecodeCreateInfo.display_area.left = m_cropRect.l; + videoDecodeCreateInfo.display_area.top = m_cropRect.t; + videoDecodeCreateInfo.display_area.right = m_cropRect.r; + videoDecodeCreateInfo.display_area.bottom = m_cropRect.b; + m_nWidth = m_cropRect.r - m_cropRect.l; + m_nLumaHeight = m_cropRect.b - m_cropRect.t; + } + videoDecodeCreateInfo.ulTargetWidth = m_nWidth; + videoDecodeCreateInfo.ulTargetHeight = m_nLumaHeight; + } + + m_nChromaHeight = (int)(m_nLumaHeight * GetChromaHeightFactor(videoDecodeCreateInfo.ChromaFormat)); + m_nNumChromaPlanes = GetChromaPlaneCount(videoDecodeCreateInfo.ChromaFormat); + m_nSurfaceHeight = videoDecodeCreateInfo.ulTargetHeight; + m_nSurfaceWidth = videoDecodeCreateInfo.ulTargetWidth; + m_displayRect.b = videoDecodeCreateInfo.display_area.bottom; + m_displayRect.t = videoDecodeCreateInfo.display_area.top; + m_displayRect.l = videoDecodeCreateInfo.display_area.left; + m_displayRect.r = videoDecodeCreateInfo.display_area.right; + + m_videoInfo << "Video Decoding Params:" << std::endl + << "\tNum Surfaces : " << videoDecodeCreateInfo.ulNumDecodeSurfaces << std::endl + << "\tCrop : [" << videoDecodeCreateInfo.display_area.left << ", " << videoDecodeCreateInfo.display_area.top << ", " + << videoDecodeCreateInfo.display_area.right << ", " << videoDecodeCreateInfo.display_area.bottom << "]" << std::endl + << "\tResize : " << videoDecodeCreateInfo.ulTargetWidth << "x" << videoDecodeCreateInfo.ulTargetHeight << std::endl + << "\tDeinterlace : " << std::vector{"Weave", "Bob", "Adaptive"}[videoDecodeCreateInfo.DeinterlaceMode] + ; + m_videoInfo << std::endl; + + CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext)); + NVDEC_API_CALL(cuvidCreateDecoder(&m_hDecoder, &videoDecodeCreateInfo)); + CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); + STOP_TIMER("Session Initialization Time: "); + return nDecodeSurface; +} + +int NvDecoder::ReconfigureDecoder(CUVIDEOFORMAT *pVideoFormat) +{ + if (pVideoFormat->bit_depth_luma_minus8 != m_videoFormat.bit_depth_luma_minus8 || pVideoFormat->bit_depth_chroma_minus8 != m_videoFormat.bit_depth_chroma_minus8){ + + NVDEC_THROW_ERROR("Reconfigure Not supported for bit depth change", CUDA_ERROR_NOT_SUPPORTED); + } + + if (pVideoFormat->chroma_format != m_videoFormat.chroma_format) { + + NVDEC_THROW_ERROR("Reconfigure Not supported for chroma format change", CUDA_ERROR_NOT_SUPPORTED); + } + + bool bDecodeResChange = !(pVideoFormat->coded_width == m_videoFormat.coded_width && pVideoFormat->coded_height == m_videoFormat.coded_height); + bool bDisplayRectChange = !(pVideoFormat->display_area.bottom == m_videoFormat.display_area.bottom && pVideoFormat->display_area.top == m_videoFormat.display_area.top \ + && pVideoFormat->display_area.left == m_videoFormat.display_area.left && pVideoFormat->display_area.right == m_videoFormat.display_area.right); + + int nDecodeSurface = pVideoFormat->min_num_decode_surfaces; + + if ((pVideoFormat->coded_width > m_nMaxWidth) || (pVideoFormat->coded_height > m_nMaxHeight)) { + // For VP9, let driver handle the change if new width/height > maxwidth/maxheight + if ((m_eCodec != cudaVideoCodec_VP9) || m_bReconfigExternal) + { + NVDEC_THROW_ERROR("Reconfigure Not supported when width/height > maxwidth/maxheight", CUDA_ERROR_NOT_SUPPORTED); + } + return 1; + } + + if (!bDecodeResChange && !m_bReconfigExtPPChange) { + // if the coded_width/coded_height hasn't changed but display resolution has changed, then need to update width/height for + // correct output without cropping. Example : 1920x1080 vs 1920x1088 + if (bDisplayRectChange) + { + m_nWidth = pVideoFormat->display_area.right - pVideoFormat->display_area.left; + m_nLumaHeight = pVideoFormat->display_area.bottom - pVideoFormat->display_area.top; + m_nChromaHeight = int(m_nLumaHeight * GetChromaHeightFactor(pVideoFormat->chroma_format)); + m_nNumChromaPlanes = GetChromaPlaneCount(pVideoFormat->chroma_format); + } + + // no need for reconfigureDecoder(). Just return + return 1; + } + + CUVIDRECONFIGUREDECODERINFO reconfigParams = { 0 }; + + reconfigParams.ulWidth = m_videoFormat.coded_width = pVideoFormat->coded_width; + reconfigParams.ulHeight = m_videoFormat.coded_height = pVideoFormat->coded_height; + + // Dont change display rect and get scaled output from decoder. This will help display app to present apps smoothly + reconfigParams.display_area.bottom = m_displayRect.b; + reconfigParams.display_area.top = m_displayRect.t; + reconfigParams.display_area.left = m_displayRect.l; + reconfigParams.display_area.right = m_displayRect.r; + reconfigParams.ulTargetWidth = m_nSurfaceWidth; + reconfigParams.ulTargetHeight = m_nSurfaceHeight; + + // If external reconfigure is called along with resolution change even if post processing params is not changed, + // do full reconfigure params update + if ((m_bReconfigExternal && bDecodeResChange) || m_bReconfigExtPPChange) { + // update display rect and target resolution if requested explicitely + m_bReconfigExternal = false; + m_bReconfigExtPPChange = false; + m_videoFormat = *pVideoFormat; + if (!(m_cropRect.r && m_cropRect.b) && !(m_resizeDim.w && m_resizeDim.h)) { + m_nWidth = pVideoFormat->display_area.right - pVideoFormat->display_area.left; + m_nLumaHeight = pVideoFormat->display_area.bottom - pVideoFormat->display_area.top; + reconfigParams.ulTargetWidth = pVideoFormat->coded_width; + reconfigParams.ulTargetHeight = pVideoFormat->coded_height; + } + else { + if (m_resizeDim.w && m_resizeDim.h) { + reconfigParams.display_area.left = pVideoFormat->display_area.left; + reconfigParams.display_area.top = pVideoFormat->display_area.top; + reconfigParams.display_area.right = pVideoFormat->display_area.right; + reconfigParams.display_area.bottom = pVideoFormat->display_area.bottom; + m_nWidth = m_resizeDim.w; + m_nLumaHeight = m_resizeDim.h; + } + + if (m_cropRect.r && m_cropRect.b) { + reconfigParams.display_area.left = m_cropRect.l; + reconfigParams.display_area.top = m_cropRect.t; + reconfigParams.display_area.right = m_cropRect.r; + reconfigParams.display_area.bottom = m_cropRect.b; + m_nWidth = m_cropRect.r - m_cropRect.l; + m_nLumaHeight = m_cropRect.b - m_cropRect.t; + } + reconfigParams.ulTargetWidth = m_nWidth; + reconfigParams.ulTargetHeight = m_nLumaHeight; + } + + m_nChromaHeight = int(m_nLumaHeight * GetChromaHeightFactor(pVideoFormat->chroma_format)); + m_nNumChromaPlanes = GetChromaPlaneCount(pVideoFormat->chroma_format); + m_nSurfaceHeight = reconfigParams.ulTargetHeight; + m_nSurfaceWidth = reconfigParams.ulTargetWidth; + m_displayRect.b = reconfigParams.display_area.bottom; + m_displayRect.t = reconfigParams.display_area.top; + m_displayRect.l = reconfigParams.display_area.left; + m_displayRect.r = reconfigParams.display_area.right; + } + + reconfigParams.ulNumDecodeSurfaces = nDecodeSurface; + + START_TIMER + CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext)); + NVDEC_API_CALL(cuvidReconfigureDecoder(m_hDecoder, &reconfigParams)); + CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); + STOP_TIMER("Session Reconfigure Time: "); + + return nDecodeSurface; +} + +int NvDecoder::setReconfigParams(const Rect *pCropRect, const Dim *pResizeDim) +{ + m_bReconfigExternal = true; + m_bReconfigExtPPChange = false; + if (pCropRect) + { + if (!((pCropRect->t == m_cropRect.t) && (pCropRect->l == m_cropRect.l) && + (pCropRect->b == m_cropRect.b) && (pCropRect->r == m_cropRect.r))) + { + m_bReconfigExtPPChange = true; + m_cropRect = *pCropRect; + } + } + if (pResizeDim) + { + if (!((pResizeDim->w == m_resizeDim.w) && (pResizeDim->h == m_resizeDim.h))) + { + m_bReconfigExtPPChange = true; + m_resizeDim = *pResizeDim; + } + } + + // Clear existing output buffers of different size + uint8_t *pFrame = NULL; + while (!m_vpFrame.empty()) + { + pFrame = m_vpFrame.back(); + m_vpFrame.pop_back(); + if (m_bUseDeviceFrame) + { + CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext)); + CUDA_DRVAPI_CALL(cuMemFree((CUdeviceptr)pFrame)); + CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); + } + else + { + delete pFrame; + } + } + m_vpFrameRet.clear(); + + return 1; +} + +/* Return value from HandlePictureDecode() are interpreted as: +* 0: fail, >=1: succeeded +*/ +int NvDecoder::HandlePictureDecode(CUVIDPICPARAMS *pPicParams) { + if (!m_hDecoder) + { + NVDEC_THROW_ERROR("Decoder not initialized.", CUDA_ERROR_NOT_INITIALIZED); + return false; + } + m_nPicNumInDecodeOrder[pPicParams->CurrPicIdx] = m_nDecodePicCnt++; + NVDEC_API_CALL(cuvidDecodePicture(m_hDecoder, pPicParams)); + return 1; +} + +/* Return value from HandlePictureDisplay() are interpreted as: +* 0: fail, >=1: succeeded +*/ +int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) { + CUVIDPROCPARAMS videoProcessingParameters = {}; + videoProcessingParameters.progressive_frame = pDispInfo->progressive_frame; + videoProcessingParameters.second_field = pDispInfo->repeat_first_field + 1; + videoProcessingParameters.top_field_first = pDispInfo->top_field_first; + videoProcessingParameters.unpaired_field = pDispInfo->repeat_first_field < 0; + videoProcessingParameters.output_stream = m_cuvidStream; + + CUdeviceptr dpSrcFrame = 0; + unsigned int nSrcPitch = 0; + NVDEC_API_CALL(cuvidMapVideoFrame(m_hDecoder, pDispInfo->picture_index, &dpSrcFrame, + &nSrcPitch, &videoProcessingParameters)); + + CUVIDGETDECODESTATUS DecodeStatus; + memset(&DecodeStatus, 0, sizeof(DecodeStatus)); + CUresult result = cuvidGetDecodeStatus(m_hDecoder, pDispInfo->picture_index, &DecodeStatus); + if (result == CUDA_SUCCESS && (DecodeStatus.decodeStatus == cuvidDecodeStatus_Error || DecodeStatus.decodeStatus == cuvidDecodeStatus_Error_Concealed)) + { + printf("Decode Error occurred for picture %d\n", m_nPicNumInDecodeOrder[pDispInfo->picture_index]); + } + + uint8_t *pDecodedFrame = nullptr; + { + std::lock_guard lock(m_mtxVPFrame); + if ((unsigned)++m_nDecodedFrame > m_vpFrame.size()) + { + // Not enough frames in stock + m_nFrameAlloc++; + uint8_t *pFrame = NULL; + if (m_bUseDeviceFrame) + { + CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext)); + if (m_bDeviceFramePitched) + { + CUDA_DRVAPI_CALL(cuMemAllocPitch((CUdeviceptr *)&pFrame, &m_nDeviceFramePitch, m_nWidth * m_nBPP, m_nLumaHeight + (m_nChromaHeight * m_nNumChromaPlanes), 16)); + } + else + { + CUDA_DRVAPI_CALL(cuMemAlloc((CUdeviceptr *)&pFrame, GetFrameSize())); + } + CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); + } + else + { + pFrame = new uint8_t[GetFrameSize()]; + } + m_vpFrame.push_back(pFrame); + } + pDecodedFrame = m_vpFrame[m_nDecodedFrame - 1]; + } + + CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext)); + CUDA_MEMCPY2D m = { 0 }; + m.srcMemoryType = CU_MEMORYTYPE_DEVICE; + m.srcDevice = dpSrcFrame; + m.srcPitch = nSrcPitch; + m.dstMemoryType = m_bUseDeviceFrame ? CU_MEMORYTYPE_DEVICE : CU_MEMORYTYPE_HOST; + m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame); + m.dstPitch = m_nDeviceFramePitch ? m_nDeviceFramePitch : m_nWidth * m_nBPP; + m.WidthInBytes = m_nWidth * m_nBPP; + m.Height = m_nLumaHeight; + CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream)); + + m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame + m.srcPitch * m_nSurfaceHeight); + m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame + m.dstPitch * m_nLumaHeight); + m.Height = m_nChromaHeight; + CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream)); + + if (m_nNumChromaPlanes == 2) + { + m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame + m.srcPitch * m_nSurfaceHeight * 2); + m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame + m.dstPitch * m_nLumaHeight * 2); + m.Height = m_nChromaHeight; + CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream)); + } + CUDA_DRVAPI_CALL(cuStreamSynchronize(m_cuvidStream)); + CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); + + if ((int)m_vTimestamp.size() < m_nDecodedFrame) { + m_vTimestamp.resize(m_vpFrame.size()); + } + m_vTimestamp[m_nDecodedFrame - 1] = pDispInfo->timestamp; + + NVDEC_API_CALL(cuvidUnmapVideoFrame(m_hDecoder, dpSrcFrame)); + return 1; +} + +NvDecoder::NvDecoder(CUcontext cuContext, bool bUseDeviceFrame, cudaVideoCodec eCodec, std::mutex *pMutex, + bool bLowLatency, bool bDeviceFramePitched, const Rect *pCropRect, const Dim *pResizeDim, int maxWidth, int maxHeight) : + m_cuContext(cuContext), m_bUseDeviceFrame(bUseDeviceFrame), m_eCodec(eCodec), m_pMutex(pMutex), m_bDeviceFramePitched(bDeviceFramePitched), + m_nMaxWidth (maxWidth), m_nMaxHeight(maxHeight) +{ + if (pCropRect) m_cropRect = *pCropRect; + if (pResizeDim) m_resizeDim = *pResizeDim; + + NVDEC_API_CALL(cuvidCtxLockCreate(&m_ctxLock, cuContext)); + + CUVIDPARSERPARAMS videoParserParameters = {}; + videoParserParameters.CodecType = eCodec; + videoParserParameters.ulMaxNumDecodeSurfaces = 1; + videoParserParameters.ulMaxDisplayDelay = bLowLatency ? 0 : 1; + videoParserParameters.pUserData = this; + videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc; + videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc; + videoParserParameters.pfnDisplayPicture = HandlePictureDisplayProc; + if (m_pMutex) m_pMutex->lock(); + NVDEC_API_CALL(cuvidCreateVideoParser(&m_hParser, &videoParserParameters)); + if (m_pMutex) m_pMutex->unlock(); +} + +NvDecoder::~NvDecoder() { + + START_TIMER + cuCtxPushCurrent(m_cuContext); + cuCtxPopCurrent(NULL); + + if (m_hParser) { + cuvidDestroyVideoParser(m_hParser); + } + + if (m_hDecoder) { + if (m_pMutex) m_pMutex->lock(); + cuvidDestroyDecoder(m_hDecoder); + if (m_pMutex) m_pMutex->unlock(); + } + + std::lock_guard lock(m_mtxVPFrame); + if (m_vpFrame.size() != m_nFrameAlloc) + { + //LOG(WARNING) << "nFrameAlloc(" << m_nFrameAlloc << ") != m_vpFrame.size()(" << m_vpFrame.size() << ")"; + } + for (uint8_t *pFrame : m_vpFrame) + { + if (m_bUseDeviceFrame) + { + if (m_pMutex) m_pMutex->lock(); + cuCtxPushCurrent(m_cuContext); + cuMemFree((CUdeviceptr)pFrame); + cuCtxPopCurrent(NULL); + if (m_pMutex) m_pMutex->unlock(); + } + else + { + delete[] pFrame; + } + } + cuvidCtxLockDestroy(m_ctxLock); + STOP_TIMER("Session Deinitialization Time: "); +} + +bool NvDecoder::Decode(const uint8_t *pData, int nSize, uint8_t ***pppFrame, int *pnFrameReturned, uint32_t flags, int64_t **ppTimestamp, int64_t timestamp, CUstream stream) +{ + if (!m_hParser) + { + NVDEC_THROW_ERROR("Parser not initialized.", CUDA_ERROR_NOT_INITIALIZED); + return false; + } + + m_nDecodedFrame = 0; + CUVIDSOURCEDATAPACKET packet = {0}; + packet.payload = pData; + packet.payload_size = nSize; + packet.flags = flags | CUVID_PKT_TIMESTAMP; + packet.timestamp = timestamp; + if (!pData || nSize == 0) { + packet.flags |= CUVID_PKT_ENDOFSTREAM; + } + m_cuvidStream = stream; + if (m_pMutex) m_pMutex->lock(); + NVDEC_API_CALL(cuvidParseVideoData(m_hParser, &packet)); + if (m_pMutex) m_pMutex->unlock(); + m_cuvidStream = 0; + + if (m_nDecodedFrame > 0) + { + if (pppFrame) + { + m_vpFrameRet.clear(); + std::lock_guard lock(m_mtxVPFrame); + m_vpFrameRet.insert(m_vpFrameRet.begin(), m_vpFrame.begin(), m_vpFrame.begin() + m_nDecodedFrame); + *pppFrame = &m_vpFrameRet[0]; + } + if (ppTimestamp) + { + *ppTimestamp = &m_vTimestamp[0]; + } + } + if (pnFrameReturned) + { + *pnFrameReturned = m_nDecodedFrame; + } + return true; +} + +bool NvDecoder::DecodeLockFrame(const uint8_t *pData, int nSize, uint8_t ***pppFrame, int *pnFrameReturned, uint32_t flags, int64_t **ppTimestamp, int64_t timestamp, CUstream stream) +{ + bool ret = Decode(pData, nSize, pppFrame, pnFrameReturned, flags, ppTimestamp, timestamp, stream); + std::lock_guard lock(m_mtxVPFrame); + m_vpFrame.erase(m_vpFrame.begin(), m_vpFrame.begin() + m_nDecodedFrame); + return true; +} + +void NvDecoder::UnlockFrame(uint8_t **ppFrame, int nFrame) +{ + std::lock_guard lock(m_mtxVPFrame); + m_vpFrame.insert(m_vpFrame.end(), &ppFrame[0], &ppFrame[nFrame]); +} diff --git a/NvCodec/NvCodec/NvDecoder/NvDecoder.h b/NvCodec/NvCodec/NvDecoder/NvDecoder.h new file mode 100644 index 0000000..1eaed1f --- /dev/null +++ b/NvCodec/NvCodec/NvDecoder/NvDecoder.h @@ -0,0 +1,280 @@ +/* +* Copyright 2017-2018 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include "nvcuvid.h" + +/** +* @brief Exception class for error reporting from the decode API. +*/ +class NVDECException : public std::exception +{ +public: + NVDECException(const std::string& errorStr, const CUresult errorCode) + : m_errorString(errorStr), m_errorCode(errorCode) {} + + virtual ~NVDECException() throw() {} + virtual const char* what() const throw() { return m_errorString.c_str(); } + CUresult getErrorCode() const { return m_errorCode; } + const std::string& getErrorString() const { return m_errorString; } + static NVDECException makeNVDECException(const std::string& errorStr, const CUresult errorCode, + const std::string& functionName, const std::string& fileName, int lineNo); +private: + std::string m_errorString; + CUresult m_errorCode; +}; + +inline NVDECException NVDECException::makeNVDECException(const std::string& errorStr, const CUresult errorCode, const std::string& functionName, + const std::string& fileName, int lineNo) +{ + std::ostringstream errorLog; + errorLog << functionName << " : " << errorStr << " at " << fileName << ":" << lineNo << std::endl; + NVDECException exception(errorLog.str(), errorCode); + return exception; +} + +#define NVDEC_THROW_ERROR( errorStr, errorCode ) \ + do \ + { \ + throw NVDECException::makeNVDECException(errorStr, errorCode, __FUNCTION__, __FILE__, __LINE__); \ + } while (0) + + +#define NVDEC_API_CALL( cuvidAPI ) \ + do \ + { \ + CUresult errorCode = cuvidAPI; \ + if( errorCode != CUDA_SUCCESS) \ + { \ + std::ostringstream errorLog; \ + errorLog << #cuvidAPI << " returned error " << errorCode; \ + throw NVDECException::makeNVDECException(errorLog.str(), errorCode, __FUNCTION__, __FILE__, __LINE__); \ + } \ + } while (0) + +struct Rect { + int l, t, r, b; +}; + +struct Dim { + int w, h; +}; + +/** +* @brief Base class for decoder interface. +*/ +class NvDecoder { + +public: + /** + * @brief This function is used to initialize the decoder session. + * Application must call this function to initialize the decoder, before + * starting to decode any frames. + */ + NvDecoder(CUcontext cuContext, bool bUseDeviceFrame, cudaVideoCodec eCodec, std::mutex *pMutex = NULL, bool bLowLatency = false, + bool bDeviceFramePitched = false, const Rect *pCropRect = NULL, const Dim *pResizeDim = NULL, + int maxWidth = 0, int maxHeight = 0); + ~NvDecoder(); + + /** + * @brief This function is used to get the current CUDA context. + */ + CUcontext GetContext() { return m_cuContext; } + + /** + * @brief This function is used to get the current decode width. + */ + int GetWidth() { assert(m_nWidth); return m_nWidth; } + + /** + * @brief This function is used to get the current decode height (Luma height). + */ + int GetHeight() { assert(m_nLumaHeight); return m_nLumaHeight; } + + /** + * @brief This function is used to get the current chroma height. + */ + int GetChromaHeight() { assert(m_nChromaHeight); return m_nChromaHeight; } + + /** + * @brief This function is used to get the number of chroma planes. + */ + int GetNumChromaPlanes() { assert(m_nNumChromaPlanes); return m_nNumChromaPlanes; } + + /** + * @brief This function is used to get the current frame size based on pixel format. + */ + int GetFrameSize() { assert(m_nWidth); return m_nWidth * (m_nLumaHeight + m_nChromaHeight * m_nNumChromaPlanes) * m_nBPP; } + + /** + * @brief This function is used to get the pitch of the device buffer holding the decoded frame. + */ + int GetDeviceFramePitch() { assert(m_nWidth); return m_nDeviceFramePitch ? (int)m_nDeviceFramePitch : m_nWidth * m_nBPP; } + + /** + * @brief This function is used to get the bit depth associated with the pixel format. + */ + int GetBitDepth() { assert(m_nWidth); return m_nBitDepthMinus8 + 8; } + + /** + * @brief This function is used to get the bytes used per pixel. + */ + int GetBPP() { assert(m_nWidth); return m_nBPP; } + + /** + * @brief This function is used to get the YUV chroma format + */ + cudaVideoSurfaceFormat GetOutputFormat() { return m_eOutputFormat; } + + /** + * @brief This function is used to get information about the video stream (codec, display parameters etc) + */ + CUVIDEOFORMAT GetVideoFormatInfo() { assert(m_nWidth); return m_videoFormat; } + + /** + * @brief This function is used to print information about the video stream + */ + std::string GetVideoInfo() const { return m_videoInfo.str(); } + + /** + * @brief This function decodes a frame and returns frames that are available for display. + The frames should be used or buffered before making subsequent calls to the Decode function again + * @param pData - pointer to the data buffer that is to be decoded + * @param nSize - size of the data buffer in bytes + * @param pppFrame - CUvideopacketflags for setting decode options + * @param pnFrameReturned - pointer to array of decoded frames that are returned + * @param flags - CUvideopacketflags for setting decode options + * @param ppTimestamp - pointer to array of timestamps for decoded frames that are returned + * @param timestamp - presentation timestamp + * @param stream - CUstream to be used for post-processing operations + */ + bool Decode(const uint8_t *pData, int nSize, uint8_t ***pppFrame, int *pnFrameReturned, uint32_t flags = 0, int64_t **ppTimestamp = NULL, int64_t timestamp = 0, CUstream stream = 0); + + /** + * @brief This function decodes a frame and returns the locked frame buffers + * This makes the buffers available for use by the application without the buffers + * getting overwritten, even if subsequent decode calls are made. The frame buffers + * remain locked, until ::UnlockFrame() is called + * @param pData - pointer to the data buffer that is to be decoded + * @param nSize - size of the data buffer in bytes + * @param pppFrame - CUvideopacketflags for setting decode options + * @param pnFrameReturned - pointer to array of decoded frames that are returned + * @param flags - CUvideopacketflags for setting decode options + * @param ppTimestamp - pointer to array of timestamps for decoded frames that are returned + * @param timestamp - presentation timestamp + * @param stream - CUstream to be used for post-processing operations + */ + bool DecodeLockFrame(const uint8_t *pData, int nSize, uint8_t ***pppFrame, int *pnFrameReturned, uint32_t flags = 0, int64_t **ppTimestamp = NULL, int64_t timestamp = 0, CUstream stream = 0); + + /** + * @brief This function unlocks the frame buffer and makes the frame buffers available for write again + * @param ppFrame - pointer to array of frames that are to be unlocked + * @param nFrame - number of frames to be unlocked + */ + void UnlockFrame(uint8_t **ppFrame, int nFrame); + + /** + * @brief This function allow app to set decoder reconfig params + * @param pCropRect - cropping rectangle coordinates + * @param pResizeDim - width and height of resized output + */ + int setReconfigParams(const Rect * pCropRect, const Dim * pResizeDim); + +private: + /** + * @brief Callback function to be registered for getting a callback when decoding of sequence starts + */ + static int CUDAAPI HandleVideoSequenceProc(void *pUserData, CUVIDEOFORMAT *pVideoFormat) { return ((NvDecoder *)pUserData)->HandleVideoSequence(pVideoFormat); } + + /** + * @brief Callback function to be registered for getting a callback when a decoded frame is ready to be decoded + */ + static int CUDAAPI HandlePictureDecodeProc(void *pUserData, CUVIDPICPARAMS *pPicParams) { return ((NvDecoder *)pUserData)->HandlePictureDecode(pPicParams); } + + /** + * @brief Callback function to be registered for getting a callback when a decoded frame is available for display + */ + static int CUDAAPI HandlePictureDisplayProc(void *pUserData, CUVIDPARSERDISPINFO *pDispInfo) { return ((NvDecoder *)pUserData)->HandlePictureDisplay(pDispInfo); } + + /** + * @brief This function gets called when a sequence is ready to be decoded. The function also gets called + when there is format change + */ + int HandleVideoSequence(CUVIDEOFORMAT *pVideoFormat); + + /** + * @brief This function gets called when a picture is ready to be decoded. cuvidDecodePicture is called from this function + * to decode the picture + */ + int HandlePictureDecode(CUVIDPICPARAMS *pPicParams); + + /** + * @brief This function gets called after a picture is decoded and available for display. Frames are fetched and stored in + internal buffer + */ + int HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo); + + /** + * @brief This function reconfigure decoder if there is a change in sequence params. + */ + int ReconfigureDecoder(CUVIDEOFORMAT *pVideoFormat); + +private: + CUcontext m_cuContext = NULL; + CUvideoctxlock m_ctxLock; + std::mutex *m_pMutex; + CUvideoparser m_hParser = NULL; + CUvideodecoder m_hDecoder = NULL; + bool m_bUseDeviceFrame; + // dimension of the output + unsigned int m_nWidth = 0, m_nLumaHeight = 0, m_nChromaHeight = 0; + unsigned int m_nNumChromaPlanes = 0; + // height of the mapped surface + int m_nSurfaceHeight = 0; + int m_nSurfaceWidth = 0; + cudaVideoCodec m_eCodec = cudaVideoCodec_NumCodecs; + cudaVideoChromaFormat m_eChromaFormat; + cudaVideoSurfaceFormat m_eOutputFormat; + int m_nBitDepthMinus8 = 0; + int m_nBPP = 1; + CUVIDEOFORMAT m_videoFormat = {}; + Rect m_displayRect = {}; + // stock of frames + std::vector m_vpFrame; + // decoded frames for return + std::vector m_vpFrameRet; + // timestamps of decoded frames + std::vector m_vTimestamp; + int m_nDecodedFrame = 0, m_nDecodedFrameReturned = 0; + int m_nDecodePicCnt = 0, m_nPicNumInDecodeOrder[32]; + bool m_bEndDecodeDone = false; + std::mutex m_mtxVPFrame; + int m_nFrameAlloc = 0; + CUstream m_cuvidStream = 0; + bool m_bDeviceFramePitched = false; + size_t m_nDeviceFramePitch = 0; + Rect m_cropRect = {}; + Dim m_resizeDim = {}; + + std::ostringstream m_videoInfo; + unsigned int m_nMaxWidth = 0, m_nMaxHeight = 0; + bool m_bReconfigExternal = false; + bool m_bReconfigExtPPChange = false; +}; diff --git a/NvCodec/NvCodec/NvEncoder/NvEncoder.cpp b/NvCodec/NvCodec/NvEncoder/NvEncoder.cpp new file mode 100644 index 0000000..edbe62e --- /dev/null +++ b/NvCodec/NvCodec/NvEncoder/NvEncoder.cpp @@ -0,0 +1,1023 @@ +/* +* Copyright 2017-2019 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#include "NvEncoder/NvEncoder.h" +#if !defined(_WIN32) +#include +#endif + +#ifndef _WIN32 +#include +static inline bool operator==(const GUID& guid1, const GUID& guid2) { + return !memcmp(&guid1, &guid2, sizeof(GUID)); +} + +static inline bool operator!=(const GUID& guid1, const GUID& guid2) { + return !(guid1 == guid2); +} +#endif + +NvEncoder::NvEncoder(NV_ENC_DEVICE_TYPE eDeviceType, + void* pDevice, + uint32_t nWidth, + uint32_t nHeight, + NV_ENC_BUFFER_FORMAT eBufferFormat, + uint32_t nExtraOutputDelay, + bool bMotionEstimationOnly, + bool bOutputInVideoMemory) + : m_pDevice(pDevice), + m_eDeviceType(eDeviceType), + m_nWidth(nWidth), + m_nHeight(nHeight), + m_nMaxEncodeWidth(nWidth), + m_nMaxEncodeHeight(nHeight), + m_eBufferFormat(eBufferFormat), + m_bMotionEstimationOnly(bMotionEstimationOnly), + m_bOutputInVideoMemory(bOutputInVideoMemory), + m_nExtraOutputDelay(nExtraOutputDelay), + m_hEncoder(nullptr) { + LoadNvEncApi(); + + if (!m_nvenc.nvEncOpenEncodeSession) { + m_nEncoderBuffer = 0; + NVENC_THROW_ERROR("EncodeAPI not found", NV_ENC_ERR_NO_ENCODE_DEVICE); + } + + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS encodeSessionExParams = { + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER}; + encodeSessionExParams.device = m_pDevice; + encodeSessionExParams.deviceType = m_eDeviceType; + encodeSessionExParams.apiVersion = NVENCAPI_VERSION; + void* hEncoder = NULL; + NVENC_API_CALL( + m_nvenc.nvEncOpenEncodeSessionEx(&encodeSessionExParams, &hEncoder)); + m_hEncoder = hEncoder; +} + +void NvEncoder::TryLoadNvEncApi() { +#if defined(_WIN32) +#if defined(_WIN64) + HMODULE hModule = LoadLibrary(TEXT("nvEncodeAPI64.dll")); +#else + HMODULE hModule = LoadLibrary(TEXT("nvEncodeAPI.dll")); +#endif +#else + void* hModule = dlopen("libnvidia-encode.so.1", RTLD_LAZY); +#endif + + if (hModule == NULL) { + NVENC_THROW_ERROR( + "NVENC library file is not found. Please ensure NV driver is installed", + NV_ENC_ERR_NO_ENCODE_DEVICE); + } + + typedef NVENCSTATUS(NVENCAPI * + NvEncodeAPIGetMaxSupportedVersion_Type)(uint32_t*); +#if defined(_WIN32) + NvEncodeAPIGetMaxSupportedVersion_Type NvEncodeAPIGetMaxSupportedVersion = + (NvEncodeAPIGetMaxSupportedVersion_Type)GetProcAddress( + hModule, "NvEncodeAPIGetMaxSupportedVersion"); +#else + NvEncodeAPIGetMaxSupportedVersion_Type NvEncodeAPIGetMaxSupportedVersion = + (NvEncodeAPIGetMaxSupportedVersion_Type)dlsym( + hModule, "NvEncodeAPIGetMaxSupportedVersion"); +#endif + + uint32_t version = 0; + uint32_t currentVersion = + (NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION; + NVENC_API_CALL(NvEncodeAPIGetMaxSupportedVersion(&version)); + if (currentVersion > version) { +#if defined(_WIN32) + FreeLibrary((HMODULE)hModule); +#else + dlclose(hModule); +#endif + NVENC_THROW_ERROR( + "Current Driver Version does not support this NvEncodeAPI version, " + "please upgrade driver", + NV_ENC_ERR_INVALID_VERSION); + } +#if defined(_WIN32) + FreeLibrary((HMODULE)hModule); +#else + dlclose(hModule); +#endif +} + +void NvEncoder::LoadNvEncApi() { +#if defined(_WIN32) +#if defined(_WIN64) + HMODULE hModule = LoadLibrary(TEXT("nvEncodeAPI64.dll")); +#else + HMODULE hModule = LoadLibrary(TEXT("nvEncodeAPI.dll")); +#endif +#else + void* hModule = dlopen("libnvidia-encode.so.1", RTLD_LAZY); +#endif + + if (hModule == NULL) { + NVENC_THROW_ERROR( + "NVENC library file is not found. Please ensure NV driver is installed", + NV_ENC_ERR_NO_ENCODE_DEVICE); + } + + m_hModule = hModule; + + typedef NVENCSTATUS(NVENCAPI * + NvEncodeAPIGetMaxSupportedVersion_Type)(uint32_t*); +#if defined(_WIN32) + NvEncodeAPIGetMaxSupportedVersion_Type NvEncodeAPIGetMaxSupportedVersion = + (NvEncodeAPIGetMaxSupportedVersion_Type)GetProcAddress( + hModule, "NvEncodeAPIGetMaxSupportedVersion"); +#else + NvEncodeAPIGetMaxSupportedVersion_Type NvEncodeAPIGetMaxSupportedVersion = + (NvEncodeAPIGetMaxSupportedVersion_Type)dlsym( + hModule, "NvEncodeAPIGetMaxSupportedVersion"); +#endif + + uint32_t version = 0; + uint32_t currentVersion = + (NVENCAPI_MAJOR_VERSION << 4) | NVENCAPI_MINOR_VERSION; + NVENC_API_CALL(NvEncodeAPIGetMaxSupportedVersion(&version)); + if (currentVersion > version) { + NVENC_THROW_ERROR( + "Current Driver Version does not support this NvEncodeAPI version, " + "please upgrade driver", + NV_ENC_ERR_INVALID_VERSION); + } + + typedef NVENCSTATUS(NVENCAPI * NvEncodeAPICreateInstance_Type)( + NV_ENCODE_API_FUNCTION_LIST*); +#if defined(_WIN32) + NvEncodeAPICreateInstance_Type NvEncodeAPICreateInstance = + (NvEncodeAPICreateInstance_Type)GetProcAddress( + hModule, "NvEncodeAPICreateInstance"); +#else + NvEncodeAPICreateInstance_Type NvEncodeAPICreateInstance = + (NvEncodeAPICreateInstance_Type)dlsym(hModule, + "NvEncodeAPICreateInstance"); +#endif + + if (!NvEncodeAPICreateInstance) { + NVENC_THROW_ERROR( + "Cannot find NvEncodeAPICreateInstance() entry in NVENC library", + NV_ENC_ERR_NO_ENCODE_DEVICE); + } + + m_nvenc = {NV_ENCODE_API_FUNCTION_LIST_VER}; + NVENC_API_CALL(NvEncodeAPICreateInstance(&m_nvenc)); +} + +NvEncoder::~NvEncoder() { + DestroyHWEncoder(); + + if (m_hModule) { +#if defined(_WIN32) + FreeLibrary((HMODULE)m_hModule); +#else + dlclose(m_hModule); +#endif + m_hModule = nullptr; + } +} + +void NvEncoder::CreateDefaultEncoderParams( + NV_ENC_INITIALIZE_PARAMS* pIntializeParams, + GUID codecGuid, + GUID presetGuid) { + if (!m_hEncoder) { + NVENC_THROW_ERROR("Encoder Initialization failed", + NV_ENC_ERR_NO_ENCODE_DEVICE); + return; + } + + if (pIntializeParams == nullptr || + pIntializeParams->encodeConfig == nullptr) { + NVENC_THROW_ERROR( + "pInitializeParams and pInitializeParams->encodeConfig can't be NULL", + NV_ENC_ERR_INVALID_PTR); + } + + memset(pIntializeParams->encodeConfig, 0, sizeof(NV_ENC_CONFIG)); + auto pEncodeConfig = pIntializeParams->encodeConfig; + memset(pIntializeParams, 0, sizeof(NV_ENC_INITIALIZE_PARAMS)); + pIntializeParams->encodeConfig = pEncodeConfig; + + pIntializeParams->encodeConfig->version = NV_ENC_CONFIG_VER; + pIntializeParams->version = NV_ENC_INITIALIZE_PARAMS_VER; + + pIntializeParams->encodeGUID = codecGuid; + pIntializeParams->presetGUID = presetGuid; + pIntializeParams->encodeWidth = m_nWidth; + pIntializeParams->encodeHeight = m_nHeight; + pIntializeParams->darWidth = m_nWidth; + pIntializeParams->darHeight = m_nHeight; + pIntializeParams->frameRateNum = 30; + pIntializeParams->frameRateDen = 1; + pIntializeParams->enablePTD = 1; + pIntializeParams->reportSliceOffsets = 0; + pIntializeParams->enableSubFrameWrite = 0; + pIntializeParams->maxEncodeWidth = m_nWidth; + pIntializeParams->maxEncodeHeight = m_nHeight; + pIntializeParams->enableMEOnlyMode = m_bMotionEstimationOnly; + pIntializeParams->enableOutputInVidmem = m_bOutputInVideoMemory; +#if defined(_WIN32) + if (!m_bOutputInVideoMemory) { + pIntializeParams->enableEncodeAsync = + GetCapabilityValue(codecGuid, NV_ENC_CAPS_ASYNC_ENCODE_SUPPORT); + } +#endif + + NV_ENC_PRESET_CONFIG presetConfig = {NV_ENC_PRESET_CONFIG_VER, + {NV_ENC_CONFIG_VER}}; + m_nvenc.nvEncGetEncodePresetConfig(m_hEncoder, codecGuid, presetGuid, + &presetConfig); + memcpy(pIntializeParams->encodeConfig, &presetConfig.presetCfg, + sizeof(NV_ENC_CONFIG)); + pIntializeParams->encodeConfig->frameIntervalP = 1; + pIntializeParams->encodeConfig->gopLength = NVENC_INFINITE_GOPLENGTH; + + pIntializeParams->encodeConfig->rcParams.rateControlMode = + NV_ENC_PARAMS_RC_CONSTQP; + + if (pIntializeParams->presetGUID != NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID && + pIntializeParams->presetGUID != NV_ENC_PRESET_LOSSLESS_HP_GUID) { + pIntializeParams->encodeConfig->rcParams.constQP = {28, 31, 25}; + } + + if (pIntializeParams->encodeGUID == NV_ENC_CODEC_H264_GUID) { + if (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444 || + m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) { + pIntializeParams->encodeConfig->encodeCodecConfig.h264Config + .chromaFormatIDC = 3; + } + pIntializeParams->encodeConfig->encodeCodecConfig.h264Config.idrPeriod = + pIntializeParams->encodeConfig->gopLength; + } else if (pIntializeParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID) { + pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig + .pixelBitDepthMinus8 = + (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT || + m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) + ? 2 + : 0; + if (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444 || + m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) { + pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig + .chromaFormatIDC = 3; + } + pIntializeParams->encodeConfig->encodeCodecConfig.hevcConfig.idrPeriod = + pIntializeParams->encodeConfig->gopLength; + } + + return; +} + +void NvEncoder::CreateEncoder(const NV_ENC_INITIALIZE_PARAMS* pEncoderParams) { + if (!m_hEncoder) { + NVENC_THROW_ERROR("Encoder Initialization failed", + NV_ENC_ERR_NO_ENCODE_DEVICE); + } + + if (!pEncoderParams) { + NVENC_THROW_ERROR("Invalid NV_ENC_INITIALIZE_PARAMS ptr", + NV_ENC_ERR_INVALID_PTR); + } + + if (pEncoderParams->encodeWidth == 0 || pEncoderParams->encodeHeight == 0) { + NVENC_THROW_ERROR("Invalid encoder width and height", + NV_ENC_ERR_INVALID_PARAM); + } + + if (pEncoderParams->encodeGUID != NV_ENC_CODEC_H264_GUID && + pEncoderParams->encodeGUID != NV_ENC_CODEC_HEVC_GUID) { + NVENC_THROW_ERROR("Invalid codec guid", NV_ENC_ERR_INVALID_PARAM); + } + + if (pEncoderParams->encodeGUID == NV_ENC_CODEC_H264_GUID) { + if (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT || + m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) { + NVENC_THROW_ERROR("10-bit format isn't supported by H264 encoder", + NV_ENC_ERR_INVALID_PARAM); + } + } + + // set other necessary params if not set yet + if (pEncoderParams->encodeGUID == NV_ENC_CODEC_H264_GUID) { + if ((m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444) && + (pEncoderParams->encodeConfig->encodeCodecConfig.h264Config + .chromaFormatIDC != 3)) { + NVENC_THROW_ERROR("Invalid ChromaFormatIDC", NV_ENC_ERR_INVALID_PARAM); + } + } + + if (pEncoderParams->encodeGUID == NV_ENC_CODEC_HEVC_GUID) { + bool yuv10BitFormat = + (m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV420_10BIT || + m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) + ? true + : false; + if (yuv10BitFormat && pEncoderParams->encodeConfig->encodeCodecConfig + .hevcConfig.pixelBitDepthMinus8 != 2) { + NVENC_THROW_ERROR("Invalid PixelBitdepth", NV_ENC_ERR_INVALID_PARAM); + } + + if ((m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444 || + m_eBufferFormat == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) && + (pEncoderParams->encodeConfig->encodeCodecConfig.hevcConfig + .chromaFormatIDC != 3)) { + NVENC_THROW_ERROR("Invalid ChromaFormatIDC", NV_ENC_ERR_INVALID_PARAM); + } + } + + memcpy(&m_initializeParams, pEncoderParams, sizeof(m_initializeParams)); + m_initializeParams.version = NV_ENC_INITIALIZE_PARAMS_VER; + + if (pEncoderParams->encodeConfig) { + memcpy(&m_encodeConfig, pEncoderParams->encodeConfig, + sizeof(m_encodeConfig)); + m_encodeConfig.version = NV_ENC_CONFIG_VER; + } else { + NV_ENC_PRESET_CONFIG presetConfig = {NV_ENC_PRESET_CONFIG_VER, + {NV_ENC_CONFIG_VER}}; + m_nvenc.nvEncGetEncodePresetConfig(m_hEncoder, pEncoderParams->encodeGUID, + NV_ENC_PRESET_DEFAULT_GUID, + &presetConfig); + memcpy(&m_encodeConfig, &presetConfig.presetCfg, sizeof(NV_ENC_CONFIG)); + m_encodeConfig.version = NV_ENC_CONFIG_VER; + m_encodeConfig.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP; + m_encodeConfig.rcParams.constQP = {28, 31, 25}; + } + m_initializeParams.encodeConfig = &m_encodeConfig; + + NVENC_API_CALL( + m_nvenc.nvEncInitializeEncoder(m_hEncoder, &m_initializeParams)); + + m_bEncoderInitialized = true; + m_nWidth = m_initializeParams.encodeWidth; + m_nHeight = m_initializeParams.encodeHeight; + m_nMaxEncodeWidth = m_initializeParams.maxEncodeWidth; + m_nMaxEncodeHeight = m_initializeParams.maxEncodeHeight; + + m_nEncoderBuffer = m_encodeConfig.frameIntervalP + + m_encodeConfig.rcParams.lookaheadDepth + + m_nExtraOutputDelay; + m_nOutputDelay = m_nEncoderBuffer - 1; + m_vMappedInputBuffers.resize(m_nEncoderBuffer, nullptr); + + if (!m_bOutputInVideoMemory) { + m_vpCompletionEvent.resize(m_nEncoderBuffer, nullptr); + } + +#if defined(_WIN32) + for (uint32_t i = 0; i < m_vpCompletionEvent.size(); i++) { + m_vpCompletionEvent[i] = CreateEvent(NULL, FALSE, FALSE, NULL); + NV_ENC_EVENT_PARAMS eventParams = {NV_ENC_EVENT_PARAMS_VER}; + eventParams.completionEvent = m_vpCompletionEvent[i]; + m_nvenc.nvEncRegisterAsyncEvent(m_hEncoder, &eventParams); + } +#endif + + if (m_bMotionEstimationOnly) { + m_vMappedRefBuffers.resize(m_nEncoderBuffer, nullptr); + + if (!m_bOutputInVideoMemory) { + InitializeMVOutputBuffer(); + } + } else { + if (!m_bOutputInVideoMemory) { + m_vBitstreamOutputBuffer.resize(m_nEncoderBuffer, nullptr); + InitializeBitstreamBuffer(); + } + } + + AllocateInputBuffers(m_nEncoderBuffer); +} + +void NvEncoder::DestroyEncoder() { + if (!m_hEncoder) { + return; + } + + ReleaseInputBuffers(); + + DestroyHWEncoder(); +} + +void NvEncoder::DestroyHWEncoder() { + if (!m_hEncoder) { + return; + } + +#if defined(_WIN32) + for (uint32_t i = 0; i < m_vpCompletionEvent.size(); i++) { + if (m_vpCompletionEvent[i]) { + NV_ENC_EVENT_PARAMS eventParams = {NV_ENC_EVENT_PARAMS_VER}; + eventParams.completionEvent = m_vpCompletionEvent[i]; + m_nvenc.nvEncUnregisterAsyncEvent(m_hEncoder, &eventParams); + CloseHandle(m_vpCompletionEvent[i]); + } + } + m_vpCompletionEvent.clear(); +#endif + + if (m_bMotionEstimationOnly) { + DestroyMVOutputBuffer(); + } else { + DestroyBitstreamBuffer(); + } + + m_nvenc.nvEncDestroyEncoder(m_hEncoder); + + m_hEncoder = nullptr; + + m_bEncoderInitialized = false; +} + +const NvEncInputFrame* NvEncoder::GetNextInputFrame() { + int i = m_iToSend % m_nEncoderBuffer; + return &m_vInputFrames[i]; +} + +const NvEncInputFrame* NvEncoder::GetNextReferenceFrame() { + int i = m_iToSend % m_nEncoderBuffer; + return &m_vReferenceFrames[i]; +} + +void NvEncoder::MapResources(uint32_t bfrIdx) { + NV_ENC_MAP_INPUT_RESOURCE mapInputResource = {NV_ENC_MAP_INPUT_RESOURCE_VER}; + + mapInputResource.registeredResource = m_vRegisteredResources[bfrIdx]; + NVENC_API_CALL(m_nvenc.nvEncMapInputResource(m_hEncoder, &mapInputResource)); + m_vMappedInputBuffers[bfrIdx] = mapInputResource.mappedResource; + + if (m_bMotionEstimationOnly) { + mapInputResource.registeredResource = + m_vRegisteredResourcesForReference[bfrIdx]; + NVENC_API_CALL( + m_nvenc.nvEncMapInputResource(m_hEncoder, &mapInputResource)); + m_vMappedRefBuffers[bfrIdx] = mapInputResource.mappedResource; + } +} + +void NvEncoder::EncodeFrame(std::vector>& vPacket, + NV_ENC_PIC_PARAMS* pPicParams) { + vPacket.clear(); + if (!IsHWEncoderInitialized()) { + NVENC_THROW_ERROR("Encoder device not found", NV_ENC_ERR_NO_ENCODE_DEVICE); + } + + int bfrIdx = m_iToSend % m_nEncoderBuffer; + + MapResources(bfrIdx); + + NVENCSTATUS nvStatus = DoEncode(m_vMappedInputBuffers[bfrIdx], + m_vBitstreamOutputBuffer[bfrIdx], pPicParams); + + if (nvStatus == NV_ENC_SUCCESS || nvStatus == NV_ENC_ERR_NEED_MORE_INPUT) { + m_iToSend++; + GetEncodedPacket(m_vBitstreamOutputBuffer, vPacket, true); + } else { + NVENC_THROW_ERROR("nvEncEncodePicture API failed", nvStatus); + } +} + +void NvEncoder::RunMotionEstimation(std::vector& mvData) { + if (!m_hEncoder) { + NVENC_THROW_ERROR("Encoder Initialization failed", + NV_ENC_ERR_NO_ENCODE_DEVICE); + return; + } + + const uint32_t bfrIdx = m_iToSend % m_nEncoderBuffer; + + MapResources(bfrIdx); + + NVENCSTATUS nvStatus = DoMotionEstimation(m_vMappedInputBuffers[bfrIdx], + m_vMappedRefBuffers[bfrIdx], + m_vMVDataOutputBuffer[bfrIdx]); + + if (nvStatus == NV_ENC_SUCCESS) { + m_iToSend++; + std::vector> vPacket; + GetEncodedPacket(m_vMVDataOutputBuffer, vPacket, true); + if (vPacket.size() != 1) { + NVENC_THROW_ERROR( + "GetEncodedPacket() doesn't return one (and only one) MVData", + NV_ENC_ERR_GENERIC); + } + mvData = vPacket[0]; + } else { + NVENC_THROW_ERROR("nvEncEncodePicture API failed", nvStatus); + } +} + +void NvEncoder::GetSequenceParams(std::vector& seqParams) { + uint8_t spsppsData[1024]; // Assume maximum spspps data is 1KB or less + memset(spsppsData, 0, sizeof(spsppsData)); + NV_ENC_SEQUENCE_PARAM_PAYLOAD payload = {NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER}; + uint32_t spsppsSize = 0; + + payload.spsppsBuffer = spsppsData; + payload.inBufferSize = sizeof(spsppsData); + payload.outSPSPPSPayloadSize = &spsppsSize; + NVENC_API_CALL(m_nvenc.nvEncGetSequenceParams(m_hEncoder, &payload)); + seqParams.clear(); + seqParams.insert(seqParams.end(), &spsppsData[0], &spsppsData[spsppsSize]); +} + +NVENCSTATUS NvEncoder::DoEncode(NV_ENC_INPUT_PTR inputBuffer, + NV_ENC_OUTPUT_PTR outputBuffer, + NV_ENC_PIC_PARAMS* pPicParams) { + NV_ENC_PIC_PARAMS picParams = {}; + if (pPicParams) { + picParams = *pPicParams; + } + picParams.version = NV_ENC_PIC_PARAMS_VER; + picParams.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; + picParams.inputBuffer = inputBuffer; + picParams.bufferFmt = GetPixelFormat(); + picParams.inputWidth = GetEncodeWidth(); + picParams.inputHeight = GetEncodeHeight(); + picParams.outputBitstream = outputBuffer; + picParams.completionEvent = GetCompletionEvent(m_iToSend % m_nEncoderBuffer); + NVENCSTATUS nvStatus = m_nvenc.nvEncEncodePicture(m_hEncoder, &picParams); + + return nvStatus; +} + +void NvEncoder::SendEOS() { + NV_ENC_PIC_PARAMS picParams = {NV_ENC_PIC_PARAMS_VER}; + picParams.encodePicFlags = NV_ENC_PIC_FLAG_EOS; + picParams.completionEvent = GetCompletionEvent(m_iToSend % m_nEncoderBuffer); + NVENC_API_CALL(m_nvenc.nvEncEncodePicture(m_hEncoder, &picParams)); +} + +void NvEncoder::EndEncode(std::vector>& vPacket) { + vPacket.clear(); + if (!IsHWEncoderInitialized()) { + NVENC_THROW_ERROR("Encoder device not initialized", + NV_ENC_ERR_ENCODER_NOT_INITIALIZED); + } + + SendEOS(); + + GetEncodedPacket(m_vBitstreamOutputBuffer, vPacket, false); +} + +void NvEncoder::GetEncodedPacket(std::vector& vOutputBuffer, + std::vector>& vPacket, + bool bOutputDelay) { + unsigned i = 0; + int iEnd = bOutputDelay ? m_iToSend - m_nOutputDelay : m_iToSend; + for (; m_iGot < iEnd; m_iGot++) { + WaitForCompletionEvent(m_iGot % m_nEncoderBuffer); + NV_ENC_LOCK_BITSTREAM lockBitstreamData = {NV_ENC_LOCK_BITSTREAM_VER}; + lockBitstreamData.outputBitstream = + vOutputBuffer[m_iGot % m_nEncoderBuffer]; + lockBitstreamData.doNotWait = false; + NVENC_API_CALL(m_nvenc.nvEncLockBitstream(m_hEncoder, &lockBitstreamData)); + + uint8_t* pData = (uint8_t*)lockBitstreamData.bitstreamBufferPtr; + if (vPacket.size() < i + 1) { + vPacket.push_back(std::vector()); + } + vPacket[i].clear(); + vPacket[i].insert(vPacket[i].end(), &pData[0], + &pData[lockBitstreamData.bitstreamSizeInBytes]); + i++; + + NVENC_API_CALL(m_nvenc.nvEncUnlockBitstream( + m_hEncoder, lockBitstreamData.outputBitstream)); + + if (m_vMappedInputBuffers[m_iGot % m_nEncoderBuffer]) { + NVENC_API_CALL(m_nvenc.nvEncUnmapInputResource( + m_hEncoder, m_vMappedInputBuffers[m_iGot % m_nEncoderBuffer])); + m_vMappedInputBuffers[m_iGot % m_nEncoderBuffer] = nullptr; + } + + if (m_bMotionEstimationOnly && + m_vMappedRefBuffers[m_iGot % m_nEncoderBuffer]) { + NVENC_API_CALL(m_nvenc.nvEncUnmapInputResource( + m_hEncoder, m_vMappedRefBuffers[m_iGot % m_nEncoderBuffer])); + m_vMappedRefBuffers[m_iGot % m_nEncoderBuffer] = nullptr; + } + } +} + +bool NvEncoder::Reconfigure( + const NV_ENC_RECONFIGURE_PARAMS* pReconfigureParams) { + NVENC_API_CALL(m_nvenc.nvEncReconfigureEncoder( + m_hEncoder, const_cast(pReconfigureParams))); + + memcpy(&m_initializeParams, &(pReconfigureParams->reInitEncodeParams), + sizeof(m_initializeParams)); + if (pReconfigureParams->reInitEncodeParams.encodeConfig) { + memcpy(&m_encodeConfig, pReconfigureParams->reInitEncodeParams.encodeConfig, + sizeof(m_encodeConfig)); + } + + m_nWidth = m_initializeParams.encodeWidth; + m_nHeight = m_initializeParams.encodeHeight; + m_nMaxEncodeWidth = m_initializeParams.maxEncodeWidth; + m_nMaxEncodeHeight = m_initializeParams.maxEncodeHeight; + + return true; +} + +NV_ENC_REGISTERED_PTR NvEncoder::RegisterResource( + void* pBuffer, + NV_ENC_INPUT_RESOURCE_TYPE eResourceType, + int width, + int height, + int pitch, + NV_ENC_BUFFER_FORMAT bufferFormat, + NV_ENC_BUFFER_USAGE bufferUsage) { + NV_ENC_REGISTER_RESOURCE registerResource = {NV_ENC_REGISTER_RESOURCE_VER}; + registerResource.resourceType = eResourceType; + registerResource.resourceToRegister = pBuffer; + registerResource.width = width; + registerResource.height = height; + registerResource.pitch = pitch; + registerResource.bufferFormat = bufferFormat; + registerResource.bufferUsage = bufferUsage; + NVENC_API_CALL(m_nvenc.nvEncRegisterResource(m_hEncoder, ®isterResource)); + + return registerResource.registeredResource; +} + +void NvEncoder::RegisterInputResources(std::vector inputframes, + NV_ENC_INPUT_RESOURCE_TYPE eResourceType, + int width, + int height, + int pitch, + NV_ENC_BUFFER_FORMAT bufferFormat, + bool bReferenceFrame) { + for (uint32_t i = 0; i < inputframes.size(); ++i) { + NV_ENC_REGISTERED_PTR registeredPtr = + RegisterResource(inputframes[i], eResourceType, width, height, pitch, + bufferFormat, NV_ENC_INPUT_IMAGE); + + std::vector _chromaOffsets; + NvEncoder::GetChromaSubPlaneOffsets(bufferFormat, pitch, height, + _chromaOffsets); + NvEncInputFrame inputframe = {}; + inputframe.inputPtr = (void*)inputframes[i]; + inputframe.chromaOffsets[0] = 0; + inputframe.chromaOffsets[1] = 0; + for (uint32_t ch = 0; ch < _chromaOffsets.size(); ch++) { + inputframe.chromaOffsets[ch] = _chromaOffsets[ch]; + } + inputframe.numChromaPlanes = NvEncoder::GetNumChromaPlanes(bufferFormat); + inputframe.pitch = pitch; + inputframe.chromaPitch = NvEncoder::GetChromaPitch(bufferFormat, pitch); + inputframe.bufferFormat = bufferFormat; + inputframe.resourceType = eResourceType; + + if (bReferenceFrame) { + m_vRegisteredResourcesForReference.push_back(registeredPtr); + m_vReferenceFrames.push_back(inputframe); + } else { + m_vRegisteredResources.push_back(registeredPtr); + m_vInputFrames.push_back(inputframe); + } + } +} + +void NvEncoder::FlushEncoder() { + if (!m_bMotionEstimationOnly && !m_bOutputInVideoMemory) { + // Incase of error it is possible for buffers still mapped to encoder. + // flush the encoder queue and then unmapped it if any surface is still mapped + try { + std::vector> vPacket; + EndEncode(vPacket); + } catch (...) { + } + } +} + +void NvEncoder::UnregisterInputResources() { + FlushEncoder(); + + if (m_bMotionEstimationOnly) { + for (uint32_t i = 0; i < m_vMappedRefBuffers.size(); ++i) { + if (m_vMappedRefBuffers[i]) { + m_nvenc.nvEncUnmapInputResource(m_hEncoder, m_vMappedRefBuffers[i]); + } + } + } + m_vMappedRefBuffers.clear(); + + for (uint32_t i = 0; i < m_vMappedInputBuffers.size(); ++i) { + if (m_vMappedInputBuffers[i]) { + m_nvenc.nvEncUnmapInputResource(m_hEncoder, m_vMappedInputBuffers[i]); + } + } + m_vMappedInputBuffers.clear(); + + for (uint32_t i = 0; i < m_vRegisteredResources.size(); ++i) { + if (m_vRegisteredResources[i]) { + m_nvenc.nvEncUnregisterResource(m_hEncoder, m_vRegisteredResources[i]); + } + } + m_vRegisteredResources.clear(); + + for (uint32_t i = 0; i < m_vRegisteredResourcesForReference.size(); ++i) { + if (m_vRegisteredResourcesForReference[i]) { + m_nvenc.nvEncUnregisterResource(m_hEncoder, + m_vRegisteredResourcesForReference[i]); + } + } + m_vRegisteredResourcesForReference.clear(); +} + +void NvEncoder::WaitForCompletionEvent(int iEvent) { +#if defined(_WIN32) + // Check if we are in async mode. If not, don't wait for event; + NV_ENC_CONFIG sEncodeConfig = {0}; + NV_ENC_INITIALIZE_PARAMS sInitializeParams = {0}; + sInitializeParams.encodeConfig = &sEncodeConfig; + GetInitializeParams(&sInitializeParams); + + if (0U == sInitializeParams.enableEncodeAsync) { + return; + } +#ifdef DEBUG + WaitForSingleObject(m_vpCompletionEvent[iEvent], INFINITE); +#else + // wait for 20s which is infinite on terms of gpu time + if (WaitForSingleObject(m_vpCompletionEvent[iEvent], 20000) == WAIT_FAILED) { + NVENC_THROW_ERROR("Failed to encode frame", NV_ENC_ERR_GENERIC); + } +#endif +#endif +} + +uint32_t NvEncoder::GetWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat, + const uint32_t width) { + switch (bufferFormat) { + case NV_ENC_BUFFER_FORMAT_NV12: + case NV_ENC_BUFFER_FORMAT_YV12: + case NV_ENC_BUFFER_FORMAT_IYUV: + case NV_ENC_BUFFER_FORMAT_YUV444: + return width; + case NV_ENC_BUFFER_FORMAT_YUV420_10BIT: + case NV_ENC_BUFFER_FORMAT_YUV444_10BIT: + return width * 2; + case NV_ENC_BUFFER_FORMAT_ARGB: + case NV_ENC_BUFFER_FORMAT_ARGB10: + case NV_ENC_BUFFER_FORMAT_AYUV: + case NV_ENC_BUFFER_FORMAT_ABGR: + case NV_ENC_BUFFER_FORMAT_ABGR10: + return width * 4; + default: + NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM); + return 0; + } +} + +uint32_t NvEncoder::GetNumChromaPlanes( + const NV_ENC_BUFFER_FORMAT bufferFormat) { + switch (bufferFormat) { + case NV_ENC_BUFFER_FORMAT_NV12: + case NV_ENC_BUFFER_FORMAT_YUV420_10BIT: + return 1; + case NV_ENC_BUFFER_FORMAT_YV12: + case NV_ENC_BUFFER_FORMAT_IYUV: + case NV_ENC_BUFFER_FORMAT_YUV444: + case NV_ENC_BUFFER_FORMAT_YUV444_10BIT: + return 2; + case NV_ENC_BUFFER_FORMAT_ARGB: + case NV_ENC_BUFFER_FORMAT_ARGB10: + case NV_ENC_BUFFER_FORMAT_AYUV: + case NV_ENC_BUFFER_FORMAT_ABGR: + case NV_ENC_BUFFER_FORMAT_ABGR10: + return 0; + default: + NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM); + return -1; + } +} + +uint32_t NvEncoder::GetChromaPitch(const NV_ENC_BUFFER_FORMAT bufferFormat, + const uint32_t lumaPitch) { + switch (bufferFormat) { + case NV_ENC_BUFFER_FORMAT_NV12: + case NV_ENC_BUFFER_FORMAT_YUV420_10BIT: + case NV_ENC_BUFFER_FORMAT_YUV444: + case NV_ENC_BUFFER_FORMAT_YUV444_10BIT: + return lumaPitch; + case NV_ENC_BUFFER_FORMAT_YV12: + case NV_ENC_BUFFER_FORMAT_IYUV: + return (lumaPitch + 1) / 2; + case NV_ENC_BUFFER_FORMAT_ARGB: + case NV_ENC_BUFFER_FORMAT_ARGB10: + case NV_ENC_BUFFER_FORMAT_AYUV: + case NV_ENC_BUFFER_FORMAT_ABGR: + case NV_ENC_BUFFER_FORMAT_ABGR10: + return 0; + default: + NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM); + return -1; + } +} + +void NvEncoder::GetChromaSubPlaneOffsets( + const NV_ENC_BUFFER_FORMAT bufferFormat, + const uint32_t pitch, + const uint32_t height, + std::vector& chromaOffsets) { + chromaOffsets.clear(); + switch (bufferFormat) { + case NV_ENC_BUFFER_FORMAT_NV12: + case NV_ENC_BUFFER_FORMAT_YUV420_10BIT: + chromaOffsets.push_back(pitch * height); + return; + case NV_ENC_BUFFER_FORMAT_YV12: + case NV_ENC_BUFFER_FORMAT_IYUV: + chromaOffsets.push_back(pitch * height); + chromaOffsets.push_back(chromaOffsets[0] + + (NvEncoder::GetChromaPitch(bufferFormat, pitch) * + GetChromaHeight(bufferFormat, height))); + return; + case NV_ENC_BUFFER_FORMAT_YUV444: + case NV_ENC_BUFFER_FORMAT_YUV444_10BIT: + chromaOffsets.push_back(pitch * height); + chromaOffsets.push_back(chromaOffsets[0] + (pitch * height)); + return; + case NV_ENC_BUFFER_FORMAT_ARGB: + case NV_ENC_BUFFER_FORMAT_ARGB10: + case NV_ENC_BUFFER_FORMAT_AYUV: + case NV_ENC_BUFFER_FORMAT_ABGR: + case NV_ENC_BUFFER_FORMAT_ABGR10: + return; + default: + NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM); + return; + } +} + +uint32_t NvEncoder::GetChromaHeight(const NV_ENC_BUFFER_FORMAT bufferFormat, + const uint32_t lumaHeight) { + switch (bufferFormat) { + case NV_ENC_BUFFER_FORMAT_YV12: + case NV_ENC_BUFFER_FORMAT_IYUV: + case NV_ENC_BUFFER_FORMAT_NV12: + case NV_ENC_BUFFER_FORMAT_YUV420_10BIT: + return (lumaHeight + 1) / 2; + case NV_ENC_BUFFER_FORMAT_YUV444: + case NV_ENC_BUFFER_FORMAT_YUV444_10BIT: + return lumaHeight; + case NV_ENC_BUFFER_FORMAT_ARGB: + case NV_ENC_BUFFER_FORMAT_ARGB10: + case NV_ENC_BUFFER_FORMAT_AYUV: + case NV_ENC_BUFFER_FORMAT_ABGR: + case NV_ENC_BUFFER_FORMAT_ABGR10: + return 0; + default: + NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM); + return 0; + } +} + +uint32_t NvEncoder::GetChromaWidthInBytes( + const NV_ENC_BUFFER_FORMAT bufferFormat, + const uint32_t lumaWidth) { + switch (bufferFormat) { + case NV_ENC_BUFFER_FORMAT_YV12: + case NV_ENC_BUFFER_FORMAT_IYUV: + return (lumaWidth + 1) / 2; + case NV_ENC_BUFFER_FORMAT_NV12: + return lumaWidth; + case NV_ENC_BUFFER_FORMAT_YUV420_10BIT: + return 2 * lumaWidth; + case NV_ENC_BUFFER_FORMAT_YUV444: + return lumaWidth; + case NV_ENC_BUFFER_FORMAT_YUV444_10BIT: + return 2 * lumaWidth; + case NV_ENC_BUFFER_FORMAT_ARGB: + case NV_ENC_BUFFER_FORMAT_ARGB10: + case NV_ENC_BUFFER_FORMAT_AYUV: + case NV_ENC_BUFFER_FORMAT_ABGR: + case NV_ENC_BUFFER_FORMAT_ABGR10: + return 0; + default: + NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM); + return 0; + } +} + +int NvEncoder::GetCapabilityValue(GUID guidCodec, NV_ENC_CAPS capsToQuery) { + if (!m_hEncoder) { + return 0; + } + NV_ENC_CAPS_PARAM capsParam = {NV_ENC_CAPS_PARAM_VER}; + capsParam.capsToQuery = capsToQuery; + int v; + m_nvenc.nvEncGetEncodeCaps(m_hEncoder, guidCodec, &capsParam, &v); + return v; +} + +int NvEncoder::GetFrameSize() const { + switch (GetPixelFormat()) { + case NV_ENC_BUFFER_FORMAT_YV12: + case NV_ENC_BUFFER_FORMAT_IYUV: + case NV_ENC_BUFFER_FORMAT_NV12: + return GetEncodeWidth() * + (GetEncodeHeight() + (GetEncodeHeight() + 1) / 2); + case NV_ENC_BUFFER_FORMAT_YUV420_10BIT: + return 2 * GetEncodeWidth() * + (GetEncodeHeight() + (GetEncodeHeight() + 1) / 2); + case NV_ENC_BUFFER_FORMAT_YUV444: + return GetEncodeWidth() * GetEncodeHeight() * 3; + case NV_ENC_BUFFER_FORMAT_YUV444_10BIT: + return 2 * GetEncodeWidth() * GetEncodeHeight() * 3; + case NV_ENC_BUFFER_FORMAT_ARGB: + case NV_ENC_BUFFER_FORMAT_ARGB10: + case NV_ENC_BUFFER_FORMAT_AYUV: + case NV_ENC_BUFFER_FORMAT_ABGR: + case NV_ENC_BUFFER_FORMAT_ABGR10: + return 4 * GetEncodeWidth() * GetEncodeHeight(); + default: + NVENC_THROW_ERROR("Invalid Buffer format", NV_ENC_ERR_INVALID_PARAM); + return 0; + } +} + +void NvEncoder::GetInitializeParams( + NV_ENC_INITIALIZE_PARAMS* pInitializeParams) { + if (!pInitializeParams || !pInitializeParams->encodeConfig) { + NVENC_THROW_ERROR( + "Both pInitializeParams and pInitializeParams->encodeConfig can't be " + "NULL", + NV_ENC_ERR_INVALID_PTR); + } + NV_ENC_CONFIG* pEncodeConfig = pInitializeParams->encodeConfig; + *pEncodeConfig = m_encodeConfig; + *pInitializeParams = m_initializeParams; + pInitializeParams->encodeConfig = pEncodeConfig; +} + +void NvEncoder::InitializeBitstreamBuffer() { + for (int i = 0; i < m_nEncoderBuffer; i++) { + NV_ENC_CREATE_BITSTREAM_BUFFER createBitstreamBuffer = { + NV_ENC_CREATE_BITSTREAM_BUFFER_VER}; + NVENC_API_CALL( + m_nvenc.nvEncCreateBitstreamBuffer(m_hEncoder, &createBitstreamBuffer)); + m_vBitstreamOutputBuffer[i] = createBitstreamBuffer.bitstreamBuffer; + } +} + +void NvEncoder::DestroyBitstreamBuffer() { + for (uint32_t i = 0; i < m_vBitstreamOutputBuffer.size(); i++) { + if (m_vBitstreamOutputBuffer[i]) { + m_nvenc.nvEncDestroyBitstreamBuffer(m_hEncoder, + m_vBitstreamOutputBuffer[i]); + } + } + + m_vBitstreamOutputBuffer.clear(); +} + +void NvEncoder::InitializeMVOutputBuffer() { + for (int i = 0; i < m_nEncoderBuffer; i++) { + NV_ENC_CREATE_MV_BUFFER createMVBuffer = {NV_ENC_CREATE_MV_BUFFER_VER}; + NVENC_API_CALL(m_nvenc.nvEncCreateMVBuffer(m_hEncoder, &createMVBuffer)); + m_vMVDataOutputBuffer.push_back(createMVBuffer.mvBuffer); + } +} + +void NvEncoder::DestroyMVOutputBuffer() { + for (uint32_t i = 0; i < m_vMVDataOutputBuffer.size(); i++) { + if (m_vMVDataOutputBuffer[i]) { + m_nvenc.nvEncDestroyMVBuffer(m_hEncoder, m_vMVDataOutputBuffer[i]); + } + } + + m_vMVDataOutputBuffer.clear(); +} + +NVENCSTATUS NvEncoder::DoMotionEstimation( + NV_ENC_INPUT_PTR inputBuffer, + NV_ENC_INPUT_PTR inputBufferForReference, + NV_ENC_OUTPUT_PTR outputBuffer) { + NV_ENC_MEONLY_PARAMS meParams = {NV_ENC_MEONLY_PARAMS_VER}; + meParams.inputBuffer = inputBuffer; + meParams.referenceFrame = inputBufferForReference; + meParams.inputWidth = GetEncodeWidth(); + meParams.inputHeight = GetEncodeHeight(); + meParams.mvBuffer = outputBuffer; + meParams.completionEvent = GetCompletionEvent(m_iToSend % m_nEncoderBuffer); + NVENCSTATUS nvStatus = + m_nvenc.nvEncRunMotionEstimationOnly(m_hEncoder, &meParams); + + return nvStatus; +} diff --git a/NvCodec/NvCodec/NvEncoder/NvEncoder.h b/NvCodec/NvCodec/NvEncoder/NvEncoder.h new file mode 100644 index 0000000..7d75ba0 --- /dev/null +++ b/NvCodec/NvCodec/NvEncoder/NvEncoder.h @@ -0,0 +1,478 @@ +/* +* Copyright 2017-2019 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include "nvEncodeAPI.h" + +/** +* @brief Exception class for error reporting from NvEncodeAPI calls. +*/ +class NVENCException : public std::exception { + public: + NVENCException(const std::string& errorStr, const NVENCSTATUS errorCode) + : m_errorString(errorStr), m_errorCode(errorCode) {} + + virtual ~NVENCException() throw() {} + virtual const char* what() const throw() { return m_errorString.c_str(); } + NVENCSTATUS getErrorCode() const { return m_errorCode; } + const std::string& getErrorString() const { return m_errorString; } + static NVENCException makeNVENCException(const std::string& errorStr, + const NVENCSTATUS errorCode, + const std::string& functionName, + const std::string& fileName, + int lineNo); + + private: + std::string m_errorString; + NVENCSTATUS m_errorCode; +}; + +inline NVENCException NVENCException::makeNVENCException( + const std::string& errorStr, + const NVENCSTATUS errorCode, + const std::string& functionName, + const std::string& fileName, + int lineNo) { + std::ostringstream errorLog; + errorLog << functionName << " : " << errorStr << " at " << fileName << ":" + << lineNo << std::endl; + NVENCException exception(errorLog.str(), errorCode); + return exception; +} + +#define NVENC_THROW_ERROR(errorStr, errorCode) \ + do { \ + throw NVENCException::makeNVENCException( \ + errorStr, errorCode, __FUNCTION__, __FILE__, __LINE__); \ + } while (0) + +#define NVENC_API_CALL(nvencAPI) \ + do { \ + NVENCSTATUS errorCode = nvencAPI; \ + if (errorCode != NV_ENC_SUCCESS) { \ + std::ostringstream errorLog; \ + errorLog << #nvencAPI << " returned error " << errorCode; \ + throw NVENCException::makeNVENCException( \ + errorLog.str(), errorCode, __FUNCTION__, __FILE__, __LINE__); \ + } \ + } while (0) + +struct NvEncInputFrame { + void* inputPtr = nullptr; + uint32_t chromaOffsets[2]; + uint32_t numChromaPlanes; + uint32_t pitch; + uint32_t chromaPitch; + NV_ENC_BUFFER_FORMAT bufferFormat; + NV_ENC_INPUT_RESOURCE_TYPE resourceType; +}; + +/** +* @brief Shared base class for different encoder interfaces. +*/ +class NvEncoder { + public: + static void TryLoadNvEncApi(); + + /** + * @brief This function is used to initialize the encoder session. + * Application must call this function to initialize the encoder, before + * starting to encode any frames. + */ + void CreateEncoder(const NV_ENC_INITIALIZE_PARAMS* pEncodeParams); + + /** + * @brief This function is used to destroy the encoder session. + * Application must call this function to destroy the encoder session and + * clean up any allocated resources. The application must call EndEncode() + * function to get any queued encoded frames before calling DestroyEncoder(). + */ + void DestroyEncoder(); + + /** + * @brief This function is used to reconfigure an existing encoder session. + * Application can use this function to dynamically change the bitrate, + * resolution and other QOS parameters. If the application changes the + * resolution, it must set NV_ENC_RECONFIGURE_PARAMS::forceIDR. + */ + bool Reconfigure(const NV_ENC_RECONFIGURE_PARAMS* pReconfigureParams); + + /** + * @brief This function is used to get the next available input buffer. + * Applications must call this function to obtain a pointer to the next + * input buffer. The application must copy the uncompressed data to the + * input buffer and then call EncodeFrame() function to encode it. + */ + const NvEncInputFrame* GetNextInputFrame(); + + /** + * @brief This function is used to encode a frame. + * Applications must call EncodeFrame() function to encode the uncompressed + * data, which has been copied to an input buffer obtained from the + * GetNextInputFrame() function. + */ + void EncodeFrame(std::vector>& vPacket, + NV_ENC_PIC_PARAMS* pPicParams = nullptr); + + /** + * @brief This function to flush the encoder queue. + * The encoder might be queuing frames for B picture encoding or lookahead; + * the application must call EndEncode() to get all the queued encoded frames + * from the encoder. The application must call this function before destroying + * an encoder session. + */ + void EndEncode(std::vector>& vPacket); + + /** + * @brief This function is used to query hardware encoder capabilities. + * Applications can call this function to query capabilities like maximum encode + * dimensions, support for lookahead or the ME-only mode etc. + */ + int GetCapabilityValue(GUID guidCodec, NV_ENC_CAPS capsToQuery); + + /** + * @brief This function is used to get the current device on which encoder is running. + */ + void* GetDevice() const { return m_pDevice; } + + /** + * @brief This function is used to get the current device type which encoder is running. + */ + NV_ENC_DEVICE_TYPE GetDeviceType() const { return m_eDeviceType; } + + /** + * @brief This function is used to get the current encode width. + * The encode width can be modified by Reconfigure() function. + */ + int GetEncodeWidth() const { return m_nWidth; } + + /** + * @brief This function is used to get the current encode height. + * The encode height can be modified by Reconfigure() function. + */ + int GetEncodeHeight() const { return m_nHeight; } + + /** + * @brief This function is used to get the current frame size based on pixel format. + */ + int GetFrameSize() const; + + /** + * @brief This function is used to initialize config parameters based on + * given codec and preset guids. + * The application can call this function to get the default configuration + * for a certain preset. The application can either use these parameters + * directly or override them with application-specific settings before + * using them in CreateEncoder() function. + */ + void CreateDefaultEncoderParams(NV_ENC_INITIALIZE_PARAMS* pIntializeParams, + GUID codecGuid, + GUID presetGuid); + + /** + * @brief This function is used to get the current initialization parameters, + * which had been used to configure the encoder session. + * The initialization parameters are modified if the application calls + * Reconfigure() function. + */ + void GetInitializeParams(NV_ENC_INITIALIZE_PARAMS* pInitializeParams); + + /** + * @brief This function is used to run motion estimation + * This is used to run motion estimation on a a pair of frames. The + * application must copy the reference frame data to the buffer obtained + * by calling GetNextReferenceFrame(), and copy the input frame data to + * the buffer obtained by calling GetNextInputFrame() before calling the + * RunMotionEstimation() function. + */ + void RunMotionEstimation(std::vector& mvData); + + /** + * @brief This function is used to get an available reference frame. + * Application must call this function to get a pointer to reference buffer, + * to be used in the subsequent RunMotionEstimation() function. + */ + const NvEncInputFrame* GetNextReferenceFrame(); + + /** + * @brief This function is used to get sequence and picture parameter headers. + * Application can call this function after encoder is initialized to get SPS and PPS + * nalus for the current encoder instance. The sequence header data might change when + * application calls Reconfigure() function. + */ + void GetSequenceParams(std::vector& seqParams); + + /** + * @brief NvEncoder class virtual destructor. + */ + virtual ~NvEncoder(); + + public: + /** + * @brief This a static function to get chroma offsets for YUV planar formats. + */ + static void GetChromaSubPlaneOffsets(const NV_ENC_BUFFER_FORMAT bufferFormat, + const uint32_t pitch, + const uint32_t height, + std::vector& chromaOffsets); + /** + * @brief This a static function to get the chroma plane pitch for YUV planar formats. + */ + static uint32_t GetChromaPitch(const NV_ENC_BUFFER_FORMAT bufferFormat, + const uint32_t lumaPitch); + + /** + * @brief This a static function to get the number of chroma planes for YUV planar formats. + */ + static uint32_t GetNumChromaPlanes(const NV_ENC_BUFFER_FORMAT bufferFormat); + + /** + * @brief This a static function to get the chroma plane width in bytes for YUV planar formats. + */ + static uint32_t GetChromaWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat, + const uint32_t lumaWidth); + + /** + * @brief This a static function to get the chroma planes height in bytes for YUV planar formats. + */ + static uint32_t GetChromaHeight(const NV_ENC_BUFFER_FORMAT bufferFormat, + const uint32_t lumaHeight); + + /** + * @brief This a static function to get the width in bytes for the frame. + * For YUV planar format this is the width in bytes of the luma plane. + */ + static uint32_t GetWidthInBytes(const NV_ENC_BUFFER_FORMAT bufferFormat, + const uint32_t width); + + protected: + /** + * @brief NvEncoder class constructor. + * NvEncoder class constructor cannot be called directly by the application. + */ + NvEncoder(NV_ENC_DEVICE_TYPE eDeviceType, + void* pDevice, + uint32_t nWidth, + uint32_t nHeight, + NV_ENC_BUFFER_FORMAT eBufferFormat, + uint32_t nOutputDelay, + bool bMotionEstimationOnly, + bool bOutputInVideoMemory = false); + + /** + * @brief This function is used to check if hardware encoder is properly initialized. + */ + bool IsHWEncoderInitialized() const { + return m_hEncoder != NULL && m_bEncoderInitialized; + } + + /** + * @brief This function is used to register CUDA, D3D or OpenGL input buffers with NvEncodeAPI. + * This is non public function and is called by derived class for allocating + * and registering input buffers. + */ + void RegisterInputResources(std::vector inputframes, + NV_ENC_INPUT_RESOURCE_TYPE eResourceType, + int width, + int height, + int pitch, + NV_ENC_BUFFER_FORMAT bufferFormat, + bool bReferenceFrame = false); + + /** + * @brief This function is used to unregister resources which had been previously registered for encoding + * using RegisterInputResources() function. + */ + void UnregisterInputResources(); + + /** + * @brief This function is used to register CUDA, D3D or OpenGL input or output buffers with NvEncodeAPI. + */ + NV_ENC_REGISTERED_PTR RegisterResource( + void* pBuffer, + NV_ENC_INPUT_RESOURCE_TYPE eResourceType, + int width, + int height, + int pitch, + NV_ENC_BUFFER_FORMAT bufferFormat, + NV_ENC_BUFFER_USAGE bufferUsage = NV_ENC_INPUT_IMAGE); + + /** + * @brief This function returns maximum width used to open the encoder session. + * All encode input buffers are allocated using maximum dimensions. + */ + uint32_t GetMaxEncodeWidth() const { return m_nMaxEncodeWidth; } + + /** + * @brief This function returns maximum height used to open the encoder session. + * All encode input buffers are allocated using maximum dimensions. + */ + uint32_t GetMaxEncodeHeight() const { return m_nMaxEncodeHeight; } + + /** + * @brief This function returns the completion event. + */ + void* GetCompletionEvent(uint32_t eventIdx) { + return (m_vpCompletionEvent.size() == m_nEncoderBuffer) + ? m_vpCompletionEvent[eventIdx] + : nullptr; + } + + /** + * @brief This function returns the current pixel format. + */ + NV_ENC_BUFFER_FORMAT GetPixelFormat() const { return m_eBufferFormat; } + + /** + * @brief This function is used to submit the encode commands to the + * NVENC hardware. + */ + NVENCSTATUS DoEncode(NV_ENC_INPUT_PTR inputBuffer, + NV_ENC_OUTPUT_PTR outputBuffer, + NV_ENC_PIC_PARAMS* pPicParams); + + /** + * @brief This function is used to submit the encode commands to the + * NVENC hardware for ME only mode. + */ + NVENCSTATUS DoMotionEstimation(NV_ENC_INPUT_PTR inputBuffer, + NV_ENC_INPUT_PTR inputBufferForReference, + NV_ENC_OUTPUT_PTR outputBuffer); + + /** + * @brief This function is used to map the input buffers to NvEncodeAPI. + */ + void MapResources(uint32_t bfrIdx); + + /** + * @brief This function is used to wait for completion of encode command. + */ + void WaitForCompletionEvent(int iEvent); + + /** + * @brief This function is used to send EOS to HW encoder. + */ + void SendEOS(); + + private: + /** + * @brief This is a private function which is used to check if there is any + buffering done by encoder. + * The encoder generally buffers data to encode B frames or for lookahead + * or pipelining. + */ + bool IsZeroDelay() { return m_nOutputDelay == 0; } + + /** + * @brief This is a private function which is used to load the encode api shared library. + */ + void LoadNvEncApi(); + + /** + * @brief This is a private function which is used to get the output packets + * from the encoder HW. + * This is called by DoEncode() function. If there is buffering enabled, + * this may return without any output data. + */ + void GetEncodedPacket(std::vector& vOutputBuffer, + std::vector>& vPacket, + bool bOutputDelay); + + /** + * @brief This is a private function which is used to initialize the bitstream buffers. + * This is only used in the encoding mode. + */ + void InitializeBitstreamBuffer(); + + /** + * @brief This is a private function which is used to destroy the bitstream buffers. + * This is only used in the encoding mode. + */ + void DestroyBitstreamBuffer(); + + /** + * @brief This is a private function which is used to initialize MV output buffers. + * This is only used in ME-only Mode. + */ + void InitializeMVOutputBuffer(); + + /** + * @brief This is a private function which is used to destroy MV output buffers. + * This is only used in ME-only Mode. + */ + void DestroyMVOutputBuffer(); + + /** + * @brief This is a private function which is used to destroy HW encoder. + */ + void DestroyHWEncoder(); + + /** + * @brief This function is used to flush the encoder queue. + */ + void FlushEncoder(); + + private: + /** + * @brief This is a pure virtual function which is used to allocate input buffers. + * The derived classes must implement this function. + */ + virtual void AllocateInputBuffers(int32_t numInputBuffers) = 0; + + /** + * @brief This is a pure virtual function which is used to destroy input buffers. + * The derived classes must implement this function. + */ + virtual void ReleaseInputBuffers() = 0; + + protected: + bool m_bMotionEstimationOnly = false; + bool m_bOutputInVideoMemory = false; + void* m_hEncoder = nullptr; + NV_ENCODE_API_FUNCTION_LIST m_nvenc; + std::vector m_vInputFrames; + std::vector m_vRegisteredResources; + std::vector m_vReferenceFrames; + std::vector m_vRegisteredResourcesForReference; + std::vector m_vMappedInputBuffers; + std::vector m_vMappedRefBuffers; + std::vector m_vpCompletionEvent; + + int32_t m_iToSend = 0; + int32_t m_iGot = 0; + int32_t m_nEncoderBuffer = 0; + int32_t m_nOutputDelay = 0; + + private: + uint32_t m_nWidth; + uint32_t m_nHeight; + NV_ENC_BUFFER_FORMAT m_eBufferFormat; + void* m_pDevice; + NV_ENC_DEVICE_TYPE m_eDeviceType; + NV_ENC_INITIALIZE_PARAMS m_initializeParams = {}; + NV_ENC_CONFIG m_encodeConfig = {}; + bool m_bEncoderInitialized = false; + uint32_t m_nExtraOutputDelay = + 3; // To ensure encode and graphics can work in parallel, m_nExtraOutputDelay should be set to at least 1 + std::vector m_vBitstreamOutputBuffer; + std::vector m_vMVDataOutputBuffer; + uint32_t m_nMaxEncodeWidth = 0; + uint32_t m_nMaxEncodeHeight = 0; + void* m_hModule = nullptr; +}; diff --git a/NvCodec/NvCodec/NvEncoder/NvEncoderCuda.cpp b/NvCodec/NvCodec/NvEncoder/NvEncoderCuda.cpp new file mode 100644 index 0000000..82c6f3c --- /dev/null +++ b/NvCodec/NvCodec/NvEncoder/NvEncoderCuda.cpp @@ -0,0 +1,289 @@ +/* +* Copyright 2017-2019 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#include "NvEncoder/NvEncoderCuda.h" + + +NvEncoderCuda::NvEncoderCuda(CUcontext cuContext, uint32_t nWidth, uint32_t nHeight, NV_ENC_BUFFER_FORMAT eBufferFormat, + uint32_t nExtraOutputDelay, bool bMotionEstimationOnly, bool bOutputInVideoMemory): + NvEncoder(NV_ENC_DEVICE_TYPE_CUDA, cuContext, nWidth, nHeight, eBufferFormat, nExtraOutputDelay, bMotionEstimationOnly, bOutputInVideoMemory), + m_cuContext(cuContext) +{ + if (!m_hEncoder) + { + NVENC_THROW_ERROR("Encoder Initialization failed", NV_ENC_ERR_INVALID_DEVICE); + } + + if (!m_cuContext) + { + NVENC_THROW_ERROR("Invalid Cuda Context", NV_ENC_ERR_INVALID_DEVICE); + } +} + +NvEncoderCuda::~NvEncoderCuda() +{ + ReleaseCudaResources(); +} + +void NvEncoderCuda::AllocateInputBuffers(int32_t numInputBuffers) +{ + if (!IsHWEncoderInitialized()) + { + NVENC_THROW_ERROR("Encoder intialization failed", NV_ENC_ERR_ENCODER_NOT_INITIALIZED); + } + + // for MEOnly mode we need to allocate seperate set of buffers for reference frame + int numCount = m_bMotionEstimationOnly ? 2 : 1; + + for (int count = 0; count < numCount; count++) + { + CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext)); + std::vector inputFrames; + for (int i = 0; i < numInputBuffers; i++) + { + CUdeviceptr pDeviceFrame; + uint32_t chromaHeight = GetNumChromaPlanes(GetPixelFormat()) * GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight()); + if (GetPixelFormat() == NV_ENC_BUFFER_FORMAT_YV12 || GetPixelFormat() == NV_ENC_BUFFER_FORMAT_IYUV) + chromaHeight = GetChromaHeight(GetPixelFormat(), GetMaxEncodeHeight()); + CUDA_DRVAPI_CALL(cuMemAllocPitch((CUdeviceptr *)&pDeviceFrame, + &m_cudaPitch, + GetWidthInBytes(GetPixelFormat(), GetMaxEncodeWidth()), + GetMaxEncodeHeight() + chromaHeight, 16)); + inputFrames.push_back((void*)pDeviceFrame); + } + CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); + + RegisterInputResources(inputFrames, + NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR, + GetMaxEncodeWidth(), + GetMaxEncodeHeight(), + (int)m_cudaPitch, + GetPixelFormat(), + (count == 1) ? true : false); + } +} + +void NvEncoderCuda::SetIOCudaStreams(NV_ENC_CUSTREAM_PTR inputStream, NV_ENC_CUSTREAM_PTR outputStream) +{ + NVENC_API_CALL(m_nvenc.nvEncSetIOCudaStreams(m_hEncoder, inputStream, outputStream)); +} + +void NvEncoderCuda::ReleaseInputBuffers() +{ + ReleaseCudaResources(); +} + +void NvEncoderCuda::ReleaseCudaResources() +{ + if (!m_hEncoder) + { + return; + } + + if (!m_cuContext) + { + return; + } + + UnregisterInputResources(); + + cuCtxPushCurrent(m_cuContext); + + for (uint32_t i = 0; i < m_vInputFrames.size(); ++i) + { + if (m_vInputFrames[i].inputPtr) + { + cuMemFree(reinterpret_cast(m_vInputFrames[i].inputPtr)); + } + } + m_vInputFrames.clear(); + + for (uint32_t i = 0; i < m_vReferenceFrames.size(); ++i) + { + if (m_vReferenceFrames[i].inputPtr) + { + cuMemFree(reinterpret_cast(m_vReferenceFrames[i].inputPtr)); + } + } + m_vReferenceFrames.clear(); + + cuCtxPopCurrent(NULL); + m_cuContext = nullptr; +} + +void NvEncoderCuda::CopyToDeviceFrame(CUcontext device, + void* pSrcFrame, + uint32_t nSrcPitch, + CUdeviceptr pDstFrame, + uint32_t dstPitch, + int width, + int height, + CUmemorytype srcMemoryType, + NV_ENC_BUFFER_FORMAT pixelFormat, + const uint32_t dstChromaOffsets[], + uint32_t numChromaPlanes, + bool bUnAlignedDeviceCopy, + CUstream stream) +{ + if (srcMemoryType != CU_MEMORYTYPE_HOST && srcMemoryType != CU_MEMORYTYPE_DEVICE) + { + NVENC_THROW_ERROR("Invalid source memory type for copy", NV_ENC_ERR_INVALID_PARAM); + } + + CUDA_DRVAPI_CALL(cuCtxPushCurrent(device)); + + uint32_t srcPitch = nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width); + CUDA_MEMCPY2D m = { 0 }; + m.srcMemoryType = srcMemoryType; + if (srcMemoryType == CU_MEMORYTYPE_HOST) + { + m.srcHost = pSrcFrame; + } + else + { + m.srcDevice = (CUdeviceptr)pSrcFrame; + } + m.srcPitch = srcPitch; + m.dstMemoryType = CU_MEMORYTYPE_DEVICE; + m.dstDevice = pDstFrame; + m.dstPitch = dstPitch; + m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width); + m.Height = height; + if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) + { + CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m)); + } + else + { + CUDA_DRVAPI_CALL(stream == NULL? cuMemcpy2D(&m) : cuMemcpy2DAsync(&m, stream)); + } + + std::vector srcChromaOffsets; + NvEncoder::GetChromaSubPlaneOffsets(pixelFormat, srcPitch, height, srcChromaOffsets); + uint32_t chromaHeight = NvEncoder::GetChromaHeight(pixelFormat, height); + uint32_t destChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, dstPitch); + uint32_t srcChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, srcPitch); + uint32_t chromaWidthInBytes = NvEncoder::GetChromaWidthInBytes(pixelFormat, width); + + for (uint32_t i = 0; i < numChromaPlanes; ++i) + { + if (chromaHeight) + { + if (srcMemoryType == CU_MEMORYTYPE_HOST) + { + m.srcHost = ((uint8_t *)pSrcFrame + srcChromaOffsets[i]); + } + else + { + m.srcDevice = (CUdeviceptr)((uint8_t *)pSrcFrame + srcChromaOffsets[i]); + } + m.srcPitch = srcChromaPitch; + + m.dstDevice = (CUdeviceptr)((uint8_t *)pDstFrame + dstChromaOffsets[i]); + m.dstPitch = destChromaPitch; + m.WidthInBytes = chromaWidthInBytes; + m.Height = chromaHeight; + if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) + { + CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m)); + } + else + { + CUDA_DRVAPI_CALL(stream == NULL? cuMemcpy2D(&m) : cuMemcpy2DAsync(&m, stream)); + } + } + } + CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); +} + +void NvEncoderCuda::CopyToDeviceFrame(CUcontext device, + void* pSrcFrame, + uint32_t nSrcPitch, + CUdeviceptr pDstFrame, + uint32_t dstPitch, + int width, + int height, + CUmemorytype srcMemoryType, + NV_ENC_BUFFER_FORMAT pixelFormat, + CUdeviceptr dstChromaDevicePtrs[], + uint32_t dstChromaPitch, + uint32_t numChromaPlanes, + bool bUnAlignedDeviceCopy) +{ + if (srcMemoryType != CU_MEMORYTYPE_HOST && srcMemoryType != CU_MEMORYTYPE_DEVICE) + { + NVENC_THROW_ERROR("Invalid source memory type for copy", NV_ENC_ERR_INVALID_PARAM); + } + + CUDA_DRVAPI_CALL(cuCtxPushCurrent(device)); + + uint32_t srcPitch = nSrcPitch ? nSrcPitch : NvEncoder::GetWidthInBytes(pixelFormat, width); + CUDA_MEMCPY2D m = { 0 }; + m.srcMemoryType = srcMemoryType; + if (srcMemoryType == CU_MEMORYTYPE_HOST) + { + m.srcHost = pSrcFrame; + } + else + { + m.srcDevice = (CUdeviceptr)pSrcFrame; + } + m.srcPitch = srcPitch; + m.dstMemoryType = CU_MEMORYTYPE_DEVICE; + m.dstDevice = pDstFrame; + m.dstPitch = dstPitch; + m.WidthInBytes = NvEncoder::GetWidthInBytes(pixelFormat, width); + m.Height = height; + if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) + { + CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m)); + } + else + { + CUDA_DRVAPI_CALL(cuMemcpy2D(&m)); + } + + std::vector srcChromaOffsets; + NvEncoder::GetChromaSubPlaneOffsets(pixelFormat, srcPitch, height, srcChromaOffsets); + uint32_t chromaHeight = NvEncoder::GetChromaHeight(pixelFormat, height); + uint32_t srcChromaPitch = NvEncoder::GetChromaPitch(pixelFormat, srcPitch); + uint32_t chromaWidthInBytes = NvEncoder::GetChromaWidthInBytes(pixelFormat, width); + + for (uint32_t i = 0; i < numChromaPlanes; ++i) + { + if (chromaHeight) + { + if (srcMemoryType == CU_MEMORYTYPE_HOST) + { + m.srcHost = ((uint8_t *)pSrcFrame + srcChromaOffsets[i]); + } + else + { + m.srcDevice = (CUdeviceptr)((uint8_t *)pSrcFrame + srcChromaOffsets[i]); + } + m.srcPitch = srcChromaPitch; + + m.dstDevice = dstChromaDevicePtrs[i]; + m.dstPitch = dstChromaPitch; + m.WidthInBytes = chromaWidthInBytes; + m.Height = chromaHeight; + if (bUnAlignedDeviceCopy && srcMemoryType == CU_MEMORYTYPE_DEVICE) + { + CUDA_DRVAPI_CALL(cuMemcpy2DUnaligned(&m)); + } + else + { + CUDA_DRVAPI_CALL(cuMemcpy2D(&m)); + } + } + } + CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); +} diff --git a/NvCodec/NvCodec/NvEncoder/NvEncoderCuda.h b/NvCodec/NvCodec/NvEncoder/NvEncoderCuda.h new file mode 100644 index 0000000..e31b562 --- /dev/null +++ b/NvCodec/NvCodec/NvEncoder/NvEncoderCuda.h @@ -0,0 +1,111 @@ +/* +* Copyright 2017-2019 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#pragma once + +#include +#include +#include +#include +#include "NvEncoder.h" + +#define CUDA_DRVAPI_CALL( call ) \ + do \ + { \ + CUresult err__ = call; \ + if (err__ != CUDA_SUCCESS) \ + { \ + const char *szErrName = NULL; \ + cuGetErrorName(err__, &szErrName); \ + std::ostringstream errorLog; \ + errorLog << "CUDA driver API error " << szErrName ; \ + throw NVENCException::makeNVENCException(errorLog.str(), NV_ENC_ERR_GENERIC, __FUNCTION__, __FILE__, __LINE__); \ + } \ + } \ + while (0) + +/** +* @brief Encoder for CUDA device memory. +*/ +class NvEncoderCuda : public NvEncoder +{ +public: + NvEncoderCuda(CUcontext cuContext, uint32_t nWidth, uint32_t nHeight, NV_ENC_BUFFER_FORMAT eBufferFormat, + uint32_t nExtraOutputDelay = 3, bool bMotionEstimationOnly = false, bool bOPInVideoMemory = false); + virtual ~NvEncoderCuda(); + + /** + * @brief This is a static function to copy input data from host memory to device memory. + * This function assumes YUV plane is a single contiguous memory segment. + */ + static void CopyToDeviceFrame(CUcontext device, + void* pSrcFrame, + uint32_t nSrcPitch, + CUdeviceptr pDstFrame, + uint32_t dstPitch, + int width, + int height, + CUmemorytype srcMemoryType, + NV_ENC_BUFFER_FORMAT pixelFormat, + const uint32_t dstChromaOffsets[], + uint32_t numChromaPlanes, + bool bUnAlignedDeviceCopy = false, + CUstream stream = NULL); + + /** + * @brief This is a static function to copy input data from host memory to device memory. + * Application must pass a seperate device pointer for each YUV plane. + */ + static void CopyToDeviceFrame(CUcontext device, + void* pSrcFrame, + uint32_t nSrcPitch, + CUdeviceptr pDstFrame, + uint32_t dstPitch, + int width, + int height, + CUmemorytype srcMemoryType, + NV_ENC_BUFFER_FORMAT pixelFormat, + CUdeviceptr dstChromaPtr[], + uint32_t dstChromaPitch, + uint32_t numChromaPlanes, + bool bUnAlignedDeviceCopy = false); + + /** + * @brief This function sets input and output CUDA streams + */ + void SetIOCudaStreams(NV_ENC_CUSTREAM_PTR inputStream, NV_ENC_CUSTREAM_PTR outputStream); + +protected: + /** + * @brief This function is used to release the input buffers allocated for encoding. + * This function is an override of virtual function NvEncoder::ReleaseInputBuffers(). + */ + virtual void ReleaseInputBuffers() override; + +private: + /** + * @brief This function is used to allocate input buffers for encoding. + * This function is an override of virtual function NvEncoder::AllocateInputBuffers(). + */ + virtual void AllocateInputBuffers(int32_t numInputBuffers) override; + +private: + /** + * @brief This is a private function to release CUDA device memory used for encoding. + */ + void ReleaseCudaResources(); + +protected: + CUcontext m_cuContext; + +private: + size_t m_cudaPitch = 0; +}; diff --git a/NvCodec/NvCodec/NvEncoder/NvEncoderD3D11.cpp b/NvCodec/NvCodec/NvEncoder/NvEncoderD3D11.cpp new file mode 100644 index 0000000..029bee1 --- /dev/null +++ b/NvCodec/NvCodec/NvEncoder/NvEncoderD3D11.cpp @@ -0,0 +1,147 @@ +/* +* Copyright 2017-2019 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + + +#ifndef WIN32 +#include +#endif +#include "NvEncoder/NvEncoderD3D11.h" +#include + +#ifndef MAKEFOURCC +#define MAKEFOURCC(a,b,c,d) (((unsigned int)a) | (((unsigned int)b)<< 8) | (((unsigned int)c)<<16) | (((unsigned int)d)<<24) ) +#endif + +DXGI_FORMAT GetD3D11Format(NV_ENC_BUFFER_FORMAT eBufferFormat) +{ + switch (eBufferFormat) + { + case NV_ENC_BUFFER_FORMAT_NV12: + return DXGI_FORMAT_NV12; + case NV_ENC_BUFFER_FORMAT_ARGB: + return DXGI_FORMAT_B8G8R8A8_UNORM; + default: + return DXGI_FORMAT_UNKNOWN; + } +} + +NvEncoderD3D11::NvEncoderD3D11(ID3D11Device* pD3D11Device, uint32_t nWidth, uint32_t nHeight, + NV_ENC_BUFFER_FORMAT eBufferFormat, uint32_t nExtraOutputDelay, bool bMotionEstimationOnly, bool bOutputInVideoMemory) : + NvEncoder(NV_ENC_DEVICE_TYPE_DIRECTX, pD3D11Device, nWidth, nHeight, eBufferFormat, nExtraOutputDelay, bMotionEstimationOnly, bOutputInVideoMemory) +{ + if (!pD3D11Device) + { + NVENC_THROW_ERROR("Bad d3d11device ptr", NV_ENC_ERR_INVALID_PTR); + return; + } + + if (GetD3D11Format(GetPixelFormat()) == DXGI_FORMAT_UNKNOWN) + { + NVENC_THROW_ERROR("Unsupported Buffer format", NV_ENC_ERR_INVALID_PARAM); + } + + if (!m_hEncoder) + { + NVENC_THROW_ERROR("Encoder Initialization failed", NV_ENC_ERR_INVALID_DEVICE); + } + + m_pD3D11Device = pD3D11Device; + m_pD3D11Device->AddRef(); + m_pD3D11Device->GetImmediateContext(&m_pD3D11DeviceContext); +} + +NvEncoderD3D11::~NvEncoderD3D11() +{ + ReleaseD3D11Resources(); +} + +void NvEncoderD3D11::AllocateInputBuffers(int32_t numInputBuffers) +{ + if (!IsHWEncoderInitialized()) + { + NVENC_THROW_ERROR("Encoder intialization failed", NV_ENC_ERR_ENCODER_NOT_INITIALIZED); + } + + // for MEOnly mode we need to allocate seperate set of buffers for reference frame + int numCount = m_bMotionEstimationOnly ? 2 : 1; + for (int count = 0; count < numCount; count++) + { + std::vector inputFrames; + for (int i = 0; i < numInputBuffers; i++) + { + ID3D11Texture2D *pInputTextures = NULL; + D3D11_TEXTURE2D_DESC desc; + ZeroMemory(&desc, sizeof(D3D11_TEXTURE2D_DESC)); + desc.Width = GetMaxEncodeWidth(); + desc.Height = GetMaxEncodeHeight(); + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = GetD3D11Format(GetPixelFormat()); + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_RENDER_TARGET; + desc.CPUAccessFlags = 0; + if (m_pD3D11Device->CreateTexture2D(&desc, NULL, &pInputTextures) != S_OK) + { + NVENC_THROW_ERROR("Failed to create d3d11textures", NV_ENC_ERR_OUT_OF_MEMORY); + } + inputFrames.push_back(pInputTextures); + } + RegisterInputResources(inputFrames, NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX, + GetMaxEncodeWidth(), GetMaxEncodeHeight(), 0, GetPixelFormat(), count == 1 ? true : false); + } +} + +void NvEncoderD3D11::ReleaseInputBuffers() +{ + ReleaseD3D11Resources(); +} + +void NvEncoderD3D11::ReleaseD3D11Resources() +{ + if (!m_hEncoder) + { + return; + } + + UnregisterInputResources(); + + for (uint32_t i = 0; i < m_vInputFrames.size(); ++i) + { + if (m_vInputFrames[i].inputPtr) + { + reinterpret_cast(m_vInputFrames[i].inputPtr)->Release(); + } + } + m_vInputFrames.clear(); + + for (uint32_t i = 0; i < m_vReferenceFrames.size(); ++i) + { + if (m_vReferenceFrames[i].inputPtr) + { + reinterpret_cast(m_vReferenceFrames[i].inputPtr)->Release(); + } + } + m_vReferenceFrames.clear(); + + if (m_pD3D11DeviceContext) + { + m_pD3D11DeviceContext->Release(); + m_pD3D11DeviceContext = nullptr; + } + + if (m_pD3D11Device) + { + m_pD3D11Device->Release(); + m_pD3D11Device = nullptr; + } +} + diff --git a/NvCodec/NvCodec/NvEncoder/NvEncoderD3D11.h b/NvCodec/NvCodec/NvEncoder/NvEncoderD3D11.h new file mode 100644 index 0000000..68b83a4 --- /dev/null +++ b/NvCodec/NvCodec/NvEncoder/NvEncoderD3D11.h @@ -0,0 +1,55 @@ +/* +* Copyright 2017-2019 NVIDIA Corporation. All rights reserved. +* +* Please refer to the NVIDIA end user license agreement (EULA) associated +* with this source code for terms and conditions that govern your use of +* this software. Any use, reproduction, disclosure, or distribution of +* this software and related documentation outside the terms of the EULA +* is strictly prohibited. +* +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include "NvEncoder.h" + +class NvEncoderD3D11 : public NvEncoder +{ +public: + NvEncoderD3D11(ID3D11Device* pD3D11Device, uint32_t nWidth, uint32_t nHeight, NV_ENC_BUFFER_FORMAT eBufferFormat, + uint32_t nExtraOutputDelay = 3, bool bMotionEstimationOnly = false, bool bOPInVideoMemory = false); + virtual ~NvEncoderD3D11(); + +protected: + /** + * @brief This function is used to release the input buffers allocated for encoding. + * This function is an override of virtual function NvEncoder::ReleaseInputBuffers(). + */ + virtual void ReleaseInputBuffers() override; + +private: + /** + * @brief This function is used to allocate input buffers for encoding. + * This function is an override of virtual function NvEncoder::AllocateInputBuffers(). + * This function creates ID3D11Texture2D textures which is used to accept input data. + * To obtain handle to input buffers application must call NvEncoder::GetNextInputFrame() + */ + virtual void AllocateInputBuffers(int32_t numInputBuffers) override; + +private: + /** + * @brief This is a private function to release ID3D11Texture2D textures used for encoding. + */ + void ReleaseD3D11Resources(); + +protected: + ID3D11Device *m_pD3D11Device = nullptr; + +private: + ID3D11DeviceContext* m_pD3D11DeviceContext = nullptr; +}; diff --git a/NvCodec/include/cuviddec.h b/NvCodec/include/cuviddec.h new file mode 100644 index 0000000..33d2ffd --- /dev/null +++ b/NvCodec/include/cuviddec.h @@ -0,0 +1,1002 @@ +/* + * This copyright notice applies to this header file only: + * + * Copyright (c) 2010-2019 NVIDIA Corporation + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the software, and to permit persons to whom the + * software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/*****************************************************************************************************/ +//! \file cuviddec.h +//! NVDECODE API provides video decoding interface to NVIDIA GPU devices. +//! \date 2015-2019 +//! This file contains constants, structure definitions and function prototypes used for decoding. +/*****************************************************************************************************/ + +#if !defined(__CUDA_VIDEO_H__) +#define __CUDA_VIDEO_H__ + +#ifndef __cuda_cuda_h__ +#include +#endif // __cuda_cuda_h__ + +#if defined(_WIN64) || defined(__LP64__) || defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) +#if (CUDA_VERSION >= 3020) && (!defined(CUDA_FORCE_API_VERSION) || (CUDA_FORCE_API_VERSION >= 3020)) +#define __CUVID_DEVPTR64 +#endif +#endif + +#if defined(__cplusplus) +extern "C" { +#endif /* __cplusplus */ + +typedef void *CUvideodecoder; +typedef struct _CUcontextlock_st *CUvideoctxlock; + +/*********************************************************************************/ +//! \enum cudaVideoCodec +//! Video codec enums +//! These enums are used in CUVIDDECODECREATEINFO and CUVIDDECODECAPS structures +/*********************************************************************************/ +typedef enum cudaVideoCodec_enum { + cudaVideoCodec_MPEG1=0, /**< MPEG1 */ + cudaVideoCodec_MPEG2, /**< MPEG2 */ + cudaVideoCodec_MPEG4, /**< MPEG4 */ + cudaVideoCodec_VC1, /**< VC1 */ + cudaVideoCodec_H264, /**< H264 */ + cudaVideoCodec_JPEG, /**< JPEG */ + cudaVideoCodec_H264_SVC, /**< H264-SVC */ + cudaVideoCodec_H264_MVC, /**< H264-MVC */ + cudaVideoCodec_HEVC, /**< HEVC */ + cudaVideoCodec_VP8, /**< VP8 */ + cudaVideoCodec_VP9, /**< VP9 */ + cudaVideoCodec_NumCodecs, /**< Max codecs */ + // Uncompressed YUV + cudaVideoCodec_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')), /**< Y,U,V (4:2:0) */ + cudaVideoCodec_YV12 = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')), /**< Y,V,U (4:2:0) */ + cudaVideoCodec_NV12 = (('N'<<24)|('V'<<16)|('1'<<8)|('2')), /**< Y,UV (4:2:0) */ + cudaVideoCodec_YUYV = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')), /**< YUYV/YUY2 (4:2:2) */ + cudaVideoCodec_UYVY = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y')) /**< UYVY (4:2:2) */ +} cudaVideoCodec; + +/*********************************************************************************/ +//! \enum cudaVideoSurfaceFormat +//! Video surface format enums used for output format of decoded output +//! These enums are used in CUVIDDECODECREATEINFO structure +/*********************************************************************************/ +typedef enum cudaVideoSurfaceFormat_enum { + cudaVideoSurfaceFormat_NV12=0, /**< Semi-Planar YUV [Y plane followed by interleaved UV plane] */ + cudaVideoSurfaceFormat_P016=1, /**< 16 bit Semi-Planar YUV [Y plane followed by interleaved UV plane]. + Can be used for 10 bit(6LSB bits 0), 12 bit (4LSB bits 0) */ + cudaVideoSurfaceFormat_YUV444=2, /**< Planar YUV [Y plane followed by U and V planes] */ + cudaVideoSurfaceFormat_YUV444_16Bit=3, /**< 16 bit Planar YUV [Y plane followed by U and V planes]. + Can be used for 10 bit(6LSB bits 0), 12 bit (4LSB bits 0) */ +} cudaVideoSurfaceFormat; + +/******************************************************************************************************************/ +//! \enum cudaVideoDeinterlaceMode +//! Deinterlacing mode enums +//! These enums are used in CUVIDDECODECREATEINFO structure +//! Use cudaVideoDeinterlaceMode_Weave for progressive content and for content that doesn't need deinterlacing +//! cudaVideoDeinterlaceMode_Adaptive needs more video memory than other DImodes +/******************************************************************************************************************/ +typedef enum cudaVideoDeinterlaceMode_enum { + cudaVideoDeinterlaceMode_Weave=0, /**< Weave both fields (no deinterlacing) */ + cudaVideoDeinterlaceMode_Bob, /**< Drop one field */ + cudaVideoDeinterlaceMode_Adaptive /**< Adaptive deinterlacing */ +} cudaVideoDeinterlaceMode; + +/**************************************************************************************************************/ +//! \enum cudaVideoChromaFormat +//! Chroma format enums +//! These enums are used in CUVIDDECODECREATEINFO and CUVIDDECODECAPS structures +/**************************************************************************************************************/ +typedef enum cudaVideoChromaFormat_enum { + cudaVideoChromaFormat_Monochrome=0, /**< MonoChrome */ + cudaVideoChromaFormat_420, /**< YUV 4:2:0 */ + cudaVideoChromaFormat_422, /**< YUV 4:2:2 */ + cudaVideoChromaFormat_444 /**< YUV 4:4:4 */ +} cudaVideoChromaFormat; + +/*************************************************************************************************************/ +//! \enum cudaVideoCreateFlags +//! Decoder flag enums to select preferred decode path +//! cudaVideoCreate_Default and cudaVideoCreate_PreferCUVID are most optimized, use these whenever possible +/*************************************************************************************************************/ +typedef enum cudaVideoCreateFlags_enum { + cudaVideoCreate_Default = 0x00, /**< Default operation mode: use dedicated video engines */ + cudaVideoCreate_PreferCUDA = 0x01, /**< Use CUDA-based decoder (requires valid vidLock object for multi-threading) */ + cudaVideoCreate_PreferDXVA = 0x02, /**< Go through DXVA internally if possible (requires D3D9 interop) */ + cudaVideoCreate_PreferCUVID = 0x04 /**< Use dedicated video engines directly */ +} cudaVideoCreateFlags; + + +/*************************************************************************/ +//! \enum cuvidDecodeStatus +//! Decode status enums +//! These enums are used in CUVIDGETDECODESTATUS structure +/*************************************************************************/ +typedef enum cuvidDecodeStatus_enum +{ + cuvidDecodeStatus_Invalid = 0, // Decode status is not valid + cuvidDecodeStatus_InProgress = 1, // Decode is in progress + cuvidDecodeStatus_Success = 2, // Decode is completed without any errors + // 3 to 7 enums are reserved for future use + cuvidDecodeStatus_Error = 8, // Decode is completed with an error (error is not concealed) + cuvidDecodeStatus_Error_Concealed = 9, // Decode is completed with an error and error is concealed +} cuvidDecodeStatus; + +/**************************************************************************************************************/ +//! \struct CUVIDDECODECAPS; +//! This structure is used in cuvidGetDecoderCaps API +/**************************************************************************************************************/ +typedef struct _CUVIDDECODECAPS +{ + cudaVideoCodec eCodecType; /**< IN: cudaVideoCodec_XXX */ + cudaVideoChromaFormat eChromaFormat; /**< IN: cudaVideoChromaFormat_XXX */ + unsigned int nBitDepthMinus8; /**< IN: The Value "BitDepth minus 8" */ + unsigned int reserved1[3]; /**< Reserved for future use - set to zero */ + + unsigned char bIsSupported; /**< OUT: 1 if codec supported, 0 if not supported */ + unsigned char reserved2; /**< Reserved for future use - set to zero */ + unsigned short nOutputFormatMask; /**< OUT: each bit represents corresponding cudaVideoSurfaceFormat enum */ + unsigned int nMaxWidth; /**< OUT: Max supported coded width in pixels */ + unsigned int nMaxHeight; /**< OUT: Max supported coded height in pixels */ + unsigned int nMaxMBCount; /**< OUT: Max supported macroblock count + CodedWidth*CodedHeight/256 must be <= nMaxMBCount */ + unsigned short nMinWidth; /**< OUT: Min supported coded width in pixels */ + unsigned short nMinHeight; /**< OUT: Min supported coded height in pixels */ + unsigned int reserved3[11]; /**< Reserved for future use - set to zero */ +} CUVIDDECODECAPS; + +/**************************************************************************************************************/ +//! \struct CUVIDDECODECREATEINFO +//! This structure is used in cuvidCreateDecoder API +/**************************************************************************************************************/ +typedef struct _CUVIDDECODECREATEINFO +{ + unsigned long ulWidth; /**< IN: Coded sequence width in pixels */ + unsigned long ulHeight; /**< IN: Coded sequence height in pixels */ + unsigned long ulNumDecodeSurfaces; /**< IN: Maximum number of internal decode surfaces */ + cudaVideoCodec CodecType; /**< IN: cudaVideoCodec_XXX */ + cudaVideoChromaFormat ChromaFormat; /**< IN: cudaVideoChromaFormat_XXX */ + unsigned long ulCreationFlags; /**< IN: Decoder creation flags (cudaVideoCreateFlags_XXX) */ + unsigned long bitDepthMinus8; /**< IN: The value "BitDepth minus 8" */ + unsigned long ulIntraDecodeOnly; /**< IN: Set 1 only if video has all intra frames (default value is 0). This will + optimize video memory for Intra frames only decoding. The support is limited + to specific codecs - H264, HEVC, VP9, the flag will be ignored for codecs which + are not supported. However decoding might fail if the flag is enabled in case + of supported codecs for regular bit streams having P and/or B frames. */ + unsigned long ulMaxWidth; /**< IN: Coded sequence max width in pixels used with reconfigure Decoder */ + unsigned long ulMaxHeight; /**< IN: Coded sequence max height in pixels used with reconfigure Decoder */ + unsigned long Reserved1; /**< Reserved for future use - set to zero */ + /** + * IN: area of the frame that should be displayed + */ + struct { + short left; + short top; + short right; + short bottom; + } display_area; + + cudaVideoSurfaceFormat OutputFormat; /**< IN: cudaVideoSurfaceFormat_XXX */ + cudaVideoDeinterlaceMode DeinterlaceMode; /**< IN: cudaVideoDeinterlaceMode_XXX */ + unsigned long ulTargetWidth; /**< IN: Post-processed output width (Should be aligned to 2) */ + unsigned long ulTargetHeight; /**< IN: Post-processed output height (Should be aligned to 2) */ + unsigned long ulNumOutputSurfaces; /**< IN: Maximum number of output surfaces simultaneously mapped */ + CUvideoctxlock vidLock; /**< IN: If non-NULL, context lock used for synchronizing ownership of + the cuda context. Needed for cudaVideoCreate_PreferCUDA decode */ + /** + * IN: target rectangle in the output frame (for aspect ratio conversion) + * if a null rectangle is specified, {0,0,ulTargetWidth,ulTargetHeight} will be used + */ + struct { + short left; + short top; + short right; + short bottom; + } target_rect; + unsigned long Reserved2[5]; /**< Reserved for future use - set to zero */ +} CUVIDDECODECREATEINFO; + +/*********************************************************/ +//! \struct CUVIDH264DPBENTRY +//! H.264 DPB entry +//! This structure is used in CUVIDH264PICPARAMS structure +/*********************************************************/ +typedef struct _CUVIDH264DPBENTRY +{ + int PicIdx; /**< picture index of reference frame */ + int FrameIdx; /**< frame_num(short-term) or LongTermFrameIdx(long-term) */ + int is_long_term; /**< 0=short term reference, 1=long term reference */ + int not_existing; /**< non-existing reference frame (corresponding PicIdx should be set to -1) */ + int used_for_reference; /**< 0=unused, 1=top_field, 2=bottom_field, 3=both_fields */ + int FieldOrderCnt[2]; /**< field order count of top and bottom fields */ +} CUVIDH264DPBENTRY; + +/************************************************************/ +//! \struct CUVIDH264MVCEXT +//! H.264 MVC picture parameters ext +//! This structure is used in CUVIDH264PICPARAMS structure +/************************************************************/ +typedef struct _CUVIDH264MVCEXT +{ + int num_views_minus1; /**< Max number of coded views minus 1 in video : Range - 0 to 1023 */ + int view_id; /**< view identifier */ + unsigned char inter_view_flag; /**< 1 if used for inter-view prediction, 0 if not */ + unsigned char num_inter_view_refs_l0; /**< number of inter-view ref pics in RefPicList0 */ + unsigned char num_inter_view_refs_l1; /**< number of inter-view ref pics in RefPicList1 */ + unsigned char MVCReserved8Bits; /**< Reserved bits */ + int InterViewRefsL0[16]; /**< view id of the i-th view component for inter-view prediction in RefPicList0 */ + int InterViewRefsL1[16]; /**< view id of the i-th view component for inter-view prediction in RefPicList1 */ +} CUVIDH264MVCEXT; + +/*********************************************************/ +//! \struct CUVIDH264SVCEXT +//! H.264 SVC picture parameters ext +//! This structure is used in CUVIDH264PICPARAMS structure +/*********************************************************/ +typedef struct _CUVIDH264SVCEXT +{ + unsigned char profile_idc; + unsigned char level_idc; + unsigned char DQId; + unsigned char DQIdMax; + unsigned char disable_inter_layer_deblocking_filter_idc; + unsigned char ref_layer_chroma_phase_y_plus1; + signed char inter_layer_slice_alpha_c0_offset_div2; + signed char inter_layer_slice_beta_offset_div2; + + unsigned short DPBEntryValidFlag; + unsigned char inter_layer_deblocking_filter_control_present_flag; + unsigned char extended_spatial_scalability_idc; + unsigned char adaptive_tcoeff_level_prediction_flag; + unsigned char slice_header_restriction_flag; + unsigned char chroma_phase_x_plus1_flag; + unsigned char chroma_phase_y_plus1; + + unsigned char tcoeff_level_prediction_flag; + unsigned char constrained_intra_resampling_flag; + unsigned char ref_layer_chroma_phase_x_plus1_flag; + unsigned char store_ref_base_pic_flag; + unsigned char Reserved8BitsA; + unsigned char Reserved8BitsB; + + short scaled_ref_layer_left_offset; + short scaled_ref_layer_top_offset; + short scaled_ref_layer_right_offset; + short scaled_ref_layer_bottom_offset; + unsigned short Reserved16Bits; + struct _CUVIDPICPARAMS *pNextLayer; /**< Points to the picparams for the next layer to be decoded. + Linked list ends at the target layer. */ + int bRefBaseLayer; /**< whether to store ref base pic */ +} CUVIDH264SVCEXT; + +/******************************************************/ +//! \struct CUVIDH264PICPARAMS +//! H.264 picture parameters +//! This structure is used in CUVIDPICPARAMS structure +/******************************************************/ +typedef struct _CUVIDH264PICPARAMS +{ + // SPS + int log2_max_frame_num_minus4; + int pic_order_cnt_type; + int log2_max_pic_order_cnt_lsb_minus4; + int delta_pic_order_always_zero_flag; + int frame_mbs_only_flag; + int direct_8x8_inference_flag; + int num_ref_frames; // NOTE: shall meet level 4.1 restrictions + unsigned char residual_colour_transform_flag; + unsigned char bit_depth_luma_minus8; // Must be 0 (only 8-bit supported) + unsigned char bit_depth_chroma_minus8; // Must be 0 (only 8-bit supported) + unsigned char qpprime_y_zero_transform_bypass_flag; + // PPS + int entropy_coding_mode_flag; + int pic_order_present_flag; + int num_ref_idx_l0_active_minus1; + int num_ref_idx_l1_active_minus1; + int weighted_pred_flag; + int weighted_bipred_idc; + int pic_init_qp_minus26; + int deblocking_filter_control_present_flag; + int redundant_pic_cnt_present_flag; + int transform_8x8_mode_flag; + int MbaffFrameFlag; + int constrained_intra_pred_flag; + int chroma_qp_index_offset; + int second_chroma_qp_index_offset; + int ref_pic_flag; + int frame_num; + int CurrFieldOrderCnt[2]; + // DPB + CUVIDH264DPBENTRY dpb[16]; // List of reference frames within the DPB + // Quantization Matrices (raster-order) + unsigned char WeightScale4x4[6][16]; + unsigned char WeightScale8x8[2][64]; + // FMO/ASO + unsigned char fmo_aso_enable; + unsigned char num_slice_groups_minus1; + unsigned char slice_group_map_type; + signed char pic_init_qs_minus26; + unsigned int slice_group_change_rate_minus1; + union + { + unsigned long long slice_group_map_addr; + const unsigned char *pMb2SliceGroupMap; + } fmo; + unsigned int Reserved[12]; + // SVC/MVC + union + { + CUVIDH264MVCEXT mvcext; + CUVIDH264SVCEXT svcext; + }; +} CUVIDH264PICPARAMS; + + +/********************************************************/ +//! \struct CUVIDMPEG2PICPARAMS +//! MPEG-2 picture parameters +//! This structure is used in CUVIDPICPARAMS structure +/********************************************************/ +typedef struct _CUVIDMPEG2PICPARAMS +{ + int ForwardRefIdx; // Picture index of forward reference (P/B-frames) + int BackwardRefIdx; // Picture index of backward reference (B-frames) + int picture_coding_type; + int full_pel_forward_vector; + int full_pel_backward_vector; + int f_code[2][2]; + int intra_dc_precision; + int frame_pred_frame_dct; + int concealment_motion_vectors; + int q_scale_type; + int intra_vlc_format; + int alternate_scan; + int top_field_first; + // Quantization matrices (raster order) + unsigned char QuantMatrixIntra[64]; + unsigned char QuantMatrixInter[64]; +} CUVIDMPEG2PICPARAMS; + +// MPEG-4 has VOP types instead of Picture types +#define I_VOP 0 +#define P_VOP 1 +#define B_VOP 2 +#define S_VOP 3 + +/*******************************************************/ +//! \struct CUVIDMPEG4PICPARAMS +//! MPEG-4 picture parameters +//! This structure is used in CUVIDPICPARAMS structure +/*******************************************************/ +typedef struct _CUVIDMPEG4PICPARAMS +{ + int ForwardRefIdx; // Picture index of forward reference (P/B-frames) + int BackwardRefIdx; // Picture index of backward reference (B-frames) + // VOL + int video_object_layer_width; + int video_object_layer_height; + int vop_time_increment_bitcount; + int top_field_first; + int resync_marker_disable; + int quant_type; + int quarter_sample; + int short_video_header; + int divx_flags; + // VOP + int vop_coding_type; + int vop_coded; + int vop_rounding_type; + int alternate_vertical_scan_flag; + int interlaced; + int vop_fcode_forward; + int vop_fcode_backward; + int trd[2]; + int trb[2]; + // Quantization matrices (raster order) + unsigned char QuantMatrixIntra[64]; + unsigned char QuantMatrixInter[64]; + int gmc_enabled; +} CUVIDMPEG4PICPARAMS; + +/********************************************************/ +//! \struct CUVIDVC1PICPARAMS +//! VC1 picture parameters +//! This structure is used in CUVIDPICPARAMS structure +/********************************************************/ +typedef struct _CUVIDVC1PICPARAMS +{ + int ForwardRefIdx; /**< Picture index of forward reference (P/B-frames) */ + int BackwardRefIdx; /**< Picture index of backward reference (B-frames) */ + int FrameWidth; /**< Actual frame width */ + int FrameHeight; /**< Actual frame height */ + // PICTURE + int intra_pic_flag; /**< Set to 1 for I,BI frames */ + int ref_pic_flag; /**< Set to 1 for I,P frames */ + int progressive_fcm; /**< Progressive frame */ + // SEQUENCE + int profile; + int postprocflag; + int pulldown; + int interlace; + int tfcntrflag; + int finterpflag; + int psf; + int multires; + int syncmarker; + int rangered; + int maxbframes; + // ENTRYPOINT + int panscan_flag; + int refdist_flag; + int extended_mv; + int dquant; + int vstransform; + int loopfilter; + int fastuvmc; + int overlap; + int quantizer; + int extended_dmv; + int range_mapy_flag; + int range_mapy; + int range_mapuv_flag; + int range_mapuv; + int rangeredfrm; // range reduction state +} CUVIDVC1PICPARAMS; + +/***********************************************************/ +//! \struct CUVIDJPEGPICPARAMS +//! JPEG picture parameters +//! This structure is used in CUVIDPICPARAMS structure +/***********************************************************/ +typedef struct _CUVIDJPEGPICPARAMS +{ + int Reserved; +} CUVIDJPEGPICPARAMS; + + +/*******************************************************/ +//! \struct CUVIDHEVCPICPARAMS +//! HEVC picture parameters +//! This structure is used in CUVIDPICPARAMS structure +/*******************************************************/ +typedef struct _CUVIDHEVCPICPARAMS +{ + // sps + int pic_width_in_luma_samples; + int pic_height_in_luma_samples; + unsigned char log2_min_luma_coding_block_size_minus3; + unsigned char log2_diff_max_min_luma_coding_block_size; + unsigned char log2_min_transform_block_size_minus2; + unsigned char log2_diff_max_min_transform_block_size; + unsigned char pcm_enabled_flag; + unsigned char log2_min_pcm_luma_coding_block_size_minus3; + unsigned char log2_diff_max_min_pcm_luma_coding_block_size; + unsigned char pcm_sample_bit_depth_luma_minus1; + + unsigned char pcm_sample_bit_depth_chroma_minus1; + unsigned char pcm_loop_filter_disabled_flag; + unsigned char strong_intra_smoothing_enabled_flag; + unsigned char max_transform_hierarchy_depth_intra; + unsigned char max_transform_hierarchy_depth_inter; + unsigned char amp_enabled_flag; + unsigned char separate_colour_plane_flag; + unsigned char log2_max_pic_order_cnt_lsb_minus4; + + unsigned char num_short_term_ref_pic_sets; + unsigned char long_term_ref_pics_present_flag; + unsigned char num_long_term_ref_pics_sps; + unsigned char sps_temporal_mvp_enabled_flag; + unsigned char sample_adaptive_offset_enabled_flag; + unsigned char scaling_list_enable_flag; + unsigned char IrapPicFlag; + unsigned char IdrPicFlag; + + unsigned char bit_depth_luma_minus8; + unsigned char bit_depth_chroma_minus8; + //sps/pps extension fields + unsigned char log2_max_transform_skip_block_size_minus2; + unsigned char log2_sao_offset_scale_luma; + unsigned char log2_sao_offset_scale_chroma; + unsigned char high_precision_offsets_enabled_flag; + unsigned char reserved1[10]; + + // pps + unsigned char dependent_slice_segments_enabled_flag; + unsigned char slice_segment_header_extension_present_flag; + unsigned char sign_data_hiding_enabled_flag; + unsigned char cu_qp_delta_enabled_flag; + unsigned char diff_cu_qp_delta_depth; + signed char init_qp_minus26; + signed char pps_cb_qp_offset; + signed char pps_cr_qp_offset; + + unsigned char constrained_intra_pred_flag; + unsigned char weighted_pred_flag; + unsigned char weighted_bipred_flag; + unsigned char transform_skip_enabled_flag; + unsigned char transquant_bypass_enabled_flag; + unsigned char entropy_coding_sync_enabled_flag; + unsigned char log2_parallel_merge_level_minus2; + unsigned char num_extra_slice_header_bits; + + unsigned char loop_filter_across_tiles_enabled_flag; + unsigned char loop_filter_across_slices_enabled_flag; + unsigned char output_flag_present_flag; + unsigned char num_ref_idx_l0_default_active_minus1; + unsigned char num_ref_idx_l1_default_active_minus1; + unsigned char lists_modification_present_flag; + unsigned char cabac_init_present_flag; + unsigned char pps_slice_chroma_qp_offsets_present_flag; + + unsigned char deblocking_filter_override_enabled_flag; + unsigned char pps_deblocking_filter_disabled_flag; + signed char pps_beta_offset_div2; + signed char pps_tc_offset_div2; + unsigned char tiles_enabled_flag; + unsigned char uniform_spacing_flag; + unsigned char num_tile_columns_minus1; + unsigned char num_tile_rows_minus1; + + unsigned short column_width_minus1[21]; + unsigned short row_height_minus1[21]; + + // sps and pps extension HEVC-main 444 + unsigned char sps_range_extension_flag; + unsigned char transform_skip_rotation_enabled_flag; + unsigned char transform_skip_context_enabled_flag; + unsigned char implicit_rdpcm_enabled_flag; + + unsigned char explicit_rdpcm_enabled_flag; + unsigned char extended_precision_processing_flag; + unsigned char intra_smoothing_disabled_flag; + unsigned char persistent_rice_adaptation_enabled_flag; + + unsigned char cabac_bypass_alignment_enabled_flag; + unsigned char pps_range_extension_flag; + unsigned char cross_component_prediction_enabled_flag; + unsigned char chroma_qp_offset_list_enabled_flag; + + unsigned char diff_cu_chroma_qp_offset_depth; + unsigned char chroma_qp_offset_list_len_minus1; + signed char cb_qp_offset_list[6]; + + signed char cr_qp_offset_list[6]; + unsigned char reserved2[2]; + + unsigned int reserved3[8]; + + // RefPicSets + int NumBitsForShortTermRPSInSlice; + int NumDeltaPocsOfRefRpsIdx; + int NumPocTotalCurr; + int NumPocStCurrBefore; + int NumPocStCurrAfter; + int NumPocLtCurr; + int CurrPicOrderCntVal; + int RefPicIdx[16]; // [refpic] Indices of valid reference pictures (-1 if unused for reference) + int PicOrderCntVal[16]; // [refpic] + unsigned char IsLongTerm[16]; // [refpic] 0=not a long-term reference, 1=long-term reference + unsigned char RefPicSetStCurrBefore[8]; // [0..NumPocStCurrBefore-1] -> refpic (0..15) + unsigned char RefPicSetStCurrAfter[8]; // [0..NumPocStCurrAfter-1] -> refpic (0..15) + unsigned char RefPicSetLtCurr[8]; // [0..NumPocLtCurr-1] -> refpic (0..15) + unsigned char RefPicSetInterLayer0[8]; + unsigned char RefPicSetInterLayer1[8]; + unsigned int reserved4[12]; + + // scaling lists (diag order) + unsigned char ScalingList4x4[6][16]; // [matrixId][i] + unsigned char ScalingList8x8[6][64]; // [matrixId][i] + unsigned char ScalingList16x16[6][64]; // [matrixId][i] + unsigned char ScalingList32x32[2][64]; // [matrixId][i] + unsigned char ScalingListDCCoeff16x16[6]; // [matrixId] + unsigned char ScalingListDCCoeff32x32[2]; // [matrixId] +} CUVIDHEVCPICPARAMS; + + +/***********************************************************/ +//! \struct CUVIDVP8PICPARAMS +//! VP8 picture parameters +//! This structure is used in CUVIDPICPARAMS structure +/***********************************************************/ +typedef struct _CUVIDVP8PICPARAMS +{ + int width; + int height; + unsigned int first_partition_size; + //Frame Indexes + unsigned char LastRefIdx; + unsigned char GoldenRefIdx; + unsigned char AltRefIdx; + union { + struct { + unsigned char frame_type : 1; /**< 0 = KEYFRAME, 1 = INTERFRAME */ + unsigned char version : 3; + unsigned char show_frame : 1; + unsigned char update_mb_segmentation_data : 1; /**< Must be 0 if segmentation is not enabled */ + unsigned char Reserved2Bits : 2; + }vp8_frame_tag; + unsigned char wFrameTagFlags; + }; + unsigned char Reserved1[4]; + unsigned int Reserved2[3]; +} CUVIDVP8PICPARAMS; + +/***********************************************************/ +//! \struct CUVIDVP9PICPARAMS +//! VP9 picture parameters +//! This structure is used in CUVIDPICPARAMS structure +/***********************************************************/ +typedef struct _CUVIDVP9PICPARAMS +{ + unsigned int width; + unsigned int height; + + //Frame Indices + unsigned char LastRefIdx; + unsigned char GoldenRefIdx; + unsigned char AltRefIdx; + unsigned char colorSpace; + + unsigned short profile : 3; + unsigned short frameContextIdx : 2; + unsigned short frameType : 1; + unsigned short showFrame : 1; + unsigned short errorResilient : 1; + unsigned short frameParallelDecoding : 1; + unsigned short subSamplingX : 1; + unsigned short subSamplingY : 1; + unsigned short intraOnly : 1; + unsigned short allow_high_precision_mv : 1; + unsigned short refreshEntropyProbs : 1; + unsigned short reserved2Bits : 2; + + unsigned short reserved16Bits; + + unsigned char refFrameSignBias[4]; + + unsigned char bitDepthMinus8Luma; + unsigned char bitDepthMinus8Chroma; + unsigned char loopFilterLevel; + unsigned char loopFilterSharpness; + + unsigned char modeRefLfEnabled; + unsigned char log2_tile_columns; + unsigned char log2_tile_rows; + + unsigned char segmentEnabled : 1; + unsigned char segmentMapUpdate : 1; + unsigned char segmentMapTemporalUpdate : 1; + unsigned char segmentFeatureMode : 1; + unsigned char reserved4Bits : 4; + + + unsigned char segmentFeatureEnable[8][4]; + short segmentFeatureData[8][4]; + unsigned char mb_segment_tree_probs[7]; + unsigned char segment_pred_probs[3]; + unsigned char reservedSegment16Bits[2]; + + int qpYAc; + int qpYDc; + int qpChDc; + int qpChAc; + + unsigned int activeRefIdx[3]; + unsigned int resetFrameContext; + unsigned int mcomp_filter_type; + unsigned int mbRefLfDelta[4]; + unsigned int mbModeLfDelta[2]; + unsigned int frameTagSize; + unsigned int offsetToDctParts; + unsigned int reserved128Bits[4]; + +} CUVIDVP9PICPARAMS; + + +/******************************************************************************************/ +//! \struct CUVIDPICPARAMS +//! Picture parameters for decoding +//! This structure is used in cuvidDecodePicture API +//! IN for cuvidDecodePicture +/******************************************************************************************/ +typedef struct _CUVIDPICPARAMS +{ + int PicWidthInMbs; /**< IN: Coded frame size in macroblocks */ + int FrameHeightInMbs; /**< IN: Coded frame height in macroblocks */ + int CurrPicIdx; /**< IN: Output index of the current picture */ + int field_pic_flag; /**< IN: 0=frame picture, 1=field picture */ + int bottom_field_flag; /**< IN: 0=top field, 1=bottom field (ignored if field_pic_flag=0) */ + int second_field; /**< IN: Second field of a complementary field pair */ + // Bitstream data + unsigned int nBitstreamDataLen; /**< IN: Number of bytes in bitstream data buffer */ + const unsigned char *pBitstreamData; /**< IN: Ptr to bitstream data for this picture (slice-layer) */ + unsigned int nNumSlices; /**< IN: Number of slices in this picture */ + const unsigned int *pSliceDataOffsets; /**< IN: nNumSlices entries, contains offset of each slice within + the bitstream data buffer */ + int ref_pic_flag; /**< IN: This picture is a reference picture */ + int intra_pic_flag; /**< IN: This picture is entirely intra coded */ + unsigned int Reserved[30]; /**< Reserved for future use */ + // IN: Codec-specific data + union { + CUVIDMPEG2PICPARAMS mpeg2; /**< Also used for MPEG-1 */ + CUVIDH264PICPARAMS h264; + CUVIDVC1PICPARAMS vc1; + CUVIDMPEG4PICPARAMS mpeg4; + CUVIDJPEGPICPARAMS jpeg; + CUVIDHEVCPICPARAMS hevc; + CUVIDVP8PICPARAMS vp8; + CUVIDVP9PICPARAMS vp9; + unsigned int CodecReserved[1024]; + } CodecSpecific; +} CUVIDPICPARAMS; + + +/******************************************************/ +//! \struct CUVIDPROCPARAMS +//! Picture parameters for postprocessing +//! This structure is used in cuvidMapVideoFrame API +/******************************************************/ +typedef struct _CUVIDPROCPARAMS +{ + int progressive_frame; /**< IN: Input is progressive (deinterlace_mode will be ignored) */ + int second_field; /**< IN: Output the second field (ignored if deinterlace mode is Weave) */ + int top_field_first; /**< IN: Input frame is top field first (1st field is top, 2nd field is bottom) */ + int unpaired_field; /**< IN: Input only contains one field (2nd field is invalid) */ + // The fields below are used for raw YUV input + unsigned int reserved_flags; /**< Reserved for future use (set to zero) */ + unsigned int reserved_zero; /**< Reserved (set to zero) */ + unsigned long long raw_input_dptr; /**< IN: Input CUdeviceptr for raw YUV extensions */ + unsigned int raw_input_pitch; /**< IN: pitch in bytes of raw YUV input (should be aligned appropriately) */ + unsigned int raw_input_format; /**< IN: Input YUV format (cudaVideoCodec_enum) */ + unsigned long long raw_output_dptr; /**< IN: Output CUdeviceptr for raw YUV extensions */ + unsigned int raw_output_pitch; /**< IN: pitch in bytes of raw YUV output (should be aligned appropriately) */ + unsigned int Reserved1; /**< Reserved for future use (set to zero) */ + CUstream output_stream; /**< IN: stream object used by cuvidMapVideoFrame */ + unsigned int Reserved[46]; /**< Reserved for future use (set to zero) */ + void *Reserved2[2]; /**< Reserved for future use (set to zero) */ +} CUVIDPROCPARAMS; + +/*********************************************************************************************************/ +//! \struct CUVIDGETDECODESTATUS +//! Struct for reporting decode status. +//! This structure is used in cuvidGetDecodeStatus API. +/*********************************************************************************************************/ +typedef struct _CUVIDGETDECODESTATUS +{ + cuvidDecodeStatus decodeStatus; + unsigned int reserved[31]; + void *pReserved[8]; +} CUVIDGETDECODESTATUS; + +/****************************************************/ +//! \struct CUVIDRECONFIGUREDECODERINFO +//! Struct for decoder reset +//! This structure is used in cuvidReconfigureDecoder() API +/****************************************************/ +typedef struct _CUVIDRECONFIGUREDECODERINFO +{ + unsigned int ulWidth; /**< IN: Coded sequence width in pixels, MUST be < = ulMaxWidth defined at CUVIDDECODECREATEINFO */ + unsigned int ulHeight; /**< IN: Coded sequence height in pixels, MUST be < = ulMaxHeight defined at CUVIDDECODECREATEINFO */ + unsigned int ulTargetWidth; /**< IN: Post processed output width */ + unsigned int ulTargetHeight; /**< IN: Post Processed output height */ + unsigned int ulNumDecodeSurfaces; /**< IN: Maximum number of internal decode surfaces */ + unsigned int reserved1[12]; /**< Reserved for future use. Set to Zero */ + /** + * IN: Area of frame to be displayed. Use-case : Source Cropping + */ + struct { + short left; + short top; + short right; + short bottom; + } display_area; + /** + * IN: Target Rectangle in the OutputFrame. Use-case : Aspect ratio Conversion + */ + struct { + short left; + short top; + short right; + short bottom; + } target_rect; + unsigned int reserved2[11]; /**< Reserved for future use. Set to Zero */ +} CUVIDRECONFIGUREDECODERINFO; + + +/***********************************************************************************************************/ +//! VIDEO_DECODER +//! +//! In order to minimize decode latencies, there should be always at least 2 pictures in the decode +//! queue at any time, in order to make sure that all decode engines are always busy. +//! +//! Overall data flow: +//! - cuvidGetDecoderCaps(...) +//! - cuvidCreateDecoder(...) +//! - For each picture: +//! + cuvidDecodePicture(N) +//! + cuvidMapVideoFrame(N-4) +//! + do some processing in cuda +//! + cuvidUnmapVideoFrame(N-4) +//! + cuvidDecodePicture(N+1) +//! + cuvidMapVideoFrame(N-3) +//! + ... +//! - cuvidDestroyDecoder(...) +//! +//! NOTE: +//! - When the cuda context is created from a D3D device, the D3D device must also be created +//! with the D3DCREATE_MULTITHREADED flag. +//! - There is a limit to how many pictures can be mapped simultaneously (ulNumOutputSurfaces) +//! - cuvidDecodePicture may block the calling thread if there are too many pictures pending +//! in the decode queue +/***********************************************************************************************************/ + + +/**********************************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidGetDecoderCaps(CUVIDDECODECAPS *pdc) +//! Queries decode capabilities of NVDEC-HW based on CodecType, ChromaFormat and BitDepthMinus8 parameters. +//! 1. Application fills IN parameters CodecType, ChromaFormat and BitDepthMinus8 of CUVIDDECODECAPS structure +//! 2. On calling cuvidGetDecoderCaps, driver fills OUT parameters if the IN parameters are supported +//! If IN parameters passed to the driver are not supported by NVDEC-HW, then all OUT params are set to 0. +//! E.g. on Geforce GTX 960: +//! App fills - eCodecType = cudaVideoCodec_H264; eChromaFormat = cudaVideoChromaFormat_420; nBitDepthMinus8 = 0; +//! Given IN parameters are supported, hence driver fills: bIsSupported = 1; nMinWidth = 48; nMinHeight = 16; +//! nMaxWidth = 4096; nMaxHeight = 4096; nMaxMBCount = 65536; +//! CodedWidth*CodedHeight/256 must be less than or equal to nMaxMBCount +/**********************************************************************************************************************/ +extern CUresult CUDAAPI cuvidGetDecoderCaps(CUVIDDECODECAPS *pdc); + +/*****************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci) +//! Create the decoder object based on pdci. A handle to the created decoder is returned +/*****************************************************************************************************/ +extern CUresult CUDAAPI cuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci); + +/*****************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder hDecoder) +//! Destroy the decoder object +/*****************************************************************************************************/ +extern CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder hDecoder); + +/*****************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams) +//! Decode a single picture (field or frame) +//! Kicks off HW decoding +/*****************************************************************************************************/ +extern CUresult CUDAAPI cuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams); + +/************************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidGetDecodeStatus(CUvideodecoder hDecoder, int nPicIdx); +//! Get the decode status for frame corresponding to nPicIdx +/************************************************************************************************************/ +extern CUresult CUDAAPI cuvidGetDecodeStatus(CUvideodecoder hDecoder, int nPicIdx, CUVIDGETDECODESTATUS* pDecodeStatus); + +/*********************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidReconfigureDecoder(CUvideodecoder hDecoder, CUVIDRECONFIGUREDECODERINFO *pDecReconfigParams) +//! Used to reuse single decoder for multiple clips. Currently supports resolution change, resize params, display area +//! params, target area params change for same codec. Must be called during CUVIDPARSERPARAMS::pfnSequenceCallback +/*********************************************************************************************************/ +extern CUresult CUDAAPI cuvidReconfigureDecoder(CUvideodecoder hDecoder, CUVIDRECONFIGUREDECODERINFO *pDecReconfigParams); + + +#if !defined(__CUVID_DEVPTR64) || defined(__CUVID_INTERNAL) +/************************************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx, unsigned int *pDevPtr, +//! unsigned int *pPitch, CUVIDPROCPARAMS *pVPP); +//! Post-process and map video frame corresponding to nPicIdx for use in cuda. Returns cuda device pointer and associated +//! pitch of the video frame +/************************************************************************************************************************/ +extern CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx, + unsigned int *pDevPtr, unsigned int *pPitch, + CUVIDPROCPARAMS *pVPP); + +/*****************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr) +//! Unmap a previously mapped video frame +/*****************************************************************************************************/ +extern CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr); +#endif + +#if defined(_WIN64) || defined(__LP64__) || defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) +/****************************************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr, +//! unsigned int * pPitch, CUVIDPROCPARAMS *pVPP); +//! Post-process and map video frame corresponding to nPicIdx for use in cuda. Returns cuda device pointer and associated +//! pitch of the video frame +/****************************************************************************************************************************/ +extern CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr, + unsigned int *pPitch, CUVIDPROCPARAMS *pVPP); + +/**************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr); +//! Unmap a previously mapped video frame +/**************************************************************************************************/ +extern CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr); + +#if defined(__CUVID_DEVPTR64) && !defined(__CUVID_INTERNAL) +#define cuvidMapVideoFrame cuvidMapVideoFrame64 +#define cuvidUnmapVideoFrame cuvidUnmapVideoFrame64 +#endif +#endif + + +/********************************************************************************************************************/ +//! +//! Context-locking: to facilitate multi-threaded implementations, the following 4 functions +//! provide a simple mutex-style host synchronization. If a non-NULL context is specified +//! in CUVIDDECODECREATEINFO, the codec library will acquire the mutex associated with the given +//! context before making any cuda calls. +//! A multi-threaded application could create a lock associated with a context handle so that +//! multiple threads can safely share the same cuda context: +//! - use cuCtxPopCurrent immediately after context creation in order to create a 'floating' context +//! that can be passed to cuvidCtxLockCreate. +//! - When using a floating context, all cuda calls should only be made within a cuvidCtxLock/cuvidCtxUnlock section. +//! +//! NOTE: This is a safer alternative to cuCtxPushCurrent and cuCtxPopCurrent, and is not related to video +//! decoder in any way (implemented as a critical section associated with cuCtx{Push|Pop}Current calls). +/********************************************************************************************************************/ + +/********************************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx) +//! This API is used to create CtxLock object +/********************************************************************************************************************/ +extern CUresult CUDAAPI cuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx); + +/********************************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidCtxLockDestroy(CUvideoctxlock lck) +//! This API is used to free CtxLock object +/********************************************************************************************************************/ +extern CUresult CUDAAPI cuvidCtxLockDestroy(CUvideoctxlock lck); + +/********************************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags) +//! This API is used to acquire ctxlock +/********************************************************************************************************************/ +extern CUresult CUDAAPI cuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags); + +/********************************************************************************************************************/ +//! \fn CUresult CUDAAPI cuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags) +//! This API is used to release ctxlock +/********************************************************************************************************************/ +extern CUresult CUDAAPI cuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags); + +/**********************************************************************************************/ + + +#if defined(__cplusplus) +} +// Auto-lock helper for C++ applications +class CCtxAutoLock +{ +private: + CUvideoctxlock m_ctx; +public: + CCtxAutoLock(CUvideoctxlock ctx):m_ctx(ctx) { cuvidCtxLock(m_ctx,0); } + ~CCtxAutoLock() { cuvidCtxUnlock(m_ctx,0); } +}; +#endif /* __cplusplus */ + +#endif // __CUDA_VIDEO_H__ + diff --git a/NvCodec/include/nvEncodeAPI.h b/NvCodec/include/nvEncodeAPI.h new file mode 100644 index 0000000..4b9ba11 --- /dev/null +++ b/NvCodec/include/nvEncodeAPI.h @@ -0,0 +1,3634 @@ +/* + * This copyright notice applies to this header file only: + * + * Copyright (c) 2010-2019 NVIDIA Corporation + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the software, and to permit persons to whom the + * software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file nvEncodeAPI.h + * NVIDIA GPUs - beginning with the Kepler generation - contain a hardware-based encoder + * (referred to as NVENC) which provides fully-accelerated hardware-based video encoding. + * NvEncodeAPI provides the interface for NVIDIA video encoder (NVENC). + * \date 2011-2019 + * This file contains the interface constants, structure definitions and function prototypes. + */ + +#ifndef _NV_ENCODEAPI_H_ +#define _NV_ENCODEAPI_H_ + +#include + +#ifdef _WIN32 +#include +#endif + +#ifdef _MSC_VER +#ifndef _STDINT +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +typedef signed char int8_t; +typedef unsigned char uint8_t; +typedef short int16_t; +typedef unsigned short uint16_t; +#endif +#else +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \addtogroup ENCODER_STRUCTURE NvEncodeAPI Data structures + * @{ + */ + +#ifdef _WIN32 +#define NVENCAPI __stdcall +typedef RECT NVENC_RECT; +#else +#define NVENCAPI +// ========================================================================================= +#ifndef GUID +/*! + * \struct GUID + * Abstracts the GUID structure for non-windows platforms. + */ +// ========================================================================================= +typedef struct +{ + uint32_t Data1; /**< [in]: Specifies the first 8 hexadecimal digits of the GUID. */ + uint16_t Data2; /**< [in]: Specifies the first group of 4 hexadecimal digits. */ + uint16_t Data3; /**< [in]: Specifies the second group of 4 hexadecimal digits. */ + uint8_t Data4[8]; /**< [in]: Array of 8 bytes. The first 2 bytes contain the third group of 4 hexadecimal digits. + The remaining 6 bytes contain the final 12 hexadecimal digits. */ +} GUID; +#endif // GUID + +/** + * \struct _NVENC_RECT + * Defines a Rectangle. Used in ::NV_ENC_PREPROCESS_FRAME. + */ +typedef struct _NVENC_RECT +{ + uint32_t left; /**< [in]: X coordinate of the upper left corner of rectangular area to be specified. */ + uint32_t top; /**< [in]: Y coordinate of the upper left corner of the rectangular area to be specified. */ + uint32_t right; /**< [in]: X coordinate of the bottom right corner of the rectangular area to be specified. */ + uint32_t bottom; /**< [in]: Y coordinate of the bottom right corner of the rectangular area to be specified. */ +} NVENC_RECT; + +#endif // _WIN32 + +/** @} */ /* End of GUID and NVENC_RECT structure grouping*/ + +typedef void* NV_ENC_INPUT_PTR; /**< NVENCODE API input buffer */ +typedef void* NV_ENC_OUTPUT_PTR; /**< NVENCODE API output buffer*/ +typedef void* NV_ENC_REGISTERED_PTR; /**< A Resource that has been registered with NVENCODE API*/ +typedef void* NV_ENC_CUSTREAM_PTR; /**< Pointer to CUstream*/ + +#define NVENCAPI_MAJOR_VERSION 9 +#define NVENCAPI_MINOR_VERSION 1 + +#define NVENCAPI_VERSION (NVENCAPI_MAJOR_VERSION | (NVENCAPI_MINOR_VERSION << 24)) + +/** + * Macro to generate per-structure version for use with API. + */ +#define NVENCAPI_STRUCT_VERSION(ver) ((uint32_t)NVENCAPI_VERSION | ((ver)<<16) | (0x7 << 28)) + + +#define NVENC_INFINITE_GOPLENGTH 0xffffffff + +#define NV_MAX_SEQ_HDR_LEN (512) + +// ========================================================================================= +// Encode Codec GUIDS supported by the NvEncodeAPI interface. +// ========================================================================================= + +// {6BC82762-4E63-4ca4-AA85-1E50F321F6BF} +static const GUID NV_ENC_CODEC_H264_GUID = +{ 0x6bc82762, 0x4e63, 0x4ca4, { 0xaa, 0x85, 0x1e, 0x50, 0xf3, 0x21, 0xf6, 0xbf } }; + +// {790CDC88-4522-4d7b-9425-BDA9975F7603} +static const GUID NV_ENC_CODEC_HEVC_GUID = +{ 0x790cdc88, 0x4522, 0x4d7b, { 0x94, 0x25, 0xbd, 0xa9, 0x97, 0x5f, 0x76, 0x3 } }; + + + +// ========================================================================================= +// * Encode Profile GUIDS supported by the NvEncodeAPI interface. +// ========================================================================================= + +// {BFD6F8E7-233C-4341-8B3E-4818523803F4} +static const GUID NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID = +{ 0xbfd6f8e7, 0x233c, 0x4341, { 0x8b, 0x3e, 0x48, 0x18, 0x52, 0x38, 0x3, 0xf4 } }; + +// {0727BCAA-78C4-4c83-8C2F-EF3DFF267C6A} +static const GUID NV_ENC_H264_PROFILE_BASELINE_GUID = +{ 0x727bcaa, 0x78c4, 0x4c83, { 0x8c, 0x2f, 0xef, 0x3d, 0xff, 0x26, 0x7c, 0x6a } }; + +// {60B5C1D4-67FE-4790-94D5-C4726D7B6E6D} +static const GUID NV_ENC_H264_PROFILE_MAIN_GUID = +{ 0x60b5c1d4, 0x67fe, 0x4790, { 0x94, 0xd5, 0xc4, 0x72, 0x6d, 0x7b, 0x6e, 0x6d } }; + +// {E7CBC309-4F7A-4b89-AF2A-D537C92BE310} +static const GUID NV_ENC_H264_PROFILE_HIGH_GUID = +{ 0xe7cbc309, 0x4f7a, 0x4b89, { 0xaf, 0x2a, 0xd5, 0x37, 0xc9, 0x2b, 0xe3, 0x10 } }; + +// {7AC663CB-A598-4960-B844-339B261A7D52} +static const GUID NV_ENC_H264_PROFILE_HIGH_444_GUID = +{ 0x7ac663cb, 0xa598, 0x4960, { 0xb8, 0x44, 0x33, 0x9b, 0x26, 0x1a, 0x7d, 0x52 } }; + +// {40847BF5-33F7-4601-9084-E8FE3C1DB8B7} +static const GUID NV_ENC_H264_PROFILE_STEREO_GUID = +{ 0x40847bf5, 0x33f7, 0x4601, { 0x90, 0x84, 0xe8, 0xfe, 0x3c, 0x1d, 0xb8, 0xb7 } }; + +// {CE788D20-AAA9-4318-92BB-AC7E858C8D36} +static const GUID NV_ENC_H264_PROFILE_SVC_TEMPORAL_SCALABILTY = +{ 0xce788d20, 0xaaa9, 0x4318, { 0x92, 0xbb, 0xac, 0x7e, 0x85, 0x8c, 0x8d, 0x36 } }; + +// {B405AFAC-F32B-417B-89C4-9ABEED3E5978} +static const GUID NV_ENC_H264_PROFILE_PROGRESSIVE_HIGH_GUID = +{ 0xb405afac, 0xf32b, 0x417b, { 0x89, 0xc4, 0x9a, 0xbe, 0xed, 0x3e, 0x59, 0x78 } }; + +// {AEC1BD87-E85B-48f2-84C3-98BCA6285072} +static const GUID NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID = +{ 0xaec1bd87, 0xe85b, 0x48f2, { 0x84, 0xc3, 0x98, 0xbc, 0xa6, 0x28, 0x50, 0x72 } }; + +// {B514C39A-B55B-40fa-878F-F1253B4DFDEC} +static const GUID NV_ENC_HEVC_PROFILE_MAIN_GUID = +{ 0xb514c39a, 0xb55b, 0x40fa, { 0x87, 0x8f, 0xf1, 0x25, 0x3b, 0x4d, 0xfd, 0xec } }; + +// {fa4d2b6c-3a5b-411a-8018-0a3f5e3c9be5} +static const GUID NV_ENC_HEVC_PROFILE_MAIN10_GUID = +{ 0xfa4d2b6c, 0x3a5b, 0x411a, { 0x80, 0x18, 0x0a, 0x3f, 0x5e, 0x3c, 0x9b, 0xe5 } }; + +// For HEVC Main 444 8 bit and HEVC Main 444 10 bit profiles only +// {51ec32b5-1b4c-453c-9cbd-b616bd621341} +static const GUID NV_ENC_HEVC_PROFILE_FREXT_GUID = +{ 0x51ec32b5, 0x1b4c, 0x453c, { 0x9c, 0xbd, 0xb6, 0x16, 0xbd, 0x62, 0x13, 0x41 } }; + +// ========================================================================================= +// * Preset GUIDS supported by the NvEncodeAPI interface. +// ========================================================================================= +// {B2DFB705-4EBD-4C49-9B5F-24A777D3E587} +static const GUID NV_ENC_PRESET_DEFAULT_GUID = +{ 0xb2dfb705, 0x4ebd, 0x4c49, { 0x9b, 0x5f, 0x24, 0xa7, 0x77, 0xd3, 0xe5, 0x87 } }; + +// {60E4C59F-E846-4484-A56D-CD45BE9FDDF6} +static const GUID NV_ENC_PRESET_HP_GUID = +{ 0x60e4c59f, 0xe846, 0x4484, { 0xa5, 0x6d, 0xcd, 0x45, 0xbe, 0x9f, 0xdd, 0xf6 } }; + +// {34DBA71D-A77B-4B8F-9C3E-B6D5DA24C012} +static const GUID NV_ENC_PRESET_HQ_GUID = +{ 0x34dba71d, 0xa77b, 0x4b8f, { 0x9c, 0x3e, 0xb6, 0xd5, 0xda, 0x24, 0xc0, 0x12 } }; + +// {82E3E450-BDBB-4e40-989C-82A90DF9EF32} +static const GUID NV_ENC_PRESET_BD_GUID = +{ 0x82e3e450, 0xbdbb, 0x4e40, { 0x98, 0x9c, 0x82, 0xa9, 0xd, 0xf9, 0xef, 0x32 } }; + +// {49DF21C5-6DFA-4feb-9787-6ACC9EFFB726} +static const GUID NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID = +{ 0x49df21c5, 0x6dfa, 0x4feb, { 0x97, 0x87, 0x6a, 0xcc, 0x9e, 0xff, 0xb7, 0x26 } }; + +// {C5F733B9-EA97-4cf9-BEC2-BF78A74FD105} +static const GUID NV_ENC_PRESET_LOW_LATENCY_HQ_GUID = +{ 0xc5f733b9, 0xea97, 0x4cf9, { 0xbe, 0xc2, 0xbf, 0x78, 0xa7, 0x4f, 0xd1, 0x5 } }; + +// {67082A44-4BAD-48FA-98EA-93056D150A58} +static const GUID NV_ENC_PRESET_LOW_LATENCY_HP_GUID = +{ 0x67082a44, 0x4bad, 0x48fa, { 0x98, 0xea, 0x93, 0x5, 0x6d, 0x15, 0xa, 0x58 } }; + +// {D5BFB716-C604-44e7-9BB8-DEA5510FC3AC} +static const GUID NV_ENC_PRESET_LOSSLESS_DEFAULT_GUID = +{ 0xd5bfb716, 0xc604, 0x44e7, { 0x9b, 0xb8, 0xde, 0xa5, 0x51, 0xf, 0xc3, 0xac } }; + +// {149998E7-2364-411d-82EF-179888093409} +static const GUID NV_ENC_PRESET_LOSSLESS_HP_GUID = +{ 0x149998e7, 0x2364, 0x411d, { 0x82, 0xef, 0x17, 0x98, 0x88, 0x9, 0x34, 0x9 } }; + +/** + * \addtogroup ENCODER_STRUCTURE NvEncodeAPI Data structures + * @{ + */ + +/** + * Input frame encode modes + */ +typedef enum _NV_ENC_PARAMS_FRAME_FIELD_MODE +{ + NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME = 0x01, /**< Frame mode */ + NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD = 0x02, /**< Field mode */ + NV_ENC_PARAMS_FRAME_FIELD_MODE_MBAFF = 0x03 /**< MB adaptive frame/field */ +} NV_ENC_PARAMS_FRAME_FIELD_MODE; + +/** + * Rate Control Modes + */ +typedef enum _NV_ENC_PARAMS_RC_MODE +{ + NV_ENC_PARAMS_RC_CONSTQP = 0x0, /**< Constant QP mode */ + NV_ENC_PARAMS_RC_VBR = 0x1, /**< Variable bitrate mode */ + NV_ENC_PARAMS_RC_CBR = 0x2, /**< Constant bitrate mode */ + NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ = 0x8, /**< low-delay CBR, high quality */ + NV_ENC_PARAMS_RC_CBR_HQ = 0x10, /**< CBR, high quality (slower) */ + NV_ENC_PARAMS_RC_VBR_HQ = 0x20 /**< VBR, high quality (slower) */ +} NV_ENC_PARAMS_RC_MODE; + +/** + * Emphasis Levels + */ +typedef enum _NV_ENC_EMPHASIS_MAP_LEVEL +{ + NV_ENC_EMPHASIS_MAP_LEVEL_0 = 0x0, /**< Emphasis Map Level 0, for zero Delta QP value */ + NV_ENC_EMPHASIS_MAP_LEVEL_1 = 0x1, /**< Emphasis Map Level 1, for very low Delta QP value */ + NV_ENC_EMPHASIS_MAP_LEVEL_2 = 0x2, /**< Emphasis Map Level 2, for low Delta QP value */ + NV_ENC_EMPHASIS_MAP_LEVEL_3 = 0x3, /**< Emphasis Map Level 3, for medium Delta QP value */ + NV_ENC_EMPHASIS_MAP_LEVEL_4 = 0x4, /**< Emphasis Map Level 4, for high Delta QP value */ + NV_ENC_EMPHASIS_MAP_LEVEL_5 = 0x5 /**< Emphasis Map Level 5, for very high Delta QP value */ +} NV_ENC_EMPHASIS_MAP_LEVEL; + +/** + * QP MAP MODE + */ +typedef enum _NV_ENC_QP_MAP_MODE +{ + NV_ENC_QP_MAP_DISABLED = 0x0, /**< Value in NV_ENC_PIC_PARAMS::qpDeltaMap have no effect. */ + NV_ENC_QP_MAP_EMPHASIS = 0x1, /**< Value in NV_ENC_PIC_PARAMS::qpDeltaMap will be treated as Empasis level. Currently this is only supported for H264 */ + NV_ENC_QP_MAP_DELTA = 0x2, /**< Value in NV_ENC_PIC_PARAMS::qpDeltaMap will be treated as QP delta map. */ + NV_ENC_QP_MAP = 0x3, /**< Currently This is not supported. Value in NV_ENC_PIC_PARAMS::qpDeltaMap will be treated as QP value. */ +} NV_ENC_QP_MAP_MODE; + +#define NV_ENC_PARAMS_RC_VBR_MINQP (NV_ENC_PARAMS_RC_MODE)0x4 /**< Deprecated */ +#define NV_ENC_PARAMS_RC_2_PASS_QUALITY NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ /**< Deprecated */ +#define NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP NV_ENC_PARAMS_RC_CBR_HQ /**< Deprecated */ +#define NV_ENC_PARAMS_RC_2_PASS_VBR NV_ENC_PARAMS_RC_VBR_HQ /**< Deprecated */ +#define NV_ENC_PARAMS_RC_CBR2 NV_ENC_PARAMS_RC_CBR /**< Deprecated */ + +/** + * Input picture structure + */ +typedef enum _NV_ENC_PIC_STRUCT +{ + NV_ENC_PIC_STRUCT_FRAME = 0x01, /**< Progressive frame */ + NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM = 0x02, /**< Field encoding top field first */ + NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP = 0x03 /**< Field encoding bottom field first */ +} NV_ENC_PIC_STRUCT; + +/** + * Input picture type + */ +typedef enum _NV_ENC_PIC_TYPE +{ + NV_ENC_PIC_TYPE_P = 0x0, /**< Forward predicted */ + NV_ENC_PIC_TYPE_B = 0x01, /**< Bi-directionally predicted picture */ + NV_ENC_PIC_TYPE_I = 0x02, /**< Intra predicted picture */ + NV_ENC_PIC_TYPE_IDR = 0x03, /**< IDR picture */ + NV_ENC_PIC_TYPE_BI = 0x04, /**< Bi-directionally predicted with only Intra MBs */ + NV_ENC_PIC_TYPE_SKIPPED = 0x05, /**< Picture is skipped */ + NV_ENC_PIC_TYPE_INTRA_REFRESH = 0x06, /**< First picture in intra refresh cycle */ + NV_ENC_PIC_TYPE_NONREF_P = 0x07, /**< Non reference P picture */ + NV_ENC_PIC_TYPE_UNKNOWN = 0xFF /**< Picture type unknown */ +} NV_ENC_PIC_TYPE; + +/** + * Motion vector precisions + */ +typedef enum _NV_ENC_MV_PRECISION +{ + NV_ENC_MV_PRECISION_DEFAULT = 0x0, /** (if lookahead is enabled, input frames must remain available to the encoder until encode completion) */ + uint32_t disableIadapt :1; /**< [in]: Set this to 1 to disable adaptive I-frame insertion at scene cuts (only has an effect when lookahead is enabled) */ + uint32_t disableBadapt :1; /**< [in]: Set this to 1 to disable adaptive B-frame decision (only has an effect when lookahead is enabled) */ + uint32_t enableTemporalAQ :1; /**< [in]: Set this to 1 to enable temporal AQ */ + uint32_t zeroReorderDelay :1; /**< [in]: Set this to 1 to indicate zero latency operation (no reordering delay, num_reorder_frames=0) */ + uint32_t enableNonRefP :1; /**< [in]: Set this to 1 to enable automatic insertion of non-reference P-frames (no effect if enablePTD=0) */ + uint32_t strictGOPTarget :1; /**< [in]: Set this to 1 to minimize GOP-to-GOP rate fluctuations */ + uint32_t aqStrength :4; /**< [in]: When AQ (Spatial) is enabled (i.e. NV_ENC_RC_PARAMS::enableAQ is set), this field is used to specify AQ strength. AQ strength scale is from 1 (low) - 15 (aggressive). If not set, strength is autoselected by driver. */ + uint32_t reservedBitFields :16; /**< [in]: Reserved bitfields and must be set to 0 */ + NV_ENC_QP minQP; /**< [in]: Specifies the minimum QP used for rate control. Client must set NV_ENC_CONFIG::enableMinQP to 1. */ + NV_ENC_QP maxQP; /**< [in]: Specifies the maximum QP used for rate control. Client must set NV_ENC_CONFIG::enableMaxQP to 1. */ + NV_ENC_QP initialRCQP; /**< [in]: Specifies the initial QP used for rate control. Client must set NV_ENC_CONFIG::enableInitialRCQP to 1. */ + uint32_t temporallayerIdxMask; /**< [in]: Specifies the temporal layers (as a bitmask) whose QPs have changed. Valid max bitmask is [2^NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS - 1] */ + uint8_t temporalLayerQP[8]; /**< [in]: Specifies the temporal layer QPs used for rate control. Temporal layer index is used as as the array index */ + uint8_t targetQuality; /**< [in]: Target CQ (Constant Quality) level for VBR mode (range 0-51 with 0-automatic) */ + uint8_t targetQualityLSB; /**< [in]: Fractional part of target quality (as 8.8 fixed point format) */ + uint16_t lookaheadDepth; /**< [in]: Maximum depth of lookahead with range 0-32 (only used if enableLookahead=1) */ + uint32_t reserved1; + NV_ENC_QP_MAP_MODE qpMapMode; /**< [in]: This flag is used to interpret values in array specified by NV_ENC_PIC_PARAMS::qpDeltaMap. + Set this to NV_ENC_QP_MAP_EMPHASIS to treat values specified by NV_ENC_PIC_PARAMS::qpDeltaMap as Emphasis Level Map. + Emphasis Level can be assigned any value specified in enum NV_ENC_EMPHASIS_MAP_LEVEL. + Emphasis Level Map is used to specify regions to be encoded at varying levels of quality. + The hardware encoder adjusts the quantization within the image as per the provided emphasis map, + by adjusting the quantization parameter (QP) assigned to each macroblock. This adjustment is commonly called “Delta QP”. + The adjustment depends on the absolute QP decided by the rate control algorithm, and is applied after the rate control has decided each macroblock’s QP. + Since the Delta QP overrides rate control, enabling Emphasis Level Map may violate bitrate and VBV buffer size constraints. + Emphasis Level Map is useful in situations where client has a priori knowledge of the image complexity (e.g. via use of NVFBC's Classification feature) and encoding those high-complexity areas at higher quality (lower QP) is important, even at the possible cost of violating bitrate/VBV buffer size constraints + This feature is not supported when AQ( Spatial/Temporal) is enabled. + This feature is only supported for H264 codec currently. + + Set this to NV_ENC_QP_MAP_DELTA to treat values specified by NV_ENC_PIC_PARAMS::qpDeltaMap as QPDelta. This specifies QP modifier to be applied on top of the QP chosen by rate control + + Set this to NV_ENC_QP_MAP_DISABLED to ignore NV_ENC_PIC_PARAMS::qpDeltaMap values. In this case, qpDeltaMap should be set to NULL. + + Other values are reserved for future use.*/ + uint32_t reserved[7]; + } NV_ENC_RC_PARAMS; + +/** macro for constructing the version field of ::_NV_ENC_RC_PARAMS */ +#define NV_ENC_RC_PARAMS_VER NVENCAPI_STRUCT_VERSION(1) + + + +/** + * \struct _NV_ENC_CONFIG_H264_VUI_PARAMETERS + * H264 Video Usability Info parameters + */ +typedef struct _NV_ENC_CONFIG_H264_VUI_PARAMETERS +{ + uint32_t overscanInfoPresentFlag; /**< [in]: if set to 1 , it specifies that the overscanInfo is present */ + uint32_t overscanInfo; /**< [in]: Specifies the overscan info(as defined in Annex E of the ITU-T Specification). */ + uint32_t videoSignalTypePresentFlag; /**< [in]: If set to 1, it specifies that the videoFormat, videoFullRangeFlag and colourDescriptionPresentFlag are present. */ + uint32_t videoFormat; /**< [in]: Specifies the source video format(as defined in Annex E of the ITU-T Specification).*/ + uint32_t videoFullRangeFlag; /**< [in]: Specifies the output range of the luma and chroma samples(as defined in Annex E of the ITU-T Specification). */ + uint32_t colourDescriptionPresentFlag; /**< [in]: If set to 1, it specifies that the colourPrimaries, transferCharacteristics and colourMatrix are present. */ + uint32_t colourPrimaries; /**< [in]: Specifies color primaries for converting to RGB(as defined in Annex E of the ITU-T Specification) */ + uint32_t transferCharacteristics; /**< [in]: Specifies the opto-electronic transfer characteristics to use (as defined in Annex E of the ITU-T Specification) */ + uint32_t colourMatrix; /**< [in]: Specifies the matrix coefficients used in deriving the luma and chroma from the RGB primaries (as defined in Annex E of the ITU-T Specification). */ + uint32_t chromaSampleLocationFlag; /**< [in]: if set to 1 , it specifies that the chromaSampleLocationTop and chromaSampleLocationBot are present.*/ + uint32_t chromaSampleLocationTop; /**< [in]: Specifies the chroma sample location for top field(as defined in Annex E of the ITU-T Specification) */ + uint32_t chromaSampleLocationBot; /**< [in]: Specifies the chroma sample location for bottom field(as defined in Annex E of the ITU-T Specification) */ + uint32_t bitstreamRestrictionFlag; /**< [in]: if set to 1, it specifies the bitstream restriction parameters are present in the bitstream.*/ + uint32_t reserved[15]; +}NV_ENC_CONFIG_H264_VUI_PARAMETERS; + +typedef NV_ENC_CONFIG_H264_VUI_PARAMETERS NV_ENC_CONFIG_HEVC_VUI_PARAMETERS; + +/** + * \struct _NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE + * External motion vector hint counts per block type. + * H264 supports multiple hint while HEVC supports one hint for each valid candidate. + */ +typedef struct _NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE +{ + uint32_t numCandsPerBlk16x16 : 4; /**< [in]: Supported for H264,HEVC.It Specifies the number of candidates per 16x16 block. */ + uint32_t numCandsPerBlk16x8 : 4; /**< [in]: Supported for H264 only.Specifies the number of candidates per 16x8 block. */ + uint32_t numCandsPerBlk8x16 : 4; /**< [in]: Supported for H264 only.Specifies the number of candidates per 8x16 block. */ + uint32_t numCandsPerBlk8x8 : 4; /**< [in]: Supported for H264,HEVC.Specifies the number of candidates per 8x8 block. */ + uint32_t reserved : 16; /**< [in]: Reserved for padding. */ + uint32_t reserved1[3]; /**< [in]: Reserved for future use. */ +} NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE; + + +/** + * \struct _NVENC_EXTERNAL_ME_HINT + * External Motion Vector hint structure. + */ +typedef struct _NVENC_EXTERNAL_ME_HINT +{ + int32_t mvx : 12; /**< [in]: Specifies the x component of integer pixel MV (relative to current MB) S12.0. */ + int32_t mvy : 10; /**< [in]: Specifies the y component of integer pixel MV (relative to current MB) S10.0 .*/ + int32_t refidx : 5; /**< [in]: Specifies the reference index (31=invalid). Current we support only 1 reference frame per direction for external hints, so \p refidx must be 0. */ + int32_t dir : 1; /**< [in]: Specifies the direction of motion estimation . 0=L0 1=L1.*/ + int32_t partType : 2; /**< [in]: Specifies the block partition type.0=16x16 1=16x8 2=8x16 3=8x8 (blocks in partition must be consecutive).*/ + int32_t lastofPart : 1; /**< [in]: Set to 1 for the last MV of (sub) partition */ + int32_t lastOfMB : 1; /**< [in]: Set to 1 for the last MV of macroblock. */ +} NVENC_EXTERNAL_ME_HINT; + + +/** + * \struct _NV_ENC_CONFIG_H264 + * H264 encoder configuration parameters + */ +typedef struct _NV_ENC_CONFIG_H264 +{ + uint32_t reserved :1; /**< [in]: Reserved and must be set to 0 */ + uint32_t enableStereoMVC :1; /**< [in]: Set to 1 to enable stereo MVC*/ + uint32_t hierarchicalPFrames :1; /**< [in]: Set to 1 to enable hierarchical PFrames */ + uint32_t hierarchicalBFrames :1; /**< [in]: Set to 1 to enable hierarchical BFrames */ + uint32_t outputBufferingPeriodSEI :1; /**< [in]: Set to 1 to write SEI buffering period syntax in the bitstream */ + uint32_t outputPictureTimingSEI :1; /**< [in]: Set to 1 to write SEI picture timing syntax in the bitstream. When set for following rateControlMode : NV_ENC_PARAMS_RC_CBR, NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ, + NV_ENC_PARAMS_RC_CBR_HQ, filler data is inserted if needed to achieve hrd bitrate */ + uint32_t outputAUD :1; /**< [in]: Set to 1 to write access unit delimiter syntax in bitstream */ + uint32_t disableSPSPPS :1; /**< [in]: Set to 1 to disable writing of Sequence and Picture parameter info in bitstream */ + uint32_t outputFramePackingSEI :1; /**< [in]: Set to 1 to enable writing of frame packing arrangement SEI messages to bitstream */ + uint32_t outputRecoveryPointSEI :1; /**< [in]: Set to 1 to enable writing of recovery point SEI message */ + uint32_t enableIntraRefresh :1; /**< [in]: Set to 1 to enable gradual decoder refresh or intra refresh. If the GOP structure uses B frames this will be ignored */ + uint32_t enableConstrainedEncoding :1; /**< [in]: Set this to 1 to enable constrainedFrame encoding where each slice in the constarined picture is independent of other slices + Check support for constrained encoding using ::NV_ENC_CAPS_SUPPORT_CONSTRAINED_ENCODING caps. */ + uint32_t repeatSPSPPS :1; /**< [in]: Set to 1 to enable writing of Sequence and Picture parameter for every IDR frame */ + uint32_t enableVFR :1; /**< [in]: Set to 1 to enable variable frame rate. */ + uint32_t enableLTR :1; /**< [in]: Set to 1 to enable LTR (Long Term Reference) frame support. LTR can be used in two modes: "LTR Trust" mode and "LTR Per Picture" mode. + LTR Trust mode: In this mode, ltrNumFrames pictures after IDR are automatically marked as LTR. This mode is enabled by setting ltrTrustMode = 1. + Use of LTR Trust mode is strongly discouraged as this mode may be deprecated in future. + LTR Per Picture mode: In this mode, client can control whether the current picture should be marked as LTR. Enable this mode by setting + ltrTrustMode = 0 and ltrMarkFrame = 1 for the picture to be marked as LTR. This is the preferred mode + for using LTR. + Note that LTRs are not supported if encoding session is configured with B-frames */ + uint32_t qpPrimeYZeroTransformBypassFlag :1; /**< [in]: To enable lossless encode set this to 1, set QP to 0 and RC_mode to NV_ENC_PARAMS_RC_CONSTQP and profile to HIGH_444_PREDICTIVE_PROFILE. + Check support for lossless encoding using ::NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE caps. */ + uint32_t useConstrainedIntraPred :1; /**< [in]: Set 1 to enable constrained intra prediction. */ + uint32_t enableFillerDataInsertion :1; /**< [in]: Set to 1 to enable insertion of filler data in the bitstream. + This flag will take effect only when one of the CBR rate + control modes (NV_ENC_PARAMS_RC_CBR, NV_ENC_PARAMS_RC_CBR_HQ, + NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ) is in use and both + NV_ENC_INITIALIZE_PARAMS::frameRateNum and + NV_ENC_INITIALIZE_PARAMS::frameRateDen are set to non-zero + values. Setting this field when + NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is also set + is currently not supported and will make ::NvEncInitializeEncoder() + return an error. */ + uint32_t reservedBitFields :14; /**< [in]: Reserved bitfields and must be set to 0 */ + uint32_t level; /**< [in]: Specifies the encoding level. Client is recommended to set this to NV_ENC_LEVEL_AUTOSELECT in order to enable the NvEncodeAPI interface to select the correct level. */ + uint32_t idrPeriod; /**< [in]: Specifies the IDR interval. If not set, this is made equal to gopLength in NV_ENC_CONFIG.Low latency application client can set IDR interval to NVENC_INFINITE_GOPLENGTH so that IDR frames are not inserted automatically. */ + uint32_t separateColourPlaneFlag; /**< [in]: Set to 1 to enable 4:4:4 separate colour planes */ + uint32_t disableDeblockingFilterIDC; /**< [in]: Specifies the deblocking filter mode. Permissible value range: [0,2] */ + uint32_t numTemporalLayers; /**< [in]: Specifies max temporal layers to be used for hierarchical coding. Valid value range is [1,::NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS] */ + uint32_t spsId; /**< [in]: Specifies the SPS id of the sequence header */ + uint32_t ppsId; /**< [in]: Specifies the PPS id of the picture header */ + NV_ENC_H264_ADAPTIVE_TRANSFORM_MODE adaptiveTransformMode; /**< [in]: Specifies the AdaptiveTransform Mode. Check support for AdaptiveTransform mode using ::NV_ENC_CAPS_SUPPORT_ADAPTIVE_TRANSFORM caps. */ + NV_ENC_H264_FMO_MODE fmoMode; /**< [in]: Specified the FMO Mode. Check support for FMO using ::NV_ENC_CAPS_SUPPORT_FMO caps. */ + NV_ENC_H264_BDIRECT_MODE bdirectMode; /**< [in]: Specifies the BDirect mode. Check support for BDirect mode using ::NV_ENC_CAPS_SUPPORT_BDIRECT_MODE caps.*/ + NV_ENC_H264_ENTROPY_CODING_MODE entropyCodingMode; /**< [in]: Specifies the entropy coding mode. Check support for CABAC mode using ::NV_ENC_CAPS_SUPPORT_CABAC caps. */ + NV_ENC_STEREO_PACKING_MODE stereoMode; /**< [in]: Specifies the stereo frame packing mode which is to be signalled in frame packing arrangement SEI */ + uint32_t intraRefreshPeriod; /**< [in]: Specifies the interval between successive intra refresh if enableIntrarefresh is set. Requires enableIntraRefresh to be set. + Will be disabled if NV_ENC_CONFIG::gopLength is not set to NVENC_INFINITE_GOPLENGTH. */ + uint32_t intraRefreshCnt; /**< [in]: Specifies the length of intra refresh in number of frames for periodic intra refresh. This value should be smaller than intraRefreshPeriod */ + uint32_t maxNumRefFrames; /**< [in]: Specifies the DPB size used for encoding. Setting it to 0 will let driver use the default dpb size. + The low latency application which wants to invalidate reference frame as an error resilience tool + is recommended to use a large DPB size so that the encoder can keep old reference frames which can be used if recent + frames are invalidated. */ + uint32_t sliceMode; /**< [in]: This parameter in conjunction with sliceModeData specifies the way in which the picture is divided into slices + sliceMode = 0 MB based slices, sliceMode = 1 Byte based slices, sliceMode = 2 MB row based slices, sliceMode = 3 numSlices in Picture. + When forceIntraRefreshWithFrameCnt is set it will have priority over sliceMode setting + When sliceMode == 0 and sliceModeData == 0 whole picture will be coded with one slice */ + uint32_t sliceModeData; /**< [in]: Specifies the parameter needed for sliceMode. For: + sliceMode = 0, sliceModeData specifies # of MBs in each slice (except last slice) + sliceMode = 1, sliceModeData specifies maximum # of bytes in each slice (except last slice) + sliceMode = 2, sliceModeData specifies # of MB rows in each slice (except last slice) + sliceMode = 3, sliceModeData specifies number of slices in the picture. Driver will divide picture into slices optimally */ + NV_ENC_CONFIG_H264_VUI_PARAMETERS h264VUIParameters; /**< [in]: Specifies the H264 video usability info pamameters */ + uint32_t ltrNumFrames; /**< [in]: Specifies the number of LTR frames. This parameter has different meaning in two LTR modes. + In "LTR Trust" mode (ltrTrustMode = 1), encoder will mark the first ltrNumFrames base layer reference frames within each IDR interval as LTR. + In "LTR Per Picture" mode (ltrTrustMode = 0 and ltrMarkFrame = 1), ltrNumFrames specifies maximum number of LTR frames in DPB. */ + uint32_t ltrTrustMode; /**< [in]: Specifies the LTR operating mode. See comments near NV_ENC_CONFIG_H264::enableLTR for description of the two modes. + Set to 1 to use "LTR Trust" mode of LTR operation. Clients are discouraged to use "LTR Trust" mode as this mode may + be deprecated in future releases. + Set to 0 when using "LTR Per Picture" mode of LTR operation. */ + uint32_t chromaFormatIDC; /**< [in]: Specifies the chroma format. Should be set to 1 for yuv420 input, 3 for yuv444 input. + Check support for YUV444 encoding using ::NV_ENC_CAPS_SUPPORT_YUV444_ENCODE caps.*/ + uint32_t maxTemporalLayers; /**< [in]: Specifies the max temporal layer used for hierarchical coding. */ + NV_ENC_BFRAME_REF_MODE useBFramesAsRef; /**< [in]: Specifies the B-Frame as reference mode. Check support for useBFramesAsRef mode using ::NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE caps.*/ + NV_ENC_NUM_REF_FRAMES numRefL0; /**< [in]: Specifies max number of reference frames in reference picture list L0, that can be used by hardware for prediction of a frame. + Check support for numRefL0 using ::NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES caps. */ + NV_ENC_NUM_REF_FRAMES numRefL1; /**< [in]: Specifies max number of reference frames in reference picture list L1, that can be used by hardware for prediction of a frame. + Check support for numRefL1 using ::NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES caps. */ + uint32_t reserved1[267]; /**< [in]: Reserved and must be set to 0 */ + void* reserved2[64]; /**< [in]: Reserved and must be set to NULL */ +} NV_ENC_CONFIG_H264; + +/** + * \struct _NV_ENC_CONFIG_HEVC + * HEVC encoder configuration parameters to be set during initialization. + */ +typedef struct _NV_ENC_CONFIG_HEVC +{ + uint32_t level; /**< [in]: Specifies the level of the encoded bitstream.*/ + uint32_t tier; /**< [in]: Specifies the level tier of the encoded bitstream.*/ + NV_ENC_HEVC_CUSIZE minCUSize; /**< [in]: Specifies the minimum size of luma coding unit.*/ + NV_ENC_HEVC_CUSIZE maxCUSize; /**< [in]: Specifies the maximum size of luma coding unit. Currently NVENC SDK only supports maxCUSize equal to NV_ENC_HEVC_CUSIZE_32x32.*/ + uint32_t useConstrainedIntraPred :1; /**< [in]: Set 1 to enable constrained intra prediction. */ + uint32_t disableDeblockAcrossSliceBoundary :1; /**< [in]: Set 1 to disable in loop filtering across slice boundary.*/ + uint32_t outputBufferingPeriodSEI :1; /**< [in]: Set 1 to write SEI buffering period syntax in the bitstream */ + uint32_t outputPictureTimingSEI :1; /**< [in]: Set 1 to write SEI picture timing syntax in the bitstream */ + uint32_t outputAUD :1; /**< [in]: Set 1 to write Access Unit Delimiter syntax. */ + uint32_t enableLTR :1; /**< [in]: Set to 1 to enable LTR (Long Term Reference) frame support. LTR can be used in two modes: "LTR Trust" mode and "LTR Per Picture" mode. + LTR Trust mode: In this mode, ltrNumFrames pictures after IDR are automatically marked as LTR. This mode is enabled by setting ltrTrustMode = 1. + Use of LTR Trust mode is strongly discouraged as this mode may be deprecated in future releases. + LTR Per Picture mode: In this mode, client can control whether the current picture should be marked as LTR. Enable this mode by setting + ltrTrustMode = 0 and ltrMarkFrame = 1 for the picture to be marked as LTR. This is the preferred mode + for using LTR. + Note that LTRs are not supported if encoding session is configured with B-frames */ + uint32_t disableSPSPPS :1; /**< [in]: Set 1 to disable VPS,SPS and PPS signalling in the bitstream. */ + uint32_t repeatSPSPPS :1; /**< [in]: Set 1 to output VPS,SPS and PPS for every IDR frame.*/ + uint32_t enableIntraRefresh :1; /**< [in]: Set 1 to enable gradual decoder refresh or intra refresh. If the GOP structure uses B frames this will be ignored */ + uint32_t chromaFormatIDC :2; /**< [in]: Specifies the chroma format. Should be set to 1 for yuv420 input, 3 for yuv444 input.*/ + uint32_t pixelBitDepthMinus8 :3; /**< [in]: Specifies pixel bit depth minus 8. Should be set to 0 for 8 bit input, 2 for 10 bit input.*/ + uint32_t enableFillerDataInsertion :1; /**< [in]: Set to 1 to enable insertion of filler data in the bitstream. + This flag will take effect only when one of the CBR rate + control modes (NV_ENC_PARAMS_RC_CBR, NV_ENC_PARAMS_RC_CBR_HQ, + NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ) is in use and both + NV_ENC_INITIALIZE_PARAMS::frameRateNum and + NV_ENC_INITIALIZE_PARAMS::frameRateDen are set to non-zero + values. Setting this field when + NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is also set + is currently not supported and will make ::NvEncInitializeEncoder() + return an error. */ + uint32_t reserved :17; /**< [in]: Reserved bitfields.*/ + uint32_t idrPeriod; /**< [in]: Specifies the IDR interval. If not set, this is made equal to gopLength in NV_ENC_CONFIG.Low latency application client can set IDR interval to NVENC_INFINITE_GOPLENGTH so that IDR frames are not inserted automatically. */ + uint32_t intraRefreshPeriod; /**< [in]: Specifies the interval between successive intra refresh if enableIntrarefresh is set. Requires enableIntraRefresh to be set. + Will be disabled if NV_ENC_CONFIG::gopLength is not set to NVENC_INFINITE_GOPLENGTH. */ + uint32_t intraRefreshCnt; /**< [in]: Specifies the length of intra refresh in number of frames for periodic intra refresh. This value should be smaller than intraRefreshPeriod */ + uint32_t maxNumRefFramesInDPB; /**< [in]: Specifies the maximum number of references frames in the DPB.*/ + uint32_t ltrNumFrames; /**< [in]: This parameter has different meaning in two LTR modes. + In "LTR Trust" mode (ltrTrustMode = 1), encoder will mark the first ltrNumFrames base layer reference frames within each IDR interval as LTR. + In "LTR Per Picture" mode (ltrTrustMode = 0 and ltrMarkFrame = 1), ltrNumFrames specifies maximum number of LTR frames in DPB. */ + uint32_t vpsId; /**< [in]: Specifies the VPS id of the video parameter set */ + uint32_t spsId; /**< [in]: Specifies the SPS id of the sequence header */ + uint32_t ppsId; /**< [in]: Specifies the PPS id of the picture header */ + uint32_t sliceMode; /**< [in]: This parameter in conjunction with sliceModeData specifies the way in which the picture is divided into slices + sliceMode = 0 CTU based slices, sliceMode = 1 Byte based slices, sliceMode = 2 CTU row based slices, sliceMode = 3, numSlices in Picture + When sliceMode == 0 and sliceModeData == 0 whole picture will be coded with one slice */ + uint32_t sliceModeData; /**< [in]: Specifies the parameter needed for sliceMode. For: + sliceMode = 0, sliceModeData specifies # of CTUs in each slice (except last slice) + sliceMode = 1, sliceModeData specifies maximum # of bytes in each slice (except last slice) + sliceMode = 2, sliceModeData specifies # of CTU rows in each slice (except last slice) + sliceMode = 3, sliceModeData specifies number of slices in the picture. Driver will divide picture into slices optimally */ + uint32_t maxTemporalLayersMinus1; /**< [in]: Specifies the max temporal layer used for hierarchical coding. */ + NV_ENC_CONFIG_HEVC_VUI_PARAMETERS hevcVUIParameters; /**< [in]: Specifies the HEVC video usability info pamameters */ + uint32_t ltrTrustMode; /**< [in]: Specifies the LTR operating mode. See comments near NV_ENC_CONFIG_HEVC::enableLTR for description of the two modes. + Set to 1 to use "LTR Trust" mode of LTR operation. Clients are discouraged to use "LTR Trust" mode as this mode may + be deprecated in future releases. + Set to 0 when using "LTR Per Picture" mode of LTR operation. */ + NV_ENC_BFRAME_REF_MODE useBFramesAsRef; /**< [in]: Specifies the B-Frame as reference mode. Check support for useBFramesAsRef mode using ::NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE caps.*/ + NV_ENC_NUM_REF_FRAMES numRefL0; /**< [in]: Specifies max number of reference frames in reference picture list L0, that can be used by hardware for prediction of a frame. + Check support for numRefL0 using ::NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES caps. */ + NV_ENC_NUM_REF_FRAMES numRefL1; /**< [in]: Specifies max number of reference frames in reference picture list L1, that can be used by hardware for prediction of a frame. + Check support for numRefL1 using ::NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES caps. */ + uint32_t reserved1[214]; /**< [in]: Reserved and must be set to 0.*/ + void* reserved2[64]; /**< [in]: Reserved and must be set to NULL */ +} NV_ENC_CONFIG_HEVC; + +/** + * \struct _NV_ENC_CONFIG_H264_MEONLY + * H264 encoder configuration parameters for ME only Mode + * + */ +typedef struct _NV_ENC_CONFIG_H264_MEONLY +{ + uint32_t disablePartition16x16 :1; /**< [in]: Disable MotionEstimation on 16x16 blocks*/ + uint32_t disablePartition8x16 :1; /**< [in]: Disable MotionEstimation on 8x16 blocks*/ + uint32_t disablePartition16x8 :1; /**< [in]: Disable MotionEstimation on 16x8 blocks*/ + uint32_t disablePartition8x8 :1; /**< [in]: Disable MotionEstimation on 8x8 blocks*/ + uint32_t disableIntraSearch :1; /**< [in]: Disable Intra search during MotionEstimation*/ + uint32_t bStereoEnable :1; /**< [in]: Enable Stereo Mode for Motion Estimation where each view is independently executed*/ + uint32_t reserved :26; /**< [in]: Reserved and must be set to 0 */ + uint32_t reserved1 [255]; /**< [in]: Reserved and must be set to 0 */ + void* reserved2[64]; /**< [in]: Reserved and must be set to NULL */ +} NV_ENC_CONFIG_H264_MEONLY; + + +/** + * \struct _NV_ENC_CONFIG_HEVC_MEONLY + * HEVC encoder configuration parameters for ME only Mode + * + */ +typedef struct _NV_ENC_CONFIG_HEVC_MEONLY +{ + uint32_t reserved [256]; /**< [in]: Reserved and must be set to 0 */ + void* reserved1[64]; /**< [in]: Reserved and must be set to NULL */ +} NV_ENC_CONFIG_HEVC_MEONLY; + +/** + * \struct _NV_ENC_CODEC_CONFIG + * Codec-specific encoder configuration parameters to be set during initialization. + */ +typedef union _NV_ENC_CODEC_CONFIG +{ + NV_ENC_CONFIG_H264 h264Config; /**< [in]: Specifies the H.264-specific encoder configuration. */ + NV_ENC_CONFIG_HEVC hevcConfig; /**< [in]: Specifies the HEVC-specific encoder configuration. */ + NV_ENC_CONFIG_H264_MEONLY h264MeOnlyConfig; /**< [in]: Specifies the H.264-specific ME only encoder configuration. */ + NV_ENC_CONFIG_HEVC_MEONLY hevcMeOnlyConfig; /**< [in]: Specifies the HEVC-specific ME only encoder configuration. */ + uint32_t reserved[320]; /**< [in]: Reserved and must be set to 0 */ +} NV_ENC_CODEC_CONFIG; + + +/** + * \struct _NV_ENC_CONFIG + * Encoder configuration parameters to be set during initialization. + */ +typedef struct _NV_ENC_CONFIG +{ + uint32_t version; /**< [in]: Struct version. Must be set to ::NV_ENC_CONFIG_VER. */ + GUID profileGUID; /**< [in]: Specifies the codec profile guid. If client specifies \p NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID the NvEncodeAPI interface will select the appropriate codec profile. */ + uint32_t gopLength; /**< [in]: Specifies the number of pictures in one GOP. Low latency application client can set goplength to NVENC_INFINITE_GOPLENGTH so that keyframes are not inserted automatically. */ + int32_t frameIntervalP; /**< [in]: Specifies the GOP pattern as follows: \p frameIntervalP = 0: I, 1: IPP, 2: IBP, 3: IBBP If goplength is set to NVENC_INFINITE_GOPLENGTH \p frameIntervalP should be set to 1. */ + uint32_t monoChromeEncoding; /**< [in]: Set this to 1 to enable monochrome encoding for this session. */ + NV_ENC_PARAMS_FRAME_FIELD_MODE frameFieldMode; /**< [in]: Specifies the frame/field mode. + Check support for field encoding using ::NV_ENC_CAPS_SUPPORT_FIELD_ENCODING caps. + Using a frameFieldMode other than NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME for RGB input is not supported. */ + NV_ENC_MV_PRECISION mvPrecision; /**< [in]: Specifies the desired motion vector prediction precision. */ + NV_ENC_RC_PARAMS rcParams; /**< [in]: Specifies the rate control parameters for the current encoding session. */ + NV_ENC_CODEC_CONFIG encodeCodecConfig; /**< [in]: Specifies the codec specific config parameters through this union. */ + uint32_t reserved [278]; /**< [in]: Reserved and must be set to 0 */ + void* reserved2[64]; /**< [in]: Reserved and must be set to NULL */ +} NV_ENC_CONFIG; + +/** macro for constructing the version field of ::_NV_ENC_CONFIG */ +#define NV_ENC_CONFIG_VER (NVENCAPI_STRUCT_VERSION(7) | ( 1<<31 )) + + +/** + * \struct _NV_ENC_INITIALIZE_PARAMS + * Encode Session Initialization parameters. + */ +typedef struct _NV_ENC_INITIALIZE_PARAMS +{ + uint32_t version; /**< [in]: Struct version. Must be set to ::NV_ENC_INITIALIZE_PARAMS_VER. */ + GUID encodeGUID; /**< [in]: Specifies the Encode GUID for which the encoder is being created. ::NvEncInitializeEncoder() API will fail if this is not set, or set to unsupported value. */ + GUID presetGUID; /**< [in]: Specifies the preset for encoding. If the preset GUID is set then , the preset configuration will be applied before any other parameter. */ + uint32_t encodeWidth; /**< [in]: Specifies the encode width. If not set ::NvEncInitializeEncoder() API will fail. */ + uint32_t encodeHeight; /**< [in]: Specifies the encode height. If not set ::NvEncInitializeEncoder() API will fail. */ + uint32_t darWidth; /**< [in]: Specifies the display aspect ratio Width. */ + uint32_t darHeight; /**< [in]: Specifies the display aspect ratio height. */ + uint32_t frameRateNum; /**< [in]: Specifies the numerator for frame rate used for encoding in frames per second ( Frame rate = frameRateNum / frameRateDen ). */ + uint32_t frameRateDen; /**< [in]: Specifies the denominator for frame rate used for encoding in frames per second ( Frame rate = frameRateNum / frameRateDen ). */ + uint32_t enableEncodeAsync; /**< [in]: Set this to 1 to enable asynchronous mode and is expected to use events to get picture completion notification. */ + uint32_t enablePTD; /**< [in]: Set this to 1 to enable the Picture Type Decision is be taken by the NvEncodeAPI interface. */ + uint32_t reportSliceOffsets :1; /**< [in]: Set this to 1 to enable reporting slice offsets in ::_NV_ENC_LOCK_BITSTREAM. NV_ENC_INITIALIZE_PARAMS::enableEncodeAsync must be set to 0 to use this feature. Client must set this to 0 if NV_ENC_CONFIG_H264::sliceMode is 1 on Kepler GPUs */ + uint32_t enableSubFrameWrite :1; /**< [in]: Set this to 1 to write out available bitstream to memory at subframe intervals */ + uint32_t enableExternalMEHints :1; /**< [in]: Set to 1 to enable external ME hints for the current frame. For NV_ENC_INITIALIZE_PARAMS::enablePTD=1 with B frames, programming L1 hints is optional for B frames since Client doesn't know internal GOP structure. + NV_ENC_PIC_PARAMS::meHintRefPicDist should preferably be set with enablePTD=1. */ + uint32_t enableMEOnlyMode :1; /**< [in]: Set to 1 to enable ME Only Mode .*/ + uint32_t enableWeightedPrediction :1; /**< [in]: Set this to 1 to enable weighted prediction. Not supported if encode session is configured for B-Frames( 'frameIntervalP' in NV_ENC_CONFIG is greater than 1).*/ + uint32_t enableOutputInVidmem :1; /**< [in]: Set this to 1 to enable output of NVENC in video memory buffer created by application. This feature is not supported for HEVC ME only mode. */ + uint32_t reservedBitFields :26; /**< [in]: Reserved bitfields and must be set to 0 */ + uint32_t privDataSize; /**< [in]: Reserved private data buffer size and must be set to 0 */ + void* privData; /**< [in]: Reserved private data buffer and must be set to NULL */ + NV_ENC_CONFIG* encodeConfig; /**< [in]: Specifies the advanced codec specific structure. If client has sent a valid codec config structure, it will override parameters set by the NV_ENC_INITIALIZE_PARAMS::presetGUID parameter. If set to NULL the NvEncodeAPI interface will use the NV_ENC_INITIALIZE_PARAMS::presetGUID to set the codec specific parameters. + Client can also optionally query the NvEncodeAPI interface to get codec specific parameters for a presetGUID using ::NvEncGetEncodePresetConfig() API. It can then modify (if required) some of the codec config parameters and send down a custom config structure as part of ::_NV_ENC_INITIALIZE_PARAMS. + Even in this case client is recommended to pass the same preset guid it has used in ::NvEncGetEncodePresetConfig() API to query the config structure; as NV_ENC_INITIALIZE_PARAMS::presetGUID. This will not override the custom config structure but will be used to determine other Encoder HW specific parameters not exposed in the API. */ + uint32_t maxEncodeWidth; /**< [in]: Maximum encode width to be used for current Encode session. + Client should allocate output buffers according to this dimension for dynamic resolution change. If set to 0, Encoder will not allow dynamic resolution change. */ + uint32_t maxEncodeHeight; /**< [in]: Maximum encode height to be allowed for current Encode session. + Client should allocate output buffers according to this dimension for dynamic resolution change. If set to 0, Encode will not allow dynamic resolution change. */ + NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE maxMEHintCountsPerBlock[2]; /**< [in]: If Client wants to pass external motion vectors in NV_ENC_PIC_PARAMS::meExternalHints buffer it must specify the maximum number of hint candidates per block per direction for the encode session. + The NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[0] is for L0 predictors and NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[1] is for L1 predictors. + This client must also set NV_ENC_INITIALIZE_PARAMS::enableExternalMEHints to 1. */ + uint32_t reserved [289]; /**< [in]: Reserved and must be set to 0 */ + void* reserved2[64]; /**< [in]: Reserved and must be set to NULL */ +} NV_ENC_INITIALIZE_PARAMS; + +/** macro for constructing the version field of ::_NV_ENC_INITIALIZE_PARAMS */ +#define NV_ENC_INITIALIZE_PARAMS_VER (NVENCAPI_STRUCT_VERSION(5) | ( 1<<31 )) + + +/** + * \struct _NV_ENC_RECONFIGURE_PARAMS + * Encode Session Reconfigured parameters. + */ +typedef struct _NV_ENC_RECONFIGURE_PARAMS +{ + uint32_t version; /**< [in]: Struct version. Must be set to ::NV_ENC_RECONFIGURE_PARAMS_VER. */ + NV_ENC_INITIALIZE_PARAMS reInitEncodeParams; /**< [in]: Encoder session re-initialization parameters. + If reInitEncodeParams.encodeConfig is NULL and + reInitEncodeParams.presetGUID is the same as the preset + GUID specified on the call to NvEncInitializeEncoder(), + EncodeAPI will continue to use the existing encode + configuration. + If reInitEncodeParams.encodeConfig is NULL and + reInitEncodeParams.presetGUID is different from the preset + GUID specified on the call to NvEncInitializeEncoder(), + EncodeAPI will try to use the default configuration for + the preset specified by reInitEncodeParams.presetGUID. + In this case, reconfiguration may fail if the new + configuration is incompatible with the existing + configuration (e.g. the new configuration results in + a change in the GOP structure). */ + uint32_t resetEncoder :1; /**< [in]: This resets the rate control states and other internal encoder states. This should be used only with an IDR frame. + If NV_ENC_INITIALIZE_PARAMS::enablePTD is set to 1, encoder will force the frame type to IDR */ + uint32_t forceIDR :1; /**< [in]: Encode the current picture as an IDR picture. This flag is only valid when Picture type decision is taken by the Encoder + [_NV_ENC_INITIALIZE_PARAMS::enablePTD == 1]. */ + uint32_t reserved :30; + +}NV_ENC_RECONFIGURE_PARAMS; + +/** macro for constructing the version field of ::_NV_ENC_RECONFIGURE_PARAMS */ +#define NV_ENC_RECONFIGURE_PARAMS_VER (NVENCAPI_STRUCT_VERSION(1) | ( 1<<31 )) + +/** + * \struct _NV_ENC_PRESET_CONFIG + * Encoder preset config + */ +typedef struct _NV_ENC_PRESET_CONFIG +{ + uint32_t version; /**< [in]: Struct version. Must be set to ::NV_ENC_PRESET_CONFIG_VER. */ + NV_ENC_CONFIG presetCfg; /**< [out]: preset config returned by the Nvidia Video Encoder interface. */ + uint32_t reserved1[255]; /**< [in]: Reserved and must be set to 0 */ + void* reserved2[64]; /**< [in]: Reserved and must be set to NULL */ +}NV_ENC_PRESET_CONFIG; + +/** macro for constructing the version field of ::_NV_ENC_PRESET_CONFIG */ +#define NV_ENC_PRESET_CONFIG_VER (NVENCAPI_STRUCT_VERSION(4) | ( 1<<31 )) + + +/** + * \struct _NV_ENC_PIC_PARAMS_MVC + * MVC-specific parameters to be sent on a per-frame basis. + */ +typedef struct _NV_ENC_PIC_PARAMS_MVC +{ + uint32_t version; /**< [in]: Struct version. Must be set to ::NV_ENC_PIC_PARAMS_MVC_VER. */ + uint32_t viewID; /**< [in]: Specifies the view ID associated with the current input view. */ + uint32_t temporalID; /**< [in]: Specifies the temporal ID associated with the current input view. */ + uint32_t priorityID; /**< [in]: Specifies the priority ID associated with the current input view. Reserved and ignored by the NvEncodeAPI interface. */ + uint32_t reserved1[12]; /**< [in]: Reserved and must be set to 0. */ + void* reserved2[8]; /**< [in]: Reserved and must be set to NULL. */ +}NV_ENC_PIC_PARAMS_MVC; + +/** macro for constructing the version field of ::_NV_ENC_PIC_PARAMS_MVC */ +#define NV_ENC_PIC_PARAMS_MVC_VER NVENCAPI_STRUCT_VERSION(1) + + +/** + * \union _NV_ENC_PIC_PARAMS_H264_EXT + * H264 extension picture parameters + */ +typedef union _NV_ENC_PIC_PARAMS_H264_EXT +{ + NV_ENC_PIC_PARAMS_MVC mvcPicParams; /**< [in]: Specifies the MVC picture parameters. */ + uint32_t reserved1[32]; /**< [in]: Reserved and must be set to 0. */ +}NV_ENC_PIC_PARAMS_H264_EXT; + +/** + * \struct _NV_ENC_SEI_PAYLOAD + * User SEI message + */ +typedef struct _NV_ENC_SEI_PAYLOAD +{ + uint32_t payloadSize; /**< [in] SEI payload size in bytes. SEI payload must be byte aligned, as described in Annex D */ + uint32_t payloadType; /**< [in] SEI payload types and syntax can be found in Annex D of the H.264 Specification. */ + uint8_t *payload; /**< [in] pointer to user data */ +} NV_ENC_SEI_PAYLOAD; + +#define NV_ENC_H264_SEI_PAYLOAD NV_ENC_SEI_PAYLOAD + +/** + * \struct _NV_ENC_PIC_PARAMS_H264 + * H264 specific enc pic params. sent on a per frame basis. + */ +typedef struct _NV_ENC_PIC_PARAMS_H264 +{ + uint32_t displayPOCSyntax; /**< [in]: Specifies the display POC syntax This is required to be set if client is handling the picture type decision. */ + uint32_t reserved3; /**< [in]: Reserved and must be set to 0 */ + uint32_t refPicFlag; /**< [in]: Set to 1 for a reference picture. This is ignored if NV_ENC_INITIALIZE_PARAMS::enablePTD is set to 1. */ + uint32_t colourPlaneId; /**< [in]: Specifies the colour plane ID associated with the current input. */ + uint32_t forceIntraRefreshWithFrameCnt; /**< [in]: Forces an intra refresh with duration equal to intraRefreshFrameCnt. + When outputRecoveryPointSEI is set this is value is used for recovery_frame_cnt in recovery point SEI message + forceIntraRefreshWithFrameCnt cannot be used if B frames are used in the GOP structure specified */ + uint32_t constrainedFrame :1; /**< [in]: Set to 1 if client wants to encode this frame with each slice completely independent of other slices in the frame. + NV_ENC_INITIALIZE_PARAMS::enableConstrainedEncoding should be set to 1 */ + uint32_t sliceModeDataUpdate :1; /**< [in]: Set to 1 if client wants to change the sliceModeData field to specify new sliceSize Parameter + When forceIntraRefreshWithFrameCnt is set it will have priority over sliceMode setting */ + uint32_t ltrMarkFrame :1; /**< [in]: Set to 1 if client wants to mark this frame as LTR */ + uint32_t ltrUseFrames :1; /**< [in]: Set to 1 if client allows encoding this frame using the LTR frames specified in ltrFrameBitmap */ + uint32_t reservedBitFields :28; /**< [in]: Reserved bit fields and must be set to 0 */ + uint8_t* sliceTypeData; /**< [in]: Deprecated. */ + uint32_t sliceTypeArrayCnt; /**< [in]: Deprecated. */ + uint32_t seiPayloadArrayCnt; /**< [in]: Specifies the number of elements allocated in seiPayloadArray array. */ + NV_ENC_SEI_PAYLOAD* seiPayloadArray; /**< [in]: Array of SEI payloads which will be inserted for this frame. */ + uint32_t sliceMode; /**< [in]: This parameter in conjunction with sliceModeData specifies the way in which the picture is divided into slices + sliceMode = 0 MB based slices, sliceMode = 1 Byte based slices, sliceMode = 2 MB row based slices, sliceMode = 3, numSlices in Picture + When forceIntraRefreshWithFrameCnt is set it will have priority over sliceMode setting + When sliceMode == 0 and sliceModeData == 0 whole picture will be coded with one slice */ + uint32_t sliceModeData; /**< [in]: Specifies the parameter needed for sliceMode. For: + sliceMode = 0, sliceModeData specifies # of MBs in each slice (except last slice) + sliceMode = 1, sliceModeData specifies maximum # of bytes in each slice (except last slice) + sliceMode = 2, sliceModeData specifies # of MB rows in each slice (except last slice) + sliceMode = 3, sliceModeData specifies number of slices in the picture. Driver will divide picture into slices optimally */ + uint32_t ltrMarkFrameIdx; /**< [in]: Specifies the long term referenceframe index to use for marking this frame as LTR.*/ + uint32_t ltrUseFrameBitmap; /**< [in]: Specifies the the associated bitmap of LTR frame indices to use when encoding this frame. */ + uint32_t ltrUsageMode; /**< [in]: Not supported. Reserved for future use and must be set to 0. */ + uint32_t forceIntraSliceCount; /**< [in]: Specfies the number of slices to be forced to Intra in the current picture. + This option along with forceIntraSliceIdx[] array needs to be used with sliceMode = 3 only */ + uint32_t *forceIntraSliceIdx; /**< [in]: Slice indices to be forced to intra in the current picture. Each slice index should be <= num_slices_in_picture -1. Index starts from 0 for first slice. + The number of entries in this array should be equal to forceIntraSliceCount */ + NV_ENC_PIC_PARAMS_H264_EXT h264ExtPicParams; /**< [in]: Specifies the H264 extension config parameters using this config. */ + uint32_t reserved [210]; /**< [in]: Reserved and must be set to 0. */ + void* reserved2[61]; /**< [in]: Reserved and must be set to NULL. */ +} NV_ENC_PIC_PARAMS_H264; + +/** + * \struct _NV_ENC_PIC_PARAMS_HEVC + * HEVC specific enc pic params. sent on a per frame basis. + */ +typedef struct _NV_ENC_PIC_PARAMS_HEVC +{ + uint32_t displayPOCSyntax; /**< [in]: Specifies the display POC syntax This is required to be set if client is handling the picture type decision. */ + uint32_t refPicFlag; /**< [in]: Set to 1 for a reference picture. This is ignored if NV_ENC_INITIALIZE_PARAMS::enablePTD is set to 1. */ + uint32_t temporalId; /**< [in]: Specifies the temporal id of the picture */ + uint32_t forceIntraRefreshWithFrameCnt; /**< [in]: Forces an intra refresh with duration equal to intraRefreshFrameCnt. + When outputRecoveryPointSEI is set this is value is used for recovery_frame_cnt in recovery point SEI message + forceIntraRefreshWithFrameCnt cannot be used if B frames are used in the GOP structure specified */ + uint32_t constrainedFrame :1; /**< [in]: Set to 1 if client wants to encode this frame with each slice completely independent of other slices in the frame. + NV_ENC_INITIALIZE_PARAMS::enableConstrainedEncoding should be set to 1 */ + uint32_t sliceModeDataUpdate :1; /**< [in]: Set to 1 if client wants to change the sliceModeData field to specify new sliceSize Parameter + When forceIntraRefreshWithFrameCnt is set it will have priority over sliceMode setting */ + uint32_t ltrMarkFrame :1; /**< [in]: Set to 1 if client wants to mark this frame as LTR */ + uint32_t ltrUseFrames :1; /**< [in]: Set to 1 if client allows encoding this frame using the LTR frames specified in ltrFrameBitmap */ + uint32_t reservedBitFields :28; /**< [in]: Reserved bit fields and must be set to 0 */ + uint8_t* sliceTypeData; /**< [in]: Array which specifies the slice type used to force intra slice for a particular slice. Currently supported only for NV_ENC_CONFIG_H264::sliceMode == 3. + Client should allocate array of size sliceModeData where sliceModeData is specified in field of ::_NV_ENC_CONFIG_H264 + Array element with index n corresponds to nth slice. To force a particular slice to intra client should set corresponding array element to NV_ENC_SLICE_TYPE_I + all other array elements should be set to NV_ENC_SLICE_TYPE_DEFAULT */ + uint32_t sliceTypeArrayCnt; /**< [in]: Client should set this to the number of elements allocated in sliceTypeData array. If sliceTypeData is NULL then this should be set to 0 */ + uint32_t sliceMode; /**< [in]: This parameter in conjunction with sliceModeData specifies the way in which the picture is divided into slices + sliceMode = 0 CTU based slices, sliceMode = 1 Byte based slices, sliceMode = 2 CTU row based slices, sliceMode = 3, numSlices in Picture + When forceIntraRefreshWithFrameCnt is set it will have priority over sliceMode setting + When sliceMode == 0 and sliceModeData == 0 whole picture will be coded with one slice */ + uint32_t sliceModeData; /**< [in]: Specifies the parameter needed for sliceMode. For: + sliceMode = 0, sliceModeData specifies # of CTUs in each slice (except last slice) + sliceMode = 1, sliceModeData specifies maximum # of bytes in each slice (except last slice) + sliceMode = 2, sliceModeData specifies # of CTU rows in each slice (except last slice) + sliceMode = 3, sliceModeData specifies number of slices in the picture. Driver will divide picture into slices optimally */ + uint32_t ltrMarkFrameIdx; /**< [in]: Specifies the long term reference frame index to use for marking this frame as LTR.*/ + uint32_t ltrUseFrameBitmap; /**< [in]: Specifies the associated bitmap of LTR frame indices to use when encoding this frame. */ + uint32_t ltrUsageMode; /**< [in]: Not supported. Reserved for future use and must be set to 0. */ + uint32_t seiPayloadArrayCnt; /**< [in]: Specifies the number of elements allocated in seiPayloadArray array. */ + uint32_t reserved; /**< [in]: Reserved and must be set to 0. */ + NV_ENC_SEI_PAYLOAD* seiPayloadArray; /**< [in]: Array of SEI payloads which will be inserted for this frame. */ + uint32_t reserved2 [244]; /**< [in]: Reserved and must be set to 0. */ + void* reserved3[61]; /**< [in]: Reserved and must be set to NULL. */ +} NV_ENC_PIC_PARAMS_HEVC; + +/** + * Codec specific per-picture encoding parameters. + */ +typedef union _NV_ENC_CODEC_PIC_PARAMS +{ + NV_ENC_PIC_PARAMS_H264 h264PicParams; /**< [in]: H264 encode picture params. */ + NV_ENC_PIC_PARAMS_HEVC hevcPicParams; /**< [in]: HEVC encode picture params. */ + uint32_t reserved[256]; /**< [in]: Reserved and must be set to 0. */ +} NV_ENC_CODEC_PIC_PARAMS; + +/** + * \struct _NV_ENC_PIC_PARAMS + * Encoding parameters that need to be sent on a per frame basis. + */ +typedef struct _NV_ENC_PIC_PARAMS +{ + uint32_t version; /**< [in]: Struct version. Must be set to ::NV_ENC_PIC_PARAMS_VER. */ + uint32_t inputWidth; /**< [in]: Specifies the input buffer width */ + uint32_t inputHeight; /**< [in]: Specifies the input buffer height */ + uint32_t inputPitch; /**< [in]: Specifies the input buffer pitch. If pitch value is not known, set this to inputWidth. */ + uint32_t encodePicFlags; /**< [in]: Specifies bit-wise OR`ed encode pic flags. See ::NV_ENC_PIC_FLAGS enum. */ + uint32_t frameIdx; /**< [in]: Specifies the frame index associated with the input frame [optional]. */ + uint64_t inputTimeStamp; /**< [in]: Specifies presentation timestamp associated with the input picture. */ + uint64_t inputDuration; /**< [in]: Specifies duration of the input picture */ + NV_ENC_INPUT_PTR inputBuffer; /**< [in]: Specifies the input buffer pointer. Client must use a pointer obtained from ::NvEncCreateInputBuffer() or ::NvEncMapInputResource() APIs.*/ + NV_ENC_OUTPUT_PTR outputBitstream; /**< [in]: Specifies the output buffer pointer. + If NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is set to 0, specifies the pointer to output buffer. Client should use a pointer obtained from ::NvEncCreateBitstreamBuffer() API. + If NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is set to 1, client should allocate buffer in video memory for NV_ENC_ENCODE_OUT_PARAMS struct and encoded bitstream data. Client + should use a pointer obtained from ::NvEncMapInputResource() API, when mapping this output buffer and assign it to NV_ENC_PIC_PARAMS::outputBitstream. + First 256 bytes of this buffer should be interpreted as NV_ENC_ENCODE_OUT_PARAMS struct followed by encoded bitstream data. Recommended size for output buffer is sum of size of + NV_ENC_ENCODE_OUT_PARAMS struct and twice the input frame size for lower resolution eg. CIF and 1.5 times the input frame size for higher resolutions. If encoded bitstream size is + greater than the allocated buffer size for encoded bitstream, then the output buffer will have encoded bitstream data equal to buffer size. All CUDA operations on this buffer must use + the default stream. */ + void* completionEvent; /**< [in]: Specifies an event to be signalled on completion of encoding of this Frame [only if operating in Asynchronous mode]. Each output buffer should be associated with a distinct event pointer. */ + NV_ENC_BUFFER_FORMAT bufferFmt; /**< [in]: Specifies the input buffer format. */ + NV_ENC_PIC_STRUCT pictureStruct; /**< [in]: Specifies structure of the input picture. */ + NV_ENC_PIC_TYPE pictureType; /**< [in]: Specifies input picture type. Client required to be set explicitly by the client if the client has not set NV_ENC_INITALIZE_PARAMS::enablePTD to 1 while calling NvInitializeEncoder. */ + NV_ENC_CODEC_PIC_PARAMS codecPicParams; /**< [in]: Specifies the codec specific per-picture encoding parameters. */ + NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE meHintCountsPerBlock[2]; /**< [in]: Specifies the number of hint candidates per block per direction for the current frame. meHintCountsPerBlock[0] is for L0 predictors and meHintCountsPerBlock[1] is for L1 predictors. + The candidate count in NV_ENC_PIC_PARAMS::meHintCountsPerBlock[lx] must never exceed NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[lx] provided during encoder intialization. */ + NVENC_EXTERNAL_ME_HINT *meExternalHints; /**< [in]: Specifies the pointer to ME external hints for the current frame. The size of ME hint buffer should be equal to number of macroblocks * the total number of candidates per macroblock. + The total number of candidates per MB per direction = 1*meHintCountsPerBlock[Lx].numCandsPerBlk16x16 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk16x8 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk8x8 + + 4*meHintCountsPerBlock[Lx].numCandsPerBlk8x8. For frames using bidirectional ME , the total number of candidates for single macroblock is sum of total number of candidates per MB for each direction (L0 and L1) */ + uint32_t reserved1[6]; /**< [in]: Reserved and must be set to 0 */ + void* reserved2[2]; /**< [in]: Reserved and must be set to NULL */ + int8_t *qpDeltaMap; /**< [in]: Specifies the pointer to signed byte array containing value per MB in raster scan order for the current picture, which will be interpreted depending on NV_ENC_RC_PARAMS::qpMapMode. + If NV_ENC_RC_PARAMS::qpMapMode is NV_ENC_QP_MAP_DELTA, qpDeltaMap specifies QP modifier per MB. This QP modifier will be applied on top of the QP chosen by rate control. + If NV_ENC_RC_PARAMS::qpMapMode is NV_ENC_QP_MAP_EMPHASIS, qpDeltaMap specifies Emphasis Level Map per MB. This level value along with QP chosen by rate control is used to + compute the QP modifier, which in turn is applied on top of QP chosen by rate control. + If NV_ENC_RC_PARAMS::qpMapMode is NV_ENC_QP_MAP_DISABLED, value in qpDeltaMap will be ignored.*/ + uint32_t qpDeltaMapSize; /**< [in]: Specifies the size in bytes of qpDeltaMap surface allocated by client and pointed to by NV_ENC_PIC_PARAMS::qpDeltaMap. Surface (array) should be picWidthInMbs * picHeightInMbs */ + uint32_t reservedBitFields; /**< [in]: Reserved bitfields and must be set to 0 */ + uint16_t meHintRefPicDist[2]; /**< [in]: Specifies temporal distance for reference picture (NVENC_EXTERNAL_ME_HINT::refidx = 0) used during external ME with NV_ENC_INITALIZE_PARAMS::enablePTD = 1 . meHintRefPicDist[0] is for L0 hints and meHintRefPicDist[1] is for L1 hints. + If not set, will internally infer distance of 1. Ignored for NV_ENC_INITALIZE_PARAMS::enablePTD = 0 */ + uint32_t reserved3[286]; /**< [in]: Reserved and must be set to 0 */ + void* reserved4[60]; /**< [in]: Reserved and must be set to NULL */ +} NV_ENC_PIC_PARAMS; + +/** Macro for constructing the version field of ::_NV_ENC_PIC_PARAMS */ +#define NV_ENC_PIC_PARAMS_VER (NVENCAPI_STRUCT_VERSION(4) | ( 1<<31 )) + + +/** + * \struct _NV_ENC_MEONLY_PARAMS + * MEOnly parameters that need to be sent on a per motion estimation basis. + * NV_ENC_MEONLY_PARAMS::meExternalHints is supported for H264 only. + */ +typedef struct _NV_ENC_MEONLY_PARAMS +{ + uint32_t version; /**< [in]: Struct version. Must be set to NV_ENC_MEONLY_PARAMS_VER.*/ + uint32_t inputWidth; /**< [in]: Specifies the input buffer width */ + uint32_t inputHeight; /**< [in]: Specifies the input buffer height */ + NV_ENC_INPUT_PTR inputBuffer; /**< [in]: Specifies the input buffer pointer. Client must use a pointer obtained from NvEncCreateInputBuffer() or NvEncMapInputResource() APIs. */ + NV_ENC_INPUT_PTR referenceFrame; /**< [in]: Specifies the reference frame pointer */ + NV_ENC_OUTPUT_PTR mvBuffer; /**< [in]: Specifies the output buffer pointer. + If NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is set to 0, specifies the pointer to motion vector data buffer allocated by NvEncCreateMVBuffer. + Client must lock mvBuffer using ::NvEncLockBitstream() API to get the motion vector data. + If NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is set to 1, client should allocate buffer in video memory for storing the motion vector data. The size of this buffer must + be equal to total number of macroblocks multiplied by size of NV_ENC_H264_MV_DATA struct. Client should use a pointer obtained from ::NvEncMapInputResource() API, when mapping this + output buffer and assign it to NV_ENC_MEONLY_PARAMS::mvBuffer. All CUDA operations on this buffer must use the default stream. */ + NV_ENC_BUFFER_FORMAT bufferFmt; /**< [in]: Specifies the input buffer format. */ + void* completionEvent; /**< [in]: Specifies an event to be signalled on completion of motion estimation + of this Frame [only if operating in Asynchronous mode]. + Each output buffer should be associated with a distinct event pointer. */ + uint32_t viewID; /**< [in]: Specifies left,right viewID if NV_ENC_CONFIG_H264_MEONLY::bStereoEnable is set. + viewID can be 0,1 if bStereoEnable is set, 0 otherwise. */ + NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE + meHintCountsPerBlock[2]; /**< [in]: Specifies the number of hint candidates per block for the current frame. meHintCountsPerBlock[0] is for L0 predictors. + The candidate count in NV_ENC_PIC_PARAMS::meHintCountsPerBlock[lx] must never exceed NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[lx] provided during encoder intialization. */ + NVENC_EXTERNAL_ME_HINT *meExternalHints; /**< [in]: Specifies the pointer to ME external hints for the current frame. The size of ME hint buffer should be equal to number of macroblocks * the total number of candidates per macroblock. + The total number of candidates per MB per direction = 1*meHintCountsPerBlock[Lx].numCandsPerBlk16x16 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk16x8 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk8x8 + + 4*meHintCountsPerBlock[Lx].numCandsPerBlk8x8. For frames using bidirectional ME , the total number of candidates for single macroblock is sum of total number of candidates per MB for each direction (L0 and L1) */ + uint32_t reserved1[243]; /**< [in]: Reserved and must be set to 0 */ + void* reserved2[59]; /**< [in]: Reserved and must be set to NULL */ +} NV_ENC_MEONLY_PARAMS; + +/** NV_ENC_MEONLY_PARAMS struct version*/ +#define NV_ENC_MEONLY_PARAMS_VER NVENCAPI_STRUCT_VERSION(3) + + +/** + * \struct _NV_ENC_LOCK_BITSTREAM + * Bitstream buffer lock parameters. + */ +typedef struct _NV_ENC_LOCK_BITSTREAM +{ + uint32_t version; /**< [in]: Struct version. Must be set to ::NV_ENC_LOCK_BITSTREAM_VER. */ + uint32_t doNotWait :1; /**< [in]: If this flag is set, the NvEncodeAPI interface will return buffer pointer even if operation is not completed. If not set, the call will block until operation completes. */ + uint32_t ltrFrame :1; /**< [out]: Flag indicating this frame is marked as LTR frame */ + uint32_t getRCStats :1; /**< [in]: If this flag is set then lockBitstream call will add additional intra-inter MB count and average MVX, MVY */ + uint32_t reservedBitFields :29; /**< [in]: Reserved bit fields and must be set to 0 */ + void* outputBitstream; /**< [in]: Pointer to the bitstream buffer being locked. */ + uint32_t* sliceOffsets; /**< [in,out]: Array which receives the slice offsets. This is not supported if NV_ENC_CONFIG_H264::sliceMode is 1 on Kepler GPUs. Array size must be equal to size of frame in MBs. */ + uint32_t frameIdx; /**< [out]: Frame no. for which the bitstream is being retrieved. */ + uint32_t hwEncodeStatus; /**< [out]: The NvEncodeAPI interface status for the locked picture. */ + uint32_t numSlices; /**< [out]: Number of slices in the encoded picture. Will be reported only if NV_ENC_INITIALIZE_PARAMS::reportSliceOffsets set to 1. */ + uint32_t bitstreamSizeInBytes; /**< [out]: Actual number of bytes generated and copied to the memory pointed by bitstreamBufferPtr. */ + uint64_t outputTimeStamp; /**< [out]: Presentation timestamp associated with the encoded output. */ + uint64_t outputDuration; /**< [out]: Presentation duration associates with the encoded output. */ + void* bitstreamBufferPtr; /**< [out]: Pointer to the generated output bitstream. + For MEOnly mode _NV_ENC_LOCK_BITSTREAM::bitstreamBufferPtr should be typecast to + NV_ENC_H264_MV_DATA/NV_ENC_HEVC_MV_DATA pointer respectively for H264/HEVC */ + NV_ENC_PIC_TYPE pictureType; /**< [out]: Picture type of the encoded picture. */ + NV_ENC_PIC_STRUCT pictureStruct; /**< [out]: Structure of the generated output picture. */ + uint32_t frameAvgQP; /**< [out]: Average QP of the frame. */ + uint32_t frameSatd; /**< [out]: Total SATD cost for whole frame. */ + uint32_t ltrFrameIdx; /**< [out]: Frame index associated with this LTR frame. */ + uint32_t ltrFrameBitmap; /**< [out]: Bitmap of LTR frames indices which were used for encoding this frame. Value of 0 if no LTR frames were used. */ + uint32_t reserved[13]; /**< [in]: Reserved and must be set to 0 */ + uint32_t intraMBCount; /**< [out]: For H264, Number of Intra MBs in the encoded frame. For HEVC, Number of Intra CTBs in the encoded frame. Supported only if _NV_ENC_LOCK_BITSTREAM::getRCStats set to 1. */ + uint32_t interMBCount; /**< [out]: For H264, Number of Inter MBs in the encoded frame, includes skip MBs. For HEVC, Number of Inter CTBs in the encoded frame. Supported only if _NV_ENC_LOCK_BITSTREAM::getRCStats set to 1. */ + int32_t averageMVX; /**< [out]: Average Motion Vector in X direction for the encoded frame. Supported only if _NV_ENC_LOCK_BITSTREAM::getRCStats set to 1. */ + int32_t averageMVY; /**< [out]: Average Motion Vector in y direction for the encoded frame. Supported only if _NV_ENC_LOCK_BITSTREAM::getRCStats set to 1. */ + uint32_t reserved1[219]; /**< [in]: Reserved and must be set to 0 */ + void* reserved2[64]; /**< [in]: Reserved and must be set to NULL */ +} NV_ENC_LOCK_BITSTREAM; + +/** Macro for constructing the version field of ::_NV_ENC_LOCK_BITSTREAM */ +#define NV_ENC_LOCK_BITSTREAM_VER NVENCAPI_STRUCT_VERSION(1) + + +/** + * \struct _NV_ENC_LOCK_INPUT_BUFFER + * Uncompressed Input Buffer lock parameters. + */ +typedef struct _NV_ENC_LOCK_INPUT_BUFFER +{ + uint32_t version; /**< [in]: Struct version. Must be set to ::NV_ENC_LOCK_INPUT_BUFFER_VER. */ + uint32_t doNotWait :1; /**< [in]: Set to 1 to make ::NvEncLockInputBuffer() a unblocking call. If the encoding is not completed, driver will return ::NV_ENC_ERR_ENCODER_BUSY error code. */ + uint32_t reservedBitFields :31; /**< [in]: Reserved bitfields and must be set to 0 */ + NV_ENC_INPUT_PTR inputBuffer; /**< [in]: Pointer to the input buffer to be locked, client should pass the pointer obtained from ::NvEncCreateInputBuffer() or ::NvEncMapInputResource API. */ + void* bufferDataPtr; /**< [out]: Pointed to the locked input buffer data. Client can only access input buffer using the \p bufferDataPtr. */ + uint32_t pitch; /**< [out]: Pitch of the locked input buffer. */ + uint32_t reserved1[251]; /**< [in]: Reserved and must be set to 0 */ + void* reserved2[64]; /**< [in]: Reserved and must be set to NULL */ +} NV_ENC_LOCK_INPUT_BUFFER; + +/** Macro for constructing the version field of ::_NV_ENC_LOCK_INPUT_BUFFER */ +#define NV_ENC_LOCK_INPUT_BUFFER_VER NVENCAPI_STRUCT_VERSION(1) + + +/** + * \struct _NV_ENC_MAP_INPUT_RESOURCE + * Map an input resource to a Nvidia Encoder Input Buffer + */ +typedef struct _NV_ENC_MAP_INPUT_RESOURCE +{ + uint32_t version; /**< [in]: Struct version. Must be set to ::NV_ENC_MAP_INPUT_RESOURCE_VER. */ + uint32_t subResourceIndex; /**< [in]: Deprecated. Do not use. */ + void* inputResource; /**< [in]: Deprecated. Do not use. */ + NV_ENC_REGISTERED_PTR registeredResource; /**< [in]: The Registered resource handle obtained by calling NvEncRegisterInputResource. */ + NV_ENC_INPUT_PTR mappedResource; /**< [out]: Mapped pointer corresponding to the registeredResource. This pointer must be used in NV_ENC_PIC_PARAMS::inputBuffer parameter in ::NvEncEncodePicture() API. */ + NV_ENC_BUFFER_FORMAT mappedBufferFmt; /**< [out]: Buffer format of the outputResource. This buffer format must be used in NV_ENC_PIC_PARAMS::bufferFmt if client using the above mapped resource pointer. */ + uint32_t reserved1[251]; /**< [in]: Reserved and must be set to 0. */ + void* reserved2[63]; /**< [in]: Reserved and must be set to NULL */ +} NV_ENC_MAP_INPUT_RESOURCE; + +/** Macro for constructing the version field of ::_NV_ENC_MAP_INPUT_RESOURCE */ +#define NV_ENC_MAP_INPUT_RESOURCE_VER NVENCAPI_STRUCT_VERSION(4) + +/** + * \struct _NV_ENC_INPUT_RESOURCE_OPENGL_TEX + * NV_ENC_REGISTER_RESOURCE::resourceToRegister must be a pointer to a variable of this type, + * when NV_ENC_REGISTER_RESOURCE::resourceType is NV_ENC_INPUT_RESOURCE_TYPE_OPENGL_TEX + */ +typedef struct _NV_ENC_INPUT_RESOURCE_OPENGL_TEX +{ + uint32_t texture; /**< [in]: The name of the texture to be used. */ + uint32_t target; /**< [in]: Accepted values are GL_TEXTURE_RECTANGLE and GL_TEXTURE_2D. */ +} NV_ENC_INPUT_RESOURCE_OPENGL_TEX; + +/** + * \struct _NV_ENC_REGISTER_RESOURCE + * Register a resource for future use with the Nvidia Video Encoder Interface. + */ +typedef struct _NV_ENC_REGISTER_RESOURCE +{ + uint32_t version; /**< [in]: Struct version. Must be set to ::NV_ENC_REGISTER_RESOURCE_VER. */ + NV_ENC_INPUT_RESOURCE_TYPE resourceType; /**< [in]: Specifies the type of resource to be registered. + Supported values are + ::NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX, + ::NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR, + ::NV_ENC_INPUT_RESOURCE_TYPE_OPENGL_TEX */ + uint32_t width; /**< [in]: Input buffer Width. */ + uint32_t height; /**< [in]: Input buffer Height. */ + uint32_t pitch; /**< [in]: Input buffer Pitch. + For ::NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX resources, set this to 0. + For ::NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR resources, set this to + the pitch as obtained from cuMemAllocPitch(), or to the width in + bytes (if this resource was created by using cuMemAlloc()). This + value must be a multiple of 4. + For ::NV_ENC_INPUT_RESOURCE_TYPE_CUDAARRAY resources, set this to the + width of the allocation in bytes (i.e. + CUDA_ARRAY3D_DESCRIPTOR::Width * CUDA_ARRAY3D_DESCRIPTOR::NumChannels). + For ::NV_ENC_INPUT_RESOURCE_TYPE_OPENGL_TEX resources, set this to the + texture width multiplied by the number of components in the texture + format. */ + uint32_t subResourceIndex; /**< [in]: Subresource Index of the DirectX resource to be registered. Should be set to 0 for other interfaces. */ + void* resourceToRegister; /**< [in]: Handle to the resource that is being registered. */ + NV_ENC_REGISTERED_PTR registeredResource; /**< [out]: Registered resource handle. This should be used in future interactions with the Nvidia Video Encoder Interface. */ + NV_ENC_BUFFER_FORMAT bufferFormat; /**< [in]: Buffer format of resource to be registered. */ + NV_ENC_BUFFER_USAGE bufferUsage; /**< [in]: Usage of resource to be registered. */ + uint32_t reserved1[247]; /**< [in]: Reserved and must be set to 0. */ + void* reserved2[62]; /**< [in]: Reserved and must be set to NULL. */ +} NV_ENC_REGISTER_RESOURCE; + +/** Macro for constructing the version field of ::_NV_ENC_REGISTER_RESOURCE */ +#define NV_ENC_REGISTER_RESOURCE_VER NVENCAPI_STRUCT_VERSION(3) + +/** + * \struct _NV_ENC_STAT + * Encode Stats structure. + */ +typedef struct _NV_ENC_STAT +{ + uint32_t version; /**< [in]: Struct version. Must be set to ::NV_ENC_STAT_VER. */ + uint32_t reserved; /**< [in]: Reserved and must be set to 0 */ + NV_ENC_OUTPUT_PTR outputBitStream; /**< [out]: Specifies the pointer to output bitstream. */ + uint32_t bitStreamSize; /**< [out]: Size of generated bitstream in bytes. */ + uint32_t picType; /**< [out]: Picture type of encoded picture. See ::NV_ENC_PIC_TYPE. */ + uint32_t lastValidByteOffset; /**< [out]: Offset of last valid bytes of completed bitstream */ + uint32_t sliceOffsets[16]; /**< [out]: Offsets of each slice */ + uint32_t picIdx; /**< [out]: Picture number */ + uint32_t reserved1[233]; /**< [in]: Reserved and must be set to 0 */ + void* reserved2[64]; /**< [in]: Reserved and must be set to NULL */ +} NV_ENC_STAT; + +/** Macro for constructing the version field of ::_NV_ENC_STAT */ +#define NV_ENC_STAT_VER NVENCAPI_STRUCT_VERSION(1) + + +/** + * \struct _NV_ENC_SEQUENCE_PARAM_PAYLOAD + * Sequence and picture paramaters payload. + */ +typedef struct _NV_ENC_SEQUENCE_PARAM_PAYLOAD +{ + uint32_t version; /**< [in]: Struct version. Must be set to ::NV_ENC_INITIALIZE_PARAMS_VER. */ + uint32_t inBufferSize; /**< [in]: Specifies the size of the spsppsBuffer provied by the client */ + uint32_t spsId; /**< [in]: Specifies the SPS id to be used in sequence header. Default value is 0. */ + uint32_t ppsId; /**< [in]: Specifies the PPS id to be used in picture header. Default value is 0. */ + void* spsppsBuffer; /**< [in]: Specifies bitstream header pointer of size NV_ENC_SEQUENCE_PARAM_PAYLOAD::inBufferSize. It is the client's responsibility to manage this memory. */ + uint32_t* outSPSPPSPayloadSize; /**< [out]: Size of the sequence and picture header in bytes written by the NvEncodeAPI interface to the SPSPPSBuffer. */ + uint32_t reserved [250]; /**< [in]: Reserved and must be set to 0 */ + void* reserved2[64]; /**< [in]: Reserved and must be set to NULL */ +} NV_ENC_SEQUENCE_PARAM_PAYLOAD; + +/** Macro for constructing the version field of ::_NV_ENC_SEQUENCE_PARAM_PAYLOAD */ +#define NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER NVENCAPI_STRUCT_VERSION(1) + + +/** + * Event registration/unregistration parameters. + */ +typedef struct _NV_ENC_EVENT_PARAMS +{ + uint32_t version; /**< [in]: Struct version. Must be set to ::NV_ENC_EVENT_PARAMS_VER. */ + uint32_t reserved; /**< [in]: Reserved and must be set to 0 */ + void* completionEvent; /**< [in]: Handle to event to be registered/unregistered with the NvEncodeAPI interface. */ + uint32_t reserved1[253]; /**< [in]: Reserved and must be set to 0 */ + void* reserved2[64]; /**< [in]: Reserved and must be set to NULL */ +} NV_ENC_EVENT_PARAMS; + +/** Macro for constructing the version field of ::_NV_ENC_EVENT_PARAMS */ +#define NV_ENC_EVENT_PARAMS_VER NVENCAPI_STRUCT_VERSION(1) + +/** + * Encoder Session Creation parameters + */ +typedef struct _NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS +{ + uint32_t version; /**< [in]: Struct version. Must be set to ::NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER. */ + NV_ENC_DEVICE_TYPE deviceType; /**< [in]: Specified the device Type */ + void* device; /**< [in]: Pointer to client device. */ + void* reserved; /**< [in]: Reserved and must be set to 0. */ + uint32_t apiVersion; /**< [in]: API version. Should be set to NVENCAPI_VERSION. */ + uint32_t reserved1[253]; /**< [in]: Reserved and must be set to 0 */ + void* reserved2[64]; /**< [in]: Reserved and must be set to NULL */ +} NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS; +/** Macro for constructing the version field of ::_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS */ +#define NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER NVENCAPI_STRUCT_VERSION(1) + +/** @} */ /* END ENCODER_STRUCTURE */ + + +/** + * \addtogroup ENCODE_FUNC NvEncodeAPI Functions + * @{ + */ + +// NvEncOpenEncodeSession +/** + * \brief Opens an encoding session. + * + * Deprecated. + * + * \return + * ::NV_ENC_ERR_INVALID_CALL\n + * + */ +NVENCSTATUS NVENCAPI NvEncOpenEncodeSession (void* device, uint32_t deviceType, void** encoder); + +// NvEncGetEncodeGuidCount +/** + * \brief Retrieves the number of supported encode GUIDs. + * + * The function returns the number of codec guids supported by the NvEncodeAPI + * interface. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [out] encodeGUIDCount + * Number of supported encode GUIDs. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncGetEncodeGUIDCount (void* encoder, uint32_t* encodeGUIDCount); + + +// NvEncGetEncodeGUIDs +/** + * \brief Retrieves an array of supported encoder codec GUIDs. + * + * The function returns an array of codec guids supported by the NvEncodeAPI interface. + * The client must allocate an array where the NvEncodeAPI interface can + * fill the supported guids and pass the pointer in \p *GUIDs parameter. + * The size of the array can be determined by using ::NvEncGetEncodeGUIDCount() API. + * The Nvidia Encoding interface returns the number of codec guids it has actually + * filled in the guid array in the \p GUIDCount parameter. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] guidArraySize + * Number of GUIDs to retrieved. Should be set to the number retrieved using + * ::NvEncGetEncodeGUIDCount. + * \param [out] GUIDs + * Array of supported Encode GUIDs. + * \param [out] GUIDCount + * Number of supported Encode GUIDs. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncGetEncodeGUIDs (void* encoder, GUID* GUIDs, uint32_t guidArraySize, uint32_t* GUIDCount); + + +// NvEncGetEncodeProfileGuidCount +/** + * \brief Retrieves the number of supported profile GUIDs. + * + * The function returns the number of profile GUIDs supported for a given codec. + * The client must first enumerate the codec guids supported by the NvEncodeAPI + * interface. After determining the codec guid, it can query the NvEncodeAPI + * interface to determine the number of profile guids supported for a particular + * codec guid. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] encodeGUID + * The codec guid for which the profile guids are being enumerated. + * \param [out] encodeProfileGUIDCount + * Number of encode profiles supported for the given encodeGUID. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncGetEncodeProfileGUIDCount (void* encoder, GUID encodeGUID, uint32_t* encodeProfileGUIDCount); + + +// NvEncGetEncodeProfileGUIDs +/** + * \brief Retrieves an array of supported encode profile GUIDs. + * + * The function returns an array of supported profile guids for a particular + * codec guid. The client must allocate an array where the NvEncodeAPI interface + * can populate the profile guids. The client can determine the array size using + * ::NvEncGetEncodeProfileGUIDCount() API. The client must also validiate that the + * NvEncodeAPI interface supports the GUID the client wants to pass as \p encodeGUID + * parameter. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] encodeGUID + * The encode guid whose profile guids are being enumerated. + * \param [in] guidArraySize + * Number of GUIDs to be retrieved. Should be set to the number retrieved using + * ::NvEncGetEncodeProfileGUIDCount. + * \param [out] profileGUIDs + * Array of supported Encode Profile GUIDs + * \param [out] GUIDCount + * Number of valid encode profile GUIDs in \p profileGUIDs array. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncGetEncodeProfileGUIDs (void* encoder, GUID encodeGUID, GUID* profileGUIDs, uint32_t guidArraySize, uint32_t* GUIDCount); + +// NvEncGetInputFormatCount +/** + * \brief Retrieve the number of supported Input formats. + * + * The function returns the number of supported input formats. The client must + * query the NvEncodeAPI interface to determine the supported input formats + * before creating the input surfaces. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] encodeGUID + * Encode GUID, corresponding to which the number of supported input formats + * is to be retrieved. + * \param [out] inputFmtCount + * Number of input formats supported for specified Encode GUID. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_GENERIC \n + */ +NVENCSTATUS NVENCAPI NvEncGetInputFormatCount (void* encoder, GUID encodeGUID, uint32_t* inputFmtCount); + + +// NvEncGetInputFormats +/** + * \brief Retrieves an array of supported Input formats + * + * Returns an array of supported input formats The client must use the input + * format to create input surface using ::NvEncCreateInputBuffer() API. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] encodeGUID + * Encode GUID, corresponding to which the number of supported input formats + * is to be retrieved. + *\param [in] inputFmtArraySize + * Size input format count array passed in \p inputFmts. + *\param [out] inputFmts + * Array of input formats supported for this Encode GUID. + *\param [out] inputFmtCount + * The number of valid input format types returned by the NvEncodeAPI + * interface in \p inputFmts array. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncGetInputFormats (void* encoder, GUID encodeGUID, NV_ENC_BUFFER_FORMAT* inputFmts, uint32_t inputFmtArraySize, uint32_t* inputFmtCount); + + +// NvEncGetEncodeCaps +/** + * \brief Retrieves the capability value for a specified encoder attribute. + * + * The function returns the capability value for a given encoder attribute. The + * client must validate the encodeGUID using ::NvEncGetEncodeGUIDs() API before + * calling this function. The encoder attribute being queried are enumerated in + * ::NV_ENC_CAPS_PARAM enum. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] encodeGUID + * Encode GUID, corresponding to which the capability attribute is to be retrieved. + * \param [in] capsParam + * Used to specify attribute being queried. Refer ::NV_ENC_CAPS_PARAM for more + * details. + * \param [out] capsVal + * The value corresponding to the capability attribute being queried. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_GENERIC \n + */ +NVENCSTATUS NVENCAPI NvEncGetEncodeCaps (void* encoder, GUID encodeGUID, NV_ENC_CAPS_PARAM* capsParam, int* capsVal); + + +// NvEncGetEncodePresetCount +/** + * \brief Retrieves the number of supported preset GUIDs. + * + * The function returns the number of preset GUIDs available for a given codec. + * The client must validate the codec guid using ::NvEncGetEncodeGUIDs() API + * before calling this function. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] encodeGUID + * Encode GUID, corresponding to which the number of supported presets is to + * be retrieved. + * \param [out] encodePresetGUIDCount + * Receives the number of supported preset GUIDs. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncGetEncodePresetCount (void* encoder, GUID encodeGUID, uint32_t* encodePresetGUIDCount); + + +// NvEncGetEncodePresetGUIDs +/** + * \brief Receives an array of supported encoder preset GUIDs. + * + * The function returns an array of encode preset guids available for a given codec. + * The client can directly use one of the preset guids based upon the use case + * or target device. The preset guid chosen can be directly used in + * NV_ENC_INITIALIZE_PARAMS::presetGUID parameter to ::NvEncEncodePicture() API. + * Alternately client can also use the preset guid to retrieve the encoding config + * parameters being used by NvEncodeAPI interface for that given preset, using + * ::NvEncGetEncodePresetConfig() API. It can then modify preset config parameters + * as per its use case and send it to NvEncodeAPI interface as part of + * NV_ENC_INITIALIZE_PARAMS::encodeConfig parameter for NvEncInitializeEncoder() + * API. + * + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] encodeGUID + * Encode GUID, corresponding to which the list of supported presets is to be + * retrieved. + * \param [in] guidArraySize + * Size of array of preset guids passed in \p preset GUIDs + * \param [out] presetGUIDs + * Array of supported Encode preset GUIDs from the NvEncodeAPI interface + * to client. + * \param [out] encodePresetGUIDCount + * Receives the number of preset GUIDs returned by the NvEncodeAPI + * interface. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncGetEncodePresetGUIDs (void* encoder, GUID encodeGUID, GUID* presetGUIDs, uint32_t guidArraySize, uint32_t* encodePresetGUIDCount); + + +// NvEncGetEncodePresetConfig +/** + * \brief Returns a preset config structure supported for given preset GUID. + * + * The function returns a preset config structure for a given preset guid. Before + * using this function the client must enumerate the preset guids available for + * a given codec. The preset config structure can be modified by the client depending + * upon its use case and can be then used to initialize the encoder using + * ::NvEncInitializeEncoder() API. The client can use this function only if it + * wants to modify the NvEncodeAPI preset configuration, otherwise it can + * directly use the preset guid. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] encodeGUID + * Encode GUID, corresponding to which the list of supported presets is to be + * retrieved. + * \param [in] presetGUID + * Preset GUID, corresponding to which the Encoding configurations is to be + * retrieved. + * \param [out] presetConfig + * The requested Preset Encoder Attribute set. Refer ::_NV_ENC_CONFIG for +* more details. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncGetEncodePresetConfig (void* encoder, GUID encodeGUID, GUID presetGUID, NV_ENC_PRESET_CONFIG* presetConfig); + +// NvEncInitializeEncoder +/** + * \brief Initialize the encoder. + * + * This API must be used to initialize the encoder. The initialization parameter + * is passed using \p *createEncodeParams The client must send the following + * fields of the _NV_ENC_INITIALIZE_PARAMS structure with a valid value. + * - NV_ENC_INITIALIZE_PARAMS::encodeGUID + * - NV_ENC_INITIALIZE_PARAMS::encodeWidth + * - NV_ENC_INITIALIZE_PARAMS::encodeHeight + * + * The client can pass a preset guid directly to the NvEncodeAPI interface using + * NV_ENC_INITIALIZE_PARAMS::presetGUID field. If the client doesn't pass + * NV_ENC_INITIALIZE_PARAMS::encodeConfig structure, the codec specific parameters + * will be selected based on the preset guid. The preset guid must have been + * validated by the client using ::NvEncGetEncodePresetGUIDs() API. + * If the client passes a custom ::_NV_ENC_CONFIG structure through + * NV_ENC_INITIALIZE_PARAMS::encodeConfig , it will override the codec specific parameters + * based on the preset guid. It is recommended that even if the client passes a custom config, + * it should also send a preset guid. In this case, the preset guid passed by the client + * will not override any of the custom config parameters programmed by the client, + * it is only used as a hint by the NvEncodeAPI interface to determine certain encoder parameters + * which are not exposed to the client. + * + * There are two modes of operation for the encoder namely: + * - Asynchronous mode + * - Synchronous mode + * + * The client can select asynchronous or synchronous mode by setting the \p + * enableEncodeAsync field in ::_NV_ENC_INITIALIZE_PARAMS to 1 or 0 respectively. + *\par Asynchronous mode of operation: + * The Asynchronous mode can be enabled by setting NV_ENC_INITIALIZE_PARAMS::enableEncodeAsync to 1. + * The client operating in asynchronous mode must allocate completion event object + * for each output buffer and pass the completion event object in the + * ::NvEncEncodePicture() API. The client can create another thread and wait on + * the event object to be signalled by NvEncodeAPI interface on completion of the + * encoding process for the output frame. This should unblock the main thread from + * submitting work to the encoder. When the event is signalled the client can call + * NvEncodeAPI interfaces to copy the bitstream data using ::NvEncLockBitstream() + * API. This is the preferred mode of operation. + * + * NOTE: Asynchronous mode is not supported on Linux. + * + *\par Synchronous mode of operation: + * The client can select synchronous mode by setting NV_ENC_INITIALIZE_PARAMS::enableEncodeAsync to 0. + * The client working in synchronous mode can work in a single threaded or multi + * threaded mode. The client need not allocate any event objects. The client can + * only lock the bitstream data after NvEncodeAPI interface has returned + * ::NV_ENC_SUCCESS from encode picture. The NvEncodeAPI interface can return + * ::NV_ENC_ERR_NEED_MORE_INPUT error code from ::NvEncEncodePicture() API. The + * client must not lock the output buffer in such case but should send the next + * frame for encoding. The client must keep on calling ::NvEncEncodePicture() API + * until it returns ::NV_ENC_SUCCESS. \n + * The client must always lock the bitstream data in order in which it has submitted. + * This is true for both asynchronous and synchronous mode. + * + *\par Picture type decision: + * If the client is taking the picture type decision and it must disable the picture + * type decision module in NvEncodeAPI by setting NV_ENC_INITIALIZE_PARAMS::enablePTD + * to 0. In this case the client is required to send the picture in encoding + * order to NvEncodeAPI by doing the re-ordering for B frames. \n + * If the client doesn't want to take the picture type decision it can enable + * picture type decision module in the NvEncodeAPI interface by setting + * NV_ENC_INITIALIZE_PARAMS::enablePTD to 1 and send the input pictures in display + * order. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] createEncodeParams + * Refer ::_NV_ENC_INITIALIZE_PARAMS for details. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncInitializeEncoder (void* encoder, NV_ENC_INITIALIZE_PARAMS* createEncodeParams); + + +// NvEncCreateInputBuffer +/** + * \brief Allocates Input buffer. + * + * This function is used to allocate an input buffer. The client must enumerate + * the input buffer format before allocating the input buffer resources. The + * NV_ENC_INPUT_PTR returned by the NvEncodeAPI interface in the + * NV_ENC_CREATE_INPUT_BUFFER::inputBuffer field can be directly used in + * ::NvEncEncodePicture() API. The number of input buffers to be allocated by the + * client must be at least 4 more than the number of B frames being used for encoding. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in,out] createInputBufferParams + * Pointer to the ::NV_ENC_CREATE_INPUT_BUFFER structure. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncCreateInputBuffer (void* encoder, NV_ENC_CREATE_INPUT_BUFFER* createInputBufferParams); + + +// NvEncDestroyInputBuffer +/** + * \brief Release an input buffers. + * + * This function is used to free an input buffer. If the client has allocated + * any input buffer using ::NvEncCreateInputBuffer() API, it must free those + * input buffers by calling this function. The client must release the input + * buffers before destroying the encoder using ::NvEncDestroyEncoder() API. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] inputBuffer + * Pointer to the input buffer to be released. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncDestroyInputBuffer (void* encoder, NV_ENC_INPUT_PTR inputBuffer); + +// NvEncSetIOCudaStreams +/** + * \brief Set input and output CUDA stream for specified encoder attribute. + * + * Encoding may involve CUDA pre-processing on the input and post-processing on encoded output. + * This function is used to set input and output CUDA streams to pipeline the CUDA pre-processing + * and post-processing tasks. Clients should call this function before the call to + * NvEncUnlockInputBuffer(). If this function is not called, the default CUDA stream is used for + * input and output processing. After a successful call to this function, the streams specified + * in that call will replace the previously-used streams. + * This API is supported for NVCUVID interface only. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] inputStream + * Pointer to CUstream which is used to process ::NV_ENC_PIC_PARAMS::inputFrame for encode. + * In case of ME-only mode, inputStream is used to process ::NV_ENC_MEONLY_PARAMS::inputBuffer and + * ::NV_ENC_MEONLY_PARAMS::referenceFrame + * \param [in] outputStream + * Pointer to CUstream which is used to process ::NV_ENC_PIC_PARAMS::outputBuffer for encode. + * In case of ME-only mode, outputStream is used to process ::NV_ENC_MEONLY_PARAMS::mvBuffer + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_GENERIC \n + */ +NVENCSTATUS NVENCAPI NvEncSetIOCudaStreams (void* encoder, NV_ENC_CUSTREAM_PTR inputStream, NV_ENC_CUSTREAM_PTR outputStream); + + +// NvEncCreateBitstreamBuffer +/** + * \brief Allocates an output bitstream buffer + * + * This function is used to allocate an output bitstream buffer and returns a + * NV_ENC_OUTPUT_PTR to bitstream buffer to the client in the + * NV_ENC_CREATE_BITSTREAM_BUFFER::bitstreamBuffer field. + * The client can only call this function after the encoder session has been + * initialized using ::NvEncInitializeEncoder() API. The minimum number of output + * buffers allocated by the client must be at least 4 more than the number of B + * B frames being used for encoding. The client can only access the output + * bitsteam data by locking the \p bitstreamBuffer using the ::NvEncLockBitstream() + * function. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in,out] createBitstreamBufferParams + * Pointer ::NV_ENC_CREATE_BITSTREAM_BUFFER for details. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncCreateBitstreamBuffer (void* encoder, NV_ENC_CREATE_BITSTREAM_BUFFER* createBitstreamBufferParams); + + +// NvEncDestroyBitstreamBuffer +/** + * \brief Release a bitstream buffer. + * + * This function is used to release the output bitstream buffer allocated using + * the ::NvEncCreateBitstreamBuffer() function. The client must release the output + * bitstreamBuffer using this function before destroying the encoder session. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] bitstreamBuffer + * Pointer to the bitstream buffer being released. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncDestroyBitstreamBuffer (void* encoder, NV_ENC_OUTPUT_PTR bitstreamBuffer); + +// NvEncEncodePicture +/** + * \brief Submit an input picture for encoding. + * + * This function is used to submit an input picture buffer for encoding. The + * encoding parameters are passed using \p *encodePicParams which is a pointer + * to the ::_NV_ENC_PIC_PARAMS structure. + * + * If the client has set NV_ENC_INITIALIZE_PARAMS::enablePTD to 0, then it must + * send a valid value for the following fields. + * - NV_ENC_PIC_PARAMS::pictureType + * - NV_ENC_PIC_PARAMS_H264::displayPOCSyntax (H264 only) + * - NV_ENC_PIC_PARAMS_H264::frameNumSyntax(H264 only) + * - NV_ENC_PIC_PARAMS_H264::refPicFlag(H264 only) + * + *\par MVC Encoding: + * For MVC encoding the client must call encode picture api for each view separately + * and must pass valid view id in NV_ENC_PIC_PARAMS_MVC::viewID field. Currently + * NvEncodeAPI only support stereo MVC so client must send viewID as 0 for base + * view and view ID as 1 for dependent view. + * + *\par Asynchronous Encoding + * If the client has enabled asynchronous mode of encoding by setting + * NV_ENC_INITIALIZE_PARAMS::enableEncodeAsync to 1 in the ::NvEncInitializeEncoder() + * API ,then the client must send a valid NV_ENC_PIC_PARAMS::completionEvent. + * Incase of asynchronous mode of operation, client can queue the ::NvEncEncodePicture() + * API commands from the main thread and then queue output buffers to be processed + * to a secondary worker thread. Before the locking the output buffers in the + * secondary thread , the client must wait on NV_ENC_PIC_PARAMS::completionEvent + * it has queued in ::NvEncEncodePicture() API call. The client must always process + * completion event and the output buffer in the same order in which they have been + * submitted for encoding. The NvEncodeAPI interface is responsible for any + * re-ordering required for B frames and will always ensure that encoded bitstream + * data is written in the same order in which output buffer is submitted. + *\code + The below example shows how asynchronous encoding in case of 1 B frames + ------------------------------------------------------------------------ + Suppose the client allocated 4 input buffers(I1,I2..), 4 output buffers(O1,O2..) + and 4 completion events(E1, E2, ...). The NvEncodeAPI interface will need to + keep a copy of the input buffers for re-ordering and it allocates following + internal buffers (NvI1, NvI2...). These internal buffers are managed by NvEncodeAPI + and the client is not responsible for the allocating or freeing the memory of + the internal buffers. + + a) The client main thread will queue the following encode frame calls. + Note the picture type is unknown to the client, the decision is being taken by + NvEncodeAPI interface. The client should pass ::_NV_ENC_PIC_PARAMS parameter + consisting of allocated input buffer, output buffer and output events in successive + ::NvEncEncodePicture() API calls along with other required encode picture params. + For example: + 1st EncodePicture parameters - (I1, O1, E1) + 2nd EncodePicture parameters - (I2, O2, E2) + 3rd EncodePicture parameters - (I3, O3, E3) + + b) NvEncodeAPI SW will receive the following encode Commands from the client. + The left side shows input from client in the form (Input buffer, Output Buffer, + Output Event). The right hand side shows a possible picture type decision take by + the NvEncodeAPI interface. + (I1, O1, E1) ---P1 Frame + (I2, O2, E2) ---B2 Frame + (I3, O3, E3) ---P3 Frame + + c) NvEncodeAPI interface will make a copy of the input buffers to its internal + buffersfor re-ordering. These copies are done as part of nvEncEncodePicture + function call from the client and NvEncodeAPI interface is responsible for + synchronization of copy operation with the actual encoding operation. + I1 --> NvI1 + I2 --> NvI2 + I3 --> NvI3 + + d) After returning from ::NvEncEncodePicture() call , the client must queue the output + bitstream processing work to the secondary thread. The output bitstream processing + for asynchronous mode consist of first waiting on completion event(E1, E2..) + and then locking the output bitstream buffer(O1, O2..) for reading the encoded + data. The work queued to the secondary thread by the client is in the following order + (I1, O1, E1) + (I2, O2, E2) + (I3, O3, E3) + Note they are in the same order in which client calls ::NvEncEncodePicture() API + in \p step a). + + e) NvEncodeAPI interface will do the re-ordering such that Encoder HW will receive + the following encode commands: + (NvI1, O1, E1) ---P1 Frame + (NvI3, O2, E2) ---P3 Frame + (NvI2, O3, E3) ---B2 frame + + f) After the encoding operations are completed, the events will be signalled + by NvEncodeAPI interface in the following order : + (O1, E1) ---P1 Frame ,output bitstream copied to O1 and event E1 signalled. + (O2, E2) ---P3 Frame ,output bitstream copied to O2 and event E2 signalled. + (O3, E3) ---B2 Frame ,output bitstream copied to O3 and event E3 signalled. + + g) The client must lock the bitstream data using ::NvEncLockBitstream() API in + the order O1,O2,O3 to read the encoded data, after waiting for the events + to be signalled in the same order i.e E1, E2 and E3.The output processing is + done in the secondary thread in the following order: + Waits on E1, copies encoded bitstream from O1 + Waits on E2, copies encoded bitstream from O2 + Waits on E3, copies encoded bitstream from O3 + + -Note the client will receive the events signalling and output buffer in the + same order in which they have submitted for encoding. + -Note the LockBitstream will have picture type field which will notify the + output picture type to the clients. + -Note the input, output buffer and the output completion event are free to be + reused once NvEncodeAPI interfaced has signalled the event and the client has + copied the data from the output buffer. + + * \endcode + * + *\par Synchronous Encoding + * The client can enable synchronous mode of encoding by setting + * NV_ENC_INITIALIZE_PARAMS::enableEncodeAsync to 0 in ::NvEncInitializeEncoder() API. + * The NvEncodeAPI interface may return ::NV_ENC_ERR_NEED_MORE_INPUT error code for + * some ::NvEncEncodePicture() API calls when NV_ENC_INITIALIZE_PARAMS::enablePTD + * is set to 1, but the client must not treat it as a fatal error. The NvEncodeAPI + * interface might not be able to submit an input picture buffer for encoding + * immediately due to re-ordering for B frames. The NvEncodeAPI interface cannot + * submit the input picture which is decided to be encoded as B frame as it waits + * for backward reference from temporally subsequent frames. This input picture + * is buffered internally and waits for more input picture to arrive. The client + * must not call ::NvEncLockBitstream() API on the output buffers whose + * ::NvEncEncodePicture() API returns ::NV_ENC_ERR_NEED_MORE_INPUT. The client must + * wait for the NvEncodeAPI interface to return ::NV_ENC_SUCCESS before locking the + * output bitstreams to read the encoded bitstream data. The following example + * explains the scenario with synchronous encoding with 2 B frames. + *\code + The below example shows how synchronous encoding works in case of 1 B frames + ----------------------------------------------------------------------------- + Suppose the client allocated 4 input buffers(I1,I2..), 4 output buffers(O1,O2..) + and 4 completion events(E1, E2, ...). The NvEncodeAPI interface will need to + keep a copy of the input buffers for re-ordering and it allocates following + internal buffers (NvI1, NvI2...). These internal buffers are managed by NvEncodeAPI + and the client is not responsible for the allocating or freeing the memory of + the internal buffers. + + The client calls ::NvEncEncodePicture() API with input buffer I1 and output buffer O1. + The NvEncodeAPI decides to encode I1 as P frame and submits it to encoder + HW and returns ::NV_ENC_SUCCESS. + The client can now read the encoded data by locking the output O1 by calling + NvEncLockBitstream API. + + The client calls ::NvEncEncodePicture() API with input buffer I2 and output buffer O2. + The NvEncodeAPI decides to encode I2 as B frame and buffers I2 by copying it + to internal buffer and returns ::NV_ENC_ERR_NEED_MORE_INPUT. + The error is not fatal and it notifies client that it cannot read the encoded + data by locking the output O2 by calling ::NvEncLockBitstream() API without submitting + more work to the NvEncodeAPI interface. + + The client calls ::NvEncEncodePicture() with input buffer I3 and output buffer O3. + The NvEncodeAPI decides to encode I3 as P frame and it first submits I3 for + encoding which will be used as backward reference frame for I2. + The NvEncodeAPI then submits I2 for encoding and returns ::NV_ENC_SUCESS. Both + the submission are part of the same ::NvEncEncodePicture() function call. + The client can now read the encoded data for both the frames by locking the output + O2 followed by O3 ,by calling ::NvEncLockBitstream() API. + + The client must always lock the output in the same order in which it has submitted + to receive the encoded bitstream in correct encoding order. + + * \endcode + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in,out] encodePicParams + * Pointer to the ::_NV_ENC_PIC_PARAMS structure. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_ENCODER_BUSY \n + * ::NV_ENC_ERR_NEED_MORE_INPUT \n + * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncEncodePicture (void* encoder, NV_ENC_PIC_PARAMS* encodePicParams); + + +// NvEncLockBitstream +/** + * \brief Lock output bitstream buffer + * + * This function is used to lock the bitstream buffer to read the encoded data. + * The client can only access the encoded data by calling this function. + * The pointer to client accessible encoded data is returned in the + * NV_ENC_LOCK_BITSTREAM::bitstreamBufferPtr field. The size of the encoded data + * in the output buffer is returned in the NV_ENC_LOCK_BITSTREAM::bitstreamSizeInBytes + * The NvEncodeAPI interface also returns the output picture type and picture structure + * of the encoded frame in NV_ENC_LOCK_BITSTREAM::pictureType and + * NV_ENC_LOCK_BITSTREAM::pictureStruct fields respectively. If the client has + * set NV_ENC_LOCK_BITSTREAM::doNotWait to 1, the function might return + * ::NV_ENC_ERR_LOCK_BUSY if client is operating in synchronous mode. This is not + * a fatal failure if NV_ENC_LOCK_BITSTREAM::doNotWait is set to 1. In the above case the client can + * retry the function after few milliseconds. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in,out] lockBitstreamBufferParams + * Pointer to the ::_NV_ENC_LOCK_BITSTREAM structure. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_LOCK_BUSY \n + * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncLockBitstream (void* encoder, NV_ENC_LOCK_BITSTREAM* lockBitstreamBufferParams); + + +// NvEncUnlockBitstream +/** + * \brief Unlock the output bitstream buffer + * + * This function is used to unlock the output bitstream buffer after the client + * has read the encoded data from output buffer. The client must call this function + * to unlock the output buffer which it has previously locked using ::NvEncLockBitstream() + * function. Using a locked bitstream buffer in ::NvEncEncodePicture() API will cause + * the function to fail. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in,out] bitstreamBuffer + * bitstream buffer pointer being unlocked + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncUnlockBitstream (void* encoder, NV_ENC_OUTPUT_PTR bitstreamBuffer); + + +// NvLockInputBuffer +/** + * \brief Locks an input buffer + * + * This function is used to lock the input buffer to load the uncompressed YUV + * pixel data into input buffer memory. The client must pass the NV_ENC_INPUT_PTR + * it had previously allocated using ::NvEncCreateInputBuffer()in the + * NV_ENC_LOCK_INPUT_BUFFER::inputBuffer field. + * The NvEncodeAPI interface returns pointer to client accessible input buffer + * memory in NV_ENC_LOCK_INPUT_BUFFER::bufferDataPtr field. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in,out] lockInputBufferParams + * Pointer to the ::_NV_ENC_LOCK_INPUT_BUFFER structure + * + * \return + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_LOCK_BUSY \n + * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncLockInputBuffer (void* encoder, NV_ENC_LOCK_INPUT_BUFFER* lockInputBufferParams); + + +// NvUnlockInputBuffer +/** + * \brief Unlocks the input buffer + * + * This function is used to unlock the input buffer memory previously locked for + * uploading YUV pixel data. The input buffer must be unlocked before being used + * again for encoding, otherwise NvEncodeAPI will fail the ::NvEncEncodePicture() + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] inputBuffer + * Pointer to the input buffer that is being unlocked. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n + * ::NV_ENC_ERR_GENERIC \n + * + * + */ +NVENCSTATUS NVENCAPI NvEncUnlockInputBuffer (void* encoder, NV_ENC_INPUT_PTR inputBuffer); + + +// NvEncGetEncodeStats +/** + * \brief Get encoding statistics. + * + * This function is used to retrieve the encoding statistics. + * This API is not supported when encode device type is CUDA. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in,out] encodeStats + * Pointer to the ::_NV_ENC_STAT structure. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncGetEncodeStats (void* encoder, NV_ENC_STAT* encodeStats); + + +// NvEncGetSequenceParams +/** + * \brief Get encoded sequence and picture header. + * + * This function can be used to retrieve the sequence and picture header out of + * band. The client must call this function only after the encoder has been + * initialized using ::NvEncInitializeEncoder() function. The client must + * allocate the memory where the NvEncodeAPI interface can copy the bitstream + * header and pass the pointer to the memory in NV_ENC_SEQUENCE_PARAM_PAYLOAD::spsppsBuffer. + * The size of buffer is passed in the field NV_ENC_SEQUENCE_PARAM_PAYLOAD::inBufferSize. + * The NvEncodeAPI interface will copy the bitstream header payload and returns + * the actual size of the bitstream header in the field + * NV_ENC_SEQUENCE_PARAM_PAYLOAD::outSPSPPSPayloadSize. + * The client must call ::NvEncGetSequenceParams() function from the same thread which is + * being used to call ::NvEncEncodePicture() function. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in,out] sequenceParamPayload + * Pointer to the ::_NV_ENC_SEQUENCE_PARAM_PAYLOAD structure. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncGetSequenceParams (void* encoder, NV_ENC_SEQUENCE_PARAM_PAYLOAD* sequenceParamPayload); + + +// NvEncRegisterAsyncEvent +/** + * \brief Register event for notification to encoding completion. + * + * This function is used to register the completion event with NvEncodeAPI + * interface. The event is required when the client has configured the encoder to + * work in asynchronous mode. In this mode the client needs to send a completion + * event with every output buffer. The NvEncodeAPI interface will signal the + * completion of the encoding process using this event. Only after the event is + * signalled the client can get the encoded data using ::NvEncLockBitstream() function. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] eventParams + * Pointer to the ::_NV_ENC_EVENT_PARAMS structure. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncRegisterAsyncEvent (void* encoder, NV_ENC_EVENT_PARAMS* eventParams); + + +// NvEncUnregisterAsyncEvent +/** + * \brief Unregister completion event. + * + * This function is used to unregister completion event which has been previously + * registered using ::NvEncRegisterAsyncEvent() function. The client must unregister + * all events before destroying the encoder using ::NvEncDestroyEncoder() function. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] eventParams + * Pointer to the ::_NV_ENC_EVENT_PARAMS structure. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncUnregisterAsyncEvent (void* encoder, NV_ENC_EVENT_PARAMS* eventParams); + + +// NvEncMapInputResource +/** + * \brief Map an externally created input resource pointer for encoding. + * + * Maps an externally allocated input resource [using and returns a NV_ENC_INPUT_PTR + * which can be used for encoding in the ::NvEncEncodePicture() function. The + * mapped resource is returned in the field NV_ENC_MAP_INPUT_RESOURCE::outputResourcePtr. + * The NvEncodeAPI interface also returns the buffer format of the mapped resource + * in the field NV_ENC_MAP_INPUT_RESOURCE::outbufferFmt. + * This function provides synchronization guarantee that any graphics work submitted + * on the input buffer is completed before the buffer is used for encoding. This is + * also true for compute (i.e. CUDA) work, provided that the previous workload using + * the input resource was submitted to the default stream. + * The client should not access any input buffer while they are mapped by the encoder. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in,out] mapInputResParams + * Pointer to the ::_NV_ENC_MAP_INPUT_RESOURCE structure. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n + * ::NV_ENC_ERR_RESOURCE_NOT_REGISTERED \n + * ::NV_ENC_ERR_MAP_FAILED \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncMapInputResource (void* encoder, NV_ENC_MAP_INPUT_RESOURCE* mapInputResParams); + + +// NvEncUnmapInputResource +/** + * \brief UnMaps a NV_ENC_INPUT_PTR which was mapped for encoding + * + * + * UnMaps an input buffer which was previously mapped using ::NvEncMapInputResource() + * API. The mapping created using ::NvEncMapInputResource() should be invalidated + * using this API before the external resource is destroyed by the client. The client + * must unmap the buffer after ::NvEncLockBitstream() API returns succuessfully for encode + * work submitted using the mapped input buffer. + * + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] mappedInputBuffer + * Pointer to the NV_ENC_INPUT_PTR + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n + * ::NV_ENC_ERR_RESOURCE_NOT_REGISTERED \n + * ::NV_ENC_ERR_RESOURCE_NOT_MAPPED \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncUnmapInputResource (void* encoder, NV_ENC_INPUT_PTR mappedInputBuffer); + +// NvEncDestroyEncoder +/** + * \brief Destroy Encoding Session + * + * Destroys the encoder session previously created using ::NvEncOpenEncodeSession() + * function. The client must flush the encoder before freeing any resources. In order + * to flush the encoder the client must pass a NULL encode picture packet and either + * wait for the ::NvEncEncodePicture() function to return in synchronous mode or wait + * for the flush event to be signaled by the encoder in asynchronous mode. + * The client must free all the input and output resources created using the + * NvEncodeAPI interface before destroying the encoder. If the client is operating + * in asynchronous mode, it must also unregister the completion events previously + * registered. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncDestroyEncoder (void* encoder); + +// NvEncInvalidateRefFrames +/** + * \brief Invalidate reference frames + * + * Invalidates reference frame based on the time stamp provided by the client. + * The encoder marks any reference frames or any frames which have been reconstructed + * using the corrupt frame as invalid for motion estimation and uses older reference + * frames for motion estimation. The encoded forces the current frame to be encoded + * as an intra frame if no reference frames are left after invalidation process. + * This is useful for low latency application for error resiliency. The client + * is recommended to set NV_ENC_CONFIG_H264::maxNumRefFrames to a large value so + * that encoder can keep a backup of older reference frames in the DPB and can use them + * for motion estimation when the newer reference frames have been invalidated. + * This API can be called multiple times. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] invalidRefFrameTimeStamp + * Timestamp of the invalid reference frames which needs to be invalidated. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncInvalidateRefFrames(void* encoder, uint64_t invalidRefFrameTimeStamp); + +// NvEncOpenEncodeSessionEx +/** + * \brief Opens an encoding session. + * + * Opens an encoding session and returns a pointer to the encoder interface in + * the \p **encoder parameter. The client should start encoding process by calling + * this API first. + * The client must pass a pointer to IDirect3DDevice9 device or CUDA context in the \p *device parameter. + * For the OpenGL interface, \p device must be NULL. An OpenGL context must be current when + * calling all NvEncodeAPI functions. + * If the creation of encoder session fails, the client must call ::NvEncDestroyEncoder API + * before exiting. + * + * \param [in] openSessionExParams + * Pointer to a ::NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS structure. + * \param [out] encoder + * Encode Session pointer to the NvEncodeAPI interface. + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_NO_ENCODE_DEVICE \n + * ::NV_ENC_ERR_UNSUPPORTED_DEVICE \n + * ::NV_ENC_ERR_INVALID_DEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncOpenEncodeSessionEx (NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS *openSessionExParams, void** encoder); + +// NvEncRegisterResource +/** + * \brief Registers a resource with the Nvidia Video Encoder Interface. + * + * Registers a resource with the Nvidia Video Encoder Interface for book keeping. + * The client is expected to pass the registered resource handle as well, while calling ::NvEncMapInputResource API. + * + * \param [in] encoder + * Pointer to the NVEncodeAPI interface. + * + * \param [in] registerResParams + * Pointer to a ::_NV_ENC_REGISTER_RESOURCE structure + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n + * ::NV_ENC_ERR_RESOURCE_REGISTER_FAILED \n + * ::NV_ENC_ERR_GENERIC \n + * ::NV_ENC_ERR_UNIMPLEMENTED \n + * + */ +NVENCSTATUS NVENCAPI NvEncRegisterResource (void* encoder, NV_ENC_REGISTER_RESOURCE* registerResParams); + +// NvEncUnregisterResource +/** + * \brief Unregisters a resource previously registered with the Nvidia Video Encoder Interface. + * + * Unregisters a resource previously registered with the Nvidia Video Encoder Interface. + * The client is expected to unregister any resource that it has registered with the + * Nvidia Video Encoder Interface before destroying the resource. + * + * \param [in] encoder + * Pointer to the NVEncodeAPI interface. + * + * \param [in] registeredResource + * The registered resource pointer that was returned in ::NvEncRegisterResource. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n + * ::NV_ENC_ERR_RESOURCE_NOT_REGISTERED \n + * ::NV_ENC_ERR_GENERIC \n + * ::NV_ENC_ERR_UNIMPLEMENTED \n + * + */ +NVENCSTATUS NVENCAPI NvEncUnregisterResource (void* encoder, NV_ENC_REGISTERED_PTR registeredResource); + +// NvEncReconfigureEncoder +/** + * \brief Reconfigure an existing encoding session. + * + * Reconfigure an existing encoding session. + * The client should call this API to change/reconfigure the parameter passed during + * NvEncInitializeEncoder API call. + * Currently Reconfiguration of following are not supported. + * Change in GOP structure. + * Change in sync-Async mode. + * Change in MaxWidth & MaxHeight. + * Change in PTDmode. + * + * Resolution change is possible only if maxEncodeWidth & maxEncodeHeight of NV_ENC_INITIALIZE_PARAMS + * is set while creating encoder session. + * + * \param [in] encoder + * Pointer to the NVEncodeAPI interface. + * + * \param [in] reInitEncodeParams + * Pointer to a ::NV_ENC_RECONFIGURE_PARAMS structure. + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_NO_ENCODE_DEVICE \n + * ::NV_ENC_ERR_UNSUPPORTED_DEVICE \n + * ::NV_ENC_ERR_INVALID_DEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_GENERIC \n + * + */ +NVENCSTATUS NVENCAPI NvEncReconfigureEncoder (void *encoder, NV_ENC_RECONFIGURE_PARAMS* reInitEncodeParams); + + + +// NvEncCreateMVBuffer +/** + * \brief Allocates output MV buffer for ME only mode. + * + * This function is used to allocate an output MV buffer. The size of the mvBuffer is + * dependent on the frame height and width of the last ::NvEncCreateInputBuffer() call. + * The NV_ENC_OUTPUT_PTR returned by the NvEncodeAPI interface in the + * ::NV_ENC_CREATE_MV_BUFFER::mvBuffer field should be used in + * ::NvEncRunMotionEstimationOnly() API. + * Client must lock ::NV_ENC_CREATE_MV_BUFFER::mvBuffer using ::NvEncLockBitstream() API to get the motion vector data. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in,out] createMVBufferParams + * Pointer to the ::NV_ENC_CREATE_MV_BUFFER structure. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_GENERIC \n + */ +NVENCSTATUS NVENCAPI NvEncCreateMVBuffer (void* encoder, NV_ENC_CREATE_MV_BUFFER* createMVBufferParams); + + +// NvEncDestroyMVBuffer +/** + * \brief Release an output MV buffer for ME only mode. + * + * This function is used to release the output MV buffer allocated using + * the ::NvEncCreateMVBuffer() function. The client must release the output + * mvBuffer using this function before destroying the encoder session. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] mvBuffer + * Pointer to the mvBuffer being released. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n + * ::NV_ENC_ERR_GENERIC \n + */ +NVENCSTATUS NVENCAPI NvEncDestroyMVBuffer (void* encoder, NV_ENC_OUTPUT_PTR mvBuffer); + + +// NvEncRunMotionEstimationOnly +/** + * \brief Submit an input picture and reference frame for motion estimation in ME only mode. + * + * This function is used to submit the input frame and reference frame for motion + * estimation. The ME parameters are passed using *meOnlyParams which is a pointer + * to ::_NV_ENC_MEONLY_PARAMS structure. + * Client must lock ::NV_ENC_CREATE_MV_BUFFER::mvBuffer using ::NvEncLockBitstream() API to get the motion vector data. + * to get motion vector data. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * \param [in] meOnlyParams + * Pointer to the ::_NV_ENC_MEONLY_PARAMS structure. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + * ::NV_ENC_ERR_INVALID_ENCODERDEVICE \n + * ::NV_ENC_ERR_DEVICE_NOT_EXIST \n + * ::NV_ENC_ERR_UNSUPPORTED_PARAM \n + * ::NV_ENC_ERR_OUT_OF_MEMORY \n + * ::NV_ENC_ERR_INVALID_PARAM \n + * ::NV_ENC_ERR_INVALID_VERSION \n + * ::NV_ENC_ERR_NEED_MORE_INPUT \n + * ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \n + * ::NV_ENC_ERR_GENERIC \n + */ +NVENCSTATUS NVENCAPI NvEncRunMotionEstimationOnly (void* encoder, NV_ENC_MEONLY_PARAMS* meOnlyParams); + +// NvEncodeAPIGetMaxSupportedVersion +/** + * \brief Get the largest NvEncodeAPI version supported by the driver. + * + * This function can be used by clients to determine if the driver supports + * the NvEncodeAPI header the application was compiled with. + * + * \param [out] version + * Pointer to the requested value. The 4 least significant bits in the returned + * indicate the minor version and the rest of the bits indicate the major + * version of the largest supported version. + * + * \return + * ::NV_ENC_SUCCESS \n + * ::NV_ENC_ERR_INVALID_PTR \n + */ +NVENCSTATUS NVENCAPI NvEncodeAPIGetMaxSupportedVersion (uint32_t* version); + + +// NvEncodeAPIGetLastErrorString +/** + * \brief Get the description of the last error reported by the API. + * + * This function returns a null-terminated string that can be used by clients to better understand the reason + * for failure of a previous API call. + * + * \param [in] encoder + * Pointer to the NvEncodeAPI interface. + * + * \return + * Pointer to buffer containing the details of the last error encountered by the API. + */ +const char * NVENCAPI NvEncGetLastErrorString (void* encoder); + + +/// \cond API PFN +/* + * Defines API function pointers + */ +typedef NVENCSTATUS (NVENCAPI* PNVENCOPENENCODESESSION) (void* device, uint32_t deviceType, void** encoder); +typedef NVENCSTATUS (NVENCAPI* PNVENCGETENCODEGUIDCOUNT) (void* encoder, uint32_t* encodeGUIDCount); +typedef NVENCSTATUS (NVENCAPI* PNVENCGETENCODEGUIDS) (void* encoder, GUID* GUIDs, uint32_t guidArraySize, uint32_t* GUIDCount); +typedef NVENCSTATUS (NVENCAPI* PNVENCGETENCODEPROFILEGUIDCOUNT) (void* encoder, GUID encodeGUID, uint32_t* encodeProfileGUIDCount); +typedef NVENCSTATUS (NVENCAPI* PNVENCGETENCODEPROFILEGUIDS) (void* encoder, GUID encodeGUID, GUID* profileGUIDs, uint32_t guidArraySize, uint32_t* GUIDCount); +typedef NVENCSTATUS (NVENCAPI* PNVENCGETINPUTFORMATCOUNT) (void* encoder, GUID encodeGUID, uint32_t* inputFmtCount); +typedef NVENCSTATUS (NVENCAPI* PNVENCGETINPUTFORMATS) (void* encoder, GUID encodeGUID, NV_ENC_BUFFER_FORMAT* inputFmts, uint32_t inputFmtArraySize, uint32_t* inputFmtCount); +typedef NVENCSTATUS (NVENCAPI* PNVENCGETENCODECAPS) (void* encoder, GUID encodeGUID, NV_ENC_CAPS_PARAM* capsParam, int* capsVal); +typedef NVENCSTATUS (NVENCAPI* PNVENCGETENCODEPRESETCOUNT) (void* encoder, GUID encodeGUID, uint32_t* encodePresetGUIDCount); +typedef NVENCSTATUS (NVENCAPI* PNVENCGETENCODEPRESETGUIDS) (void* encoder, GUID encodeGUID, GUID* presetGUIDs, uint32_t guidArraySize, uint32_t* encodePresetGUIDCount); +typedef NVENCSTATUS (NVENCAPI* PNVENCGETENCODEPRESETCONFIG) (void* encoder, GUID encodeGUID, GUID presetGUID, NV_ENC_PRESET_CONFIG* presetConfig); +typedef NVENCSTATUS (NVENCAPI* PNVENCINITIALIZEENCODER) (void* encoder, NV_ENC_INITIALIZE_PARAMS* createEncodeParams); +typedef NVENCSTATUS (NVENCAPI* PNVENCCREATEINPUTBUFFER) (void* encoder, NV_ENC_CREATE_INPUT_BUFFER* createInputBufferParams); +typedef NVENCSTATUS (NVENCAPI* PNVENCDESTROYINPUTBUFFER) (void* encoder, NV_ENC_INPUT_PTR inputBuffer); +typedef NVENCSTATUS (NVENCAPI* PNVENCCREATEBITSTREAMBUFFER) (void* encoder, NV_ENC_CREATE_BITSTREAM_BUFFER* createBitstreamBufferParams); +typedef NVENCSTATUS (NVENCAPI* PNVENCDESTROYBITSTREAMBUFFER) (void* encoder, NV_ENC_OUTPUT_PTR bitstreamBuffer); +typedef NVENCSTATUS (NVENCAPI* PNVENCENCODEPICTURE) (void* encoder, NV_ENC_PIC_PARAMS* encodePicParams); +typedef NVENCSTATUS (NVENCAPI* PNVENCLOCKBITSTREAM) (void* encoder, NV_ENC_LOCK_BITSTREAM* lockBitstreamBufferParams); +typedef NVENCSTATUS (NVENCAPI* PNVENCUNLOCKBITSTREAM) (void* encoder, NV_ENC_OUTPUT_PTR bitstreamBuffer); +typedef NVENCSTATUS (NVENCAPI* PNVENCLOCKINPUTBUFFER) (void* encoder, NV_ENC_LOCK_INPUT_BUFFER* lockInputBufferParams); +typedef NVENCSTATUS (NVENCAPI* PNVENCUNLOCKINPUTBUFFER) (void* encoder, NV_ENC_INPUT_PTR inputBuffer); +typedef NVENCSTATUS (NVENCAPI* PNVENCGETENCODESTATS) (void* encoder, NV_ENC_STAT* encodeStats); +typedef NVENCSTATUS (NVENCAPI* PNVENCGETSEQUENCEPARAMS) (void* encoder, NV_ENC_SEQUENCE_PARAM_PAYLOAD* sequenceParamPayload); +typedef NVENCSTATUS (NVENCAPI* PNVENCREGISTERASYNCEVENT) (void* encoder, NV_ENC_EVENT_PARAMS* eventParams); +typedef NVENCSTATUS (NVENCAPI* PNVENCUNREGISTERASYNCEVENT) (void* encoder, NV_ENC_EVENT_PARAMS* eventParams); +typedef NVENCSTATUS (NVENCAPI* PNVENCMAPINPUTRESOURCE) (void* encoder, NV_ENC_MAP_INPUT_RESOURCE* mapInputResParams); +typedef NVENCSTATUS (NVENCAPI* PNVENCUNMAPINPUTRESOURCE) (void* encoder, NV_ENC_INPUT_PTR mappedInputBuffer); +typedef NVENCSTATUS (NVENCAPI* PNVENCDESTROYENCODER) (void* encoder); +typedef NVENCSTATUS (NVENCAPI* PNVENCINVALIDATEREFFRAMES) (void* encoder, uint64_t invalidRefFrameTimeStamp); +typedef NVENCSTATUS (NVENCAPI* PNVENCOPENENCODESESSIONEX) (NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS *openSessionExParams, void** encoder); +typedef NVENCSTATUS (NVENCAPI* PNVENCREGISTERRESOURCE) (void* encoder, NV_ENC_REGISTER_RESOURCE* registerResParams); +typedef NVENCSTATUS (NVENCAPI* PNVENCUNREGISTERRESOURCE) (void* encoder, NV_ENC_REGISTERED_PTR registeredRes); +typedef NVENCSTATUS (NVENCAPI* PNVENCRECONFIGUREENCODER) (void* encoder, NV_ENC_RECONFIGURE_PARAMS* reInitEncodeParams); + +typedef NVENCSTATUS (NVENCAPI* PNVENCCREATEMVBUFFER) (void* encoder, NV_ENC_CREATE_MV_BUFFER* createMVBufferParams); +typedef NVENCSTATUS (NVENCAPI* PNVENCDESTROYMVBUFFER) (void* encoder, NV_ENC_OUTPUT_PTR mvBuffer); +typedef NVENCSTATUS (NVENCAPI* PNVENCRUNMOTIONESTIMATIONONLY) (void* encoder, NV_ENC_MEONLY_PARAMS* meOnlyParams); +typedef const char * (NVENCAPI* PNVENCGETLASTERROR) (void* encoder); +typedef NVENCSTATUS (NVENCAPI* PNVENCSETIOCUDASTREAMS) (void* encoder, NV_ENC_CUSTREAM_PTR inputStream, NV_ENC_CUSTREAM_PTR outputStream); + + +/// \endcond + + +/** @} */ /* END ENCODE_FUNC */ + +/** + * \ingroup ENCODER_STRUCTURE + * NV_ENCODE_API_FUNCTION_LIST + */ +typedef struct _NV_ENCODE_API_FUNCTION_LIST +{ + uint32_t version; /**< [in]: Client should pass NV_ENCODE_API_FUNCTION_LIST_VER. */ + uint32_t reserved; /**< [in]: Reserved and should be set to 0. */ + PNVENCOPENENCODESESSION nvEncOpenEncodeSession; /**< [out]: Client should access ::NvEncOpenEncodeSession() API through this pointer. */ + PNVENCGETENCODEGUIDCOUNT nvEncGetEncodeGUIDCount; /**< [out]: Client should access ::NvEncGetEncodeGUIDCount() API through this pointer. */ + PNVENCGETENCODEPRESETCOUNT nvEncGetEncodeProfileGUIDCount; /**< [out]: Client should access ::NvEncGetEncodeProfileGUIDCount() API through this pointer.*/ + PNVENCGETENCODEPRESETGUIDS nvEncGetEncodeProfileGUIDs; /**< [out]: Client should access ::NvEncGetEncodeProfileGUIDs() API through this pointer. */ + PNVENCGETENCODEGUIDS nvEncGetEncodeGUIDs; /**< [out]: Client should access ::NvEncGetEncodeGUIDs() API through this pointer. */ + PNVENCGETINPUTFORMATCOUNT nvEncGetInputFormatCount; /**< [out]: Client should access ::NvEncGetInputFormatCount() API through this pointer. */ + PNVENCGETINPUTFORMATS nvEncGetInputFormats; /**< [out]: Client should access ::NvEncGetInputFormats() API through this pointer. */ + PNVENCGETENCODECAPS nvEncGetEncodeCaps; /**< [out]: Client should access ::NvEncGetEncodeCaps() API through this pointer. */ + PNVENCGETENCODEPRESETCOUNT nvEncGetEncodePresetCount; /**< [out]: Client should access ::NvEncGetEncodePresetCount() API through this pointer. */ + PNVENCGETENCODEPRESETGUIDS nvEncGetEncodePresetGUIDs; /**< [out]: Client should access ::NvEncGetEncodePresetGUIDs() API through this pointer. */ + PNVENCGETENCODEPRESETCONFIG nvEncGetEncodePresetConfig; /**< [out]: Client should access ::NvEncGetEncodePresetConfig() API through this pointer. */ + PNVENCINITIALIZEENCODER nvEncInitializeEncoder; /**< [out]: Client should access ::NvEncInitializeEncoder() API through this pointer. */ + PNVENCCREATEINPUTBUFFER nvEncCreateInputBuffer; /**< [out]: Client should access ::NvEncCreateInputBuffer() API through this pointer. */ + PNVENCDESTROYINPUTBUFFER nvEncDestroyInputBuffer; /**< [out]: Client should access ::NvEncDestroyInputBuffer() API through this pointer. */ + PNVENCCREATEBITSTREAMBUFFER nvEncCreateBitstreamBuffer; /**< [out]: Client should access ::NvEncCreateBitstreamBuffer() API through this pointer. */ + PNVENCDESTROYBITSTREAMBUFFER nvEncDestroyBitstreamBuffer; /**< [out]: Client should access ::NvEncDestroyBitstreamBuffer() API through this pointer. */ + PNVENCENCODEPICTURE nvEncEncodePicture; /**< [out]: Client should access ::NvEncEncodePicture() API through this pointer. */ + PNVENCLOCKBITSTREAM nvEncLockBitstream; /**< [out]: Client should access ::NvEncLockBitstream() API through this pointer. */ + PNVENCUNLOCKBITSTREAM nvEncUnlockBitstream; /**< [out]: Client should access ::NvEncUnlockBitstream() API through this pointer. */ + PNVENCLOCKINPUTBUFFER nvEncLockInputBuffer; /**< [out]: Client should access ::NvEncLockInputBuffer() API through this pointer. */ + PNVENCUNLOCKINPUTBUFFER nvEncUnlockInputBuffer; /**< [out]: Client should access ::NvEncUnlockInputBuffer() API through this pointer. */ + PNVENCGETENCODESTATS nvEncGetEncodeStats; /**< [out]: Client should access ::NvEncGetEncodeStats() API through this pointer. */ + PNVENCGETSEQUENCEPARAMS nvEncGetSequenceParams; /**< [out]: Client should access ::NvEncGetSequenceParams() API through this pointer. */ + PNVENCREGISTERASYNCEVENT nvEncRegisterAsyncEvent; /**< [out]: Client should access ::NvEncRegisterAsyncEvent() API through this pointer. */ + PNVENCUNREGISTERASYNCEVENT nvEncUnregisterAsyncEvent; /**< [out]: Client should access ::NvEncUnregisterAsyncEvent() API through this pointer. */ + PNVENCMAPINPUTRESOURCE nvEncMapInputResource; /**< [out]: Client should access ::NvEncMapInputResource() API through this pointer. */ + PNVENCUNMAPINPUTRESOURCE nvEncUnmapInputResource; /**< [out]: Client should access ::NvEncUnmapInputResource() API through this pointer. */ + PNVENCDESTROYENCODER nvEncDestroyEncoder; /**< [out]: Client should access ::NvEncDestroyEncoder() API through this pointer. */ + PNVENCINVALIDATEREFFRAMES nvEncInvalidateRefFrames; /**< [out]: Client should access ::NvEncInvalidateRefFrames() API through this pointer. */ + PNVENCOPENENCODESESSIONEX nvEncOpenEncodeSessionEx; /**< [out]: Client should access ::NvEncOpenEncodeSession() API through this pointer. */ + PNVENCREGISTERRESOURCE nvEncRegisterResource; /**< [out]: Client should access ::NvEncRegisterResource() API through this pointer. */ + PNVENCUNREGISTERRESOURCE nvEncUnregisterResource; /**< [out]: Client should access ::NvEncUnregisterResource() API through this pointer. */ + PNVENCRECONFIGUREENCODER nvEncReconfigureEncoder; /**< [out]: Client should access ::NvEncReconfigureEncoder() API through this pointer. */ + void* reserved1; + PNVENCCREATEMVBUFFER nvEncCreateMVBuffer; /**< [out]: Client should access ::NvEncCreateMVBuffer API through this pointer. */ + PNVENCDESTROYMVBUFFER nvEncDestroyMVBuffer; /**< [out]: Client should access ::NvEncDestroyMVBuffer API through this pointer. */ + PNVENCRUNMOTIONESTIMATIONONLY nvEncRunMotionEstimationOnly; /**< [out]: Client should access ::NvEncRunMotionEstimationOnly API through this pointer. */ + PNVENCGETLASTERROR nvEncGetLastErrorString; /**< [out]: Client should access ::nvEncGetLastErrorString API through this pointer. */ + PNVENCSETIOCUDASTREAMS nvEncSetIOCudaStreams; /**< [out]: Client should access ::nvEncSetIOCudaStreams API through this pointer. */ + void* reserved2[279]; /**< [in]: Reserved and must be set to NULL */ +} NV_ENCODE_API_FUNCTION_LIST; + +/** Macro for constructing the version field of ::_NV_ENCODEAPI_FUNCTION_LIST. */ +#define NV_ENCODE_API_FUNCTION_LIST_VER NVENCAPI_STRUCT_VERSION(2) + +// NvEncodeAPICreateInstance +/** + * \ingroup ENCODE_FUNC + * Entry Point to the NvEncodeAPI interface. + * + * Creates an instance of the NvEncodeAPI interface, and populates the + * pFunctionList with function pointers to the API routines implemented by the + * NvEncodeAPI interface. + * + * \param [out] functionList + * + * \return + * ::NV_ENC_SUCCESS + * ::NV_ENC_ERR_INVALID_PTR + */ +NVENCSTATUS NVENCAPI NvEncodeAPICreateInstance(NV_ENCODE_API_FUNCTION_LIST *functionList); + +#ifdef __cplusplus +} +#endif + + +#endif + diff --git a/NvCodec/include/nvcuvid.h b/NvCodec/include/nvcuvid.h new file mode 100644 index 0000000..3c393a6 --- /dev/null +++ b/NvCodec/include/nvcuvid.h @@ -0,0 +1,392 @@ +/* + * This copyright notice applies to this header file only: + * + * Copyright (c) 2010-2019 NVIDIA Corporation + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the software, and to permit persons to whom the + * software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/********************************************************************************************************************/ +//! \file nvcuvid.h +//! NVDECODE API provides video decoding interface to NVIDIA GPU devices. +//! \date 2015-2019 +//! This file contains the interface constants, structure definitions and function prototypes. +/********************************************************************************************************************/ + +#if !defined(__NVCUVID_H__) +#define __NVCUVID_H__ + +#include "cuviddec.h" + +#if defined(__cplusplus) +extern "C" { +#endif /* __cplusplus */ + + +/***********************************************/ +//! +//! High-level helper APIs for video sources +//! +/***********************************************/ + +typedef void *CUvideosource; +typedef void *CUvideoparser; +typedef long long CUvideotimestamp; + + +/************************************************************************/ +//! \enum cudaVideoState +//! Video source state enums +//! Used in cuvidSetVideoSourceState and cuvidGetVideoSourceState APIs +/************************************************************************/ +typedef enum { + cudaVideoState_Error = -1, /**< Error state (invalid source) */ + cudaVideoState_Stopped = 0, /**< Source is stopped (or reached end-of-stream) */ + cudaVideoState_Started = 1 /**< Source is running and delivering data */ +} cudaVideoState; + +/************************************************************************/ +//! \enum cudaAudioCodec +//! Audio compression enums +//! Used in CUAUDIOFORMAT structure +/************************************************************************/ +typedef enum { + cudaAudioCodec_MPEG1=0, /**< MPEG-1 Audio */ + cudaAudioCodec_MPEG2, /**< MPEG-2 Audio */ + cudaAudioCodec_MP3, /**< MPEG-1 Layer III Audio */ + cudaAudioCodec_AC3, /**< Dolby Digital (AC3) Audio */ + cudaAudioCodec_LPCM, /**< PCM Audio */ + cudaAudioCodec_AAC, /**< AAC Audio */ +} cudaAudioCodec; + +/************************************************************************************************/ +//! \ingroup STRUCTS +//! \struct CUVIDEOFORMAT +//! Video format +//! Used in cuvidGetSourceVideoFormat API +/************************************************************************************************/ +typedef struct +{ + cudaVideoCodec codec; /**< OUT: Compression format */ + /** + * OUT: frame rate = numerator / denominator (for example: 30000/1001) + */ + struct { + /**< OUT: frame rate numerator (0 = unspecified or variable frame rate) */ + unsigned int numerator; + /**< OUT: frame rate denominator (0 = unspecified or variable frame rate) */ + unsigned int denominator; + } frame_rate; + unsigned char progressive_sequence; /**< OUT: 0=interlaced, 1=progressive */ + unsigned char bit_depth_luma_minus8; /**< OUT: high bit depth luma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth */ + unsigned char bit_depth_chroma_minus8; /**< OUT: high bit depth chroma. E.g, 2 for 10-bitdepth, 4 for 12-bitdepth */ + unsigned char min_num_decode_surfaces; /**< OUT: Minimum number of decode surfaces to be allocated for correct + decoding. The client can send this value in ulNumDecodeSurfaces + (in CUVIDDECODECREATEINFO structure). + This guarantees correct functionality and optimal video memory + usage but not necessarily the best performance, which depends on + the design of the overall application. The optimal number of + decode surfaces (in terms of performance and memory utilization) + should be decided by experimentation for each application, but it + cannot go below min_num_decode_surfaces. + If this value is used for ulNumDecodeSurfaces then it must be + returned to parser during sequence callback. */ + unsigned int coded_width; /**< OUT: coded frame width in pixels */ + unsigned int coded_height; /**< OUT: coded frame height in pixels */ + /** + * area of the frame that should be displayed + * typical example: + * coded_width = 1920, coded_height = 1088 + * display_area = { 0,0,1920,1080 } + */ + struct { + int left; /**< OUT: left position of display rect */ + int top; /**< OUT: top position of display rect */ + int right; /**< OUT: right position of display rect */ + int bottom; /**< OUT: bottom position of display rect */ + } display_area; + cudaVideoChromaFormat chroma_format; /**< OUT: Chroma format */ + unsigned int bitrate; /**< OUT: video bitrate (bps, 0=unknown) */ + /** + * OUT: Display Aspect Ratio = x:y (4:3, 16:9, etc) + */ + struct { + int x; + int y; + } display_aspect_ratio; + /** + * Video Signal Description + * Refer section E.2.1 (VUI parameters semantics) of H264 spec file + */ + struct { + unsigned char video_format : 3; /**< OUT: 0-Component, 1-PAL, 2-NTSC, 3-SECAM, 4-MAC, 5-Unspecified */ + unsigned char video_full_range_flag : 1; /**< OUT: indicates the black level and luma and chroma range */ + unsigned char reserved_zero_bits : 4; /**< Reserved bits */ + unsigned char color_primaries; /**< OUT: chromaticity coordinates of source primaries */ + unsigned char transfer_characteristics; /**< OUT: opto-electronic transfer characteristic of the source picture */ + unsigned char matrix_coefficients; /**< OUT: used in deriving luma and chroma signals from RGB primaries */ + } video_signal_description; + unsigned int seqhdr_data_length; /**< OUT: Additional bytes following (CUVIDEOFORMATEX) */ +} CUVIDEOFORMAT; + +/****************************************************************/ +//! \ingroup STRUCTS +//! \struct CUVIDEOFORMATEX +//! Video format including raw sequence header information +//! Used in cuvidGetSourceVideoFormat API +/****************************************************************/ +typedef struct +{ + CUVIDEOFORMAT format; /**< OUT: CUVIDEOFORMAT structure */ + unsigned char raw_seqhdr_data[1024]; /**< OUT: Sequence header data */ +} CUVIDEOFORMATEX; + +/****************************************************************/ +//! \ingroup STRUCTS +//! \struct CUAUDIOFORMAT +//! Audio formats +//! Used in cuvidGetSourceAudioFormat API +/****************************************************************/ +typedef struct +{ + cudaAudioCodec codec; /**< OUT: Compression format */ + unsigned int channels; /**< OUT: number of audio channels */ + unsigned int samplespersec; /**< OUT: sampling frequency */ + unsigned int bitrate; /**< OUT: For uncompressed, can also be used to determine bits per sample */ + unsigned int reserved1; /**< Reserved for future use */ + unsigned int reserved2; /**< Reserved for future use */ +} CUAUDIOFORMAT; + + +/***************************************************************/ +//! \enum CUvideopacketflags +//! Data packet flags +//! Used in CUVIDSOURCEDATAPACKET structure +/***************************************************************/ +typedef enum { + CUVID_PKT_ENDOFSTREAM = 0x01, /**< Set when this is the last packet for this stream */ + CUVID_PKT_TIMESTAMP = 0x02, /**< Timestamp is valid */ + CUVID_PKT_DISCONTINUITY = 0x04, /**< Set when a discontinuity has to be signalled */ + CUVID_PKT_ENDOFPICTURE = 0x08, /**< Set when the packet contains exactly one frame or one field */ + CUVID_PKT_NOTIFY_EOS = 0x10, /**< If this flag is set along with CUVID_PKT_ENDOFSTREAM, an additional (dummy) + display callback will be invoked with null value of CUVIDPARSERDISPINFO which + should be interpreted as end of the stream. */ +} CUvideopacketflags; + +/*****************************************************************************/ +//! \ingroup STRUCTS +//! \struct CUVIDSOURCEDATAPACKET +//! Data Packet +//! Used in cuvidParseVideoData API +//! IN for cuvidParseVideoData +/*****************************************************************************/ +typedef struct _CUVIDSOURCEDATAPACKET +{ + unsigned long flags; /**< IN: Combination of CUVID_PKT_XXX flags */ + unsigned long payload_size; /**< IN: number of bytes in the payload (may be zero if EOS flag is set) */ + const unsigned char *payload; /**< IN: Pointer to packet payload data (may be NULL if EOS flag is set) */ + CUvideotimestamp timestamp; /**< IN: Presentation time stamp (10MHz clock), only valid if + CUVID_PKT_TIMESTAMP flag is set */ +} CUVIDSOURCEDATAPACKET; + +// Callback for packet delivery +typedef int (CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *); + +/**************************************************************************************************************************/ +//! \ingroup STRUCTS +//! \struct CUVIDSOURCEPARAMS +//! Describes parameters needed in cuvidCreateVideoSource API +//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported +//! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed. +/**************************************************************************************************************************/ +typedef struct _CUVIDSOURCEPARAMS +{ + unsigned int ulClockRate; /**< IN: Time stamp units in Hz (0=default=10000000Hz) */ + unsigned int uReserved1[7]; /**< Reserved for future use - set to zero */ + void *pUserData; /**< IN: User private data passed in to the data handlers */ + PFNVIDSOURCECALLBACK pfnVideoDataHandler; /**< IN: Called to deliver video packets */ + PFNVIDSOURCECALLBACK pfnAudioDataHandler; /**< IN: Called to deliver audio packets. */ + void *pvReserved2[8]; /**< Reserved for future use - set to NULL */ +} CUVIDSOURCEPARAMS; + + +/**********************************************/ +//! \ingroup ENUMS +//! \enum CUvideosourceformat_flags +//! CUvideosourceformat_flags +//! Used in cuvidGetSourceVideoFormat API +/**********************************************/ +typedef enum { + CUVID_FMT_EXTFORMATINFO = 0x100 /**< Return extended format structure (CUVIDEOFORMATEX) */ +} CUvideosourceformat_flags; + +#if !defined(__APPLE__) +/***************************************************************************************************************************/ +//! \ingroup FUNCTS +//! \fn CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams) +//! Create CUvideosource object. CUvideosource spawns demultiplexer thread that provides two callbacks: +//! pfnVideoDataHandler() and pfnAudioDataHandler() +//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported +//! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed. +/***************************************************************************************************************************/ +CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams); + +/***************************************************************************************************************************/ +//! \ingroup FUNCTS +//! \fn CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams) +//! Create video source +/***************************************************************************************************************************/ +CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams); + +/********************************************************************/ +//! \ingroup FUNCTS +//! \fn CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj) +//! Destroy video source +/********************************************************************/ +CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj); + +/******************************************************************************************/ +//! \ingroup FUNCTS +//! \fn CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state) +//! Set video source state to: +//! cudaVideoState_Started - to signal the source to run and deliver data +//! cudaVideoState_Stopped - to stop the source from delivering the data +//! cudaVideoState_Error - invalid source +/******************************************************************************************/ +CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state); + +/******************************************************************************************/ +//! \ingroup FUNCTS +//! \fn cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj) +//! Get video source state +//! Returns: +//! cudaVideoState_Started - if Source is running and delivering data +//! cudaVideoState_Stopped - if Source is stopped or reached end-of-stream +//! cudaVideoState_Error - if Source is in error state +/******************************************************************************************/ +cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj); + +/******************************************************************************************************************/ +//! \ingroup FUNCTS +//! \fn CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags) +//! Gets video source format in pvidfmt, flags is set to combination of CUvideosourceformat_flags as per requirement +/******************************************************************************************************************/ +CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags); + +/**************************************************************************************************************************/ +//! \ingroup FUNCTS +//! \fn CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags) +//! Get audio source format +//! NVDECODE API is intended for HW accelerated video decoding so CUvideosource doesn't have audio demuxer for all supported +//! containers. It's recommended to clients to use their own or third party demuxer if audio support is needed. +/**************************************************************************************************************************/ +CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags); + +#endif +/**********************************************************************************/ +//! \ingroup STRUCTS +//! \struct CUVIDPARSERDISPINFO +//! Used in cuvidParseVideoData API with PFNVIDDISPLAYCALLBACK pfnDisplayPicture +/**********************************************************************************/ +typedef struct _CUVIDPARSERDISPINFO +{ + int picture_index; /**< OUT: Index of the current picture */ + int progressive_frame; /**< OUT: 1 if progressive frame; 0 otherwise */ + int top_field_first; /**< OUT: 1 if top field is displayed first; 0 otherwise */ + int repeat_first_field; /**< OUT: Number of additional fields (1=ivtc, 2=frame doubling, 4=frame tripling, + -1=unpaired field) */ + CUvideotimestamp timestamp; /**< OUT: Presentation time stamp */ +} CUVIDPARSERDISPINFO; + +/***********************************************************************************************************************/ +//! Parser callbacks +//! The parser will call these synchronously from within cuvidParseVideoData(), whenever there is sequence change or a picture +//! is ready to be decoded and/or displayed. First argument in functions is "void *pUserData" member of structure CUVIDSOURCEPARAMS +//! Return values from these callbacks are interpreted as below. If the callbacks return failure, it will be propagated by +//! cuvidParseVideoData() to the application. +//! PFNVIDSEQUENCECALLBACK : 0: fail, 1: succeeded, > 1: override dpb size of parser (set by CUVIDPARSERPARAMS::ulMaxNumDecodeSurfaces +//! while creating parser) +//! PFNVIDDECODECALLBACK : 0: fail, >=1: succeeded +//! PFNVIDDISPLAYCALLBACK : 0: fail, >=1: succeeded +/***********************************************************************************************************************/ +typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *); +typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *); +typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *); + +/**************************************/ +//! \ingroup STRUCTS +//! \struct CUVIDPARSERPARAMS +//! Used in cuvidCreateVideoParser API +/**************************************/ +typedef struct _CUVIDPARSERPARAMS +{ + cudaVideoCodec CodecType; /**< IN: cudaVideoCodec_XXX */ + unsigned int ulMaxNumDecodeSurfaces; /**< IN: Max # of decode surfaces (parser will cycle through these) */ + unsigned int ulClockRate; /**< IN: Timestamp units in Hz (0=default=10000000Hz) */ + unsigned int ulErrorThreshold; /**< IN: % Error threshold (0-100) for calling pfnDecodePicture (100=always + IN: call pfnDecodePicture even if picture bitstream is fully corrupted) */ + unsigned int ulMaxDisplayDelay; /**< IN: Max display queue delay (improves pipelining of decode with display) + 0=no delay (recommended values: 2..4) */ + unsigned int uReserved1[5]; /**< IN: Reserved for future use - set to 0 */ + void *pUserData; /**< IN: User data for callbacks */ + PFNVIDSEQUENCECALLBACK pfnSequenceCallback; /**< IN: Called before decoding frames and/or whenever there is a fmt change */ + PFNVIDDECODECALLBACK pfnDecodePicture; /**< IN: Called when a picture is ready to be decoded (decode order) */ + PFNVIDDISPLAYCALLBACK pfnDisplayPicture; /**< IN: Called whenever a picture is ready to be displayed (display order) */ + void *pvReserved2[7]; /**< Reserved for future use - set to NULL */ + CUVIDEOFORMATEX *pExtVideoInfo; /**< IN: [Optional] sequence header data from system layer */ +} CUVIDPARSERPARAMS; + +/************************************************************************************************/ +//! \ingroup FUNCTS +//! \fn CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams) +//! Create video parser object and initialize +/************************************************************************************************/ +CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams); + +/************************************************************************************************/ +//! \ingroup FUNCTS +//! \fn CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket) +//! Parse the video data from source data packet in pPacket +//! Extracts parameter sets like SPS, PPS, bitstream etc. from pPacket and +//! calls back pfnDecodePicture with CUVIDPICPARAMS data for kicking of HW decoding +//! calls back pfnSequenceCallback with CUVIDEOFORMAT data for initial sequence header or when +//! the decoder encounters a video format change +//! calls back pfnDisplayPicture with CUVIDPARSERDISPINFO data to display a video frame +/************************************************************************************************/ +CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket); + +/************************************************************************************************/ +//! \ingroup FUNCTS +//! \fn CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj) +//! Destroy the video parser +/************************************************************************************************/ +CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj); + +/**********************************************************************************************/ + +#if defined(__cplusplus) +} +#endif /* __cplusplus */ + +#endif // __NVCUVID_H__ + + diff --git a/Sora/Sora.cs b/Sora/Sora.cs index d0c899a..a679b35 100644 --- a/Sora/Sora.cs +++ b/Sora/Sora.cs @@ -131,10 +131,13 @@ public bool Connect(Config config) config.AudioBitrate) == 0; } + // Unity 側でレンダリングが完了した時(yield return new WaitForEndOfFrame() の後)に呼ぶイベント + // 指定した Unity カメラの映像を Sora 側のテクスチャにレンダリングしたりする public void OnRender() { UnityEngine.GL.IssuePluginEvent(sora_get_render_callback(), sora_get_render_callback_event_id(p)); } + // trackId で受信した映像を texutre にレンダリングする public void RenderTrackToTexture(uint trackId, UnityEngine.Texture texture) { commandBuffer.IssuePluginCustomTextureUpdateV2(sora_get_texture_update_callback(), texture, trackId); @@ -321,6 +324,10 @@ public static DeviceInfo[] GetAudioPlayoutDevices() return list.ToArray(); } + public static bool IsH264Supported() { + return sora_is_h264_supported(); + } + [DllImport("SoraUnitySdk")] private static extern IntPtr sora_create(); [DllImport("SoraUnitySdk")] @@ -371,4 +378,6 @@ private static extern int sora_connect( private static extern bool sora_device_enum_audio_recording(DeviceEnumCallbackDelegate f, IntPtr userdata); [DllImport("SoraUnitySdk")] private static extern bool sora_device_enum_audio_playout(DeviceEnumCallbackDelegate f, IntPtr userdata); + [DllImport("SoraUnitySdk")] + private static extern bool sora_is_h264_supported(); } diff --git a/doc/USE_H264.md b/doc/USE_H264.md new file mode 100644 index 0000000..17f18f6 --- /dev/null +++ b/doc/USE_H264.md @@ -0,0 +1,19 @@ +# Sora Unity SDK で H.264 を利用する + +Sora Unity SDK では、ソフトウェアでの H.264 エンコード/デコードの機能は提供していません。 + +ただし、ハードウェアで H.264 エンコーダ/デコーダが使える場合は、それを積極的に利用します。 + +- Windows 版では [NVIDIA VIDEO CODEC SDK](https://developer.nvidia.com/nvidia-video-codec-sdk) がインストールされていれば、これを利用します。 + - もしこれが利用可能な場合 **H.264 エンコードのみ可能** です。H.264 のデコードはできません。 + - つまりシングルストリームの場合は `Sora.Role.Upstream` または `Sora.Role.Sendonly` のみ、マルチストリームの場合は `Sora.Role.Sendonly` でしか動作しません。 +- macOS 版では [VideoToolbox](https://developer.apple.com/documentation/videotoolbox) を利用します。 + - VideoToolbox によって H.264 エンコード/デコードが可能です。 + +## H.264 が利用可能かどうかを調べる + +`Sora.IsH264Supported()` 関数を呼び出すことで、H.264 が利用可能かどうかを調べることができます。 + +``` +bool h264Supported = Sora.IsH264Supported(); +``` diff --git a/src/hwenc_nvcodec/nvcodec_h264_encoder.cpp b/src/hwenc_nvcodec/nvcodec_h264_encoder.cpp new file mode 100644 index 0000000..04d03e3 --- /dev/null +++ b/src/hwenc_nvcodec/nvcodec_h264_encoder.cpp @@ -0,0 +1,469 @@ +#include "nvcodec_h264_encoder.h" + +#include "libyuv.h" +#include "modules/video_coding/codecs/h264/include/h264.h" +#include "rtc_base/logging.h" + +#include "rtc/native_buffer.h" + +const int kLowH264QpThreshold = 34; +const int kHighH264QpThreshold = 40; + +struct nal_entry { + size_t offset; + size_t size; +}; + +#ifdef _WIN32 +using Microsoft::WRL::ComPtr; +#endif + +NvCodecH264Encoder::NvCodecH264Encoder(const cricket::VideoCodec& codec) { +#ifdef _WIN32 + ComPtr idxgi_factory; + RTC_CHECK(!FAILED(CreateDXGIFactory1(__uuidof(IDXGIFactory1), + (void**)idxgi_factory.GetAddressOf()))); + ComPtr idxgi_adapter; + RTC_CHECK( + !FAILED(idxgi_factory->EnumAdapters(0, idxgi_adapter.GetAddressOf()))); + RTC_CHECK(!FAILED(D3D11CreateDevice( + idxgi_adapter.Get(), D3D_DRIVER_TYPE_UNKNOWN, NULL, 0, NULL, 0, + D3D11_SDK_VERSION, id3d11_device_.GetAddressOf(), NULL, + id3d11_context_.GetAddressOf()))); + + // 以下デバイス名を取得するだけの処理 + DXGI_ADAPTER_DESC adapter_desc; + idxgi_adapter->GetDesc(&adapter_desc); + char szDesc[80]; + size_t result = 0; + wcstombs_s(&result, szDesc, adapter_desc.Description, sizeof(szDesc)); + RTC_LOG(INFO) << __FUNCTION__ << "GPU in use: " << szDesc; +#endif +#ifdef __linux__ + cuda_.reset(new NvCodecH264EncoderCuda()); +#endif +} + +NvCodecH264Encoder::~NvCodecH264Encoder() {} + +bool NvCodecH264Encoder::IsSupported() { + try { + NvEncoder::TryLoadNvEncApi(); + return true; + } catch (const NVENCException& e) { + RTC_LOG(LS_ERROR) << __FUNCTION__ << e.what(); + return false; + } +} + +int32_t NvCodecH264Encoder::InitEncode(const webrtc::VideoCodec* codec_settings, + int32_t number_of_cores, + size_t max_payload_size) { + RTC_DCHECK(codec_settings); + RTC_DCHECK_EQ(codec_settings->codecType, webrtc::kVideoCodecH264); + + int32_t release_ret = Release(); + if (release_ret != WEBRTC_VIDEO_CODEC_OK) { + return release_ret; + } + + width_ = codec_settings->width; + height_ = codec_settings->height; + target_bitrate_bps_ = codec_settings->startBitrate * 1000; + max_bitrate_bps_ = codec_settings->maxBitrate * 1000; + bitrate_adjuster_.SetTargetBitrateBps(target_bitrate_bps_); + framerate_ = codec_settings->maxFramerate; + mode_ = codec_settings->mode; + + RTC_LOG(LS_INFO) << "InitEncode " << target_bitrate_bps_ << "bit/sec"; + + // Initialize encoded image. Default buffer size: size of unencoded data. + encoded_image_._completeFrame = true; + encoded_image_._encodedWidth = 0; + encoded_image_._encodedHeight = 0; + encoded_image_.set_size(0); + encoded_image_.timing_.flags = + webrtc::VideoSendTiming::TimingFrameFlags::kInvalid; + encoded_image_.content_type_ = + (codec_settings->mode == webrtc::VideoCodecMode::kScreensharing) + ? webrtc::VideoContentType::SCREENSHARE + : webrtc::VideoContentType::UNSPECIFIED; + + return InitNvEnc(); +} + +int32_t NvCodecH264Encoder::RegisterEncodeCompleteCallback( + webrtc::EncodedImageCallback* callback) { + std::lock_guard lock(mutex_); + callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t NvCodecH264Encoder::Release() { + return ReleaseNvEnc(); +} + +int32_t NvCodecH264Encoder::Encode( + const webrtc::VideoFrame& frame, + const std::vector* frame_types) { + //RTC_LOG(LS_ERROR) << __FUNCTION__ << " Start"; + if (!nv_encoder_) { + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + if (!callback_) { + RTC_LOG(LS_WARNING) + << "InitEncode() has been called, but a callback function " + << "has not been set with RegisterEncodeCompleteCallback()"; + return WEBRTC_VIDEO_CODEC_UNINITIALIZED; + } + + if (frame.video_frame_buffer()->type() == + webrtc::VideoFrameBuffer::Type::kNative) { + if (!use_native_) { + ReleaseNvEnc(); + RTC_LOG(LS_INFO) << "Use Native"; + use_native_ = true; + InitNvEnc(); + } + } else { + if (use_native_) { + ReleaseNvEnc(); + RTC_LOG(LS_INFO) << "Unuse Native"; + use_native_ = false; + InitNvEnc(); + } + } + + bool send_key_frame = false; + + if (reconfigure_needed_) { + NV_ENC_RECONFIGURE_PARAMS reconfigure_params = { + NV_ENC_RECONFIGURE_PARAMS_VER}; + NV_ENC_CONFIG encode_config = {NV_ENC_CONFIG_VER}; + reconfigure_params.reInitEncodeParams.encodeConfig = &encode_config; + nv_encoder_->GetInitializeParams(&reconfigure_params.reInitEncodeParams); + + reconfigure_params.reInitEncodeParams.frameRateNum = framerate_; + + encode_config.rcParams.averageBitRate = + bitrate_adjuster_.GetAdjustedBitrateBps(); + encode_config.rcParams.maxBitRate = max_bitrate_bps_; + encode_config.rcParams.vbvBufferSize = + encode_config.rcParams.averageBitRate * 1 / framerate_; + encode_config.rcParams.vbvInitialDelay = + encode_config.rcParams.vbvBufferSize; + try { + //RTC_LOG(LS_ERROR) << __FUNCTION__ << " Reconfigure"; + nv_encoder_->Reconfigure(&reconfigure_params); + } catch (const NVENCException& e) { + RTC_LOG(LS_ERROR) << __FUNCTION__ << e.what(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + + reconfigure_needed_ = false; + } + + if (frame_types != nullptr) { + // We only support a single stream. + RTC_DCHECK_EQ(frame_types->size(), static_cast(1)); + // Skip frame? + if ((*frame_types)[0] == webrtc::VideoFrameType::kEmptyFrame) { + return WEBRTC_VIDEO_CODEC_OK; + } + // Force key frame? + send_key_frame = + (*frame_types)[0] == webrtc::VideoFrameType::kVideoFrameKey; + } + + NV_ENC_PIC_PARAMS pic_params = {NV_ENC_PIC_PARAMS_VER}; + pic_params.encodePicFlags = 0; + if (send_key_frame) { + pic_params.encodePicFlags = + NV_ENC_PIC_FLAG_FORCEINTRA | NV_ENC_PIC_FLAG_FORCEIDR; + } + pic_params.inputWidth = width_; + pic_params.inputHeight = height_; + + v_packet_.clear(); + +#ifdef _WIN32 + const NvEncInputFrame* input_frame = nv_encoder_->GetNextInputFrame(); + D3D11_MAPPED_SUBRESOURCE map; + id3d11_context_->Map(id3d11_texture_.Get(), D3D11CalcSubresource(0, 0, 1), + D3D11_MAP_WRITE, 0, &map); + if (use_native_) { + const sora::NativeBuffer* frame_buffer = + dynamic_cast(frame.video_frame_buffer().get()); + for (int y = 0; y < frame_buffer->height(); y++) { + memcpy((uint8_t*)map.pData + y * map.RowPitch, + frame_buffer->Data() + frame_buffer->raw_width() * y, + frame_buffer->raw_width()); + } + } else { + rtc::scoped_refptr frame_buffer = + frame.video_frame_buffer()->ToI420(); + libyuv::I420ToNV12(frame_buffer->DataY(), frame_buffer->StrideY(), + frame_buffer->DataU(), frame_buffer->StrideU(), + frame_buffer->DataV(), frame_buffer->StrideV(), + (uint8_t*)map.pData, map.RowPitch, + ((uint8_t*)map.pData + height_ * map.RowPitch), + map.RowPitch, width_, height_); + } + id3d11_context_->Unmap(id3d11_texture_.Get(), D3D11CalcSubresource(0, 0, 1)); + ID3D11Texture2D* nv12_texture = + reinterpret_cast(input_frame->inputPtr); + id3d11_context_->CopyResource(nv12_texture, id3d11_texture_.Get()); +#endif +#ifdef __linux__ + if (frame.video_frame_buffer()->type() == + webrtc::VideoFrameBuffer::Type::kNative) { + NativeBuffer* native_buffer = + dynamic_cast(frame.video_frame_buffer().get()); + cuda_->CopyNative(nv_encoder_.get(), native_buffer->Data(), + native_buffer->length(), native_buffer->width(), + native_buffer->height()); + } else { + rtc::scoped_refptr frame_buffer = + frame.video_frame_buffer()->ToI420(); + cuda_->Copy(nv_encoder_.get(), frame_buffer->DataY(), frame_buffer->width(), + frame_buffer->height()); + } +#endif + + try { + nv_encoder_->EncodeFrame(v_packet_, &pic_params); + } catch (const NVENCException& e) { + RTC_LOG(LS_ERROR) << __FUNCTION__ << e.what(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + + for (std::vector& packet : v_packet_) { + encoded_image_.set_buffer(packet.data(), packet.size()); + encoded_image_.set_size(packet.size()); + encoded_image_._completeFrame = true; + encoded_image_._encodedWidth = width_; + encoded_image_._encodedHeight = height_; + encoded_image_.content_type_ = + (mode_ == webrtc::VideoCodecMode::kScreensharing) + ? webrtc::VideoContentType::SCREENSHARE + : webrtc::VideoContentType::UNSPECIFIED; + encoded_image_.timing_.flags = webrtc::VideoSendTiming::kInvalid; + encoded_image_.SetTimestamp(frame.timestamp()); + encoded_image_.ntp_time_ms_ = frame.ntp_time_ms(); + encoded_image_.capture_time_ms_ = frame.render_time_ms(); + encoded_image_.rotation_ = frame.rotation(); + encoded_image_.SetColorSpace(frame.color_space()); + encoded_image_._frameType = webrtc::VideoFrameType::kVideoFrameDelta; + //RTC_LOG(LS_ERROR) << __FUNCTION__ << " packet.size():" << packet.size(); + + //printf("###########"); + uint8_t zero_count = 0; + size_t nal_start_idx = 0; + std::vector nals; + for (size_t i = 0; i < packet.size(); i++) { + uint8_t data = packet.data()[i]; + //if (i < 100) printf(" %02x", data); + if ((i != 0) && (i == nal_start_idx)) { + //printf("-header"); + if ((data & 0x1F) == 0x05) { + //printf("-IDR(%02x)", (data & 0x1F)); + encoded_image_._frameType = webrtc::VideoFrameType::kVideoFrameKey; + } + } + if (data == 0x01 && zero_count == 3) { + if (nal_start_idx != 0) { + nals.push_back({nal_start_idx, i - nal_start_idx + 1 - 4}); + //printf(" nal_size: %d ", i - nal_start_idx + 1 - 4); + } + nal_start_idx = i + 1; + //printf(" nal_start_idx: %d\n", nal_start_idx); + } + if (data == 0x00) { + zero_count++; + } else { + zero_count = 0; + } + } + if (nal_start_idx != 0) { + nals.push_back({nal_start_idx, packet.size() - nal_start_idx}); + //printf(" nal_size: %d packet.size(): %d \n", packet.size() - nal_start_idx , packet.size()); + } + //printf("\n"); + //nals.push_back({4, packet.size() - 4}); + + //RTC_LOG(LS_ERROR) << __FUNCTION__ << " nals.size():" << nals.size(); + + webrtc::RTPFragmentationHeader frag_header; + frag_header.VerifyAndAllocateFragmentationHeader(nals.size()); + for (size_t i = 0; i < nals.size(); i++) { + frag_header.fragmentationOffset[i] = nals[i].offset; + frag_header.fragmentationLength[i] = nals[i].size; + //RTC_LOG(LS_ERROR) << __FUNCTION__ << " i:" << i << " offset:" << nals[i].offset << " size:" << nals[i].size; + } + + webrtc::CodecSpecificInfo codec_specific; + codec_specific.codecType = webrtc::kVideoCodecH264; + codec_specific.codecSpecific.H264.packetization_mode = + webrtc::H264PacketizationMode::NonInterleaved; + + h264_bitstream_parser_.ParseBitstream(packet.data(), packet.size()); + h264_bitstream_parser_.GetLastSliceQp(&encoded_image_.qp_); + + webrtc::EncodedImageCallback::Result result = callback_->OnEncodedImage( + encoded_image_, &codec_specific, &frag_header); + if (result.error != webrtc::EncodedImageCallback::Result::OK) { + RTC_LOG(LS_ERROR) << __FUNCTION__ + << " OnEncodedImage failed error:" << result.error; + return WEBRTC_VIDEO_CODEC_ERROR; + } + bitrate_adjuster_.Update(packet.size()); + } + + return WEBRTC_VIDEO_CODEC_OK; +} + +void NvCodecH264Encoder::SetRates( + const webrtc::VideoEncoder::RateControlParameters& parameters) { + if (!nv_encoder_) { + RTC_LOG(LS_WARNING) << "SetRates() while uninitialized."; + return; + } + + if (parameters.framerate_fps < 1.0) { + RTC_LOG(LS_WARNING) << "Invalid frame rate: " << parameters.framerate_fps; + return; + } + + uint32_t new_framerate = (uint32_t)parameters.framerate_fps; + uint32_t new_bitrate = parameters.bitrate.get_sum_bps(); + RTC_LOG(INFO) << __FUNCTION__ << " framerate_:" << framerate_ + << " new_framerate: " << new_framerate + << " target_bitrate_bps_:" << target_bitrate_bps_ + << " new_bitrate:" << new_bitrate + << " max_bitrate_bps_:" << max_bitrate_bps_; + framerate_ = new_framerate; + target_bitrate_bps_ = new_bitrate; + bitrate_adjuster_.SetTargetBitrateBps(target_bitrate_bps_); + reconfigure_needed_ = true; +} + +webrtc::VideoEncoder::EncoderInfo NvCodecH264Encoder::GetEncoderInfo() const { + webrtc::VideoEncoder::EncoderInfo info; + info.supports_native_handle = true; + info.implementation_name = "NvCodec H264"; + info.scaling_settings = webrtc::VideoEncoder::ScalingSettings( + kLowH264QpThreshold, kHighH264QpThreshold); + info.is_hardware_accelerated = true; + info.has_internal_source = false; + return info; +} + +int32_t NvCodecH264Encoder::InitNvEnc() { +#ifdef _WIN32 + DXGI_FORMAT dxgi_format = DXGI_FORMAT_NV12; + NV_ENC_BUFFER_FORMAT nvenc_format = NV_ENC_BUFFER_FORMAT_NV12; + if (use_native_) { + dxgi_format = DXGI_FORMAT_B8G8R8A8_UNORM; + nvenc_format = NV_ENC_BUFFER_FORMAT_ARGB; + } + D3D11_TEXTURE2D_DESC desc; + ZeroMemory(&desc, sizeof(D3D11_TEXTURE2D_DESC)); + desc.Width = width_; + desc.Height = height_; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = dxgi_format; + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_STAGING; + desc.BindFlags = 0; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + id3d11_device_->CreateTexture2D(&desc, NULL, id3d11_texture_.GetAddressOf()); + + // Driver が古いとかに気づくのはココ + try { + nv_encoder_.reset(new NvEncoderD3D11(id3d11_device_.Get(), width_, height_, + nvenc_format)); + } catch (const NVENCException& e) { + RTC_LOG(LS_ERROR) << __FUNCTION__ << e.what(); + return WEBRTC_VIDEO_CODEC_ERROR; + } +#endif + +#ifdef __linux__ + try { + nv_encoder_.reset(cuda_->CreateNvEncoder(width_, height_, use_native_)); + } catch (const NVENCException& e) { + RTC_LOG(LS_ERROR) << __FUNCTION__ << e.what(); + return WEBRTC_VIDEO_CODEC_ERROR; + } +#endif + + initialize_params_ = {NV_ENC_INITIALIZE_PARAMS_VER}; + NV_ENC_CONFIG encode_config = {NV_ENC_CONFIG_VER}; + initialize_params_.encodeConfig = &encode_config; + try { + nv_encoder_->CreateDefaultEncoderParams( + &initialize_params_, NV_ENC_CODEC_H264_GUID, + NV_ENC_PRESET_LOW_LATENCY_DEFAULT_GUID); + + //initialize_params_.enablePTD = 1; + initialize_params_.frameRateDen = 1; + initialize_params_.frameRateNum = framerate_; + initialize_params_.maxEncodeWidth = width_; + initialize_params_.maxEncodeHeight = height_; + + //encode_config.profileGUID = NV_ENC_H264_PROFILE_BASELINE_GUID; + encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR_LOWDELAY_HQ; + encode_config.rcParams.averageBitRate = target_bitrate_bps_; + encode_config.rcParams.maxBitRate = max_bitrate_bps_; + + encode_config.rcParams.disableBadapt = 1; + encode_config.rcParams.vbvBufferSize = + encode_config.rcParams.averageBitRate * + initialize_params_.frameRateDen / initialize_params_.frameRateNum; + encode_config.rcParams.vbvInitialDelay = + encode_config.rcParams.vbvBufferSize; + encode_config.gopLength = NVENC_INFINITE_GOPLENGTH; + encode_config.frameIntervalP = 1; + encode_config.rcParams.enableAQ = 1; + + //encode_config.encodeCodecConfig.h264Config.outputAUD = 1; + //encode_config.encodeCodecConfig.h264Config.level = NV_ENC_LEVEL_H264_31; + //encode_config.encodeCodecConfig.h264Config.entropyCodingMode = NV_ENC_H264_ENTROPY_CODING_MODE_CAVLC; + encode_config.encodeCodecConfig.h264Config.idrPeriod = + NVENC_INFINITE_GOPLENGTH; + encode_config.encodeCodecConfig.h264Config.repeatSPSPPS = 1; + encode_config.encodeCodecConfig.h264Config.sliceMode = 0; + encode_config.encodeCodecConfig.h264Config.sliceModeData = 0; + + nv_encoder_->CreateEncoder(&initialize_params_); + + RTC_LOG(INFO) << __FUNCTION__ << " framerate_:" << framerate_ + << " bitrate_bps_:" << target_bitrate_bps_ + << " maxBitRate:" << encode_config.rcParams.maxBitRate; + } catch (const NVENCException& e) { + RTC_LOG(LS_ERROR) << __FUNCTION__ << e.what(); + return WEBRTC_VIDEO_CODEC_ERROR; + } + + reconfigure_needed_ = false; + + return WEBRTC_VIDEO_CODEC_OK; +} + +int32_t NvCodecH264Encoder::ReleaseNvEnc() { + RTC_LOG(LS_INFO) << __FUNCTION__; + if (nv_encoder_) { + try { + nv_encoder_->EndEncode(v_packet_); + nv_encoder_->DestroyEncoder(); + } catch (const NVENCException& e) { + RTC_LOG(LS_ERROR) << __FUNCTION__ << e.what(); + } + nv_encoder_ = nullptr; +#ifdef _WIN32 + id3d11_texture_.Reset(); +#endif + } + return WEBRTC_VIDEO_CODEC_OK; +} diff --git a/src/hwenc_nvcodec/nvcodec_h264_encoder.h b/src/hwenc_nvcodec/nvcodec_h264_encoder.h new file mode 100644 index 0000000..7664a6d --- /dev/null +++ b/src/hwenc_nvcodec/nvcodec_h264_encoder.h @@ -0,0 +1,81 @@ +#ifndef NVCODEC_H264_ENCODER_H_ +#define NVCODEC_H264_ENCODER_H_ + +#ifdef _WIN32 +#include +#include +#endif + +#include +#include +#include +#include + +#include "api/video_codecs/video_encoder.h" +#include "common_video/h264/h264_bitstream_parser.h" +#include "common_video/include/bitrate_adjuster.h" +#include "modules/video_coding/codecs/h264/include/h264.h" +#include "rtc_base/critical_section.h" + +// NvCodec +#ifdef _WIN32 +#include +#endif +#ifdef __linux__ +#include "nvcodec_h264_encoder_cuda.h" +#endif + +class NvCodecH264Encoder : public webrtc::VideoEncoder { + public: + NvCodecH264Encoder(const cricket::VideoCodec& codec); + ~NvCodecH264Encoder() override; + + static bool IsSupported(); + + int32_t InitEncode(const webrtc::VideoCodec* codec_settings, + int32_t number_of_cores, + size_t max_payload_size) override; + int32_t RegisterEncodeCompleteCallback( + webrtc::EncodedImageCallback* callback) override; + int32_t Release() override; + int32_t Encode( + const webrtc::VideoFrame& frame, + const std::vector* frame_types) override; + void SetRates( + const webrtc::VideoEncoder::RateControlParameters& parameters) override; + webrtc::VideoEncoder::EncoderInfo GetEncoderInfo() const override; + + private: + std::mutex mutex_; + webrtc::EncodedImageCallback* callback_ = nullptr; + webrtc::BitrateAdjuster bitrate_adjuster_ = + webrtc::BitrateAdjuster(0.5f, 0.95f); + uint32_t target_bitrate_bps_ = 0; + uint32_t max_bitrate_bps_ = 0; + + int32_t InitNvEnc(); + int32_t ReleaseNvEnc(); + webrtc::H264BitstreamParser h264_bitstream_parser_; + +#ifdef _WIN32 + Microsoft::WRL::ComPtr id3d11_device_; + Microsoft::WRL::ComPtr id3d11_context_; + Microsoft::WRL::ComPtr id3d11_texture_; + std::unique_ptr nv_encoder_; +#endif +#ifdef __linux__ + std::unique_ptr cuda_; + std::unique_ptr nv_encoder_; +#endif + bool reconfigure_needed_ = false; + bool use_native_ = false; + uint32_t width_ = 0; + uint32_t height_ = 0; + uint32_t framerate_ = 0; + webrtc::VideoCodecMode mode_ = webrtc::VideoCodecMode::kRealtimeVideo; + NV_ENC_INITIALIZE_PARAMS initialize_params_; + std::vector> v_packet_; + webrtc::EncodedImage encoded_image_; +}; + +#endif // NVCODEC_H264_ENCODER_H_ diff --git a/src/hwenc_nvcodec/nvcodec_h264_encoder_cuda.cpp b/src/hwenc_nvcodec/nvcodec_h264_encoder_cuda.cpp new file mode 100644 index 0000000..b309040 --- /dev/null +++ b/src/hwenc_nvcodec/nvcodec_h264_encoder_cuda.cpp @@ -0,0 +1,315 @@ +#include "nvcodec_h264_encoder_cuda.h" + +#include + +#include +#include + +#ifdef __cuda_cuda_h__ +inline bool check(CUresult e, int iLine, const char* szFile) { + if (e != CUDA_SUCCESS) { + const char* szErrName = NULL; + cuGetErrorName(e, &szErrName); + std::cerr << "CUDA driver API error " << szErrName << " at line " << iLine + << " in file " << szFile << std::endl; + return false; + } + return true; +} +#endif + +#ifdef __CUDA_RUNTIME_H__ +inline bool check(cudaError_t e, int iLine, const char* szFile) { + if (e != cudaSuccess) { + std::cerr << "CUDA runtime API error " << cudaGetErrorName(e) << " at line " + << iLine << " in file " << szFile << std::endl; + return false; + } + return true; +} +#endif + +#ifdef _NV_ENCODEAPI_H_ +inline bool check(NVENCSTATUS e, int iLine, const char* szFile) { + const char* aszErrName[] = { + "NV_ENC_SUCCESS", + "NV_ENC_ERR_NO_ENCODE_DEVICE", + "NV_ENC_ERR_UNSUPPORTED_DEVICE", + "NV_ENC_ERR_INVALID_ENCODERDEVICE", + "NV_ENC_ERR_INVALID_DEVICE", + "NV_ENC_ERR_DEVICE_NOT_EXIST", + "NV_ENC_ERR_INVALID_PTR", + "NV_ENC_ERR_INVALID_EVENT", + "NV_ENC_ERR_INVALID_PARAM", + "NV_ENC_ERR_INVALID_CALL", + "NV_ENC_ERR_OUT_OF_MEMORY", + "NV_ENC_ERR_ENCODER_NOT_INITIALIZED", + "NV_ENC_ERR_UNSUPPORTED_PARAM", + "NV_ENC_ERR_LOCK_BUSY", + "NV_ENC_ERR_NOT_ENOUGH_BUFFER", + "NV_ENC_ERR_INVALID_VERSION", + "NV_ENC_ERR_MAP_FAILED", + "NV_ENC_ERR_NEED_MORE_INPUT", + "NV_ENC_ERR_ENCODER_BUSY", + "NV_ENC_ERR_EVENT_NOT_REGISTERD", + "NV_ENC_ERR_GENERIC", + "NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY", + "NV_ENC_ERR_UNIMPLEMENTED", + "NV_ENC_ERR_RESOURCE_REGISTER_FAILED", + "NV_ENC_ERR_RESOURCE_NOT_REGISTERED", + "NV_ENC_ERR_RESOURCE_NOT_MAPPED", + }; + if (e != NV_ENC_SUCCESS) { + std::cerr << "NVENC error " << aszErrName[e] << " at line " << iLine + << " in file " << szFile << std::endl; + return false; + } + return true; +} +#endif + +#ifdef _WINERROR_ +inline bool check(HRESULT e, int iLine, const char* szFile) { + if (e != S_OK) { + std::cerr << "HRESULT error 0x" << (void*)e << " at line " << iLine + << " in file " << szFile << std::endl; + return false; + } + return true; +} +#endif + +#if defined(__gl_h_) || defined(__GL_H__) +inline bool check(GLenum e, int iLine, const char* szFile) { + if (e != 0) { + std::cerr << "GLenum error " << e << " at line " << iLine << " in file " + << szFile << std::endl; + return false; + } + return true; +} +#endif + +inline bool check(int e, int iLine, const char* szFile) { + if (e < 0) { + std::cerr << "General error " << e << " at line " << iLine << " in file " + << szFile << std::endl; + return false; + } + return true; +} + +#define ck(call) check(call, __LINE__, __FILE__) + +class NvCodecH264EncoderCudaImpl { + public: + NvCodecH264EncoderCudaImpl(); + ~NvCodecH264EncoderCudaImpl(); + + void Copy(NvEncoder* nv_encoder, + const void* ptr, + int width, + int height); + void CopyNative(NvEncoder* nv_encoder, + const void* ptr, + int size, + int width, + int height); + NvEncoder* CreateNvEncoder(int width, int height, bool use_native); + + private: + NvDecoder* nv_decoder_ = nullptr; + CUdevice cu_device_; + CUcontext cu_context_; +}; + +NvCodecH264EncoderCuda::NvCodecH264EncoderCuda() + : impl_(new NvCodecH264EncoderCudaImpl()) {} +NvCodecH264EncoderCuda::~NvCodecH264EncoderCuda() { + delete impl_; +} + +void NvCodecH264EncoderCuda::Copy(NvEncoder* nv_encoder, + const void* ptr, + int width, + int height) { + impl_->Copy(nv_encoder, ptr, width, height); +} +void NvCodecH264EncoderCuda::CopyNative(NvEncoder* nv_encoder, + const void* ptr, + int size, + int width, + int height) { + impl_->CopyNative(nv_encoder, ptr, size, width, height); +} +NvEncoder* NvCodecH264EncoderCuda::CreateNvEncoder(int width, + int height, + bool use_native) { + return impl_->CreateNvEncoder(width, height, use_native); +} + +void ShowEncoderCapability() { + ck(cuInit(0)); + int nGpu = 0; + ck(cuDeviceGetCount(&nGpu)); + if (nGpu == 0) { + std::cerr << "CUDA Device not found" << std::endl; + exit(1); + } + std::cout << "Encoder Capability" << std::endl; + for (int iGpu = 0; iGpu < nGpu; iGpu++) { + CUdevice cuDevice = 0; + ck(cuDeviceGet(&cuDevice, iGpu)); + char szDeviceName[80]; + ck(cuDeviceGetName(szDeviceName, sizeof(szDeviceName), cuDevice)); + CUcontext cuContext = NULL; + ck(cuCtxCreate(&cuContext, 0, cuDevice)); + NvEncoderCuda enc(cuContext, 1280, 720, NV_ENC_BUFFER_FORMAT_NV12); + + std::cout << "GPU " << iGpu << " - " << szDeviceName << std::endl + << std::endl; + std::cout + << "\tH264:\t\t" + << " " + << (enc.GetCapabilityValue(NV_ENC_CODEC_H264_GUID, + NV_ENC_CAPS_SUPPORTED_RATECONTROL_MODES) + ? "yes" + : "no") + << std::endl + << "\tH264_444:\t" + << " " + << (enc.GetCapabilityValue(NV_ENC_CODEC_H264_GUID, + NV_ENC_CAPS_SUPPORT_YUV444_ENCODE) + ? "yes" + : "no") + << std::endl + << "\tH264_ME:\t" + << " " + << (enc.GetCapabilityValue(NV_ENC_CODEC_H264_GUID, + NV_ENC_CAPS_SUPPORT_MEONLY_MODE) + ? "yes" + : "no") + << std::endl + << "\tH264_WxH:\t" + << " " + << (enc.GetCapabilityValue(NV_ENC_CODEC_H264_GUID, + NV_ENC_CAPS_WIDTH_MAX)) + << "*" + << (enc.GetCapabilityValue(NV_ENC_CODEC_H264_GUID, + NV_ENC_CAPS_HEIGHT_MAX)) + << std::endl + << "\tHEVC:\t\t" + << " " + << (enc.GetCapabilityValue(NV_ENC_CODEC_HEVC_GUID, + NV_ENC_CAPS_SUPPORTED_RATECONTROL_MODES) + ? "yes" + : "no") + << std::endl + << "\tHEVC_Main10:\t" + << " " + << (enc.GetCapabilityValue(NV_ENC_CODEC_HEVC_GUID, + NV_ENC_CAPS_SUPPORT_10BIT_ENCODE) + ? "yes" + : "no") + << std::endl + << "\tHEVC_Lossless:\t" + << " " + << (enc.GetCapabilityValue(NV_ENC_CODEC_HEVC_GUID, + NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE) + ? "yes" + : "no") + << std::endl + << "\tHEVC_SAO:\t" + << " " + << (enc.GetCapabilityValue(NV_ENC_CODEC_HEVC_GUID, + NV_ENC_CAPS_SUPPORT_SAO) + ? "yes" + : "no") + << std::endl + << "\tHEVC_444:\t" + << " " + << (enc.GetCapabilityValue(NV_ENC_CODEC_HEVC_GUID, + NV_ENC_CAPS_SUPPORT_YUV444_ENCODE) + ? "yes" + : "no") + << std::endl + << "\tHEVC_ME:\t" + << " " + << (enc.GetCapabilityValue(NV_ENC_CODEC_HEVC_GUID, + NV_ENC_CAPS_SUPPORT_MEONLY_MODE) + ? "yes" + : "no") + << std::endl + << "\tHEVC_WxH:\t" + << " " + << (enc.GetCapabilityValue(NV_ENC_CODEC_HEVC_GUID, + NV_ENC_CAPS_WIDTH_MAX)) + << "*" + << (enc.GetCapabilityValue(NV_ENC_CODEC_HEVC_GUID, + NV_ENC_CAPS_HEIGHT_MAX)) + << std::endl; + + std::cout << std::endl; + + enc.DestroyEncoder(); + ck(cuCtxDestroy(cuContext)); + } +} + +NvCodecH264EncoderCudaImpl::NvCodecH264EncoderCudaImpl() { + ShowEncoderCapability(); + + ck(cuInit(0)); + ck(cuDeviceGet(&cu_device_, 0)); + char device_name[80]; + ck(cuDeviceGetName(device_name, sizeof(device_name), cu_device_)); + std::cout << "GPU in use: " << device_name << std::endl; + ck(cuCtxCreate(&cu_context_, 0, cu_device_)); +} +NvCodecH264EncoderCudaImpl::~NvCodecH264EncoderCudaImpl() { + if (nv_decoder_ != nullptr) { + delete nv_decoder_; + } + cuCtxDestroy(cu_context_); +} +void NvCodecH264EncoderCudaImpl::Copy(NvEncoder* nv_encoder, + const void* ptr, + int width, + int height) { + const NvEncInputFrame* input_frame = nv_encoder->GetNextInputFrame(); + NvEncoderCuda::CopyToDeviceFrame( + cu_context_, (void*)ptr, 0, (CUdeviceptr)input_frame->inputPtr, + (int)input_frame->pitch, width, height, CU_MEMORYTYPE_HOST, + input_frame->bufferFormat, input_frame->chromaOffsets, + input_frame->numChromaPlanes); +} +void NvCodecH264EncoderCudaImpl::CopyNative(NvEncoder* nv_encoder, + const void* ptr, + int size, + int width, + int height) { + if (nv_decoder_ == nullptr) { + std::cout << "Use JPEG Decoder" << std::endl; + nv_decoder_ = new NvDecoder(cu_context_, true, cudaVideoCodec_JPEG, nullptr, + false, true); + } + uint8_t** frames = nullptr; + int frame_count = 0; + nv_decoder_->Decode((const uint8_t*)ptr, size, &frames, &frame_count); + + for (int i = 0; i < frame_count; i++) { + const NvEncInputFrame* input_frame = nv_encoder->GetNextInputFrame(); + NvEncoderCuda::CopyToDeviceFrame( + cu_context_, (void*)frames[i], nv_decoder_->GetDeviceFramePitch(), + (CUdeviceptr)input_frame->inputPtr, (int)input_frame->pitch, width, + height, CU_MEMORYTYPE_DEVICE, input_frame->bufferFormat, + input_frame->chromaOffsets, input_frame->numChromaPlanes); + } +} +NvEncoder* NvCodecH264EncoderCudaImpl::CreateNvEncoder(int width, + int height, + bool use_native) { + NV_ENC_BUFFER_FORMAT nvenc_format = + use_native ? NV_ENC_BUFFER_FORMAT_NV12 : NV_ENC_BUFFER_FORMAT_IYUV; + return new NvEncoderCuda(cu_context_, width, height, nvenc_format); +} diff --git a/src/hwenc_nvcodec/nvcodec_h264_encoder_cuda.h b/src/hwenc_nvcodec/nvcodec_h264_encoder_cuda.h new file mode 100644 index 0000000..576a9b2 --- /dev/null +++ b/src/hwenc_nvcodec/nvcodec_h264_encoder_cuda.h @@ -0,0 +1,35 @@ +#ifndef NVCODEC_H264_ENCODER_CUDA_H_ +#define NVCODEC_H264_ENCODER_CUDA_H_ + +// CUDA と WebRTC のヘッダを混ぜてコンパイルすると酷いことになったので、 +// CUDA の処理だけさせる単純な CUDA ファイルを用意する + +// このヘッダーファイルは、外から呼ばれるので #include をしてはいけない +// また、CUDA 側に WebRTC のヘッダを混ぜることができないので WebRTC のヘッダも include してはいけない + +#include + +class NvCodecH264EncoderCudaImpl; + +class NvCodecH264EncoderCuda { + public: + NvCodecH264EncoderCuda(); + ~NvCodecH264EncoderCuda(); + + void Copy(NvEncoder* nv_encoder, + const void* ptr, + int width, + int height); + void CopyNative(NvEncoder* nv_encoder, + const void* ptr, + int size, + int width, + int height); + // 念のため も include せずポインタを利用する + NvEncoder* CreateNvEncoder(int width, int height, bool use_native); + + private: + NvCodecH264EncoderCudaImpl* impl_; +}; + +#endif // NVCODEC_H264_ENCODER_CUDA_H_ diff --git a/src/rtc/h264_format.cpp b/src/rtc/h264_format.cpp new file mode 100644 index 0000000..effd346 --- /dev/null +++ b/src/rtc/h264_format.cpp @@ -0,0 +1,23 @@ +#include "h264_format.h" + +// webrtc +#include "absl/types/optional.h" +#include "api/video_codecs/sdp_video_format.h" + +namespace sora { + +// modules/video_coding/codecs/h264/h264.cc より +webrtc::SdpVideoFormat CreateH264Format(webrtc::H264::Profile profile, + webrtc::H264::Level level, + const std::string& packetization_mode) { + const absl::optional profile_string = + webrtc::H264::ProfileLevelIdToString( + webrtc::H264::ProfileLevelId(profile, level)); + return webrtc::SdpVideoFormat( + cricket::kH264CodecName, + {{cricket::kH264FmtpProfileLevelId, *profile_string}, + {cricket::kH264FmtpLevelAsymmetryAllowed, "1"}, + {cricket::kH264FmtpPacketizationMode, packetization_mode}}); +} + +} \ No newline at end of file diff --git a/src/rtc/h264_format.h b/src/rtc/h264_format.h new file mode 100644 index 0000000..d33d7a3 --- /dev/null +++ b/src/rtc/h264_format.h @@ -0,0 +1,16 @@ +#ifndef RTC_H264_FORMAT_H_ +#define RTC_H264_FORMAT_H_ + +#include + +#include "media/base/codec.h" +#include "media/base/h264_profile_level_id.h" + +namespace sora { + +webrtc::SdpVideoFormat CreateH264Format(webrtc::H264::Profile profile, + webrtc::H264::Level level, + const std::string& packetization_mode); +} + +#endif // RTC_H264_FORMAT_H_ diff --git a/src/rtc/hw_video_encoder_factory.cpp b/src/rtc/hw_video_encoder_factory.cpp new file mode 100644 index 0000000..e54a504 --- /dev/null +++ b/src/rtc/hw_video_encoder_factory.cpp @@ -0,0 +1,78 @@ +#include "hw_video_encoder_factory.h" + +#include "absl/memory/memory.h" +#include "absl/strings/match.h" +#include "api/video_codecs/sdp_video_format.h" +#include "media/base/codec.h" +#include "media/base/media_constants.h" +#include "modules/video_coding/codecs/h264/include/h264.h" +#include "modules/video_coding/codecs/vp8/include/vp8.h" +#include "modules/video_coding/codecs/vp9/include/vp9.h" +#include "rtc_base/logging.h" + +#include "h264_format.h" +#if defined(_WIN32) || defined(__linux__) +#include "hwenc_nvcodec/nvcodec_h264_encoder.h" +#endif + +namespace sora { + +std::vector HWVideoEncoderFactory::GetSupportedFormats() + const { + std::vector supported_codecs; + supported_codecs.push_back(webrtc::SdpVideoFormat(cricket::kVp8CodecName)); + for (const webrtc::SdpVideoFormat& format : webrtc::SupportedVP9Codecs()) + supported_codecs.push_back(format); + +#if defined(_WIN32) || defined(__linux__) + if (NvCodecH264Encoder::IsSupported()) { + std::vector h264_codecs = { + CreateH264Format(webrtc::H264::kProfileBaseline, + webrtc::H264::kLevel3_1, "1"), + CreateH264Format(webrtc::H264::kProfileBaseline, + webrtc::H264::kLevel3_1, "0"), + CreateH264Format(webrtc::H264::kProfileConstrainedBaseline, + webrtc::H264::kLevel3_1, "1"), + CreateH264Format(webrtc::H264::kProfileConstrainedBaseline, + webrtc::H264::kLevel3_1, "0")}; + + for (const webrtc::SdpVideoFormat& format : h264_codecs) + supported_codecs.push_back(format); + } +#endif + + return supported_codecs; +} + +webrtc::VideoEncoderFactory::CodecInfo HWVideoEncoderFactory::QueryVideoEncoder( + const webrtc::SdpVideoFormat& format) const { + CodecInfo info; + info.has_internal_source = false; + if (absl::EqualsIgnoreCase(format.name, cricket::kH264CodecName)) + info.is_hardware_accelerated = true; + else + info.is_hardware_accelerated = false; + return info; +} + +std::unique_ptr HWVideoEncoderFactory::CreateVideoEncoder( + const webrtc::SdpVideoFormat& format) { + if (absl::EqualsIgnoreCase(format.name, cricket::kVp8CodecName)) + return webrtc::VP8Encoder::Create(); + + if (absl::EqualsIgnoreCase(format.name, cricket::kVp9CodecName)) + return webrtc::VP9Encoder::Create(cricket::VideoCodec(format)); + +#if defined(_WIN32) || defined(__linux__) + if (absl::EqualsIgnoreCase(format.name, cricket::kH264CodecName)) { + return std::unique_ptr( + absl::make_unique(cricket::VideoCodec(format))); + } +#endif + + RTC_LOG(LS_ERROR) << "Trying to created encoder of unsupported format " + << format.name; + return nullptr; +} + +} // namespace sora diff --git a/src/rtc/hw_video_encoder_factory.h b/src/rtc/hw_video_encoder_factory.h new file mode 100644 index 0000000..cd26c21 --- /dev/null +++ b/src/rtc/hw_video_encoder_factory.h @@ -0,0 +1,32 @@ +#ifndef HW_VIDEO_ENCODER_FACTORY_H_ +#define HW_VIDEO_ENCODER_FACTORY_H_ + +#include +#include + +#ifdef _WIN32 +#include +#endif + +#include "api/video_codecs/sdp_video_format.h" +#include "api/video_codecs/video_encoder.h" +#include "api/video_codecs/video_encoder_factory.h" + +namespace sora { + +class HWVideoEncoderFactory : public webrtc::VideoEncoderFactory { + public: + virtual ~HWVideoEncoderFactory() {} + + std::vector GetSupportedFormats() const override; + + CodecInfo QueryVideoEncoder( + const webrtc::SdpVideoFormat& format) const override; + + std::unique_ptr CreateVideoEncoder( + const webrtc::SdpVideoFormat& format) override; +}; + +} // namespace sora + +#endif // HW_VIDEO_ENCODER_FACTORY_H_ \ No newline at end of file diff --git a/src/rtc/rtc_manager.cpp b/src/rtc/rtc_manager.cpp index cd064af..c57f566 100644 --- a/src/rtc/rtc_manager.cpp +++ b/src/rtc/rtc_manager.cpp @@ -6,6 +6,8 @@ #include "api/create_peerconnection_factory.h" #include "api/rtc_event_log/rtc_event_log_factory.h" #include "api/task_queue/default_task_queue_factory.h" +#include "api/video_codecs/builtin_video_decoder_factory.h" +#include "api/video_codecs/builtin_video_encoder_factory.h" #include "api/video_track_source_proxy.h" #include "media/engine/webrtc_media_engine.h" #include "modules/audio_device/include/audio_device.h" @@ -16,12 +18,14 @@ #include "rtc_base/logging.h" #include "rtc_base/ssl_adapter.h" +#include "hw_video_encoder_factory.h" #include "observer.h" #include "rtc_manager.h" #include "scalable_track_source.h" -#include "api/video_codecs/builtin_video_decoder_factory.h" -#include "api/video_codecs/builtin_video_encoder_factory.h" +#ifdef __APPLE__ +#include "mac_helper/objc_codec_factory_helper.h" +#endif namespace { @@ -93,10 +97,15 @@ bool RTCManager::Init( webrtc::CreateBuiltinAudioEncoderFactory(); media_dependencies.audio_decoder_factory = webrtc::CreateBuiltinAudioDecoderFactory(); +#ifdef __APPLE__ + media_dependencies.video_encoder_factory = CreateObjCEncoderFactory(); + media_dependencies.video_decoder_factory = CreateObjCDecoderFactory(); +#else media_dependencies.video_encoder_factory = - webrtc::CreateBuiltinVideoEncoderFactory(); + absl::make_unique(); media_dependencies.video_decoder_factory = webrtc::CreateBuiltinVideoDecoderFactory(); +#endif media_dependencies.audio_mixer = nullptr; media_dependencies.audio_processing = webrtc::AudioProcessingBuilder().Create(); diff --git a/src/sora.cpp b/src/sora.cpp index 9824977..219690c 100644 --- a/src/sora.cpp +++ b/src/sora.cpp @@ -73,8 +73,7 @@ bool Sora::Connect(std::string unity_version, RTC_LOG(LS_INFO) << "Sora::Connect unity_version=" << unity_version << " signaling_url =" << signaling_url_ << " channel_id=" << channel_id_ << " metadata=" << metadata - << " role=" << role - << " multistream=" << multistream + << " role=" << role << " multistream=" << multistream << " capturer_type=" << capturer_type << " unity_camera_texture=0x" << unity_camera_texture << " video_capturer_device=" << video_capturer_device @@ -85,7 +84,8 @@ bool Sora::Connect(std::string unity_version, << " audio_recording_device=" << audio_recording_device << " audio_playout_device=" << audio_playout_device; - if (role != "upstream" && role != "downstream" && role != "sendonly" && role != "recvonly" && role != "sendrecv") { + if (role != "upstream" && role != "downstream" && role != "sendonly" && + role != "recvonly" && role != "sendrecv") { RTC_LOG(LS_ERROR) << "Invalid role: " << role; return false; } @@ -118,6 +118,10 @@ bool Sora::Connect(std::string unity_version, return false; } + RTCManagerConfig config; + config.audio_recording_device = audio_recording_device; + config.audio_playout_device = audio_playout_device; + if (role == "upstream" || role == "sendonly" || role == "sendrecv") { // 送信側は capturer を設定する。送信のみの場合は playout の設定はしない rtc::scoped_refptr capturer = CreateVideoCapturer( @@ -127,10 +131,7 @@ bool Sora::Connect(std::string unity_version, return false; } - RTCManagerConfig config; config.no_playout = role == "upstream" || role == "sendonly"; - config.audio_recording_device = audio_recording_device; - config.audio_playout_device = audio_playout_device; rtc_manager_ = RTCManager::Create(config, std::move(capturer), renderer_.get(), unity_adm_, std::move(task_queue_factory)); @@ -139,8 +140,6 @@ bool Sora::Connect(std::string unity_version, RTCManagerConfig config; config.no_recording = true; config.no_video = true; - config.audio_recording_device = audio_recording_device; - config.audio_playout_device = audio_playout_device; rtc_manager_ = RTCManager::Create(config, nullptr, renderer_.get(), unity_adm_, std::move(task_queue_factory)); @@ -151,11 +150,15 @@ bool Sora::Connect(std::string unity_version, << " channel_id=" << channel_id_; SoraSignalingConfig config; config.unity_version = unity_version; - config.role = - role == "upstream" ? SoraSignalingConfig::Role::Upstream : - role == "downstream" ? SoraSignalingConfig::Role::Downstream : - role == "sendonly" ? SoraSignalingConfig::Role::Sendonly : - role == "recvonly" ? SoraSignalingConfig::Role::Recvonly : SoraSignalingConfig::Role::Sendrecv; + config.role = role == "upstream" + ? SoraSignalingConfig::Role::Upstream + : role == "downstream" + ? SoraSignalingConfig::Role::Downstream + : role == "sendonly" + ? SoraSignalingConfig::Role::Sendonly + : role == "recvonly" + ? SoraSignalingConfig::Role::Recvonly + : SoraSignalingConfig::Role::Sendrecv; config.multistream = multistream; config.signaling_url = signaling_url_; config.channel_id = channel_id_; @@ -221,10 +224,9 @@ int Sora::GetRenderCallbackEventID() const { } void Sora::RenderCallback() { - if (!unity_camera_capturer_) { - return; + if (unity_camera_capturer_) { + unity_camera_capturer_->OnRender(); } - unity_camera_capturer_->OnRender(); } void Sora::ProcessAudio(const void* p, int offset, int samples) { if (!unity_adm_) { diff --git a/src/unity.cpp b/src/unity.cpp index 816191e..25f8fa7 100644 --- a/src/unity.cpp +++ b/src/unity.cpp @@ -2,6 +2,10 @@ #include "rtc/device_list.h" #include "sora.h" +#if defined(_WIN32) || defined(__linux__) +#include "hwenc_nvcodec/nvcodec_h264_encoder.h" +#endif + extern "C" { void* sora_create() { @@ -63,13 +67,12 @@ int sora_connect(void* p, const char* audio_codec, int audio_bitrate) { auto sora = (sora::Sora*)p; - if (!sora->Connect(unity_version, signaling_url, channel_id, metadata, - role, multistream, capturer_type, - unity_camera_texture, video_capturer_device, video_width, - video_height, video_codec, video_bitrate, - unity_audio_input, unity_audio_output, - audio_recording_device, audio_playout_device, audio_codec, - audio_bitrate)) { + if (!sora->Connect(unity_version, signaling_url, channel_id, metadata, role, + multistream, capturer_type, unity_camera_texture, + video_capturer_device, video_width, video_height, + video_codec, video_bitrate, unity_audio_input, + unity_audio_output, audio_recording_device, + audio_playout_device, audio_codec, audio_bitrate)) { return -1; } return 0; @@ -122,6 +125,15 @@ bool sora_device_enum_audio_playout(device_enum_cb_t f, void* userdata) { }); } +bool sora_is_h264_supported() { +#if defined(_WIN32) || defined(__linux__) + return NvCodecH264Encoder::IsSupported(); +#else + // macOS は VideoToolbox が使えるので常に true + return true; +#endif +} + void UNITY_INTERFACE_EXPORT UNITY_INTERFACE_API UnityPluginLoad(IUnityInterfaces* ifs) { sora::UnityContext::Instance().Init(ifs); diff --git a/src/unity.h b/src/unity.h index 75a8fac..2367863 100644 --- a/src/unity.h +++ b/src/unity.h @@ -72,6 +72,7 @@ UNITY_INTERFACE_EXPORT bool sora_device_enum_audio_recording(device_enum_cb_t f, void* userdata); UNITY_INTERFACE_EXPORT bool sora_device_enum_audio_playout(device_enum_cb_t f, void* userdata); +UNITY_INTERFACE_EXPORT bool sora_is_h264_supported(); #ifdef __cplusplus }