diff --git a/.gitmodules b/.gitmodules index a69e4ee4..33a1a4ed 100644 --- a/.gitmodules +++ b/.gitmodules @@ -40,7 +40,7 @@ [submodule "third-party/nv-codec-headers"] path = third-party/nv-codec-headers url = https://github.com/FFmpeg/nv-codec-headers.git - branch = sdk/12.0 + branch = master [submodule "third-party/nvapi"] path = third-party/nvapi url = https://github.com/NVIDIA/nvapi.git diff --git a/docs/configuration.md b/docs/configuration.md index c661e955..ec4e56dd 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -2352,6 +2352,46 @@ editing the `conf` file in a text editor. Use the examples as reference. +### nvenc_split_encode + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Description + Split the encoding of each video frame over multiple NVENC hardware units. + Significantly reduces encoding latency with a marginal compression efficiency penalty. + This option is ignored if your GPU has a singular NVENC unit. + @note{This option only applies when using NVENC [encoder](#encoder) with HEVC or AV1.} + @note{Applies to Windows only.} +
Default@code{} + driver_decides + @endcode
Example@code{} + nvenc_split_encode = driver_decides + @endcode
ChoicesdisabledDisabled
driver_decidesThe NVIDIA driver will automatically enable split frame encoding when the following conditions are met: 2+ NVENC units, resolution is at least 4K, and the preset is P1-P4.
enabledEnabled
+ ### nvenc_latency_over_power diff --git a/packaging/linux/flatpak/modules/ffmpeg.json b/packaging/linux/flatpak/modules/ffmpeg.json index daca27e4..f4f15a34 100644 --- a/packaging/linux/flatpak/modules/ffmpeg.json +++ b/packaging/linux/flatpak/modules/ffmpeg.json @@ -8,8 +8,8 @@ "sources": [ { "type": "file", - "url": "https://github.com/LizardByte/build-deps/releases/download/v2026.221.143859/Linux-x86_64-ffmpeg.tar.gz", - "sha256": "cebf7a069bf144808896befe8d0d9d2d1e1d9eb1c9ac44e6906b72c6150a216a", + "url": "https://github.com/LizardByte/build-deps/releases/download/v2026.323.141148/Linux-x86_64-ffmpeg.tar.gz", + "sha256": "66319706a94d1607492e6ebc51060918fce51197d589cac313de8c532143a184", "dest-filename": "ffmpeg.tar.gz", "only-arches": [ "x86_64" @@ -23,8 +23,8 @@ }, { "type": "file", - "url": "https://github.com/LizardByte/build-deps/releases/download/v2026.221.143859/Linux-aarch64-ffmpeg.tar.gz", - "sha256": "6ba08d00f70d913f57ff0df8decaca6c3787b798e163a1cb2f086cb86ff7986d", + "url": "https://github.com/LizardByte/build-deps/releases/download/v2026.323.141148/Linux-aarch64-ffmpeg.tar.gz", + "sha256": "c955e6dba2cf62b4b3c954e0da378db47233fa7bef09ab9c86b4656d2c08378c", "dest-filename": "ffmpeg.tar.gz", "only-arches": [ "aarch64" diff --git a/src/config.cpp b/src/config.cpp index c45f55ba..9faac6f9 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -64,6 +64,21 @@ namespace config { return nvenc::nvenc_two_pass::quarter_resolution; } + nvenc::nvenc_split_frame_encoding split_encode_from_view(const std::string_view &preset) { + using enum nvenc::nvenc_split_frame_encoding; + if (preset == "disabled") { + return disabled; + } + if (preset == "driver_decides") { + return driver_decides; + } + if (preset == "enabled") { + return force_enabled; + } + BOOST_LOG(warning) << "config: unknown nvenc_split_encode value: " << preset; + return driver_decides; + } + } // namespace nv namespace amd { @@ -1097,6 +1112,7 @@ namespace config { bool_f(vars, "nvenc_spatial_aq", video.nv.adaptive_quantization); generic_f(vars, "nvenc_twopass", video.nv.two_pass, nv::twopass_from_view); bool_f(vars, "nvenc_h264_cavlc", video.nv.h264_cavlc); + generic_f(vars, "nvenc_split_encode", video.nv.split_frame_encoding, nv::split_encode_from_view); bool_f(vars, "nvenc_realtime_hags", video.nv_realtime_hags); bool_f(vars, "nvenc_opengl_vulkan_on_dxgi", video.nv_opengl_vulkan_on_dxgi); bool_f(vars, "nvenc_latency_over_power", video.nv_sunshine_high_power_mode); diff --git a/src/nvenc/nvenc_base.cpp b/src/nvenc/nvenc_base.cpp index 59c9781a..c63dfa90 100644 --- a/src/nvenc/nvenc_base.cpp +++ b/src/nvenc/nvenc_base.cpp @@ -20,7 +20,7 @@ // - NV_ENC_*_VER definitions where the value inside NVENCAPI_STRUCT_VERSION() was increased // - Incompatible struct changes in nvEncodeAPI.h (fields removed, semantics changed, etc.) // - Test both old and new drivers with all supported codecs -#if NVENCAPI_VERSION != MAKE_NVENC_VER(12U, 0U) +#if NVENCAPI_VERSION != MAKE_NVENC_VER(13U, 0U) #error Check and update NVENC code for backwards compatibility! #endif @@ -98,10 +98,6 @@ namespace nvenc { } bool nvenc_base::create_encoder(const nvenc_config &config, const video::config_t &client_config, const nvenc_colorspace_t &colorspace, NV_ENC_BUFFER_FORMAT buffer_format) { - // Pick the minimum NvEncode API version required to support the specified codec - // to maximize driver compatibility. AV1 was introduced in SDK v12.0. - minimum_api_version = (client_config.videoFormat <= 1) ? MAKE_NVENC_VER(11U, 0U) : MAKE_NVENC_VER(12U, 0U); - if (!nvenc && !init_library()) { return false; } @@ -118,10 +114,10 @@ namespace nvenc { encoder_params.buffer_format = buffer_format; encoder_params.rfi = true; - NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS session_params = {min_struct_version(NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER)}; + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS session_params = {NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER}; session_params.device = device; session_params.deviceType = device_type; - session_params.apiVersion = minimum_api_version; + session_params.apiVersion = NVENCAPI_VERSION; if (nvenc_failed(nvenc->nvEncOpenEncodeSessionEx(&session_params, &encoder))) { BOOST_LOG(error) << "NvEnc: NvEncOpenEncodeSessionEx() failed: " << last_nvenc_error_string; return false; @@ -139,7 +135,7 @@ namespace nvenc { return false; } - NV_ENC_INITIALIZE_PARAMS init_params = {min_struct_version(NV_ENC_INITIALIZE_PARAMS_VER)}; + NV_ENC_INITIALIZE_PARAMS init_params = {NV_ENC_INITIALIZE_PARAMS_VER}; switch (client_config.videoFormat) { case 0: @@ -173,10 +169,13 @@ namespace nvenc { } auto get_encoder_cap = [&](NV_ENC_CAPS cap) { - NV_ENC_CAPS_PARAM param = {min_struct_version(NV_ENC_CAPS_PARAM_VER), cap}; + NV_ENC_CAPS_PARAM param = {NV_ENC_CAPS_PARAM_VER}; + param.capsToQuery = cap; int value = 0; - nvenc->nvEncGetEncodeCaps(encoder, init_params.encodeGUID, ¶m, &value); - return value; + if (int ret = nvenc->nvEncGetEncodeCaps(encoder, init_params.encodeGUID, ¶m, &value); ret == NV_ENC_SUCCESS) { + return value; + } + return 0; }; auto buffer_is_10bit = [&]() { @@ -231,7 +230,24 @@ namespace nvenc { init_params.frameRateDen = fps.den; } - NV_ENC_PRESET_CONFIG preset_config = {min_struct_version(NV_ENC_PRESET_CONFIG_VER), {min_struct_version(NV_ENC_CONFIG_VER, 7, 8)}}; + if (client_config.videoFormat > 0 && get_encoder_cap(NV_ENC_CAPS_NUM_ENCODER_ENGINES) > 1) { + // SFE supports HEVC/AV1 if you have more than 1 nvenc block + using enum nvenc_split_frame_encoding; + NV_ENC_SPLIT_ENCODE_MODE split_mode; + if (config.split_frame_encoding == disabled) { + split_mode = NV_ENC_SPLIT_DISABLE_MODE; + } else if (config.split_frame_encoding == force_enabled) { + split_mode = NV_ENC_SPLIT_AUTO_FORCED_MODE; + } else { + split_mode = NV_ENC_SPLIT_AUTO_MODE; + } + init_params.splitEncodeMode = split_mode; + } + + NV_ENC_PRESET_CONFIG preset_config = { + .version = NV_ENC_PRESET_CONFIG_VER, + .presetCfg = {.version = NV_ENC_CONFIG_VER}, + }; if (nvenc_failed(nvenc->nvEncGetEncodePresetConfigEx(encoder, init_params.encodeGUID, init_params.presetGUID, init_params.tuningInfo, &preset_config))) { BOOST_LOG(error) << "NvEnc: NvEncGetEncodePresetConfigEx() failed: " << last_nvenc_error_string; return false; @@ -333,7 +349,8 @@ namespace nvenc { auto &format_config = enc_config.encodeCodecConfig.hevcConfig; set_h264_hevc_common_format_config(format_config); if (buffer_is_10bit()) { - format_config.pixelBitDepthMinus8 = 2; + format_config.inputBitDepth = NV_ENC_BIT_DEPTH_10; + format_config.outputBitDepth = NV_ENC_BIT_DEPTH_10; } set_ref_frames(format_config.maxNumRefFramesInDPB, format_config.numRefL0, 5); set_minqp_if_enabled(config.min_qp_hevc); @@ -366,8 +383,8 @@ namespace nvenc { } format_config.enableBitstreamPadding = config.insert_filler_data; if (buffer_is_10bit()) { - format_config.inputPixelBitDepthMinus8 = 2; - format_config.pixelBitDepthMinus8 = 2; + format_config.inputBitDepth = NV_ENC_BIT_DEPTH_10; + format_config.outputBitDepth = NV_ENC_BIT_DEPTH_10; } format_config.colorPrimaries = colorspace.primaries; format_config.transferCharacteristics = colorspace.tranfer_function; @@ -395,7 +412,7 @@ namespace nvenc { } if (async_event_handle) { - NV_ENC_EVENT_PARAMS event_params = {min_struct_version(NV_ENC_EVENT_PARAMS_VER)}; + NV_ENC_EVENT_PARAMS event_params = {NV_ENC_EVENT_PARAMS_VER}; event_params.completionEvent = async_event_handle; if (nvenc_failed(nvenc->nvEncRegisterAsyncEvent(encoder, &event_params))) { BOOST_LOG(error) << "NvEnc: NvEncRegisterAsyncEvent() failed: " << last_nvenc_error_string; @@ -403,7 +420,7 @@ namespace nvenc { } } - NV_ENC_CREATE_BITSTREAM_BUFFER create_bitstream_buffer = {min_struct_version(NV_ENC_CREATE_BITSTREAM_BUFFER_VER)}; + NV_ENC_CREATE_BITSTREAM_BUFFER create_bitstream_buffer = {NV_ENC_CREATE_BITSTREAM_BUFFER_VER}; if (nvenc_failed(nvenc->nvEncCreateBitstreamBuffer(encoder, &create_bitstream_buffer))) { BOOST_LOG(error) << "NvEnc: NvEncCreateBitstreamBuffer() failed: " << last_nvenc_error_string; return false; @@ -455,6 +472,13 @@ namespace nvenc { if (config.insert_filler_data) { extra += " filler-data"; } + if (client_config.videoFormat > 0 && get_encoder_cap(NV_ENC_CAPS_NUM_ENCODER_ENGINES) > 1) { + if (init_params.splitEncodeMode == NV_ENC_SPLIT_AUTO_MODE) { + extra += " sfe-auto"; + } else if (init_params.splitEncodeMode == NV_ENC_SPLIT_AUTO_FORCED_MODE) { + extra += " sfe"; + } + } BOOST_LOG(info) << "NvEnc: created encoder " << video_format_string << quality_preset_string_from_guid(init_params.presetGUID) << extra; } @@ -472,7 +496,7 @@ namespace nvenc { output_bitstream = nullptr; } if (encoder && async_event_handle) { - NV_ENC_EVENT_PARAMS event_params = {min_struct_version(NV_ENC_EVENT_PARAMS_VER)}; + NV_ENC_EVENT_PARAMS event_params = {NV_ENC_EVENT_PARAMS_VER}; event_params.completionEvent = async_event_handle; if (nvenc_failed(nvenc->nvEncUnregisterAsyncEvent(encoder, &event_params))) { BOOST_LOG(error) << "NvEnc: NvEncUnregisterAsyncEvent() failed: " << last_nvenc_error_string; @@ -508,7 +532,7 @@ namespace nvenc { return {}; } - NV_ENC_MAP_INPUT_RESOURCE mapped_input_buffer = {min_struct_version(NV_ENC_MAP_INPUT_RESOURCE_VER)}; + NV_ENC_MAP_INPUT_RESOURCE mapped_input_buffer = {NV_ENC_MAP_INPUT_RESOURCE_VER}; mapped_input_buffer.registeredResource = registered_input_buffer; if (nvenc_failed(nvenc->nvEncMapInputResource(encoder, &mapped_input_buffer))) { @@ -521,7 +545,7 @@ namespace nvenc { } }); - NV_ENC_PIC_PARAMS pic_params = {min_struct_version(NV_ENC_PIC_PARAMS_VER, 4, 6)}; + NV_ENC_PIC_PARAMS pic_params = {NV_ENC_PIC_PARAMS_VER}; pic_params.inputWidth = encoder_params.width; pic_params.inputHeight = encoder_params.height; pic_params.encodePicFlags = force_idr ? NV_ENC_PIC_FLAG_FORCEIDR : 0; @@ -537,7 +561,7 @@ namespace nvenc { return {}; } - NV_ENC_LOCK_BITSTREAM lock_bitstream = {min_struct_version(NV_ENC_LOCK_BITSTREAM_VER, 1, 2)}; + NV_ENC_LOCK_BITSTREAM lock_bitstream = {NV_ENC_LOCK_BITSTREAM_VER}; lock_bitstream.outputBitstream = output_bitstream; lock_bitstream.doNotWait = async_event_handle ? 1 : 0; @@ -584,8 +608,7 @@ namespace nvenc { return false; } - if (first_frame >= encoder_state.last_rfi_range.first && - last_frame <= encoder_state.last_rfi_range.second) { + if (first_frame >= encoder_state.last_rfi_range.first && last_frame <= encoder_state.last_rfi_range.second) { BOOST_LOG(debug) << "NvEnc: rfi request " << first_frame << "-" << last_frame << " already done"; return true; } @@ -671,19 +694,4 @@ namespace nvenc { return false; } - uint32_t nvenc_base::min_struct_version(uint32_t version, uint32_t v11_struct_version, uint32_t v12_struct_version) { - assert(minimum_api_version); - - // Mask off and replace the original NVENCAPI_VERSION - version &= ~NVENCAPI_VERSION; - version |= minimum_api_version; - - // If there's a struct version override, apply that too - if (v11_struct_version || v12_struct_version) { - version &= ~(0xFFu << 16); - version |= (((minimum_api_version & 0xFF) >= 12) ? v12_struct_version : v11_struct_version) << 16; - } - - return version; - } } // namespace nvenc diff --git a/src/nvenc/nvenc_base.h b/src/nvenc/nvenc_base.h index a4615a84..bb2cc3f1 100644 --- a/src/nvenc/nvenc_base.h +++ b/src/nvenc/nvenc_base.h @@ -106,16 +106,6 @@ namespace nvenc { bool nvenc_failed(NVENCSTATUS status); - /** - * @brief This function returns the corresponding struct version for the minimum API required by the codec. - * @details Reducing the struct versions maximizes driver compatibility by avoiding needless API breaks. - * @param version The raw structure version from `NVENCAPI_STRUCT_VERSION()`. - * @param v11_struct_version Optionally specifies the struct version to use with v11 SDK major versions. - * @param v12_struct_version Optionally specifies the struct version to use with v12 SDK major versions. - * @return A suitable struct version for the active codec. - */ - uint32_t min_struct_version(uint32_t version, uint32_t v11_struct_version = 0, uint32_t v12_struct_version = 0); - const NV_ENC_DEVICE_TYPE device_type; void *encoder = nullptr; @@ -142,7 +132,6 @@ namespace nvenc { private: NV_ENC_OUTPUT_PTR output_bitstream = nullptr; - uint32_t minimum_api_version = 0; struct { uint64_t last_encoded_frame_index = 0; diff --git a/src/nvenc/nvenc_config.h b/src/nvenc/nvenc_config.h index 824397e8..b2143456 100644 --- a/src/nvenc/nvenc_config.h +++ b/src/nvenc/nvenc_config.h @@ -12,6 +12,12 @@ namespace nvenc { full_resolution, ///< Better overall statistics, slower and uses more extra vram }; + enum class nvenc_split_frame_encoding { + disabled, ///< Disable + driver_decides, ///< Let driver decide + force_enabled, ///< Force-enable + }; + /** * @brief NVENC encoder configuration. */ @@ -48,6 +54,9 @@ namespace nvenc { // Add filler data to encoded frames to stay at target bitrate, mainly for testing bool insert_filler_data = false; + + // Enable split-frame encoding if the gpu has multiple NVENC hardware clusters + nvenc_split_frame_encoding split_frame_encoding = nvenc_split_frame_encoding::driver_decides; }; } // namespace nvenc diff --git a/src/nvenc/nvenc_d3d11.cpp b/src/nvenc/nvenc_d3d11.cpp index 1b749f92..f3c7af17 100644 --- a/src/nvenc/nvenc_d3d11.cpp +++ b/src/nvenc/nvenc_d3d11.cpp @@ -39,7 +39,7 @@ namespace nvenc { if ((dll = LoadLibraryEx(dll_name, nullptr, LOAD_LIBRARY_SEARCH_SYSTEM32))) { if (auto create_instance = (decltype(NvEncodeAPICreateInstance) *) GetProcAddress(dll, "NvEncodeAPICreateInstance")) { auto new_nvenc = std::make_unique(); - new_nvenc->version = min_struct_version(NV_ENCODE_API_FUNCTION_LIST_VER); + new_nvenc->version = NV_ENCODE_API_FUNCTION_LIST_VER; if (nvenc_failed(create_instance(new_nvenc.get()))) { BOOST_LOG(error) << "NvEnc: NvEncodeAPICreateInstance() failed: " << last_nvenc_error_string; } else { diff --git a/src/nvenc/nvenc_d3d11_native.cpp b/src/nvenc/nvenc_d3d11_native.cpp index afc66578..02d1b364 100644 --- a/src/nvenc/nvenc_d3d11_native.cpp +++ b/src/nvenc/nvenc_d3d11_native.cpp @@ -51,7 +51,7 @@ namespace nvenc { } if (!registered_input_buffer) { - NV_ENC_REGISTER_RESOURCE register_resource = {min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4)}; + NV_ENC_REGISTER_RESOURCE register_resource = {NV_ENC_REGISTER_RESOURCE_VER}; register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX; register_resource.width = encoder_params.width; register_resource.height = encoder_params.height; diff --git a/src/nvenc/nvenc_d3d11_on_cuda.cpp b/src/nvenc/nvenc_d3d11_on_cuda.cpp index b915b329..44123ed9 100644 --- a/src/nvenc/nvenc_d3d11_on_cuda.cpp +++ b/src/nvenc/nvenc_d3d11_on_cuda.cpp @@ -169,7 +169,7 @@ namespace nvenc { } if (!registered_input_buffer) { - NV_ENC_REGISTER_RESOURCE register_resource = {min_struct_version(NV_ENC_REGISTER_RESOURCE_VER, 3, 4)}; + NV_ENC_REGISTER_RESOURCE register_resource = {NV_ENC_REGISTER_RESOURCE_VER}; register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR; register_resource.width = encoder_params.width; register_resource.height = encoder_params.height; diff --git a/src_assets/common/assets/web/config.html b/src_assets/common/assets/web/config.html index b90a6214..979b0ac7 100644 --- a/src_assets/common/assets/web/config.html +++ b/src_assets/common/assets/web/config.html @@ -289,6 +289,7 @@ "nvenc_spatial_aq": "disabled", "nvenc_vbv_increase": 0, "nvenc_realtime_hags": "enabled", + "nvenc_split_encode": "driver_decides", "nvenc_latency_over_power": "enabled", "nvenc_opengl_vulkan_on_dxgi": "enabled", "nvenc_h264_cavlc": "disabled", diff --git a/src_assets/common/assets/web/configs/tabs/encoders/NvidiaNvencEncoder.vue b/src_assets/common/assets/web/configs/tabs/encoders/NvidiaNvencEncoder.vue index a6bd2a00..37873c1d 100644 --- a/src_assets/common/assets/web/configs/tabs/encoders/NvidiaNvencEncoder.vue +++ b/src_assets/common/assets/web/configs/tabs/encoders/NvidiaNvencEncoder.vue @@ -27,6 +27,17 @@ const config = ref(props.config)
{{ $t('config.nvenc_preset_desc') }}
+ +
+ + +
{{ $t('config.nvenc_split_encode_desc') }}
+
+
diff --git a/src_assets/common/assets/web/public/assets/locale/en.json b/src_assets/common/assets/web/public/assets/locale/en.json index 53463b78..9d32b735 100644 --- a/src_assets/common/assets/web/public/assets/locale/en.json +++ b/src_assets/common/assets/web/public/assets/locale/en.json @@ -301,6 +301,9 @@ "nvenc_realtime_hags_desc": "Currently NVIDIA drivers may freeze in encoder when HAGS is enabled, realtime priority is used and VRAM utilization is close to maximum. Disabling this option lowers the priority to high, sidestepping the freeze at the cost of reduced capture performance when the GPU is heavily loaded.", "nvenc_spatial_aq": "Spatial AQ", "nvenc_spatial_aq_desc": "Assign higher QP values to flat regions of the video. Recommended to enable when streaming at lower bitrates.", + "nvenc_split_encode": "Split frame encoding", + "nvenc_split_encode_desc": "Split the encoding of each video frame over multiple NVENC hardware units. Significantly reduces host processing latency with a marginal compression efficiency penalty. The default option enables SFE if the following conditions are met: there are 2+ NVENC units, the stream is 4K resolution or higher, and the preset is P1-P4. Set this to Enabled to use SFE at lower resolutions or higher presets.", + "nvenc_split_encode_driver_decides_def": "Driver decides (default)", "nvenc_twopass": "Two-pass mode", "nvenc_twopass_desc": "Adds preliminary encoding pass. This allows to detect more motion vectors, better distribute bitrate across the frame and more strictly adhere to bitrate limits. Disabling it is not recommended since this can lead to occasional bitrate overshoot and subsequent packet loss.", "nvenc_twopass_disabled": "Disabled (fastest, not recommended)", diff --git a/third-party/build-deps b/third-party/build-deps index 91ac60f3..c08f69db 160000 --- a/third-party/build-deps +++ b/third-party/build-deps @@ -1 +1 @@ -Subproject commit 91ac60f3a051819d7d9975b8757730a9471ee8de +Subproject commit c08f69db10450bd06cf79045e79b9179c99bae70 diff --git a/third-party/nv-codec-headers b/third-party/nv-codec-headers index fe32761e..e844e5b2 160000 --- a/third-party/nv-codec-headers +++ b/third-party/nv-codec-headers @@ -1 +1 @@ -Subproject commit fe32761e7a8bc79fcf560f356bf3898271bf4d56 +Subproject commit e844e5b26f46bb77479f063029595293aa8f812d