diff --git a/USAGE_desktop_D3D12.md b/USAGE_desktop_D3D12.md index 52fc4031a4..a81a5956d5 100644 --- a/USAGE_desktop_D3D12.md +++ b/USAGE_desktop_D3D12.md @@ -209,6 +209,7 @@ Usage: [-m | --memory-translation ] [--fw | --force-windowed ] [--log-level ] [--log-file ] [--log-debugview] + [--batching-memory-usage ] [--api ] Required arguments: @@ -327,6 +328,15 @@ D3D12-only: --dx12-override-object-names Generates unique names for all ID3D12Objects and assigns each object the generated name. This is intended to assist replay debugging. + --batching-memory-usage + Limits the max amount of additional memory that can be used to batch + resource data uploads during trim state load. Batching resource data + uploads may reduce the number of GPU submissions required to load the + trim state. is applied to the total available physical system memory + and to the application's GPU memory budget. This only limits memory use + for batching and does not guarantee overall max memory usage. + Acceptable values range from 0 to 100 (default: 80). 0 means no batching, + 100 means use all available system and GPU memory. ``` diff --git a/framework/decode/dx12_replay_consumer_base.cpp b/framework/decode/dx12_replay_consumer_base.cpp index 0e81ad91ed..a1b58f8aa6 100644 --- a/framework/decode/dx12_replay_consumer_base.cpp +++ b/framework/decode/dx12_replay_consumer_base.cpp @@ -418,9 +418,9 @@ void Dx12ReplayConsumerBase::ProcessInitSubresourceCommand(const format::InitSub // If no entry exists in resource_init_infos_, this is the first subresource of a new resource. GFXRECON_ASSERT(command_header.subresource == 0); - const double max_cpu_mem_usage = 15.0 / 16.0; + const double max_mem_usage = static_cast(options_.memory_usage) / 100.0; if (!graphics::dx12::IsMemoryAvailable( - total_size_in_bytes, extra_device_info->adapter3, max_cpu_mem_usage, extra_device_info->is_uma)) + total_size_in_bytes, extra_device_info->adapter3, max_mem_usage, extra_device_info->is_uma)) { // If neither system memory or GPU memory are able to accommodate next resource, // execute the Copy() calls and release temp buffer to free memory diff --git a/framework/decode/dx_replay_options.h b/framework/decode/dx_replay_options.h index d3fc306081..a2676f1e87 100644 --- a/framework/decode/dx_replay_options.h +++ b/framework/decode/dx_replay_options.h @@ -36,6 +36,8 @@ GFXRECON_BEGIN_NAMESPACE(gfxrecon) GFXRECON_BEGIN_NAMESPACE(decode) +static constexpr uint32_t kDefaultBatchingMemoryUsage = 80; + struct DxReplayOptions : public ReplayOptions { bool enable_d3d12{ true }; @@ -50,6 +52,7 @@ struct DxReplayOptions : public ReplayOptions std::string screenshot_dir; std::string screenshot_file_prefix{ kDefaultScreenshotFilePrefix }; std::string replace_dir; + int32_t memory_usage{ kDefaultBatchingMemoryUsage }; }; GFXRECON_END_NAMESPACE(decode) diff --git a/framework/encode/dx12_state_writer.cpp b/framework/encode/dx12_state_writer.cpp index ea11c405ae..e67889b4f3 100644 --- a/framework/encode/dx12_state_writer.cpp +++ b/framework/encode/dx12_state_writer.cpp @@ -779,11 +779,11 @@ void Dx12StateWriter::WriteResourceSnapshots( uint64_t size_in_bytes = resource_info.get()->size_in_bytes; auto device_info = device_wrapper->GetObjectInfo(); - const double max_cpu_mem_usage = 7.0 / 8.0; + const double max_mem_usage = 7.0 / 8.0; const bool is_uma = device_wrapper->GetObjectInfo()->is_uma; if (!graphics::dx12::IsMemoryAvailable( - size_in_bytes, device_info.get()->adapter3, max_cpu_mem_usage, is_uma)) + size_in_bytes, device_info.get()->adapter3, max_mem_usage, is_uma)) { // If neither system memory or GPU memory are able to accommodate next resource, // execute the existing Copy() calls and release temp buffer to free memory diff --git a/framework/graphics/dx12_util.cpp b/framework/graphics/dx12_util.cpp index d2aef19b8f..86025cdc96 100644 --- a/framework/graphics/dx12_util.cpp +++ b/framework/graphics/dx12_util.cpp @@ -978,10 +978,10 @@ bool IsUma(ID3D12Device* device) return isUma; } -uint64_t GetAvailableGpuAdapterMemory(IDXGIAdapter3* adapter, const bool is_uma) +uint64_t GetAvailableGpuAdapterMemory(IDXGIAdapter3* adapter, double memory_usage, const bool is_uma) { + GFXRECON_ASSERT(memory_usage > 0.0 && memory_usage <= 1.0); uint64_t available_mem = 0; - if (adapter != nullptr) { DXGI_QUERY_VIDEO_MEMORY_INFO video_memory_info = {}; @@ -992,13 +992,10 @@ uint64_t GetAvailableGpuAdapterMemory(IDXGIAdapter3* adapter, const bool is_uma) } if (SUCCEEDED(adapter->QueryVideoMemoryInfo(0, memory_segment, &video_memory_info))) { - if (video_memory_info.Budget > video_memory_info.CurrentUsage) - { - available_mem = video_memory_info.Budget - video_memory_info.CurrentUsage; - } - else + uint64_t total_memory = static_cast(video_memory_info.Budget * memory_usage); + if (total_memory > video_memory_info.CurrentUsage) { - GFXRECON_LOG_ERROR("Detected adapter memory oversubscription"); + available_mem = total_memory - video_memory_info.CurrentUsage; } } else @@ -1016,6 +1013,7 @@ uint64_t GetAvailableGpuAdapterMemory(IDXGIAdapter3* adapter, const bool is_uma) uint64_t GetAvailableCpuMemory(double max_usage) { + GFXRECON_ASSERT(max_usage > 0.0 && max_usage <= 1.0); MEMORYSTATUSEX mem_info = {}; mem_info.dwLength = sizeof(MEMORYSTATUSEX); if (GlobalMemoryStatusEx(&mem_info) == FALSE) @@ -1030,27 +1028,38 @@ uint64_t GetAvailableCpuMemory(double max_usage) double reserved_phys = mem_info.ullTotalPhys * (1.0 - max_usage); avail_phys = static_cast(std::max(0.0, mem_info.ullAvailPhys - reserved_phys)); } - // Always limit by available virtual memory. return std::min(avail_phys, mem_info.ullAvailVirtual); } -bool IsMemoryAvailable(uint64_t required_memory, IDXGIAdapter3* adapter, double max_cpu_mem_usage, const bool is_uma) +bool IsMemoryAvailable(uint64_t required_memory, IDXGIAdapter3* adapter, double max_mem_usage, const bool is_uma) { bool available = false; + if (max_mem_usage == 0.0) + { + // 0.0 means no batching, skip memory checking + return available; + } #ifdef _WIN64 // For 32bit, only upload one buffer at one time, to save memory usage. - if (adapter != nullptr) + if (max_mem_usage > 0.0 && max_mem_usage <= 1.0) { - uint64_t total_available_gpu_adapter_memory = GetAvailableGpuAdapterMemory(adapter, is_uma); - uint64_t total_available_cpu_memory = GetAvailableCpuMemory(max_cpu_mem_usage); - uint64_t total_required_memory = static_cast(required_memory * kMemoryTolerance); - if ((total_required_memory < total_available_gpu_adapter_memory) && - (total_required_memory < total_available_cpu_memory)) + if (adapter != nullptr) { - available = true; + uint64_t total_available_gpu_adapter_memory = GetAvailableGpuAdapterMemory(adapter, max_mem_usage, is_uma); + uint64_t total_available_cpu_memory = GetAvailableCpuMemory(max_mem_usage); + uint64_t total_required_memory = static_cast(required_memory * kMemoryTolerance); + if ((total_required_memory < total_available_gpu_adapter_memory) && + (total_required_memory < total_available_cpu_memory)) + { + available = true; + } } } + else + { + GFXRECON_LOG_ERROR("Memory usage setting out of range"); + } #endif return available; } diff --git a/framework/graphics/dx12_util.h b/framework/graphics/dx12_util.h index acc1502092..426a944a3d 100644 --- a/framework/graphics/dx12_util.h +++ b/framework/graphics/dx12_util.h @@ -223,14 +223,14 @@ bool IsUma(ID3D12Device* device); // This function is used to get available GPU virtual memory. // The input is current adapter which created current device. -uint64_t GetAvailableGpuAdapterMemory(IDXGIAdapter3* adapter, bool is_uma); +uint64_t GetAvailableGpuAdapterMemory(IDXGIAdapter3* adapter, double max_usage, bool is_uma); // This function is used to get available CPU memory. uint64_t GetAvailableCpuMemory(double max_usage); // Give require memory size to check if there are enough CPU&GPU memory to allocate the resource. If max_cpu_mem_usage // > 1.0, the result is not limited by available physical memory. -bool IsMemoryAvailable(uint64_t requried_memory, IDXGIAdapter3* adapter, double max_cpu_mem_usage, bool is_uma); +bool IsMemoryAvailable(uint64_t requried_memory, IDXGIAdapter3* adapter, double max_mem_usage, bool is_uma); // Get GPU memory usage by resource desc uint64_t GetResourceSizeInBytes(ID3D12Device* device, const D3D12_RESOURCE_DESC* desc); diff --git a/tools/replay/replay_settings.h b/tools/replay/replay_settings.h index d591f88ac8..df8a5ae95c 100644 --- a/tools/replay/replay_settings.h +++ b/tools/replay/replay_settings.h @@ -36,7 +36,7 @@ const char kArguments[] = "--log-level,--log-file,--gpu,--gpu-group,--pause-frame,--wsi,--surface-index,-m|--memory-translation," "--replace-shaders,--screenshots,--denied-messages,--allowed-messages,--screenshot-format,--" "screenshot-dir,--screenshot-prefix,--screenshot-size,--screenshot-scale,--mfr|--measurement-frame-range,--fw|--" - "force-windowed"; + "force-windowed,--batching-memory-usage"; static void PrintUsage(const char* exe_name) { @@ -66,6 +66,7 @@ static void PrintUsage(const char* exe_name) GFXRECON_WRITE_CONSOLE("\t\t\t[--fw | --force-windowed ]"); #if defined(WIN32) GFXRECON_WRITE_CONSOLE("\t\t\t[--log-level ] [--log-file ] [--log-debugview]"); + GFXRECON_WRITE_CONSOLE("\t\t\t[--batching-memory-usage ]"); #if defined(_DEBUG) GFXRECON_WRITE_CONSOLE("\t\t\t[--api ] [--no-debug-popup] \n"); #else @@ -225,6 +226,11 @@ static void PrintUsage(const char* exe_name) GFXRECON_WRITE_CONSOLE(" --dx12-override-object-names Generates unique names for all ID3D12Objects and"); GFXRECON_WRITE_CONSOLE(" assigns each object the generated name."); GFXRECON_WRITE_CONSOLE(" This is intended to assist replay debugging."); + GFXRECON_WRITE_CONSOLE(" --batching-memory-usage "); + GFXRECON_WRITE_CONSOLE(" \t\tMax amount of memory consumption while loading a trimmed capture file."); + GFXRECON_WRITE_CONSOLE(" \t\tAcceptable values range from 0 to 100 (default: 80)"); + GFXRECON_WRITE_CONSOLE(" \t\t0 means no batching at all"); + GFXRECON_WRITE_CONSOLE(" \t\t100 means use all available system and GPU memory"); #endif diff --git a/tools/tool_settings.h b/tools/tool_settings.h index 9e59473232..f965d89f04 100644 --- a/tools/tool_settings.h +++ b/tools/tool_settings.h @@ -108,9 +108,10 @@ const char kIncludeBinariesOption[] = "--include-binaries"; const char kExpandFlagsOption[] = "--expand-flags"; const char kFilePerFrameOption[] = "--file-per-frame"; #if defined(WIN32) -const char kApiFamilyOption[] = "--api"; -const char kDxTwoPassReplay[] = "--dx12-two-pass-replay"; -const char kDxOverrideObjectNames[] = "--dx12-override-object-names"; +const char kApiFamilyOption[] = "--api"; +const char kDxTwoPassReplay[] = "--dx12-two-pass-replay"; +const char kDxOverrideObjectNames[] = "--dx12-override-object-names"; +const char kBatchingMemoryUsageArgument[] = "--batching-memory-usage"; #endif enum class WsiPlatform @@ -880,6 +881,21 @@ static gfxrecon::decode::DxReplayOptions GetDxReplayOptions(const gfxrecon::util replay_options.override_object_names = true; } + const std::string& memory_usage = arg_parser.GetArgumentValue(kBatchingMemoryUsageArgument); + if (!memory_usage.empty()) + { + int memory_usage_int = std::stoi(memory_usage); + if (memory_usage_int >= 0 && memory_usage_int <= 100) + { + replay_options.memory_usage = static_cast(memory_usage_int); + } + else + { + GFXRECON_LOG_WARNING( + "The parameter to --batching-memory-usage is out of range [0, 100], will use 80 as default value."); + } + } + replay_options.screenshot_ranges = GetScreenshotRanges(arg_parser); replay_options.screenshot_format = GetScreenshotFormat(arg_parser); replay_options.screenshot_dir = GetScreenshotDir(arg_parser);