diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 55e3602c33..d7fe659dfa 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -205,7 +205,8 @@ impl super::Adapter { | wgt::Features::WRITE_TIMESTAMP_INSIDE_PASSES | wgt::Features::TEXTURE_COMPRESSION_BC | wgt::Features::CLEAR_TEXTURE - | wgt::Features::TEXTURE_FORMAT_16BIT_NORM; + | wgt::Features::TEXTURE_FORMAT_16BIT_NORM + | wgt::Features::PUSH_CONSTANTS; //TODO: in order to expose this, we need to run a compute shader // that extract the necessary statistics out of the D3D12 result. // Alternatively, we could allocate a buffer for the query set, @@ -270,7 +271,25 @@ impl super::Adapter { .min(crate::MAX_VERTEX_BUFFERS as u32), max_vertex_attributes: d3d12::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT, max_vertex_buffer_array_stride: d3d12::D3D12_SO_BUFFER_MAX_STRIDE_IN_BYTES, - max_push_constant_size: 0, + // The push constants are part of the root signature which + // has a limit of 64 DWORDS (256 bytes), but other resources + // also share the root signature: + // + // - push constants consume a `DWORD` for each `4 bytes` of data + // - If a bind group has buffers it will consume a `DWORD` + // for the descriptor table + // - If a bind group has samplers it will consume a `DWORD` + // for the descriptor table + // - Each dynamic buffer will consume `2 DWORDs` for the + // root descriptor + // - The special constants buffer count as constants + // + // Since we can't know beforehand all root signatures that + // will be created, the max size to be used for push + // constants needs to be set to a reasonable number instead. + // + // Source: https://learn.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits#memory-limits-and-costs + max_push_constant_size: 128, min_uniform_buffer_offset_alignment: d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, min_storage_buffer_offset_alignment: 4, diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index ca2f036430..daeaa96e11 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -149,6 +149,18 @@ impl super::CommandEncoder { match self.pass.root_elements[index as usize] { super::RootElement::Empty => log::error!("Root index {} is not bound", index), + super::RootElement::Constant => { + let info = self.pass.layout.root_constant_info.as_ref().unwrap(); + + for offset in info.range.clone() { + let val = self.pass.constant_data[offset as usize]; + match self.pass.kind { + Pk::Render => list.set_graphics_root_constant(index, val, offset), + Pk::Compute => list.set_compute_root_constant(index, val, offset), + Pk::Transfer => (), + } + } + } super::RootElement::SpecialConstantBuffer { base_vertex, base_instance, @@ -784,11 +796,24 @@ impl crate::CommandEncoder for super::CommandEncoder { } unsafe fn set_push_constants( &mut self, - _layout: &super::PipelineLayout, + layout: &super::PipelineLayout, _stages: wgt::ShaderStages, - _offset: u32, - _data: &[u32], + offset: u32, + data: &[u32], ) { + let info = layout.shared.root_constant_info.as_ref().unwrap(); + + self.pass.root_elements[info.root_index as usize] = super::RootElement::Constant; + + self.pass.constant_data[(offset as usize)..(offset as usize + data.len())] + .copy_from_slice(data); + + if self.pass.layout.signature == layout.shared.signature { + self.pass.dirty_root_elements |= 1 << info.root_index; + } else { + // D3D12 requires full reset on signature change + self.reset_signature(&layout.shared); + }; } unsafe fn insert_debug_marker(&mut self, label: &str) { diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index bdba9a6182..42bb343fa4 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -799,6 +799,40 @@ impl crate::Device for super::Device { hlsl::BindTarget::default(), ); let mut parameters = Vec::new(); + let mut push_constants_target = None; + let mut root_constant_info = None; + + let mut pc_start = u32::MAX; + let mut pc_end = u32::MIN; + + for pc in desc.push_constant_ranges.iter() { + pc_start = pc_start.min(pc.range.start); + pc_end = pc_end.max(pc.range.end); + } + + if pc_start != u32::MAX && pc_end != u32::MIN { + let parameter_index = parameters.len(); + let size = (pc_end - pc_start) / 4; + log::debug!( + "\tParam[{}] = push constant (count = {})", + parameter_index, + size, + ); + parameters.push(native::RootParameter::constants( + native::ShaderVisibility::All, + native_binding(&bind_cbv), + size, + )); + let binding = bind_cbv.clone(); + bind_cbv.register += 1; + root_constant_info = Some(super::RootConstantInfo { + root_index: parameter_index as u32, + range: (pc_start / 4)..(pc_end / 4), + }); + push_constants_target = Some(binding); + + bind_cbv.space += 1; + } // Collect the whole number of bindings we will create upfront. // It allows us to preallocate enough storage to avoid reallocation, @@ -1054,6 +1088,7 @@ impl crate::Device for super::Device { signature: raw, total_root_elements: parameters.len() as super::RootIndex, special_constants_root_index, + root_constant_info, }, bind_group_infos, naga_options: hlsl::Options { @@ -1061,7 +1096,7 @@ impl crate::Device for super::Device { binding_map, fake_missing_bindings: false, special_constants_binding, - push_constants_target: None, + push_constants_target, }, }) } diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 61d2ad9576..009a01d42d 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -268,6 +268,7 @@ struct PassResolve { #[derive(Clone, Copy)] enum RootElement { Empty, + Constant, SpecialConstantBuffer { base_vertex: i32, base_instance: u32, @@ -294,6 +295,7 @@ struct PassState { resolves: ArrayVec, layout: PipelineLayoutShared, root_elements: [RootElement; MAX_ROOT_ELEMENTS], + constant_data: [u32; MAX_ROOT_ELEMENTS], dirty_root_elements: u64, vertex_buffers: [d3d12::D3D12_VERTEX_BUFFER_VIEW; crate::MAX_VERTEX_BUFFERS], dirty_vertex_buffers: usize, @@ -314,8 +316,10 @@ impl PassState { signature: native::RootSignature::null(), total_root_elements: 0, special_constants_root_index: None, + root_constant_info: None, }, root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS], + constant_data: [0; MAX_ROOT_ELEMENTS], dirty_root_elements: 0, vertex_buffers: [unsafe { mem::zeroed() }; crate::MAX_VERTEX_BUFFERS], dirty_vertex_buffers: 0, @@ -482,11 +486,18 @@ struct BindGroupInfo { dynamic_buffers: Vec, } +#[derive(Clone)] +struct RootConstantInfo { + root_index: RootIndex, + range: std::ops::Range, +} + #[derive(Clone)] struct PipelineLayoutShared { signature: native::RootSignature, total_root_elements: RootIndex, special_constants_root_index: Option, + root_constant_info: Option, } unsafe impl Send for PipelineLayoutShared {}