Merge branch 'trunk' into mesh-shading/wgpu-hal

gfx-rs · Feb 14, 2025 · bd9fa21 · bd9fa21
2 parents d34935e + 723abcb
commit bd9fa21
Show file tree

Hide file tree

Showing 128 changed files with 4,534 additions and 2,938 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -40,6 +40,77 @@ Bottom level categories:
 
 ## Unreleased
 
+### Major Features
+
+#### Hashmaps Removed from APIs
+
+Both `PipelineCompilationOptions::constants` and `ShaderSource::Glsl::defines` now take
+slices of key-value pairs instead of `hashmap`s. This is to prepare for `no_std`
+support and allow us to keep which `hashmap` hasher and such as implementation details. It
+also allows more easily creating these structures inline.
+
+By @cwfitzgerald in [#7133](https://github.com/gfx-rs/wgpu/pull/7133)
+
+#### `device.poll` Api Reworked
+
+This release reworked the poll api significantly to allow polling to return errors when polling hits internal timeout limits.
+
+`Maintain` was renamed `PollType`. Additionally, `poll` now returns a result containing information about what happened during the poll.
+
+```diff
+-pub fn wgpu::Device::poll(&self, maintain: wgpu::Maintain) -> wgpu::MaintainResult
++pub fn wgpu::Device::poll(&self, poll_type: wgpu::PollType) -> Result<wgpu::PollStatus, wgpu::PollError>
+
+-device.poll(wgpu::Maintain::Poll);
++device.poll(wgpu::PollType::Poll).unwrap();
+```
+
+```rust
+pub enum PollType<T> {
+    /// On wgpu-core based backends, block until the given submission has
+    /// completed execution, and any callbacks have been invoked.
+    ///
+    /// On WebGPU, this has no effect. Callbacks are invoked from the
+    /// window event loop.
+    WaitForSubmissionIndex(T),
+    /// Same as WaitForSubmissionIndex but waits for the most recent submission.
+    Wait,
+    /// Check the device for a single time without blocking.
+    Poll,
+}
+
+pub enum PollStatus {
+    /// There are no active submissions in flight as of the beginning of the poll call.
+    /// Other submissions may have been queued on other threads during the call.
+    ///
+    /// This implies that the given Wait was satisfied before the timeout.
+    QueueEmpty,
+
+    /// The requested Wait was satisfied before the timeout.
+    WaitSucceeded,
+
+    /// This was a poll.
+    Poll,
+}
+
+pub enum PollError {
+    /// The requested Wait timed out before the submission was completed.
+    Timeout,
+}
+```
+
+> [!WARNING]
+> As part of this change, WebGL's default behavior has changed. Previously `device.poll(Wait)` appeared as though it functioned correctly. This was a quirk caused by the bug that these PRs fixed. Now it will always return `Timeout` if the submission has not already completed. As many people rely on this behavior on WebGL, there is a new options in `BackendOptions`. If you want the old behavior, set the following on instance creation:
+> 
+> ```rust
+> instance_desc.backend_options.gl.fence_behavior = wgpu::GlFenceBehavior::AutoFinish;
+> ```
+> 
+> You will lose the ability to know exactly when a submission has completed, but `device.poll(Wait)` will behave the same as it does on native.
+
+By @cwfitzgerald in [#6942](https://github.com/gfx-rs/wgpu/pull/6942).  
+By @cwfitzgerald in [#7030](https://github.com/gfx-rs/wgpu/pull/7030).
+
 ### New Features
 
 #### General
@@ -53,6 +124,7 @@ Bottom level categories:
 - Added `Buffer` methods corresponding to `BufferSlice` methods, so you can skip creating a `BufferSlice` when it offers no benefit, and `BufferSlice::slice()` for sub-slicing a slice. By @kpreid in [#7123](https://github.com/gfx-rs/wgpu/pull/7123).
 
 - Add `util::StagingBelt::allocate()` so the staging belt can be used to write textures. By @kpreid in [#6900](https://github.com/gfx-rs/wgpu/pull/6900).
+- Added `CommandEncoder::transition_resources()` for native API interop, and allowing users to slightly optimize barriers. By @JMS55 in [#6678](https://github.com/gfx-rs/wgpu/pull/6678).
 
 #### Naga
 
@@ -72,7 +144,7 @@ Bottom level categories:
 
 ##### Split up `Features` internally
 
-Internally split up the `Features` struct and recombine them internally using a macro. There should be no breaking 
+Internally split up the `Features` struct and recombine them internally using a macro. There should be no breaking
 changes from this. This means there are also namespaces (as well as the old `Features::*`) for all wgpu specific
 features and webgpu feature (`FeaturesWGPU` and `FeaturesWebGPU` respectively) and `Features::from_internal_flags` which
 allow you to be explicit about whether features you need are available on the web too.
@@ -108,7 +180,7 @@ By @brodycj in [#6924](https://github.com/gfx-rs/wgpu/pull/6924).
 - Reduce downlevel `max_color_attachments` limit from 8 to 4 for better GLES compatibility. By @adrian17 in [#6994](https://github.com/gfx-rs/wgpu/pull/6994).
 - Fix drop order in `Surface`. By @ed-2100 in [#6997](https://github.com/gfx-rs/wgpu/pull/6997)
 - Fix a possible deadlock within `Queue::write_texture`. By @metamuffin in [#7004](https://github.com/gfx-rs/wgpu/pull/7004)
-- Fix building a BLAS with a transform buffer by adding a flag to indicate usage of the transform buffer. By @Vecvec in 
+- Fix building a BLAS with a transform buffer by adding a flag to indicate usage of the transform buffer. By @Vecvec in
 [#7062](https://github.com/gfx-rs/wgpu/pull/7062).
 
 #### Vulkan
@@ -336,7 +408,6 @@ By @ErichDonGubler in [#6456](https://github.com/gfx-rs/wgpu/pull/6456), [#6148]
 - Image atomic support in shaders. By @atlv24 in [#6706](https://github.com/gfx-rs/wgpu/pull/6706)
 - 64 bit image atomic support in shaders. By @atlv24 in [#5537](https://github.com/gfx-rs/wgpu/pull/5537)
 - Add `no_std` support to `wgpu-types`. By @bushrat011899 in [#6892](https://github.com/gfx-rs/wgpu/pull/6892).
-- Added `CommandEncoder::transition_resources()` for native API interop, and allowing users to slightly optimize barriers. By @JMS55 in [6678](https://github.com/gfx-rs/wgpu/pull/6678).
 
 ##### Vulkan
 

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/benches/benches/bind_groups.rs b/benches/benches/bind_groups.rs
@@ -152,7 +152,11 @@ fn run_bench(ctx: &mut Criterion) {
                         duration += start.elapsed();
 
                         drop(bind_group);
-                        state.device_state.device.poll(wgpu::Maintain::Wait);
+                        state
+                            .device_state
+                            .device
+                            .poll(wgpu::PollType::Wait)
+                            .unwrap();
                     }
 
                     duration

diff --git a/benches/benches/computepass.rs b/benches/benches/computepass.rs
@@ -486,7 +486,11 @@ fn run_bench(ctx: &mut Criterion) {
                                 duration += start.elapsed();
                             }
 
-                            state.device_state.device.poll(wgpu::Maintain::Wait);
+                            state
+                                .device_state
+                                .device
+                                .poll(wgpu::PollType::Wait)
+                                .unwrap();
                         }
 
                         duration
@@ -531,7 +535,11 @@ fn run_bench(ctx: &mut Criterion) {
                         duration += start.elapsed();
 
                         state.device_state.queue.submit(buffers);
-                        state.device_state.device.poll(wgpu::Maintain::Wait);
+                        state
+                            .device_state
+                            .device
+                            .poll(wgpu::PollType::Wait)
+                            .unwrap();
                     }
 
                     duration
@@ -573,7 +581,11 @@ fn run_bench(ctx: &mut Criterion) {
                 duration += start.elapsed();
 
                 state.device_state.queue.submit([buffer]);
-                state.device_state.device.poll(wgpu::Maintain::Wait);
+                state
+                    .device_state
+                    .device
+                    .poll(wgpu::PollType::Wait)
+                    .unwrap();
             }
 
             duration

diff --git a/benches/benches/renderpass.rs b/benches/benches/renderpass.rs
@@ -492,7 +492,11 @@ fn run_bench(ctx: &mut Criterion) {
                                 duration += start.elapsed();
                             }
 
-                            state.device_state.device.poll(wgpu::Maintain::Wait);
+                            state
+                                .device_state
+                                .device
+                                .poll(wgpu::PollType::Wait)
+                                .unwrap();
                         }
 
                         duration
@@ -535,7 +539,11 @@ fn run_bench(ctx: &mut Criterion) {
                     duration += start.elapsed();
 
                     state.device_state.queue.submit(buffers);
-                    state.device_state.device.poll(wgpu::Maintain::Wait);
+                    state
+                        .device_state
+                        .device
+                        .poll(wgpu::PollType::Wait)
+                        .unwrap();
                 }
 
                 duration
@@ -571,7 +579,11 @@ fn run_bench(ctx: &mut Criterion) {
                 duration += start.elapsed();
 
                 state.device_state.queue.submit([buffer]);
-                state.device_state.device.poll(wgpu::Maintain::Wait);
+                state
+                    .device_state
+                    .device
+                    .poll(wgpu::PollType::Wait)
+                    .unwrap();
             }
 
             duration

diff --git a/benches/benches/resource_creation.rs b/benches/benches/resource_creation.rs
@@ -61,7 +61,7 @@ fn run_bench(ctx: &mut Criterion) {
                         drop(buffers);
 
                         state.queue.submit([]);
-                        state.device.poll(wgpu::Maintain::Wait);
+                        state.device.poll(wgpu::PollType::Wait).unwrap();
                     }
 
                     duration

diff --git a/deno_webgpu/buffer.rs b/deno_webgpu/buffer.rs
@@ -161,7 +161,7 @@ impl GPUBuffer {
             while !*done.borrow() {
                 {
                     self.instance
-                        .device_poll(self.device, wgpu_types::Maintain::wait())
+                        .device_poll(self.device, wgpu_types::PollType::wait())
                         .unwrap();
                 }
                 tokio::time::sleep(Duration::from_millis(10)).await;

diff --git a/deno_webgpu/device.rs b/deno_webgpu/device.rs
@@ -615,7 +615,7 @@ impl GPUDevice {
     #[fast]
     fn stop_capture(&self) {
         self.instance
-            .device_poll(self.id, wgpu_types::Maintain::wait())
+            .device_poll(self.id, wgpu_types::PollType::wait())
             .unwrap();
         self.instance.device_stop_capture(self.id);
     }
@@ -632,7 +632,7 @@ impl GPUDevice {
             stage: ProgrammableStageDescriptor {
                 module: descriptor.compute.module.id,
                 entry_point: descriptor.compute.entry_point.map(Into::into),
-                constants: Cow::Owned(descriptor.compute.constants.into_iter().collect()),
+                constants: descriptor.compute.constants.into_iter().collect(),
                 zero_initialize_workgroup_memory: true,
             },
             cache: None,
@@ -660,7 +660,7 @@ impl GPUDevice {
             stage: ProgrammableStageDescriptor {
                 module: descriptor.vertex.module.id,
                 entry_point: descriptor.vertex.entry_point.map(Into::into),
-                constants: Cow::Owned(descriptor.vertex.constants.into_iter().collect()),
+                constants: descriptor.vertex.constants.into_iter().collect(),
                 zero_initialize_workgroup_memory: true,
             },
             buffers: Cow::Owned(
@@ -753,7 +753,7 @@ impl GPUDevice {
                     stage: ProgrammableStageDescriptor {
                         module: fragment.module.id,
                         entry_point: fragment.entry_point.map(Into::into),
-                        constants: Cow::Owned(fragment.constants.into_iter().collect()),
+                        constants: fragment.constants.into_iter().collect(),
                         zero_initialize_workgroup_memory: true,
                     },
                     targets: Cow::Owned(

diff --git a/examples/features/src/framework.rs b/examples/features/src/framework.rs
@@ -592,9 +592,7 @@ impl<E: Example + wgpu::WasmNotSendSync> From<ExampleTestParams<E>>
 
                 let dst_buffer_slice = dst_buffer.slice(..);
                 dst_buffer_slice.map_async(wgpu::MapMode::Read, |_| ());
-                ctx.async_poll(wgpu::Maintain::wait())
-                    .await
-                    .panic_on_timeout();
+                ctx.async_poll(wgpu::PollType::wait()).await.unwrap();
                 let bytes = dst_buffer_slice.get_mapped_range().to_vec();
 
                 wgpu_test::image::compare_image_output(

diff --git a/examples/features/src/hello_synchronization/mod.rs b/examples/features/src/hello_synchronization/mod.rs
@@ -183,7 +183,7 @@ async fn get_data<T: bytemuck::Pod>(
     let buffer_slice = staging_buffer.slice(..);
     let (sender, receiver) = flume::bounded(1);
     buffer_slice.map_async(wgpu::MapMode::Read, move |r| sender.send(r).unwrap());
-    device.poll(wgpu::Maintain::wait()).panic_on_timeout();
+    device.poll(wgpu::PollType::wait()).unwrap();
     receiver.recv_async().await.unwrap().unwrap();
     output.copy_from_slice(bytemuck::cast_slice(&buffer_slice.get_mapped_range()[..]));
     staging_buffer.unmap();

diff --git a/examples/features/src/hello_workgroups/mod.rs b/examples/features/src/hello_workgroups/mod.rs
@@ -172,7 +172,7 @@ async fn get_data<T: bytemuck::Pod>(
     let buffer_slice = staging_buffer.slice(..);
     let (sender, receiver) = flume::bounded(1);
     buffer_slice.map_async(wgpu::MapMode::Read, move |r| sender.send(r).unwrap());
-    device.poll(wgpu::Maintain::wait()).panic_on_timeout();
+    device.poll(wgpu::PollType::wait()).unwrap();
     receiver.recv_async().await.unwrap().unwrap();
     output.copy_from_slice(bytemuck::cast_slice(&buffer_slice.get_mapped_range()[..]));
     staging_buffer.unmap();

diff --git a/examples/features/src/mipmap/mod.rs b/examples/features/src/mipmap/mod.rs
@@ -410,7 +410,7 @@ impl crate::framework::Example for Example {
                 .slice(..)
                 .map_async(wgpu::MapMode::Read, |_| ());
             // Wait for device to be done rendering mipmaps
-            device.poll(wgpu::Maintain::wait()).panic_on_timeout();
+            device.poll(wgpu::PollType::wait()).unwrap();
             // This is guaranteed to be ready.
             let timestamp_view = query_sets
                 .mapping_buffer

diff --git a/examples/features/src/ray_shadows/mod.rs b/examples/features/src/ray_shadows/mod.rs
@@ -355,7 +355,7 @@ impl crate::framework::Example for Example {
             rpass.draw_indexed(0..12, 0, 0..1);
         }
         queue.submit(Some(encoder.finish()));
-        device.poll(wgpu::Maintain::Wait);
+        device.poll(wgpu::PollType::Wait).unwrap();
     }
 }
 

diff --git a/examples/features/src/render_to_texture/mod.rs b/examples/features/src/render_to_texture/mod.rs
@@ -132,7 +132,7 @@ async fn run(_path: Option<String>) {
     let buffer_slice = output_staging_buffer.slice(..);
     let (sender, receiver) = flume::bounded(1);
     buffer_slice.map_async(wgpu::MapMode::Read, move |r| sender.send(r).unwrap());
-    device.poll(wgpu::Maintain::wait()).panic_on_timeout();
+    device.poll(wgpu::PollType::wait()).unwrap();
     receiver.recv_async().await.unwrap().unwrap();
     log::info!("Output buffer mapped.");
     {

diff --git a/examples/features/src/repeated_compute/mod.rs b/examples/features/src/repeated_compute/mod.rs
@@ -106,11 +106,8 @@ async fn compute(local_buffer: &mut [u32], context: &WgpuContext) {
     // In order for the mapping to be completed, one of three things must happen.
     // One of those can be calling `Device::poll`. This isn't necessary on the web as devices
     // are polled automatically but natively, we need to make sure this happens manually.
-    // `Maintain::Wait` will cause the thread to wait on native but not on WebGpu.
-    context
-        .device
-        .poll(wgpu::Maintain::wait())
-        .panic_on_timeout();
+    // `PollType::Wait` will cause the thread to wait on native but not on WebGpu.
+    context.device.poll(wgpu::PollType::wait()).unwrap();
     log::info!("Device polled.");
     // Now we await the receiving and panic if anything went wrong because we're lazy.
     receiver.recv_async().await.unwrap().unwrap();

diff --git a/examples/features/src/storage_texture/mod.rs b/examples/features/src/storage_texture/mod.rs
@@ -143,7 +143,7 @@ async fn run(_path: Option<String>) {
     let buffer_slice = output_staging_buffer.slice(..);
     let (sender, receiver) = flume::bounded(1);
     buffer_slice.map_async(wgpu::MapMode::Read, move |r| sender.send(r).unwrap());
-    device.poll(wgpu::Maintain::wait()).panic_on_timeout();
+    device.poll(wgpu::PollType::wait()).unwrap();
     receiver.recv_async().await.unwrap().unwrap();
     log::info!("Output buffer mapped");
     {

diff --git a/examples/features/src/timestamp_queries/mod.rs b/examples/features/src/timestamp_queries/mod.rs
@@ -161,7 +161,7 @@ impl Queries {
         self.destination_buffer
             .slice(..)
             .map_async(wgpu::MapMode::Read, |_| ());
-        device.poll(wgpu::Maintain::wait()).panic_on_timeout();
+        device.poll(wgpu::PollType::wait()).unwrap();
 
         let timestamps = {
             let timestamp_view = self

diff --git a/examples/standalone/01_hello_compute/src/main.rs b/examples/standalone/01_hello_compute/src/main.rs
@@ -243,7 +243,7 @@ fn main() {
 
     // Wait for the GPU to finish working on the submitted work. This doesn't work on WebGPU, so we would need
     // to rely on the callback to know when the buffer is mapped.
-    device.poll(wgpu::Maintain::Wait);
+    device.poll(wgpu::PollType::Wait).unwrap();
 
     // We can now read the data from the buffer.
     let data = buffer_slice.get_mapped_range();