From a49c923672b65dbc9f32baf4c4dc679f62b53af6 Mon Sep 17 00:00:00 2001 From: Jerome Humbert Date: Sun, 12 Jan 2025 11:49:41 +0000 Subject: [PATCH] Invalidate more bind group on changes (#414) Invalidate the bind groups for the simulation, containing the particle group buffer, when that buffer is reallocated or when the group changes position in the buffer. This was causing old buffers to be referenced when the group(s) changed, creating artifacts in ribbon rendering in particular. Similarly, invalidate the bind groups referencing a property buffer when that buffer is reallocated, for the same reasons. This fixes a crash previously introduced by the new property buffer change. --- CHANGELOG.md | 10 +- src/lib.rs | 50 +- src/plugin.rs | 38 +- src/render/aligned_buffer_vec.rs | 17 +- src/render/buffer_table.rs | 198 ++++++- src/render/effect_cache.rs | 81 ++- src/render/mod.rs | 924 ++++++++++++++++--------------- src/render/property.rs | 51 +- 8 files changed, 832 insertions(+), 537 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 336725a..804013c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,14 +9,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Changed the `ParticleEffect` component to require the `CompiledParticleEffect` and `SyncToRenderWorld` components. - Renamed `PropertyLayout::size()` to `cpu_size()` to prevent confusion. - The GPU size is given by `min_binding_size()`, and can be greater. + The GPU size is given by `min_binding_size()`, and can be greater due to WGSL padding rules being different from Rust ones. - `PropertyLayout::generate_code()` now returns an `Option` for clarity, which is `None` if the layout is empty (as opposed to an empty string previously). +- Removed effects are now deallocated from the render world before the extract schedule, instead of during it. + This should have no consequence, unless you were using a system inserted explicitly before Hanabi's extraction. ### Fixed - Fixed a bug with opaque effects not rendering. +### Removed + +- Removed the `EffectSystems::GatherRemovedEffects` system set. + Removed effects are now processed via observers, which execute during the render world sync just before extraction. + Removed the `RemovedEffectsEvent` type too. + ## [0.14.0] 2024-12-09 ### Added diff --git a/src/lib.rs b/src/lib.rs index 9ecd18c..8ad3df0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -172,11 +172,7 @@ use std::fmt::Write as _; #[cfg(feature = "2d")] use bevy::math::FloatOrd; -use bevy::{ - prelude::*, - render::sync_world::{MainEntity, RenderEntity, SyncToRenderWorld}, - utils::HashSet, -}; +use bevy::{prelude::*, render::sync_world::SyncToRenderWorld, utils::HashSet}; use serde::{Deserialize, Serialize}; use thiserror::Error; @@ -1564,8 +1560,7 @@ fn compile_effects( { // If the ParticleEffect didn't change, and the compiled one is for the correct // asset, then there's nothing to do. - let need_rebuild = - effect.is_changed() || material.as_ref().map_or(false, |r| r.is_changed()); + let need_rebuild = effect.is_changed() || material.as_ref().is_some_and(|r| r.is_changed()); if !need_rebuild && (compiled_effect.asset == effect.handle) { continue; } @@ -1633,47 +1628,6 @@ fn update_properties_from_asset( } } -/// Event sent by [`gather_removed_effects()`] with the list of effects removed -/// during this frame. -/// -/// The event is consumed during the extract phase by the [`extract_effects()`] -/// system, to clean-up unused GPU resources. -/// -/// [`extract_effects()`]: crate::render::extract_effects -#[derive(Event)] -struct RemovedEffectsEvent { - entities: Vec<(MainEntity, Option)>, -} - -/// Gather all the removed [`ParticleEffect`] components to allow cleaning-up -/// unused GPU resources. -/// -/// This system executes inside the [`EffectSystems::GatherRemovedEffects`] -/// set of the [`PostUpdate`] schedule. -fn gather_removed_effects( - q_effects: Query, With>, - mut removed_effects: RemovedComponents, - mut removed_effects_event_writer: EventWriter, -) { - let entities: Vec = removed_effects.read().collect(); - if !entities.is_empty() { - let entities = entities - .iter() - .map(|main_entity| { - ( - MainEntity::from(*main_entity), - q_effects - .get(*main_entity) - .ok() - .flatten() - .map(RenderEntity::from), - ) - }) - .collect(); - removed_effects_event_writer.send(RemovedEffectsEvent { entities }); - } -} - #[cfg(test)] mod tests { use std::ops::DerefMut; diff --git a/src/plugin.rs b/src/plugin.rs index fe44afa..e824951 100644 --- a/src/plugin.rs +++ b/src/plugin.rs @@ -22,22 +22,23 @@ use bevy::{ use crate::asset::EffectAssetLoader; use crate::{ asset::EffectAsset, - compile_effects, gather_removed_effects, + compile_effects, properties::EffectProperties, render::{ - add_remove_effects, batch_effects, extract_effect_events, extract_effects, - prepare_bind_groups, prepare_effects, prepare_gpu_resources, queue_effects, - DispatchIndirectPipeline, DrawEffects, EffectAssetEvents, EffectBindGroups, EffectCache, - EffectsMeta, ExtractedEffects, GpuDispatchIndirect, GpuParticleGroup, - GpuRenderEffectMetadata, GpuRenderGroupIndirect, GpuSpawnerParams, ParticlesInitPipeline, - ParticlesRenderPipeline, ParticlesUpdatePipeline, PropertyCache, ShaderCache, SimParams, - StorageType as _, VfxSimulateDriverNode, VfxSimulateNode, + add_effects, batch_effects, extract_effect_events, extract_effects, + on_remove_cached_effect, on_remove_cached_properties, prepare_bind_groups, prepare_effects, + prepare_gpu_resources, queue_effects, DispatchIndirectPipeline, DrawEffects, + EffectAssetEvents, EffectBindGroups, EffectCache, EffectsMeta, ExtractedEffects, + GpuDispatchIndirect, GpuParticleGroup, GpuRenderEffectMetadata, GpuRenderGroupIndirect, + GpuSpawnerParams, ParticlesInitPipeline, ParticlesRenderPipeline, ParticlesUpdatePipeline, + PropertyCache, ShaderCache, SimParams, StorageType as _, VfxSimulateDriverNode, + VfxSimulateNode, }, spawn::{self, Random}, tick_initializers, time::effect_simulation_time_system, update_properties_from_asset, CompiledParticleEffect, EffectSimulation, ParticleEffect, - RemovedEffectsEvent, Spawner, + Spawner, }; /// Labels for the Hanabi systems. @@ -69,16 +70,6 @@ pub enum EffectSystems { /// be taken into account in the same frame. UpdatePropertiesFromAsset, - /// Gather all removed [`ParticleEffect`] components during the - /// [`PostUpdate`] set, to clean-up unused GPU resources. - /// - /// Systems deleting entities with a [`ParticleEffect`] component should run - /// before this set if they want the particle effect is cleaned-up during - /// the same frame. - /// - /// [`ParticleEffect`]: crate::ParticleEffect - GatherRemovedEffects, - /// Prepare effect assets for the extracted effects. /// /// Part of Bevy's own [`RenderSet::PrepareAssets`]. @@ -191,7 +182,6 @@ impl Plugin for HanabiPlugin { fn build(&self, app: &mut App) { // Register asset app.init_asset::() - .add_event::() .insert_resource(Random(spawn::new_rng())) .init_resource::() .init_resource::>() @@ -203,7 +193,6 @@ impl Plugin for HanabiPlugin { // ComputedVisibility was updated. .after(VisibilitySystems::VisibilityPropagate), EffectSystems::CompileEffects, - EffectSystems::GatherRemovedEffects, ), ) .configure_sets( @@ -222,7 +211,6 @@ impl Plugin for HanabiPlugin { tick_initializers.in_set(EffectSystems::TickSpawners), compile_effects.in_set(EffectSystems::CompileEffects), update_properties_from_asset.in_set(EffectSystems::UpdatePropertiesFromAsset), - gather_removed_effects.in_set(EffectSystems::GatherRemovedEffects), check_visibility:: .in_set(VisibilitySystems::CheckVisibility), ), @@ -314,7 +302,7 @@ impl Plugin for HanabiPlugin { .add_systems( Render, ( - (add_remove_effects, prepare_effects, batch_effects) + (add_effects, prepare_effects, batch_effects) .chain() .in_set(EffectSystems::PrepareEffectAssets) // Ensure we run after Bevy prepared the render Mesh @@ -331,6 +319,10 @@ impl Plugin for HanabiPlugin { .after(prepare_assets::), ), ); + render_app.world_mut().add_observer(on_remove_cached_effect); + render_app + .world_mut() + .add_observer(on_remove_cached_properties); // Register the draw function for drawing the particles. This will be called // during the main 2D/3D pass, at the Transparent2d/3d phase, after the diff --git a/src/render/aligned_buffer_vec.rs b/src/render/aligned_buffer_vec.rs index 50bf687..5a47b58 100644 --- a/src/render/aligned_buffer_vec.rs +++ b/src/render/aligned_buffer_vec.rs @@ -388,10 +388,15 @@ impl HybridAlignedBufferVec { self.capacity } - // #[inline] - // pub fn len(&self) -> usize { - // self.values.len() - // } + /// Current buffer size, in bytes. + /// + /// This represents the size of the CPU data uploaded to GPU. Pending a GPU + /// buffer re-allocation or re-upload, this size might differ from the + /// actual GPU buffer size. But they're eventually consistent. + #[inline] + pub fn len(&self) -> usize { + self.values.len() + } /// Alignment, in bytes, of all the elements. #[allow(dead_code)] @@ -508,7 +513,6 @@ impl HybridAlignedBufferVec { // Insert into existing space if let Some((_, index)) = best_slot { let row_range = self.free_rows.remove(index); - debug_assert_eq!(row_range.0.start as usize % self.item_align, 0); let offset = row_range.0.start as usize * self.item_align; let free_size = (row_range.0.end - row_range.0.start) as usize * self.item_align; let size = src.len(); @@ -535,7 +539,8 @@ impl HybridAlignedBufferVec { let size = src.len(); let new_capacity = offset + size; if new_capacity > self.values.capacity() { - self.values.reserve(new_capacity - self.values.len()) + let additional = new_capacity - self.values.len(); + self.values.reserve(additional) } // Insert padding if needed diff --git a/src/render/buffer_table.rs b/src/render/buffer_table.rs index a9d6c78..4adae5f 100644 --- a/src/render/buffer_table.rs +++ b/src/render/buffer_table.rs @@ -119,6 +119,10 @@ pub struct BufferTable { /// constraints. aligned_size: usize, /// Capacity of the buffer, in number of rows. + /// + /// This is the expected capacity, as requested by CPU side allocations and + /// deallocations. The GPU buffer might not have been resized yet to handle + /// it, so might be allocated with a different size. capacity: u32, /// Size of the "active" portion of the table, which includes allocated rows /// and any row in the free list. All other rows in the @@ -344,9 +348,7 @@ impl BufferTable { self.active_count += 1; index } else { - // Note: this is inefficient O(n) but we need to apply the same logic as the - // EffectCache because we rely on indices being in sync. - self.free_indices.remove(0) + self.free_indices.pop().unwrap() }; let allocated_count = self .buffer @@ -373,6 +375,98 @@ impl BufferTable { BufferTableId(index) } + /// Insert several new contiguous rows into the table. + /// + /// For performance reasons, this buffers the row content on the CPU until + /// the next GPU update, to minimize the number of CPU to GPU transfers. + /// + /// # Returns + /// + /// Returns the index of the first entry. Other entries follow right after + /// it. + pub fn insert_contiguous(&mut self, values: impl ExactSizeIterator) -> BufferTableId { + let count = values.len() as u32; + trace!( + "Inserting {} contiguous values into table buffer '{}' with {} free indices, capacity: {}, active_size: {}", + count, + self.safe_name(), + self.free_indices.len(), + self.capacity, + self.active_count + ); + let first_index = if self.free_indices.is_empty() { + let index = self.active_count; + if index == self.capacity { + self.capacity += count; + } + debug_assert!(index < self.capacity); + self.active_count += count; + index + } else { + let mut s = 0; + let mut n = 1; + let mut i = 1; + while i < self.free_indices.len() { + debug_assert!(self.free_indices[i] > self.free_indices[i - 1]); // always sorted + if self.free_indices[i] == self.free_indices[i - 1] + 1 { + // contiguous + n += 1; + if n == count { + break; + } + } else { + // non-contiguous; restart a new sequence + debug_assert!(n < count); + s = i; + } + i += 1; + } + if n == count { + // Found a range of 'count' consecutive entries. Consume it. + let index = self.free_indices[s]; + self.free_indices.splice(s..=i, []); + index + } else { + // No free range for 'count' consecutive entries. Allocate at end instead. + let index = self.active_count; + if index == self.capacity { + self.capacity += count; + } + debug_assert!(index < self.capacity); + self.active_count += count; + index + } + }; + let allocated_count = self + .buffer + .as_ref() + .map(|ab| ab.allocated_count()) + .unwrap_or(0); + trace!( + "Found {} free indices {}..{}, capacity: {}, active_count: {}, allocated_count: {}", + count, + first_index, + first_index + count, + self.capacity, + self.active_count, + allocated_count + ); + for (i, value) in values.enumerate() { + let index = first_index + i as u32; + if index < allocated_count { + self.pending_values.alloc().init((index, value)); + } else { + let extra_index = index - allocated_count; + if extra_index < self.extra_pending_values.len() as u32 { + self.extra_pending_values[extra_index as usize] = value; + } else { + self.extra_pending_values.alloc().init(value); + } + } + } + BufferTableId(first_index) + } + /// Remove a row from the table. #[allow(dead_code)] pub fn remove(&mut self, id: BufferTableId) { @@ -395,6 +489,54 @@ impl BufferTable { } } + /// Remove a range of rows from the table. + #[allow(dead_code)] + pub fn remove_range(&mut self, first: BufferTableId, count: u32) { + let index = first.0; + assert!(index + count <= self.active_count); + + // If this is the last item in the active zone, just shrink the active zone + // (implicit free list). + if index == self.active_count - count { + self.active_count -= count; + self.capacity -= count; + + // Also try to remove free indices + if self.free_indices.len() as u32 == self.active_count { + // Easy case: everything is free, clear everything + self.free_indices.clear(); + self.active_count = 0; + self.capacity = 0; + } else { + // Some rows are still allocated. Dequeue from end while we have a contiguous + // tail of free indices. + let mut num_popped = 0; + while let Some(idx) = self.free_indices.pop() { + if idx < self.active_count { + self.free_indices.push(idx); + break; + } + num_popped += 1; + } + self.active_count -= num_popped; + self.capacity -= num_popped; + } + } else { + // This is very inefficient but we need to apply the same logic as the + // EffectCache because we rely on indices being in sync. + let pos = self + .free_indices + .binary_search(&index) // will fail + .unwrap_or_else(|e| e); // will get position of insertion + self.free_indices.splice(pos..pos, index..(index + count)); + } + + debug_assert!( + (self.free_indices.is_empty() && self.active_count == 0) + || (self.free_indices.len() as u32) < self.active_count + ); + } + /// Allocate any GPU buffer if needed, based on the most recent capacity /// requested. /// @@ -608,7 +750,7 @@ mod tests { } #[test] - fn table_sizes() { + fn buffer_table_sizes() { // Rust assert_eq!(std::mem::size_of::(), 12); assert_eq!(std::mem::align_of::(), 4); @@ -692,6 +834,54 @@ mod tests { assert_eq!(table.len(), 1); } } + + #[test] + fn buffer_table_insert() { + let mut table = + BufferTable::::new(BufferUsages::STORAGE, NonZeroU64::new(32), None); + + let id1 = table.insert(GpuDummy::default()); + assert_eq!(id1.0, 0); + assert_eq!(table.active_count, 1); + assert!(table.free_indices.is_empty()); + + let id2 = table.insert(GpuDummy::default()); + assert_eq!(id2.0, 1); + assert_eq!(table.active_count, 2); + assert!(table.free_indices.is_empty()); + + table.remove(id1); + assert_eq!(table.active_count, 2); + assert_eq!(table.free_indices.len(), 1); + assert_eq!(table.free_indices[0], 0); + + let id3 = table.insert_contiguous([GpuDummy::default(); 2].into_iter()); + assert_eq!(id3.0, 2); // at the end (doesn't fit in free slot #0) + assert_eq!(table.active_count, 4); + assert_eq!(table.free_indices.len(), 1); + assert_eq!(table.free_indices[0], 0); + + table.remove(id2); + assert_eq!(table.active_count, 4); + assert_eq!(table.free_indices.len(), 2); + assert_eq!(table.free_indices[0], 0); + assert_eq!(table.free_indices[1], 1); + + let id4 = table.insert_contiguous([GpuDummy::default(); 2].into_iter()); + assert_eq!(id4.0, 0); // this times it fit into slot #0-#1 + assert_eq!(table.active_count, 4); + assert!(table.free_indices.is_empty()); + + table.remove_range(id4, 2); + assert_eq!(table.active_count, 4); + assert_eq!(table.free_indices.len(), 2); + assert_eq!(table.free_indices[0], 0); + assert_eq!(table.free_indices[1], 1); + + table.remove_range(id3, 2); + assert_eq!(table.active_count, 0); + assert!(table.free_indices.is_empty()); + } } #[cfg(all(test, feature = "gpu_tests"))] diff --git a/src/render/effect_cache.rs b/src/render/effect_cache.rs index c9de428..ab1a4d5 100644 --- a/src/render/effect_cache.rs +++ b/src/render/effect_cache.rs @@ -53,6 +53,7 @@ impl PartialOrd for EffectSlices { /// Describes all particle groups' slices of particles in the particle buffer /// for a single effect, as well as the [`DispatchBufferIndices`]. +#[derive(Debug)] pub struct SlicesRef { pub ranges: Vec, /// Size of a single item in the slice. Currently equal to the unique size @@ -117,6 +118,13 @@ impl SliceRef { } } +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +struct SimulateBindGroupKey { + buffer: Option, + offset: u32, + size: u32, +} + /// Storage for a single kind of effects, sharing the same buffer(s). /// /// Currently only accepts a single unique item size (particle size), fixed at @@ -158,12 +166,17 @@ pub struct EffectBuffer { /// Bind group for the per-buffer data (group @1) of the init and update /// passes. simulate_bind_group: Option, + /// Key the `simulate_bind_group` was created from. + simulate_bind_group_key: SimulateBindGroupKey, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum BufferState { /// The buffer is in use, with allocated resources. Used, + /// Like `Used`, but the buffer was resized, so any bind group is + /// nonetheless invalid. + Resized, /// The buffer is free (its resources were deallocated). Free, } @@ -368,6 +381,7 @@ impl EffectBuffer { free_slices: vec![], asset, simulate_bind_group: None, + simulate_bind_group_key: default(), } } @@ -428,12 +442,26 @@ impl EffectBuffer { &mut self, buffer_index: u32, render_device: &RenderDevice, - group_binding: BufferBinding, + particle_group_buffer: &Buffer, + particle_group_offset: u64, + particle_group_size: NonZeroU64, ) { - if self.simulate_bind_group.is_some() { + let key = SimulateBindGroupKey { + buffer: Some(particle_group_buffer.id()), + offset: particle_group_offset as u32, + size: particle_group_size.get() as u32, + }; + + if self.simulate_bind_group.is_some() && self.simulate_bind_group_key == key { return; } + let group_binding = BufferBinding { + buffer: particle_group_buffer, + offset: particle_group_offset, + size: Some(particle_group_size), + }; + let layout = self.particle_layout_bind_group_sim(); let label = format!("hanabi:bind_group_sim_batch{}", buffer_index); let bindings = [ @@ -457,19 +485,32 @@ impl EffectBuffer { ); let bind_group = render_device.create_bind_group(Some(&label[..]), layout, &bindings); self.simulate_bind_group = Some(bind_group); + self.simulate_bind_group_key = key; } - /// Clear the bind group for the init and update passes. - #[allow(dead_code)] - pub fn clear_sim_bind_group(&mut self) { + /// Invalidate any existing simulate bind group. + /// + /// Invalidate any existing bind group previously created by + /// [`create_sim_bind_group()`], generally because a buffer was + /// re-allocated. This forces a re-creation of the bind group + /// next time [`create_sim_bind_group()`] is called. + /// + /// [`create_sim_bind_group()`]: self::EffectBuffer::create_sim_bind_group + fn invalidate_sim_bind_group(&mut self) { self.simulate_bind_group = None; + self.simulate_bind_group_key = default(); } /// Return the cached bind group for the init and update passes. /// /// This is the per-buffer bind group at binding @1 which binds all /// per-buffer resources shared by all effect instances batched in a single - /// buffer. + /// buffer. The bind group is created by [`create_sim_bind_group()`], and + /// cached until a call to [`invalidate_sim_bind_group()`] clears the cached + /// reference. + /// + /// [`create_sim_bind_group()`]: self::EffectBuffer::create_sim_bind_group + /// [`invalidate_sim_bind_group()`]: self::EffectBuffer::invalidate_sim_bind_group pub fn sim_bind_group(&self) -> Option<&BindGroup> { self.simulate_bind_group.as_ref() } @@ -642,7 +683,7 @@ impl EffectCacheId { /// Stores various data, including the buffer index and slice boundaries within /// the buffer for all groups in a single effect. -#[derive(Component)] +#[derive(Debug, Component)] pub(crate) struct CachedEffect { /// The index of the [`EffectBuffer`]. pub(crate) buffer_index: u32, @@ -700,7 +741,7 @@ pub(crate) struct DispatchBufferIndices { /// There will be one such dispatch buffer for each particle group. pub(crate) first_update_group_dispatch_buffer_index: BufferTableId, /// The index of the render indirect metadata buffer. - pub(crate) render_effect_metadata_buffer_index: BufferTableId, + pub(crate) render_effect_metadata_buffer_table_id: BufferTableId, /// Render group dispatch indirect indices for all groups of the effect. pub(crate) render_group_dispatch_indices: RenderGroupDispatchIndices, } @@ -716,7 +757,7 @@ impl Default for DispatchBufferIndices { fn default() -> Self { DispatchBufferIndices { first_update_group_dispatch_buffer_index: BufferTableId::INVALID, - render_effect_metadata_buffer_index: BufferTableId::INVALID, + render_effect_metadata_buffer_table_id: BufferTableId::INVALID, render_group_dispatch_indices: default(), } } @@ -765,6 +806,16 @@ impl EffectCache { self.buffers.get_mut(buffer_index as usize)?.as_mut() } + /// Invalidate all the "simulation" bind groups for all buffers. + /// + /// This iterates over all valid buffers and calls + /// [`EffectBuffer::invalidate_sim_bind_group()`] on each one. + pub fn invalidate_sim_bind_groups(&mut self) { + for buffer in self.buffers.iter_mut().flatten() { + buffer.invalidate_sim_bind_group(); + } + } + pub fn insert( &mut self, asset: Handle, @@ -871,13 +922,21 @@ impl EffectCache { &mut self, buffer_index: u32, render_device: &RenderDevice, - group_binding: BufferBinding, + particle_group_buffer: &Buffer, + particle_group_offset: u64, + particle_group_size: NonZeroU64, ) -> Result<(), ()> { // Create the bind group let effect_buffer: &mut Option = self.buffers.get_mut(buffer_index as usize).ok_or(())?; let effect_buffer = effect_buffer.as_mut().ok_or(())?; - effect_buffer.create_sim_bind_group(buffer_index, render_device, group_binding); + effect_buffer.create_sim_bind_group( + buffer_index, + render_device, + particle_group_buffer, + particle_group_offset, + particle_group_size, + ); Ok(()) } diff --git a/src/render/mod.rs b/src/render/mod.rs index 421b8ce..d968241 100644 --- a/src/render/mod.rs +++ b/src/render/mod.rs @@ -1,7 +1,7 @@ use std::{ borrow::Cow, num::{NonZero, NonZeroU32, NonZeroU64}, - ops::{Deref, Range}, + ops::Range, }; use std::{iter, marker::PhantomData}; @@ -45,7 +45,7 @@ use bevy::{ }, Extract, }, - utils::HashMap, + utils::{Entry, HashMap}, }; use bitflags::bitflags; use bytemuck::{Pod, Zeroable}; @@ -66,8 +66,7 @@ use crate::{ }, spawn::{EffectCloner, EffectInitializer, EffectInitializers, Initializer}, AlphaMode, Attribute, CompiledParticleEffect, EffectProperties, EffectShader, EffectSimulation, - HanabiPlugin, ParticleLayout, PropertyLayout, RemovedEffectsEvent, SimulationCondition, - TextureLayout, ToWgslString, + HanabiPlugin, ParticleLayout, PropertyLayout, SimulationCondition, TextureLayout, ToWgslString, }; mod aligned_buffer_vec; @@ -1372,10 +1371,6 @@ pub struct AddedEffectGroup { pub(crate) struct ExtractedEffects { /// Extracted effects this frame. pub effects: Vec, - /// Entities which had their [`ParticleEffect`] component removed. - /// - /// [`ParticleEffect`]: crate::ParticleEffect - pub removed_effect_entities: Vec, /// Newly added effects without a GPU allocation yet. pub added_effects: Vec, } @@ -1435,7 +1430,6 @@ pub(crate) fn extract_effects( >, )>, >, - mut removed_effects_event_reader: Extract>, mut sim_params: ResMut, mut extracted_effects: ResMut, effects_meta: Res, @@ -1450,23 +1444,6 @@ pub(crate) fn extract_effects( sim_params.real_time = real_time.elapsed_secs_f64(); sim_params.real_delta_time = real_time.delta_secs(); - // Collect removed effects for later GPU data purge - extracted_effects.removed_effect_entities = - removed_effects_event_reader - .read() - .fold(vec![], |mut acc, ev| { - acc.extend( - ev.entities - .iter() - .filter_map(|(_, render_entity)| *render_entity), - ); - acc - }); - trace!( - "Found {} removed effect(s).", - extracted_effects.removed_effect_entities.len() - ); - // Collect added effects for later GPU data allocation extracted_effects.added_effects = query .p1() @@ -1831,8 +1808,7 @@ impl EffectsMeta { } } - /// Allocate internal resources for newly spawned effects, and deallocate - /// them for just-removed ones. + /// Allocate internal resources for newly spawned effects. /// /// After this system ran, all valid extracted effects from the main world /// have a corresponding entity with a [`CachedEffect`] component in the @@ -1840,118 +1816,16 @@ impl EffectsMeta { /// basic checks, like having a valid mesh. Note however that the main /// world's entity might still be missing its [`RenderEntity`] /// reference, since we cannot yet write into the main world. - pub fn add_remove_effects( + pub fn add_effects( &mut self, mut commands: Commands, - q_cached_effects: &Query<( - MainEntity, - &CachedEffect, - &DispatchBufferIndices, - Option<&CachedEffectProperties>, - )>, mut added_effects: Vec, - removed_effect_entities: Vec, render_device: &RenderDevice, render_queue: &RenderQueue, effect_bind_groups: &mut ResMut, effect_cache: &mut ResMut, property_cache: &mut ResMut, ) { - // Deallocate GPU data for destroyed effect instances. This will automatically - // drop any group where there is no more effect slice. - trace!( - "Removing {} despawned effects", - removed_effect_entities.len() - ); - for render_entity in &removed_effect_entities { - trace!( - "Removing ParticleEffect on render entity {:?}", - render_entity - ); - - // Schedule a despawn of the render entity for the end of this system - if let Some(cmd) = commands.get_entity(render_entity.id()) { - cmd.despawn_recursive(); - } else { - warn!("Unknown render entity {:?}, cannot despawn", render_entity); - continue; - } - - // Fecth the components of the effect being destroyed. Note that the despawn - // command above is not yet applied, so this query should always succeed. - let Ok((main_entity, cached_effect, dispatch_buffer_indices, cached_effect_properties)) = - q_cached_effects.get(render_entity.id()) - else { - error!( - "Render entity {:?} exists, but failed to query its components.", - render_entity - ); - continue; - }; - - // Deallocate properties if any - if let Some(cached_effect_properties) = cached_effect_properties { - match property_cache.remove_properties(cached_effect_properties) { - Err(err) => match err { - CachedPropertiesError::InvalidBufferIndex(buffer_index) - => error!("Failed to remove cached properties of render entity {render_entity:?} from buffer #{buffer_index}: the index is invalid."), - CachedPropertiesError::BufferDeallocated(buffer_index) - => error!("Failed to remove cached properties of render entity {render_entity:?} from buffer #{buffer_index}: the buffer is not allocated."), - } - Ok(buffer_state) => if buffer_state == BufferState::Free { - // The entire buffer was deallocated; destroy all bind groups referencing it - let key = cached_effect_properties.to_key(); - trace!("Destroying property bind group for key {key:?} due to property buffer deallocated."); - effect_bind_groups.property_bind_groups.remove(&key); - } - } - } - - // Deallocate the effect slice in the GPU effect buffer, and if this was the - // last slice, also deallocate the GPU buffer itself. - trace!( - "=> ParticleEffect on render entity {:?} associated with main entity {:?}, removing...", - render_entity, - main_entity, - ); - if let Ok(BufferState::Free) = effect_cache.remove(cached_effect) { - // Clear bind groups associated with the removed buffer - trace!( - "=> GPU buffer #{} gone, destroying its bind groups...", - cached_effect.buffer_index - ); - effect_bind_groups - .particle_buffers - .remove(&cached_effect.buffer_index); - - let group_count = cached_effect.slices.group_count(); - - let first_row = dispatch_buffer_indices - .first_update_group_dispatch_buffer_index - .0; - for table_id in first_row..(first_row + group_count) { - self.dispatch_indirect_buffer - .remove(BufferTableId(table_id)); - } - - self.render_effect_dispatch_buffer - .remove(dispatch_buffer_indices.render_effect_metadata_buffer_index); - - if let RenderGroupDispatchIndices::Allocated { - first_render_group_dispatch_buffer_index, - .. - } = &dispatch_buffer_indices.render_group_dispatch_indices - { - for row_index in (first_render_group_dispatch_buffer_index.0) - ..(first_render_group_dispatch_buffer_index.0 + group_count) - { - self.render_group_dispatch_buffer - .remove(BufferTableId(row_index)); - } - } - } - } - // FIXME - We delete a buffer above, and have a chance to immediatly re-create // it below. We should keep the GPU buffer around until the end of this method. // On the other hand, we should also be careful that allocated buffers need to @@ -1961,19 +1835,19 @@ impl EffectsMeta { trace!("Adding {} newly spawned effects", added_effects.len()); for added_effect in added_effects.drain(..) { // Allocate per-group update dispatch indirect - let first_update_group_dispatch_buffer_index = allocate_sequential_buffers( - &mut self.dispatch_indirect_buffer, - iter::repeat(GpuDispatchIndirect::default()).take(added_effect.groups.len()), - ); + let first_update_group_dispatch_buffer_index = + self.dispatch_indirect_buffer.insert_contiguous( + iter::repeat(GpuDispatchIndirect::default()).take(added_effect.groups.len()), + ); // Allocate per-effect metadata - let render_effect_dispatch_buffer_id = self + let render_effect_dispatch_buffer_table_id = self .render_effect_dispatch_buffer .insert(GpuRenderEffectMetadata::default()); let dispatch_buffer_indices = DispatchBufferIndices { first_update_group_dispatch_buffer_index, - render_effect_metadata_buffer_index: render_effect_dispatch_buffer_id, + render_effect_metadata_buffer_table_id: render_effect_dispatch_buffer_table_id, render_group_dispatch_indices: RenderGroupDispatchIndices::Pending { groups: added_effect.groups.iter().map(Into::into).collect(), }, @@ -2008,23 +1882,8 @@ impl EffectsMeta { added_effect.render_entity, added_effect.entity, first_update_group_dispatch_buffer_index.0, - render_effect_dispatch_buffer_id.0 + render_effect_dispatch_buffer_table_id.0 ); - - // Note: those effects are already in extracted_effects.effects - // because they were gathered by the same query as - // previously existing ones, during extraction. - - // let index = self.effect_cache.buffer_index(cache_id).unwrap(); - // - // let table_id = self - // .dispatch_indirect_buffer - // .insert(GpuDispatchIndirect::default()); - // assert_eq!( - // table_id.0, index, - // "Broken table invariant: buffer={} row={}", - // index, table_id.0 - // ); } // Once all changes are applied, immediately schedule any GPU buffer @@ -2101,20 +1960,119 @@ impl Default for LayoutFlags { } } -/// Spawn a new render entity with a [`CachedEffect`] component for any newly -/// allocated effect, and conversely despawn any entity with such component for -/// removed effects. -/// -/// After this system ran, and its commands are applied, all valid extracted -/// effects have a corresponding entity in the render world, with a -/// [`CachedEffect`] component. From there, we operate on those exclusively. -pub(crate) fn add_remove_effects( - q_cached_effects: Query<( +/// Observer raised when the [`CachedEffect`] component is removed, which +/// indicates that the effect instance was despawned. +pub(crate) fn on_remove_cached_effect( + trigger: Trigger, + query: Query<( + Entity, MainEntity, &CachedEffect, &DispatchBufferIndices, Option<&CachedEffectProperties>, )>, + mut effect_cache: ResMut, + mut effect_bind_groups: ResMut, + mut effects_meta: ResMut, +) { + // FIXME - review this Observer pattern; this triggers for each event one by + // one, which could kill performance if many effects are removed. + + // Fecth the components of the effect being destroyed. Note that the despawn + // command above is not yet applied, so this query should always succeed. + let Ok((render_entity, main_entity, cached_effect, dispatch_buffer_indices, _opt_props)) = + query.get(trigger.entity()) + else { + return; + }; + + // Deallocate the effect slice in the GPU effect buffer, and if this was the + // last slice, also deallocate the GPU buffer itself. + trace!( + "=> ParticleEffect on render entity {:?} associated with main entity {:?}, removing...", + render_entity, + main_entity, + ); + let Ok(BufferState::Free) = effect_cache.remove(cached_effect) else { + // Buffer was not affected, so all bind groups are still valid. Nothing else to + // do. + return; + }; + + // Clear bind groups associated with the removed buffer + trace!( + "=> GPU buffer #{} gone, destroying its bind groups...", + cached_effect.buffer_index + ); + effect_bind_groups + .particle_buffers + .remove(&cached_effect.buffer_index); + + let group_count = cached_effect.slices.group_count(); + + let first_row = dispatch_buffer_indices + .first_update_group_dispatch_buffer_index + .0; + effects_meta + .dispatch_indirect_buffer + .remove_range(BufferTableId(first_row), group_count); + + effects_meta + .render_effect_dispatch_buffer + .remove(dispatch_buffer_indices.render_effect_metadata_buffer_table_id); + + if let RenderGroupDispatchIndices::Allocated { + first_render_group_dispatch_buffer_index, + .. + } = &dispatch_buffer_indices.render_group_dispatch_indices + { + effects_meta.render_group_dispatch_buffer.remove_range( + BufferTableId(first_render_group_dispatch_buffer_index.0), + group_count, + ); + } +} + +/// Observer raised when the [`CachedEffectProperties`] component is removed, +/// which indicates that the effect doesn't use properties anymore (including, +/// when the effect itself is despawned). +pub(crate) fn on_remove_cached_properties( + trigger: Trigger, + query: Query<(Entity, &CachedEffectProperties)>, + mut property_cache: ResMut, + mut effect_bind_groups: ResMut, +) { + // FIXME - review this Observer pattern; this triggers for each event one by + // one, which could kill performance if many effects are removed. + + let Ok((render_entity, cached_effect_properties)) = query.get(trigger.entity()) else { + return; + }; + + match property_cache.remove_properties(cached_effect_properties) { + Err(err) => match err { + CachedPropertiesError::InvalidBufferIndex(buffer_index) + => error!("Failed to remove cached properties of render entity {render_entity:?} from buffer #{buffer_index}: the index is invalid."), + CachedPropertiesError::BufferDeallocated(buffer_index) + => error!("Failed to remove cached properties of render entity {render_entity:?} from buffer #{buffer_index}: the buffer is not allocated."), + } + Ok(buffer_state) => if buffer_state != BufferState::Used { + // The entire buffer was deallocated, or it was resized; destroy all bind groups referencing it + let key = cached_effect_properties.to_key(); + trace!("Destroying property bind group for key {key:?} due to property buffer deallocated."); + effect_bind_groups + .property_bind_groups + .retain(|&k, _| k.buffer_index != key.buffer_index); + } + } +} + +/// Update the [`CachedEffect`] component for any newly allocated effect. +/// +/// After this system ran, and its commands are applied, all valid extracted +/// effects have a corresponding entity in the render world, with a +/// [`CachedEffect`] component. From there, we operate on those exclusively. +pub(crate) fn add_effects( render_device: Res, render_queue: Res, commands: Commands, @@ -2140,12 +2098,10 @@ pub(crate) fn add_remove_effects( .render_group_dispatch_buffer .clear_previous_frame_resizes(); - // Allocate new effects, deallocate removed ones - effects_meta.add_remove_effects( + // Allocate new effects + effects_meta.add_effects( commands, - &q_cached_effects, std::mem::take(&mut extracted_effects.added_effects), - std::mem::take(&mut extracted_effects.removed_effect_entities), &render_device, &render_queue, &mut effect_bind_groups, @@ -2159,9 +2115,17 @@ pub(crate) fn add_remove_effects( // Allocate all the property buffer(s) as needed, before we move to the next // step which will need those buffers to schedule data copies from CPU. - for buffer in property_cache.buffers_mut().iter_mut().flatten() { - let _changed = buffer.write_buffer(&render_device, &render_queue); - // FIXME - invalidate bind groups! + for (buffer_index, buffer_slot) in property_cache.buffers_mut().iter_mut().enumerate() { + let Some(property_buffer) = buffer_slot.as_mut() else { + continue; + }; + let changed = property_buffer.write_buffer(&render_device, &render_queue); + if changed { + trace!("Destroying all bind groups for property buffer #{buffer_index}"); + effect_bind_groups + .property_bind_groups + .retain(|&k, _| k.buffer_index != buffer_index as u32); + } } } @@ -2207,17 +2171,24 @@ pub(crate) struct CachedProperties { pub binding_size: u32, } +#[derive(SystemParam)] +pub struct PrepareEffectsReadOnlyParams<'w, 's> { + sim_params: Res<'w, SimParams>, + render_device: Res<'w, RenderDevice>, + render_queue: Res<'w, RenderQueue>, + pipeline_cache: Res<'w, PipelineCache>, + mesh_allocator: Res<'w, MeshAllocator>, + render_meshes: Res<'w, RenderAssets>, + marker: PhantomData<&'s usize>, +} + pub(crate) fn prepare_effects( mut commands: Commands, - sim_params: Res, - render_device: Res, - render_queue: Res, - pipeline_cache: Res, + read_params: PrepareEffectsReadOnlyParams, + property_cache: Res, mut init_pipeline: ResMut, mut update_pipeline: ResMut, - mesh_allocator: Res, - render_meshes: Res>, - mut property_cache: ResMut, + mut effect_cache: ResMut, mut specialized_init_pipelines: ResMut>, mut specialized_update_pipelines: ResMut>, mut effects_meta: ResMut, @@ -2232,6 +2203,14 @@ pub(crate) fn prepare_effects( ) { trace!("prepare_effects"); + // Workaround for too many params in system (TODO: refactor to split work?) + let sim_params = read_params.sim_params.into_inner(); + let render_device = read_params.render_device.into_inner(); + let render_queue = read_params.render_queue.into_inner(); + let pipeline_cache = read_params.pipeline_cache.into_inner(); + let mesh_allocator = read_params.mesh_allocator.into_inner(); + let render_meshes = read_params.render_meshes.into_inner(); + // // sort first by z and then by handle. this ensures that, when possible, // batches span multiple z layers // batches won't span z-layers if there is // another batch between them extracted_effects.effects.sort_by(|a, b| { @@ -2309,9 +2288,9 @@ pub(crate) fn prepare_effects( { trace!("Effect render_entity {:?}: allocating render group indirect dispatch entries for {} groups...", extracted_effect.render_entity, groups.len()); let mut current_base_instance = 0; - let first_render_group_dispatch_buffer_index = allocate_sequential_buffers( - &mut effects_meta.render_group_dispatch_buffer, - groups.iter().map(|group| { + let first_render_group_dispatch_buffer_index = effects_meta + .render_group_dispatch_buffer + .insert_contiguous(groups.iter().map(|group| { let indirect_dispatch = match &mesh_index_buffer_slice { // Indexed mesh rendering Some(mesh_index_buffer_slice) => { @@ -2349,8 +2328,7 @@ pub(crate) fn prepare_effects( }; current_base_instance += group.capacity as i32; indirect_dispatch - }), - ); + })); let mut trail_dispatch_buffer_indices = HashMap::new(); for (dest_group_index, group) in groups.iter().enumerate() { @@ -2444,7 +2422,7 @@ pub(crate) fn prepare_effects( init_pipeline.temp_spawner_buffer_layout = Some(spawner_buffer_layout.clone()); let init_pipeline_id: CachedComputePipelineId = specialized_init_pipelines .specialize( - &pipeline_cache, + pipeline_cache, &init_pipeline, ParticleInitPipelineKey { shader: shader.init.clone(), @@ -2458,7 +2436,7 @@ pub(crate) fn prepare_effects( // https://github.com/bevyengine/bevy/issues/17132 update_pipeline.temp_spawner_buffer_layout = Some(spawner_buffer_layout.clone()); let update_pipeline_id = specialized_update_pipelines.specialize( - &pipeline_cache, + pipeline_cache, &update_pipeline, ParticleUpdatePipelineKey { shader: shader.update.clone(), @@ -2564,7 +2542,7 @@ pub(crate) fn prepare_effects( effects_meta.particle_group_buffer.push(GpuParticleGroup { global_group_index: total_group_count, effect_index: dispatch_buffer_indices - .render_effect_metadata_buffer_index + .render_effect_metadata_buffer_table_id .0, group_index_in_effect: group_index as u32, indirect_index: range[0], @@ -2638,27 +2616,6 @@ pub(crate) fn prepare_effects( // Also remove the other one. FIXME - dedupe those two... cmd.remove::(); - match property_cache.remove_properties(cached_effect_properties) { - Err(err) => { - let render_entity = extracted_effect.render_entity.id(); - match err { - CachedPropertiesError::InvalidBufferIndex(buffer_index) - => error!("Failed to remove cached properties of render entity {render_entity:?} from buffer #{buffer_index}: the index is invalid."), - CachedPropertiesError::BufferDeallocated(buffer_index) - => error!("Failed to remove cached properties of render entity {render_entity:?} from buffer #{buffer_index}: the buffer is not allocated."), - } - } - Ok(buffer_state) => { - if buffer_state == BufferState::Free { - // The entire buffer was deallocated; destroy all bind groups - // referencing it - let key = cached_effect_properties.to_key(); - trace!("Destroying property bind group for key {key:?} due to property buffer deallocated."); - effect_bind_groups.property_bind_groups.remove(&key); - } - } - } - if extracted_effect.property_data.is_some() { warn!( "Effect on entity {:?} doesn't declare any property in its Module, but some property values were provided. Those values will be discarded.", @@ -2731,12 +2688,12 @@ pub(crate) fn prepare_effects( // Perform any GPU allocation if we (lazily) allocated some rows into the render // group dispatch indirect buffer. - effects_meta.allocate_gpu(&render_device, &render_queue, &mut effect_bind_groups); + effects_meta.allocate_gpu(render_device, render_queue, &mut effect_bind_groups); // Write the entire spawner buffer for this frame, for all effects combined if effects_meta .spawner_buffer - .write_buffer(&render_device, &render_queue) + .write_buffer(render_device, render_queue) { // All property bind groups use the spawner buffer, which was reallocate effect_bind_groups.property_bind_groups.clear(); @@ -2746,15 +2703,14 @@ pub(crate) fn prepare_effects( // Write the entire particle group buffer for this frame if effects_meta .particle_group_buffer - .write_buffer(&render_device, &render_queue) + .write_buffer(render_device, render_queue) { // The buffer changed; invalidate all bind groups for all effects. + effect_cache.invalidate_sim_bind_groups(); } // Update simulation parameters - effects_meta - .sim_params_uniforms - .set(sim_params.deref().into()); + effects_meta.sim_params_uniforms.set(sim_params.into()); { let gpu_sim_params = effects_meta.sim_params_uniforms.get_mut(); gpu_sim_params.num_groups = total_group_count; @@ -2774,7 +2730,7 @@ pub(crate) fn prepare_effects( let prev_buffer_id = effects_meta.sim_params_uniforms.buffer().map(|b| b.id()); effects_meta .sim_params_uniforms - .write_buffer(&render_device, &render_queue); + .write_buffer(render_device, render_queue); if prev_buffer_id != effects_meta.sim_params_uniforms.buffer().map(|b| b.id()) { // Buffer changed, invalidate bind groups effects_meta.sim_params_bind_group = None; @@ -2954,6 +2910,33 @@ pub struct PropertyBindGroupKey { pub binding_size: u32, } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +struct InitRenderIndirectBindGroupKey { + pub buffer_index: u32, + pub render_effect_buffer: BufferId, + pub render_group_buffer: BufferId, + pub render_effect_offset: u32, + pub render_group_offset_dst: u32, + pub render_group_offset_src: u32, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +struct UpdateRenderIndirectBindGroupKey { + pub buffer_index: u32, + pub render_effect_buffer: BufferId, + pub render_group_buffer: BufferId, + pub render_effect_offset: u32, + pub render_group_offset: u32, +} + +struct CachedBindGroup { + /// Key the bind group was created from. Each time the key changes, the bind + /// group should be re-created. + key: K, + /// Bind group created from the key. + bind_group: BindGroup, +} + #[derive(Default, Resource)] pub struct EffectBindGroups { /// Map from buffer index to the bind groups shared among all effects that @@ -2964,11 +2947,13 @@ pub struct EffectBindGroups { /// Map from buffer index to its init render indirect bind group (group /// 3), one per group in the effect. // FIXME - doesn't work with batching; this should be the instance ID - init_render_indirect_bind_groups: HashMap>, + init_render_indirect_bind_groups: + HashMap>>, /// Map from buffer index to its update render indirect bind group (group /// 3). // FIXME - doesn't work with batching; this should be the instance ID - update_render_indirect_bind_groups: HashMap, + update_render_indirect_bind_groups: + HashMap>, /// Map from an effect material to its bind group. material_bind_groups: HashMap, /// Map from a [`PropertyBuffer`] index and a binding size to the @@ -2992,6 +2977,258 @@ impl EffectBindGroups { self.no_property_bind_group.as_ref() } } + + pub(self) fn get_or_create_init_render_indirect( + &mut self, + effect_batches: &EffectBatches, + gpu_limits: &GpuLimits, + render_device: &RenderDevice, + spawn_layout: &BindGroupLayout, + clone_layout: &BindGroupLayout, + render_effect_buffer: &Buffer, + render_group_buffer: &Buffer, + ) -> Result<(), ()> { + let DispatchBufferIndices { + render_effect_metadata_buffer_table_id: render_effect_dispatch_buffer_index, + render_group_dispatch_indices, + .. + } = &effect_batches.dispatch_buffer_indices; + let RenderGroupDispatchIndices::Allocated { + first_render_group_dispatch_buffer_index, + trail_dispatch_buffer_indices, + } = render_group_dispatch_indices + else { + return Err(()); + }; + + let make_key = |group_index: u32| { + let is_cloner = matches!( + effect_batches.initializers[group_index as usize], + EffectInitializer::Cloner(_) + ); + let dst_src_indices = if is_cloner { + let trail_indices = &trail_dispatch_buffer_indices[&group_index]; + [trail_indices.dest.0, trail_indices.src.0] + } else { + let idx = first_render_group_dispatch_buffer_index.0; + [idx, idx] + }; + + let key = InitRenderIndirectBindGroupKey { + buffer_index: effect_batches.buffer_index, + render_effect_buffer: render_effect_buffer.id(), + render_group_buffer: render_group_buffer.id(), + render_effect_offset: gpu_limits + .render_effect_indirect_offset(render_effect_dispatch_buffer_index.0) + as u32, + render_group_offset_dst: gpu_limits.render_group_indirect_offset(dst_src_indices[0]) + as u32, + render_group_offset_src: gpu_limits.render_group_indirect_offset(dst_src_indices[1]) + as u32, + }; + + (is_cloner, key) + }; + + let make_bind_group = |group_index: u32, + is_cloner: bool, + key: &InitRenderIndirectBindGroupKey| { + let entries = [ + // @group(3) @binding(0) var render_effect_indirect : + // RenderEffectMetadata; + BindGroupEntry { + binding: 0, + resource: BindingResource::Buffer(BufferBinding { + buffer: render_effect_buffer, + offset: key.render_effect_offset as u64, + size: Some(gpu_limits.render_effect_indirect_size()), + }), + }, + // @group(3) @binding(1) var dest_render_group_indirect: + // RenderGroupIndirect; + BindGroupEntry { + binding: 1, + resource: BindingResource::Buffer(BufferBinding { + buffer: render_group_buffer, + offset: key.render_group_offset_dst as u64, + size: Some(gpu_limits.render_group_indirect_size()), + }), + }, + // @group(3) @binding(2) var src_render_group_indirect: + // RenderGroupIndirect; + BindGroupEntry { + binding: 2, + resource: BindingResource::Buffer(BufferBinding { + buffer: render_group_buffer, + offset: key.render_group_offset_src as u64, + size: Some(gpu_limits.render_group_indirect_size()), + }), + }, + ]; + + let (layout, entries) = if is_cloner { + (clone_layout, &entries[..]) + } else { + (spawn_layout, &entries[0..2]) + }; + + let bind_group = render_device.create_bind_group( + "hanabi:bind_group_init_render_group_dispatch", + layout, + entries, + ); + + trace!( + "Created new init render indirect bind group for group #{} buffer index {}: render_effect={} dest_group={} src_group={}", + group_index, + effect_batches.buffer_index, + render_effect_dispatch_buffer_index.0, + key.render_group_offset_dst, + key.render_group_offset_src, + ); + + bind_group + }; + + let mut entry = self + .init_render_indirect_bind_groups + .entry(effect_batches.buffer_index); + let vec = if let Entry::Occupied(occ) = &mut entry { + let vec = occ.get_mut(); + if vec.len() != effect_batches.group_batches.len() { + // Size differs; re-create all entries + vec.clear(); + Some(vec) + } else { + // Same size; only re-create needed entries + for (group_index, entry) in vec.iter_mut().enumerate() { + let (is_cloner, key) = make_key(group_index as u32); + if entry.key != key { + entry.bind_group = make_bind_group(group_index as u32, is_cloner, &key); + entry.key = key; + } + } + None + } + } else { + Some(entry.or_default()) + }; + if let Some(vec) = vec { + // Re-create all entries + debug_assert!(vec.is_empty()); + for group_index in 0..effect_batches.group_batches.len() { + let (is_cloner, key) = make_key(group_index as u32); + let bind_group = make_bind_group(group_index as u32, is_cloner, &key); + vec.push(CachedBindGroup { key, bind_group }); + } + } + + Ok(()) + } + + pub(self) fn get_or_create_update_render_indirect( + &mut self, + effect_batches: &EffectBatches, + gpu_limits: &GpuLimits, + render_device: &RenderDevice, + layout: &BindGroupLayout, + render_effect_buffer: &Buffer, + render_group_buffer: &Buffer, + ) -> Result<(), ()> { + let DispatchBufferIndices { + render_effect_metadata_buffer_table_id: render_effect_dispatch_buffer_index, + render_group_dispatch_indices, + .. + } = &effect_batches.dispatch_buffer_indices; + let RenderGroupDispatchIndices::Allocated { + first_render_group_dispatch_buffer_index, + .. + } = render_group_dispatch_indices + else { + return Err(()); + }; + + let key = UpdateRenderIndirectBindGroupKey { + buffer_index: effect_batches.buffer_index, + render_effect_buffer: render_effect_buffer.id(), + render_group_buffer: render_group_buffer.id(), + render_effect_offset: gpu_limits + .render_effect_indirect_offset(render_effect_dispatch_buffer_index.0) + as u32, + render_group_offset: gpu_limits + .render_group_indirect_offset(first_render_group_dispatch_buffer_index.0) + as u32, + }; + + let make_entry = || { + let storage_alignment = gpu_limits.storage_buffer_align.get(); + let render_effect_indirect_size = + GpuRenderEffectMetadata::aligned_size(storage_alignment); + let total_render_group_indirect_size = NonZeroU64::new( + GpuRenderGroupIndirect::aligned_size(storage_alignment).get() + * effect_batches.group_batches.len() as u64, + ) + .unwrap(); + + let bind_group = render_device.create_bind_group( + "hanabi:bind_group_update_render_group_dispatch", + layout, + &[ + // @group(3) @binding(0) var render_effect_indirect : + // RenderEffectMetadata; + BindGroupEntry { + binding: 0, + resource: BindingResource::Buffer(BufferBinding { + buffer: render_effect_buffer, + offset: key.render_effect_offset as u64, + size: Some(render_effect_indirect_size), + }), + }, + // @group(3) @binding(1) var render_group_indirect : + // array; + BindGroupEntry { + binding: 1, + resource: BindingResource::Buffer(BufferBinding { + buffer: render_group_buffer, + offset: key.render_group_offset as u64, + size: Some(total_render_group_indirect_size), + }), + }, + ], + ); + + trace!( + "Created new update render indirect bind group for buffer index {}: \ + render_effect={} \ + render_group={} group_count={}", + effect_batches.buffer_index, + render_effect_dispatch_buffer_index.0, + first_render_group_dispatch_buffer_index.0, + effect_batches.group_batches.len() + ); + + bind_group + }; + + let entry = self + .update_render_indirect_bind_groups + .entry(effect_batches.buffer_index); + if let Entry::Occupied(occ) = &entry { + if occ.get().key != key { + entry.and_modify(|e| { + e.key = key; + e.bind_group = make_entry(); + }); + } + } else { + entry.or_insert(CachedBindGroup { + key, + bind_group: make_entry(), + }); + } + + Ok(()) + } } #[derive(SystemParam)] @@ -3876,11 +4113,11 @@ pub(crate) fn prepare_bind_groups( }; // This should always be non-zero if the property key is Some(). - let binding_size = NonZeroU64::new(property_key.binding_size as u64).unwrap(); - let Some(layout) = property_cache.bind_group_layout(Some(binding_size)) else { + let property_binding_size = NonZeroU64::new(property_key.binding_size as u64).unwrap(); + let Some(layout) = property_cache.bind_group_layout(Some(property_binding_size)) else { error!( "Missing property bind group layout for binding size {}, referenced by effect batch on render entity {:?}.", - binding_size.get(), entity + property_binding_size.get(), entity ); continue; }; @@ -3921,7 +4158,7 @@ pub(crate) fn prepare_bind_groups( resource: BindingResource::Buffer(BufferBinding { buffer: property_buffer, offset: 0, - size: None, + size: Some(property_binding_size), }), }, ], @@ -3939,16 +4176,16 @@ pub(crate) fn prepare_bind_groups( * effect_batches.group_batches.len() as u64, ) .unwrap(); - let group_binding = BufferBinding { - buffer: effects_meta.particle_group_buffer.buffer().unwrap(), - offset: first_effect_particle_group_buffer_offset, - size: Some(effect_particle_groups_buffer_size), - }; // Bind group for the init compute shader to simulate particles. - // TODO - move this creation in RenderSet::PrepareBindGroups if effect_cache - .create_sim_bind_group(effect_batches.buffer_index, &render_device, group_binding) + .create_sim_bind_group( + effect_batches.buffer_index, + &render_device, + effects_meta.particle_group_buffer.buffer().unwrap(), + first_effect_particle_group_buffer_offset, + effect_particle_groups_buffer_size, + ) .is_err() { error!("No particle buffer allocated for entity {:?}", entity); @@ -3958,186 +4195,34 @@ pub(crate) fn prepare_bind_groups( // Bind group #3 of init pass // FIXME - this is instance-dependent, not buffer-dependent if effect_bind_groups - .init_render_indirect_bind_groups - .get(&effect_batches.buffer_index) - .is_none() + .get_or_create_init_render_indirect( + effect_batches, + &effects_meta.gpu_limits, + &render_device, + &init_pipeline.render_indirect_spawn_layout, + &init_pipeline.render_indirect_clone_layout, + effects_meta.render_effect_dispatch_buffer.buffer().unwrap(), + effects_meta.render_group_dispatch_buffer.buffer().unwrap(), + ) + .is_err() { - let DispatchBufferIndices { - render_effect_metadata_buffer_index: render_effect_dispatch_buffer_index, - render_group_dispatch_indices, - .. - } = &effect_batches.dispatch_buffer_indices; - let RenderGroupDispatchIndices::Allocated { - first_render_group_dispatch_buffer_index, - trail_dispatch_buffer_indices, - } = render_group_dispatch_indices - else { - continue; - }; - - let render_effect_indirect_size = effects_meta.gpu_limits.render_effect_indirect_size(); - let render_group_indirect_size = effects_meta.gpu_limits.render_group_indirect_size(); - - // Create one bind group per effect group. We don't batch them anyway for now... - let bind_groups = (0..effect_batches.group_batches.len()) - .map(|group_index| { - let is_cloner = matches!(effect_batches.initializers[group_index], EffectInitializer::Cloner(_)); - let dst_src_indices = if is_cloner { - let trail_indices = &trail_dispatch_buffer_indices[&(group_index as u32)]; - [trail_indices.dest.0, trail_indices.src.0] - } else { - let idx = first_render_group_dispatch_buffer_index.0; - [idx, idx] - }; - let entries = [ - // @group(3) @binding(0) var render_effect_indirect : RenderEffectMetadata; - BindGroupEntry { - binding: 0, - resource: BindingResource::Buffer(BufferBinding { - buffer: effects_meta - .render_effect_dispatch_buffer - .buffer() - .unwrap(), - offset: effects_meta.gpu_limits.render_effect_indirect_offset( - render_effect_dispatch_buffer_index.0, - ), - size: Some(render_effect_indirect_size), - }), - }, - // @group(3) @binding(1) var dest_render_group_indirect: RenderGroupIndirect; - BindGroupEntry { - binding: 1, - resource: BindingResource::Buffer(BufferBinding { - buffer: effects_meta - .render_group_dispatch_buffer - .buffer() - .unwrap(), - offset: effects_meta.gpu_limits.render_group_indirect_offset( - dst_src_indices[0] - ), - size: Some(render_group_indirect_size), - }), - }, - // @group(3) @binding(2) var src_render_group_indirect: RenderGroupIndirect; - BindGroupEntry { - binding: 2, - resource: BindingResource::Buffer(BufferBinding { - buffer: effects_meta - .render_group_dispatch_buffer - .buffer() - .unwrap(), - offset: effects_meta.gpu_limits.render_group_indirect_offset( - dst_src_indices[1], - ), - size: Some(render_group_indirect_size), - }), - }, - ]; - - let (layout, entries) = if is_cloner { - (&init_pipeline.render_indirect_clone_layout, &entries[..]) - } else { - (&init_pipeline.render_indirect_spawn_layout, &entries[0..2]) - }; - - let bind_group = render_device.create_bind_group("hanabi:bind_group_init_render_group_dispatch", layout, entries); - - trace!( - "Created new init render indirect bind group for group #{} buffer index {}: render_effect={} dest_group={} src_group={}", - group_index, - effect_batches.buffer_index, - render_effect_dispatch_buffer_index.0, - dst_src_indices[0], - dst_src_indices[1], - ); - - bind_group - }) - .collect::>(); - - effect_bind_groups.init_render_indirect_bind_groups.insert( - // FIXME - this is instance-dependent, not buffer-dependent - effect_batches.buffer_index, - bind_groups, - ); + continue; } // Bind group #3 of update pass // FIXME - this is instance-dependent, not buffer-dependent if effect_bind_groups - .update_render_indirect_bind_groups - .get(&effect_batches.buffer_index) - .is_none() - { - let DispatchBufferIndices { - render_effect_metadata_buffer_index: render_effect_dispatch_buffer_index, - render_group_dispatch_indices, - .. - } = &effect_batches.dispatch_buffer_indices; - let RenderGroupDispatchIndices::Allocated { - first_render_group_dispatch_buffer_index, - .. - } = render_group_dispatch_indices - else { - continue; - }; - - let storage_alignment = effects_meta.gpu_limits.storage_buffer_align.get(); - let render_effect_indirect_size = - GpuRenderEffectMetadata::aligned_size(storage_alignment); - let total_render_group_indirect_size = NonZeroU64::new( - GpuRenderGroupIndirect::aligned_size(storage_alignment).get() - * effect_batches.group_batches.len() as u64, - ) - .unwrap(); - let particles_buffer_layout_update_render_indirect = render_device.create_bind_group( - "hanabi:bind_group_update_render_group_dispatch", + .get_or_create_update_render_indirect( + effect_batches, + &effects_meta.gpu_limits, + &render_device, &update_pipeline.render_indirect_layout, - &[ - // @group(3) @binding(0) var render_effect_indirect : - // RenderEffectMetadata; - BindGroupEntry { - binding: 0, - resource: BindingResource::Buffer(BufferBinding { - buffer: effects_meta.render_effect_dispatch_buffer.buffer().unwrap(), - offset: effects_meta.gpu_limits.render_effect_indirect_offset( - render_effect_dispatch_buffer_index.0, - ), - size: Some(render_effect_indirect_size), - }), - }, - // @group(3) @binding(1) var render_group_indirect : - // array; - BindGroupEntry { - binding: 1, - resource: BindingResource::Buffer(BufferBinding { - buffer: effects_meta.render_group_dispatch_buffer.buffer().unwrap(), - offset: effects_meta.gpu_limits.render_group_indirect_offset( - first_render_group_dispatch_buffer_index.0, - ), - size: Some(total_render_group_indirect_size), - }), - }, - ], - ); - - trace!( - "Created new update render indirect bind group for buffer index {}: \ - render_effect={} \ - render_group={} group_count={}", - effect_batches.buffer_index, - render_effect_dispatch_buffer_index.0, - first_render_group_dispatch_buffer_index.0, - effect_batches.group_batches.len() - ); - - effect_bind_groups - .update_render_indirect_bind_groups - .insert( - // FIXME - this is instance-dependent, not buffer-dependent - effect_batches.buffer_index, - particles_buffer_layout_update_render_indirect, - ); + effects_meta.render_effect_dispatch_buffer.buffer().unwrap(), + effects_meta.render_group_dispatch_buffer.buffer().unwrap(), + ) + .is_err() + { + continue; } // Ensure the particle texture(s) are available as GPU resources and that a bind @@ -4799,7 +4884,8 @@ impl Node for VfxSimulateNode { ); compute_pass.set_bind_group( 3, - &render_indirect_bind_groups[dest_group_index as usize], + &render_indirect_bind_groups[dest_group_index as usize] + .bind_group, &[], ); compute_pass.dispatch_workgroups(workgroup_count, 1, 1); @@ -4886,7 +4972,8 @@ impl Node for VfxSimulateNode { ); compute_pass.set_bind_group( 3, - &render_indirect_bind_groups[dest_group_index as usize], + &render_indirect_bind_groups[dest_group_index as usize] + .bind_group, &[], ); @@ -5081,7 +5168,11 @@ impl Node for VfxSimulateNode { .unwrap(), &offsets[..], ); - compute_pass.set_bind_group(3, update_render_indirect_bind_group, &[]); + compute_pass.set_bind_group( + 3, + &update_render_indirect_bind_group.bind_group, + &[], + ); if let Some(buffer) = effects_meta.dispatch_indirect_buffer.buffer() { trace!( @@ -5105,37 +5196,6 @@ impl Node for VfxSimulateNode { } } -// FIXME - Remove this, handle it properly with a BufferTable::insert_many() or -// so... -fn allocate_sequential_buffers( - buffer_table: &mut BufferTable, - iterator: I, -) -> BufferTableId -where - T: Pod + ShaderSize, - I: Iterator, -{ - let mut first_buffer = None; - for (object_index, object) in iterator.enumerate() { - let buffer = buffer_table.insert(object); - match first_buffer { - None => first_buffer = Some(buffer), - Some(ref first_buffer) => { - if first_buffer.0 + object_index as u32 != buffer.0 { - error!( - "Allocator didn't allocate sequential indices (expected {:?}, got {:?}). \ - Expect trouble!", - first_buffer.0 + object_index as u32, - buffer.0 - ); - } - } - } - } - - first_buffer.expect("No buffers allocated") -} - impl From for ParticleRenderAlphaMaskPipelineKey { fn from(layout_flags: LayoutFlags) -> Self { if layout_flags.contains(LayoutFlags::USE_ALPHA_MASK) { diff --git a/src/render/property.rs b/src/render/property.rs index ecb03d3..9d45ebd 100644 --- a/src/render/property.rs +++ b/src/render/property.rs @@ -1,4 +1,7 @@ -use std::{num::NonZeroU64, ops::Range}; +use std::{ + num::{NonZeroU32, NonZeroU64}, + ops::Range, +}; use bevy::{ log::trace, @@ -78,8 +81,33 @@ impl PropertyBuffer { #[allow(dead_code)] #[inline] pub fn free(&mut self, range: Range) -> BufferState { - if self.buffer.remove(range) && self.buffer.is_empty() { - BufferState::Free + let id = self + .buffer + .buffer() + .map(|buf| { + let id: NonZeroU32 = buf.id().into(); + id.get() + }) + .unwrap_or(u32::MAX); + let size = self.buffer.len(); + if self.buffer.remove(range) { + if self.buffer.is_empty() { + BufferState::Free + } else if self.buffer.len() != size + || self + .buffer + .buffer() + .map(|buf| { + let id: NonZeroU32 = buf.id().into(); + id.get() + }) + .unwrap_or(u32::MAX) + != id + { + BufferState::Resized + } else { + BufferState::Used + } } else { BufferState::Used } @@ -215,6 +243,7 @@ impl PropertyCache { // FIXME - Currently PropertyBuffer::allocate() always succeeds and // grows indefinitely let range = buffer.allocate(property_layout); + trace!("Allocate new slice in property buffer #{buffer_index} for layout {property_layout:?}: range={range:?}"); Some(CachedEffectProperties { buffer_index: buffer_index as u32, range, @@ -265,19 +294,17 @@ impl PropertyCache { "Removing cached properties {:?} from cache.", cached_effect_properties ); - let Some(buffer) = self + let entry = self .buffers .get_mut(cached_effect_properties.buffer_index as usize) - else { - return Err(CachedPropertiesError::InvalidBufferIndex( + .ok_or(CachedPropertiesError::InvalidBufferIndex( cached_effect_properties.buffer_index, - )); - }; - let Some(buffer) = buffer.as_mut() else { - return Err(CachedPropertiesError::BufferDeallocated( + ))?; + let buffer = entry + .as_mut() + .ok_or(CachedPropertiesError::BufferDeallocated( cached_effect_properties.buffer_index, - )); - }; + ))?; Ok(buffer.free(cached_effect_properties.range.clone())) } }