From 23d34a52959bb76c210a472a5afa87a547c2fda0 Mon Sep 17 00:00:00 2001 From: Arman Uguray Date: Thu, 16 May 2024 13:28:34 -0400 Subject: [PATCH 01/24] Add engine support for draw calls Recording and WgpuEngine can now record and execute draw commands with a render pipeline. --- vello/src/recording.rs | 35 +++++ vello/src/shaders.rs | 2 +- vello/src/wgpu_engine.rs | 300 +++++++++++++++++++++++++++++++-------- 3 files changed, 275 insertions(+), 62 deletions(-) diff --git a/vello/src/recording.rs b/vello/src/recording.rs index e3c984006..ca9799a42 100644 --- a/vello/src/recording.rs +++ b/vello/src/recording.rs @@ -52,6 +52,11 @@ pub struct ImageProxy { #[derive(Clone, Copy)] pub enum ResourceProxy { Buffer(BufferProxy), + BufferRange { + proxy: BufferProxy, + offset: u64, + size: u64, + }, Image(ImageProxy), } @@ -69,6 +74,7 @@ pub enum Command { // Alternative: provide bufs & images as separate sequences Dispatch(ShaderId, (u32, u32, u32), Vec), DispatchIndirect(ShaderId, BufferProxy, u64, Vec), + Draw(DrawParams), Download(BufferProxy), /// Commands to clear the buffer from an offset on for a length of the given size. /// If the size is [None], it clears until the end. @@ -95,6 +101,16 @@ pub enum BindType { // TODO: Uniform, Sampler, maybe others } +pub struct DrawParams { + pub shader_id: ShaderId, + pub instance_count: u32, + pub vertex_count: u32, + pub vertex_buffer: Option, + pub resources: Vec, + pub target: ImageProxy, + pub clear_color: Option<[f32; 4]>, +} + impl Recording { /// Appends a [`Command`] to the back of the [`Recording`]. pub fn push(&mut self, cmd: Command) { @@ -167,6 +183,11 @@ impl Recording { self.push(Command::DispatchIndirect(shader, buf, offset, r)); } + /// Issue a draw call + pub fn draw(&mut self, params: DrawParams) { + self.push(Command::Draw(params)); + } + /// Prepare a buffer for downloading. /// /// Currently this copies to a download buffer. The original buffer can be freed @@ -194,6 +215,11 @@ impl Recording { pub fn free_resource(&mut self, resource: ResourceProxy) { match resource { ResourceProxy::Buffer(buf) => self.free_buffer(buf), + ResourceProxy::BufferRange { + proxy, + offset: _, + size: _, + } => self.free_buffer(proxy), ResourceProxy::Image(image) => self.free_image(image), } } @@ -220,6 +246,15 @@ impl ImageFormat { Self::Bgra8 => wgpu::TextureFormat::Bgra8Unorm, } } + + #[cfg(feature = "wgpu")] + pub fn from_wgpu(format: wgpu::TextureFormat) -> Self { + match format { + wgpu::TextureFormat::Rgba8Unorm => Self::Rgba8, + wgpu::TextureFormat::Bgra8Unorm => Self::Bgra8, + _ => unimplemented!(), + } + } } impl ImageProxy { diff --git a/vello/src/shaders.rs b/vello/src/shaders.rs index 41e13468e..bf34bad46 100644 --- a/vello/src/shaders.rs +++ b/vello/src/shaders.rs @@ -77,7 +77,7 @@ pub(crate) fn full_shaders( .into(); #[cfg(not(feature = "hot_reload"))] let source = shaders.$name.wgsl.code; - engine.add_shader( + engine.add_compute_shader( device, $label, source, diff --git a/vello/src/wgpu_engine.rs b/vello/src/wgpu_engine.rs index 67cd92f56..7bcb0d8e0 100644 --- a/vello/src/wgpu_engine.rs +++ b/vello/src/wgpu_engine.rs @@ -11,7 +11,7 @@ use vello_shaders::cpu::CpuBinding; use wgpu::{ BindGroup, BindGroupLayout, Buffer, BufferUsages, CommandEncoder, CommandEncoderDescriptor, - ComputePipeline, Device, PipelineCompilationOptions, Queue, Texture, TextureAspect, + ComputePipeline, Device, PipelineCompilationOptions, Queue, RenderPipeline, Texture, TextureAspect, TextureUsages, TextureView, TextureViewDimension, }; @@ -43,8 +43,13 @@ pub(crate) struct WgpuEngine { pub(crate) image_overrides: HashMap>>, } +enum PipelineState { + Compute(ComputePipeline), + Render(RenderPipeline), +} + struct WgpuShader { - pipeline: ComputePipeline, + pipeline: PipelineState, bind_group_layout: BindGroupLayout, } @@ -235,7 +240,7 @@ impl WgpuEngine { /// /// Maybe should do template instantiation here? But shader compilation pipeline feels maybe /// a bit separate. - pub fn add_shader( + pub fn add_compute_shader( &mut self, device: &Device, label: &'static str, @@ -271,54 +276,9 @@ impl WgpuEngine { } } - let entries = layout - .iter() - .enumerate() - .map(|(i, bind_type)| match bind_type { - BindType::Buffer | BindType::BufReadOnly => wgpu::BindGroupLayoutEntry { - binding: i as u32, - visibility: wgpu::ShaderStages::COMPUTE, - ty: wgpu::BindingType::Buffer { - ty: wgpu::BufferBindingType::Storage { - read_only: *bind_type == BindType::BufReadOnly, - }, - has_dynamic_offset: false, - min_binding_size: None, - }, - count: None, - }, - BindType::Uniform => wgpu::BindGroupLayoutEntry { - binding: i as u32, - visibility: wgpu::ShaderStages::COMPUTE, - ty: wgpu::BindingType::Buffer { - ty: wgpu::BufferBindingType::Uniform, - has_dynamic_offset: false, - min_binding_size: None, - }, - count: None, - }, - BindType::Image(format) | BindType::ImageRead(format) => { - wgpu::BindGroupLayoutEntry { - binding: i as u32, - visibility: wgpu::ShaderStages::COMPUTE, - ty: if *bind_type == BindType::ImageRead(*format) { - wgpu::BindingType::Texture { - sample_type: wgpu::TextureSampleType::Float { filterable: true }, - view_dimension: wgpu::TextureViewDimension::D2, - multisampled: false, - } - } else { - wgpu::BindingType::StorageTexture { - access: wgpu::StorageTextureAccess::WriteOnly, - format: format.to_wgpu(), - view_dimension: wgpu::TextureViewDimension::D2, - } - }, - count: None, - } - } - }) - .collect::>(); + let entries = Self::create_bind_group_layout_entries( + layout.iter().map(|b| (*b, wgpu::ShaderStages::COMPUTE)), + ); #[cfg(not(target_arch = "wasm32"))] if let Some(uninit) = self.shaders_to_initialise.as_mut() { let id = add(Shader { @@ -342,6 +302,72 @@ impl WgpuEngine { }) } + #[allow(clippy::too_many_arguments)] + pub fn add_render_shader( + &mut self, + device: &Device, + label: &'static str, + module: &wgpu::ShaderModule, + vertex_main: &'static str, + fragment_main: &'static str, + topology: wgpu::PrimitiveTopology, + color_attachment: wgpu::ColorTargetState, + vertex_buffer: Option, + bind_layout: &[(BindType, wgpu::ShaderStages)], + ) -> ShaderId { + let entries = Self::create_bind_group_layout_entries(bind_layout.iter().copied()); + let bind_group_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &entries, + }); + let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor { + label: Some(label), + layout: Some(&pipeline_layout), + vertex: wgpu::VertexState { + module, + entry_point: vertex_main, + buffers: vertex_buffer + .as_ref() + .map(core::slice::from_ref) + .unwrap_or_default(), + compilation_options: wgpu::PipelineCompilationOptions::default(), + }, + fragment: Some(wgpu::FragmentState { + module, + entry_point: fragment_main, + targets: &[Some(color_attachment)], + compilation_options: wgpu::PipelineCompilationOptions::default(), + }), + primitive: wgpu::PrimitiveState { + topology, + strip_index_format: None, + front_face: wgpu::FrontFace::Ccw, + cull_mode: Some(wgpu::Face::Back), + polygon_mode: wgpu::PolygonMode::Fill, + unclipped_depth: false, + conservative: false, + }, + depth_stencil: None, + multisample: wgpu::MultisampleState::default(), + multiview: None, + }); + let id = self.shaders.len(); + self.shaders.push(Shader { + wgpu: Some(WgpuShader { + pipeline: PipelineState::Render(pipeline), + bind_group_layout, + }), + cpu: None, + label, + }); + ShaderId(id) + } + pub fn run_recording( &mut self, device: &Device, @@ -365,8 +391,11 @@ impl WgpuEngine { transient_map .bufs .insert(buf_proxy.id, TransientBuf::Cpu(bytes)); - let usage = - BufferUsages::COPY_SRC | BufferUsages::COPY_DST | BufferUsages::STORAGE; + // TODO: restrict VERTEX usage to "debug_layers" feature? + let usage = BufferUsages::COPY_SRC + | BufferUsages::COPY_DST + | BufferUsages::STORAGE + | BufferUsages::VERTEX; let buf = self .pool .get_buf(buf_proxy.size, buf_proxy.name, usage, device); @@ -523,7 +552,10 @@ impl WgpuEngine { let query = profiler .begin_query(shader.label, &mut cpass, device) .with_parent(Some(&query)); - cpass.set_pipeline(&wgpu_shader.pipeline); + let PipelineState::Compute(pipeline) = &wgpu_shader.pipeline else { + panic!("cannot issue a dispatch with a render pipeline"); + }; + cpass.set_pipeline(pipeline); cpass.set_bind_group(0, &bind_group, &[]); cpass.dispatch_workgroups(x, y, z); #[cfg(feature = "wgpu-profiler")] @@ -570,7 +602,10 @@ impl WgpuEngine { let query = profiler .begin_query(shader.label, &mut cpass, device) .with_parent(Some(&query)); - cpass.set_pipeline(&wgpu_shader.pipeline); + let PipelineState::Compute(pipeline) = &wgpu_shader.pipeline else { + panic!("cannot issue a dispatch with a render pipeline"); + }; + cpass.set_pipeline(pipeline); cpass.set_bind_group(0, &bind_group, &[]); let buf = self.bind_map.get_gpu_buf(proxy.id).ok_or( Error::UnavailableBufferUsed(proxy.name, "indirect dispatch"), @@ -581,6 +616,59 @@ impl WgpuEngine { } } } + Command::Draw(draw_params) => { + let shader = &self.shaders[draw_params.shader_id.0]; + let ShaderKind::Wgpu(shader) = shader.select() else { + panic!("a render pass does not have a CPU equivalent"); + }; + let bind_group = transient_map.create_bind_group( + &mut self.bind_map, + &mut self.pool, + device, + queue, + &mut encoder, + &shader.bind_group_layout, + &draw_params.resources, + ); + let render_target = + transient_map.materialize_external_image_for_render_pass(&draw_params.target); + let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: None, + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: render_target, + resolve_target: None, + ops: wgpu::Operations { + load: match draw_params.clear_color { + Some(c) => wgpu::LoadOp::Clear(wgpu::Color { + r: c[0] as f64, + g: c[1] as f64, + b: c[2] as f64, + a: c[3] as f64, + }), + None => wgpu::LoadOp::Load, + }, + store: wgpu::StoreOp::Store, + }, + })], + depth_stencil_attachment: None, + occlusion_query_set: None, + timestamp_writes: None, + }); + let PipelineState::Render(pipeline) = &shader.pipeline else { + panic!("cannot issue a draw with a compute pipeline"); + }; + rpass.set_pipeline(pipeline); + if let Some(proxy) = draw_params.vertex_buffer { + // TODO: need a way to materialize a CPU initialized buffer. For now assume + // buffer exists? Also, need to materialize this buffer with vertex usage + let buf = self.bind_map.get_gpu_buf(proxy.id).ok_or( + Error::UnavailableBufferUsed(proxy.name, "draw"), + )?; + rpass.set_vertex_buffer(0, buf.slice(..)); + } + rpass.set_bind_group(0, &bind_group, &[]); + rpass.draw(0..draw_params.vertex_count, 0..draw_params.instance_count); + } Command::Download(proxy) => { let src_buf = self .bind_map @@ -649,6 +737,58 @@ impl WgpuEngine { self.downloads.remove(&buf.id); } + fn create_bind_group_layout_entries( + layout: impl Iterator, + ) -> Vec { + layout + .enumerate() + .map(|(i, (bind_type, visibility))| match bind_type { + BindType::Buffer | BindType::BufReadOnly => wgpu::BindGroupLayoutEntry { + binding: i as u32, + visibility, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { + read_only: bind_type == BindType::BufReadOnly, + }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + BindType::Uniform => wgpu::BindGroupLayoutEntry { + binding: i as u32, + visibility, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Uniform, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }, + BindType::Image(format) | BindType::ImageRead(format) => { + wgpu::BindGroupLayoutEntry { + binding: i as u32, + visibility, + ty: if bind_type == BindType::ImageRead(format) { + wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + } + } else { + wgpu::BindingType::StorageTexture { + access: wgpu::StorageTextureAccess::WriteOnly, + format: format.to_wgpu(), + view_dimension: wgpu::TextureViewDimension::D2, + } + }, + count: None, + } + } + }) + .collect::>() + } + fn create_compute_pipeline( device: &Device, label: &str, @@ -682,7 +822,7 @@ impl WgpuEngine { cache: None, }); WgpuShader { - pipeline, + pipeline: PipelineState::Compute(pipeline), bind_group_layout, } } @@ -879,6 +1019,14 @@ impl<'a> TransientBindMap<'a> { } } + fn materialize_external_image_for_render_pass(&mut self, proxy: &ImageProxy) -> &TextureView { + // TODO: Maybe this should support instantiating a transient texture. Right now all render + // passes target a `SurfaceTexture`, so supporting external textures is sufficient. + self.images + .get(&proxy.id) + .expect("texture not materialized") + } + #[allow(clippy::too_many_arguments)] fn create_bind_group( &mut self, @@ -892,17 +1040,23 @@ impl<'a> TransientBindMap<'a> { ) -> BindGroup { for proxy in bindings { match proxy { - ResourceProxy::Buffer(proxy) => { + ResourceProxy::Buffer(proxy) + | ResourceProxy::BufferRange { + proxy, + offset: _, + size: _, + } => { if self.bufs.contains_key(&proxy.id) { continue; } match bind_map.buf_map.entry(proxy.id) { Entry::Vacant(v) => { - // TODO: only some buffers will need indirect, but does it hurt? + // TODO: only some buffers will need indirect & vertex, but does it hurt? let usage = BufferUsages::COPY_SRC | BufferUsages::COPY_DST | BufferUsages::STORAGE - | BufferUsages::INDIRECT; + | BufferUsages::INDIRECT + | BufferUsages::VERTEX; let buf = pool.get_buf(proxy.size, proxy.name, usage, device); if bind_map.pending_clears.remove(&proxy.id) { encoder.clear_buffer(&buf, 0, None); @@ -966,6 +1120,24 @@ impl<'a> TransientBindMap<'a> { resource: buf.as_entire_binding(), } } + ResourceProxy::BufferRange { + proxy, + offset, + size, + } => { + let buf = match self.bufs.get(&proxy.id) { + Some(TransientBuf::Gpu(b)) => b, + _ => bind_map.get_gpu_buf(proxy.id).unwrap(), + }; + wgpu::BindGroupEntry { + binding: i as u32, + resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding { + buffer: buf, + offset: *offset, + size: core::num::NonZeroU64::new(*size), + }), + } + } ResourceProxy::Image(proxy) => { let view = self .images @@ -995,10 +1167,15 @@ impl<'a> TransientBindMap<'a> { // First pass is mutable; create buffers as needed for resource in bindings { match resource { - ResourceProxy::Buffer(buf) => match self.bufs.get(&buf.id) { + ResourceProxy::Buffer(proxy) + | ResourceProxy::BufferRange { + proxy, + offset: _, + size: _, + } => match self.bufs.get(&proxy.id) { Some(TransientBuf::Cpu(_)) => (), Some(TransientBuf::Gpu(_)) => panic!("buffer was already materialized on GPU"), - _ => bind_map.materialize_cpu_buf(buf), + _ => bind_map.materialize_cpu_buf(proxy), }, ResourceProxy::Image(_) => todo!(), }; @@ -1011,6 +1188,7 @@ impl<'a> TransientBindMap<'a> { Some(TransientBuf::Cpu(b)) => CpuBinding::Buffer(b), _ => bind_map.get_cpu_buf(buf.id), }, + ResourceProxy::BufferRange { .. } => todo!(), ResourceProxy::Image(_) => todo!(), }) .collect() From 90c18f04220c32f3e872cd67f4ea81316dfb17c4 Mon Sep 17 00:00:00 2001 From: Arman Uguray Date: Sat, 9 Dec 2023 02:01:05 -0800 Subject: [PATCH 02/24] Use a Recording for the blit render pipeline The blit pipeline now uses the render pass feature in Recording instead of making wgpu calls directly. --- vello/src/lib.rs | 236 +++++++++++++++------------------------ vello/src/recording.rs | 15 +-- vello/src/wgpu_engine.rs | 11 +- 3 files changed, 109 insertions(+), 153 deletions(-) diff --git a/vello/src/lib.rs b/vello/src/lib.rs index a8e22f53d..2cef9c7cc 100644 --- a/vello/src/lib.rs +++ b/vello/src/lib.rs @@ -127,7 +127,7 @@ use wgpu_engine::{ExternalResource, WgpuEngine}; /// Temporary export, used in `with_winit` for stats pub use vello_encoding::BumpAllocators; #[cfg(feature = "wgpu")] -use wgpu::{Device, PipelineCompilationOptions, Queue, SurfaceTexture, TextureFormat, TextureView}; +use wgpu::{Device, Queue, SurfaceTexture, TextureFormat, TextureView}; #[cfg(all(feature = "wgpu", feature = "wgpu-profiler"))] use wgpu_profiler::{GpuProfiler, GpuProfilerSettings}; @@ -311,7 +311,7 @@ impl Renderer { engine.build_shaders_if_needed(device, options.num_init_threads); let blit = options .surface_format - .map(|surface_format| BlitPipeline::new(device, surface_format)); + .map(|surface_format| BlitPipeline::new(device, surface_format, &mut engine)); Ok(Self { options, @@ -407,45 +407,39 @@ impl Renderer { .blit .as_ref() .expect("renderer should have configured surface_format to use on a surface"); - let mut encoder = - device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); - { - let surface_view = surface - .texture - .create_view(&wgpu::TextureViewDescriptor::default()); - let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { - label: None, - layout: &blit.bind_layout, - entries: &[wgpu::BindGroupEntry { - binding: 0, - resource: wgpu::BindingResource::TextureView(&target.view), - }], - }); - let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { - label: None, - color_attachments: &[Some(wgpu::RenderPassColorAttachment { - view: &surface_view, - resolve_target: None, - ops: wgpu::Operations { - load: wgpu::LoadOp::Clear(wgpu::Color::default()), - store: wgpu::StoreOp::Store, - }, - })], - depth_stencil_attachment: None, - occlusion_query_set: None, - timestamp_writes: None, - }); + let mut recording = Recording::default(); + let target_proxy = ImageProxy::new(width, height, ImageFormat::from_wgpu(target.format)); + let surface_proxy = ImageProxy::new( + width, + height, + ImageFormat::from_wgpu(surface.texture.format()), + ); + recording.draw(recording::DrawParams { + shader_id: blit.0, + instance_count: 1, + vertex_count: 6, + vertex_buffer: None, + resources: vec![ResourceProxy::Image(target_proxy)], + target: surface_proxy, + clear_color: Some([0., 0., 0., 0.]), + }); + + let surface_view = surface + .texture + .create_view(&wgpu::TextureViewDescriptor::default()); + let external_resources = [ + ExternalResource::Image(target_proxy, &target.view), + ExternalResource::Image(surface_proxy, &surface_view), + ]; + self.engine.run_recording( + device, + queue, + &recording, + &external_resources, + "blit (render_to_surface_async)", #[cfg(feature = "wgpu-profiler")] - let mut render_pass = self - .profiler - .scope("blit to surface", &mut render_pass, device); - render_pass.set_pipeline(&blit.pipeline); - render_pass.set_bind_group(0, &bind_group, &[]); - render_pass.draw(0..6, 0..1); - } - #[cfg(feature = "wgpu-profiler")] - self.profiler.resolve_queries(&mut encoder); - queue.submit(Some(encoder.finish())); + &mut self.profiler, + )?; self.target = Some(target); #[cfg(feature = "wgpu-profiler")] { @@ -573,45 +567,40 @@ impl Renderer { .blit .as_ref() .expect("renderer should have configured surface_format to use on a surface"); - let mut encoder = - device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); - { - let surface_view = surface - .texture - .create_view(&wgpu::TextureViewDescriptor::default()); - let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { - label: None, - layout: &blit.bind_layout, - entries: &[wgpu::BindGroupEntry { - binding: 0, - resource: wgpu::BindingResource::TextureView(&target.view), - }], - }); - let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { - label: None, - color_attachments: &[Some(wgpu::RenderPassColorAttachment { - view: &surface_view, - resolve_target: None, - ops: wgpu::Operations { - load: wgpu::LoadOp::Clear(wgpu::Color::default()), - store: wgpu::StoreOp::Store, - }, - })], - depth_stencil_attachment: None, - timestamp_writes: None, - occlusion_query_set: None, - }); + let mut recording = Recording::default(); + let target_proxy = ImageProxy::new(width, height, ImageFormat::from_wgpu(target.format)); + let surface_proxy = ImageProxy::new( + width, + height, + ImageFormat::from_wgpu(surface.texture.format()), + ); + recording.draw(recording::DrawParams { + shader_id: blit.0, + instance_count: 1, + vertex_count: 6, + vertex_buffer: None, + resources: vec![ResourceProxy::Image(target_proxy)], + target: surface_proxy, + clear_color: Some([0., 0., 0., 0.]), + }); + + let surface_view = surface + .texture + .create_view(&wgpu::TextureViewDescriptor::default()); + let external_resources = [ + ExternalResource::Image(target_proxy, &target.view), + ExternalResource::Image(surface_proxy, &surface_view), + ]; + self.engine.run_recording( + device, + queue, + &recording, + &external_resources, + "blit (render_to_surface_async)", #[cfg(feature = "wgpu-profiler")] - let mut render_pass = self - .profiler - .scope("blit to surface", &mut render_pass, device); - render_pass.set_pipeline(&blit.pipeline); - render_pass.set_bind_group(0, &bind_group, &[]); - render_pass.draw(0..6, 0..1); - } - #[cfg(feature = "wgpu-profiler")] - self.profiler.resolve_queries(&mut encoder); - queue.submit(Some(encoder.finish())); + &mut self.profiler, + )?; + self.target = Some(target); #[cfg(feature = "wgpu-profiler")] { @@ -632,11 +621,13 @@ struct TargetTexture { view: TextureView, width: u32, height: u32, + format: wgpu::TextureFormat, } #[cfg(feature = "wgpu")] impl TargetTexture { fn new(device: &Device, width: u32, height: u32) -> Self { + let format = wgpu::TextureFormat::Rgba8Unorm; let texture = device.create_texture(&wgpu::TextureDescriptor { label: None, size: wgpu::Extent3d { @@ -648,7 +639,7 @@ impl TargetTexture { sample_count: 1, dimension: wgpu::TextureDimension::D2, usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING, - format: wgpu::TextureFormat::Rgba8Unorm, + format, view_formats: &[], }); let view = texture.create_view(&wgpu::TextureViewDescriptor::default()); @@ -656,19 +647,17 @@ impl TargetTexture { view, width, height, + format, } } } #[cfg(feature = "wgpu")] -struct BlitPipeline { - bind_layout: wgpu::BindGroupLayout, - pipeline: wgpu::RenderPipeline, -} +struct BlitPipeline(ShaderId); #[cfg(feature = "wgpu")] impl BlitPipeline { - fn new(device: &Device, format: TextureFormat) -> Self { + fn new(device: &Device, format: TextureFormat, engine: &mut WgpuEngine) -> Self { const SHADERS: &str = r#" @vertex fn vs_main(@builtin(vertex_index) ix: u32) -> @builtin(position) vec4 { @@ -698,69 +687,28 @@ impl BlitPipeline { return vec4(rgba_sep.rgb * rgba_sep.a, rgba_sep.a); } "#; - - let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + let module = device.create_shader_module(wgpu::ShaderModuleDescriptor { label: Some("blit shaders"), source: wgpu::ShaderSource::Wgsl(SHADERS.into()), }); - let bind_layout = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { - label: None, - entries: &[wgpu::BindGroupLayoutEntry { - visibility: wgpu::ShaderStages::FRAGMENT, - binding: 0, - ty: wgpu::BindingType::Texture { - sample_type: wgpu::TextureSampleType::Float { filterable: true }, - view_dimension: wgpu::TextureViewDimension::D2, - multisampled: false, - }, - count: None, - }], - }); - let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { - label: None, - bind_group_layouts: &[&bind_layout], - push_constant_ranges: &[], - }); - let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor { - label: None, - layout: Some(&pipeline_layout), - vertex: wgpu::VertexState { - module: &shader, - entry_point: "vs_main", - compilation_options: PipelineCompilationOptions::default(), - buffers: &[], - }, - fragment: Some(wgpu::FragmentState { - module: &shader, - entry_point: "fs_main", - compilation_options: PipelineCompilationOptions::default(), - targets: &[Some(wgpu::ColorTargetState { - format, - blend: None, - write_mask: wgpu::ColorWrites::ALL, - })], - }), - primitive: wgpu::PrimitiveState { - topology: wgpu::PrimitiveTopology::TriangleList, - strip_index_format: None, - front_face: wgpu::FrontFace::Ccw, - cull_mode: Some(wgpu::Face::Back), - polygon_mode: wgpu::PolygonMode::Fill, - unclipped_depth: false, - conservative: false, - }, - depth_stencil: None, - multisample: wgpu::MultisampleState { - count: 1, - mask: !0, - alpha_to_coverage_enabled: false, + let shader_id = engine.add_render_shader( + device, + "blit", + &module, + "vs_main", + "fs_main", + wgpu::PrimitiveTopology::TriangleList, + wgpu::ColorTargetState { + format, + blend: None, + write_mask: wgpu::ColorWrites::ALL, }, - multiview: None, - cache: None, - }); - Self { - bind_layout, - pipeline, - } + None, + &[( + BindType::ImageRead(ImageFormat::from_wgpu(format)), + wgpu::ShaderStages::FRAGMENT, + )], + ); + Self(shader_id) } } diff --git a/vello/src/recording.rs b/vello/src/recording.rs index ca9799a42..6ca9a2f01 100644 --- a/vello/src/recording.rs +++ b/vello/src/recording.rs @@ -69,12 +69,6 @@ pub enum Command { /// Commands the data to be uploaded to the given image. UploadImage(ImageProxy, Vec), WriteImage(ImageProxy, [u32; 2], Image), - // Discussion question: third argument is vec of resources? - // Maybe use tricks to make more ergonomic? - // Alternative: provide bufs & images as separate sequences - Dispatch(ShaderId, (u32, u32, u32), Vec), - DispatchIndirect(ShaderId, BufferProxy, u64, Vec), - Draw(DrawParams), Download(BufferProxy), /// Commands to clear the buffer from an offset on for a length of the given size. /// If the size is [None], it clears until the end. @@ -83,8 +77,15 @@ pub enum Command { FreeBuffer(BufferProxy), /// Commands to free the image. FreeImage(ImageProxy), + // Discussion question: third argument is vec of resources? + // Maybe use tricks to make more ergonomic? + // Alternative: provide bufs & images as separate sequences + Dispatch(ShaderId, (u32, u32, u32), Vec), + DispatchIndirect(ShaderId, BufferProxy, u64, Vec), + Draw(DrawParams), } +#[cfg(feature = "wgpu")] /// The type of resource that will be bound to a slot in a shader. #[derive(Clone, Copy, PartialEq, Eq)] pub enum BindType { @@ -184,7 +185,7 @@ impl Recording { } /// Issue a draw call - pub fn draw(&mut self, params: DrawParams) { + pub fn draw(&mut self, params: DrawParams) { self.push(Command::Draw(params)); } diff --git a/vello/src/wgpu_engine.rs b/vello/src/wgpu_engine.rs index 7bcb0d8e0..76134f481 100644 --- a/vello/src/wgpu_engine.rs +++ b/vello/src/wgpu_engine.rs @@ -335,13 +335,13 @@ impl WgpuEngine { .as_ref() .map(core::slice::from_ref) .unwrap_or_default(), - compilation_options: wgpu::PipelineCompilationOptions::default(), + compilation_options: PipelineCompilationOptions::default(), }, fragment: Some(wgpu::FragmentState { module, entry_point: fragment_main, targets: &[Some(color_attachment)], - compilation_options: wgpu::PipelineCompilationOptions::default(), + compilation_options: PipelineCompilationOptions::default(), }), primitive: wgpu::PrimitiveState { topology, @@ -618,6 +618,7 @@ impl WgpuEngine { } Command::Draw(draw_params) => { let shader = &self.shaders[draw_params.shader_id.0]; + let label = shader.label; let ShaderKind::Wgpu(shader) = shader.select() else { panic!("a render pass does not have a CPU equivalent"); }; @@ -654,6 +655,10 @@ impl WgpuEngine { occlusion_query_set: None, timestamp_writes: None, }); + #[cfg(feature = "wgpu-profiler")] + let query = profiler + .begin_query(label, &mut rpass, device) + .with_parent(Some(&query)); let PipelineState::Render(pipeline) = &shader.pipeline else { panic!("cannot issue a draw with a compute pipeline"); }; @@ -668,6 +673,8 @@ impl WgpuEngine { } rpass.set_bind_group(0, &bind_group, &[]); rpass.draw(0..draw_params.vertex_count, 0..draw_params.instance_count); + #[cfg(feature = "wgpu-profiler")] + profiler.end_query(&mut rpass, query); } Command::Download(proxy) => { let src_buf = self From 15ca4cb4d7f86bf5cdbaa998c7abc9b4903c93bd Mon Sep 17 00:00:00 2001 From: Arman Uguray Date: Sat, 9 Dec 2023 02:11:10 -0800 Subject: [PATCH 03/24] debug_layers feature This introduces a new debug module and DebugLayers data structure that can render debug visualizations of GPU buffers that are internal to the vello pipeline. This currently supports a line-based visualization of the LineSoup and PathBbox buffers. The debug visualization depends on CPU read-back of the BumpAllocators buffer to issue a draw call. This could technically be avoided with an indirect draw but the visualizations will eventually include CPU-side processing and validation. The draws are recorded to the same Recording as the blit in `render_to_texture_async`. The buffers that are used by the draw commands are temporarily retained outside of the `Render` data structure. These buffers are currently released back to the engine explicitly and in various places in code since safe resource removal currently requires a Recording. --- examples/with_winit/Cargo.toml | 2 + vello/Cargo.toml | 1 + vello/src/debug.rs | 195 +++++++++++++++++++++++++++++++++ vello/src/lib.rs | 94 ++++++++++++++-- vello/src/render.rs | 65 ++++++++++- 5 files changed, 348 insertions(+), 9 deletions(-) create mode 100644 vello/src/debug.rs diff --git a/examples/with_winit/Cargo.toml b/examples/with_winit/Cargo.toml index 1d508d4e8..8bbf30fc7 100644 --- a/examples/with_winit/Cargo.toml +++ b/examples/with_winit/Cargo.toml @@ -16,6 +16,7 @@ default = ["wgpu-profiler"] # Enable the use of wgpu-profiler. This is an optional feature for times when we use a git dependency on # wgpu (which means the dependency used in wgpu-profiler would be incompatible) wgpu-profiler = ["dep:wgpu-profiler", "vello/wgpu-profiler"] +debug_layers = ["vello/debug_layers"] [lints] workspace = true @@ -25,6 +26,7 @@ workspace = true name = "with_winit_bin" path = "src/main.rs" + [dependencies] vello = { workspace = true, features = ["buffer_labels"] } scenes = { workspace = true } diff --git a/vello/Cargo.toml b/vello/Cargo.toml index 1fd55c6b3..83ba98d55 100644 --- a/vello/Cargo.toml +++ b/vello/Cargo.toml @@ -18,6 +18,7 @@ default = ["wgpu"] bump_estimate = ["vello_encoding/bump_estimate"] hot_reload = ["vello_shaders/compile"] buffer_labels = [] +debug_layers = [] wgpu = ["dep:wgpu"] wgpu-profiler = ["dep:wgpu-profiler"] diff --git a/vello/src/debug.rs b/vello/src/debug.rs new file mode 100644 index 000000000..44319596f --- /dev/null +++ b/vello/src/debug.rs @@ -0,0 +1,195 @@ +// Copyright 2023 The Vello authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::{ + recording::{BindType, DrawParams, ImageProxy, Recording, ResourceProxy, ShaderId}, + render::CapturedBuffers, + wgpu_engine::WgpuEngine, + RenderParams, +}; +use { + bytemuck::{offset_of, Pod, Zeroable}, + vello_encoding::{BumpAllocators, LineSoup, PathBbox}, +}; + +pub(crate) struct DebugLayers { + bboxes: ShaderId, + linesoup: ShaderId, +} + +impl DebugLayers { + pub fn new( + device: &wgpu::Device, + target_format: wgpu::TextureFormat, + engine: &mut WgpuEngine, + ) -> Self { + let module = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some("debug layers"), + source: wgpu::ShaderSource::Wgsl(SHADERS.into()), + }); + let bboxes = engine.add_render_shader( + device, + "bbox-debug", + &module, + "bbox_vert", + "fs_main", + wgpu::PrimitiveTopology::LineStrip, + wgpu::ColorTargetState { + format: target_format, + blend: None, + write_mask: wgpu::ColorWrites::ALL, + }, + // This mirrors the layout of the PathBbox structure. + Some(wgpu::VertexBufferLayout { + array_stride: std::mem::size_of::() as u64, + step_mode: wgpu::VertexStepMode::Instance, + attributes: &[ + wgpu::VertexAttribute { + format: wgpu::VertexFormat::Sint32x2, + offset: offset_of!(PathBbox, x0) as u64, + shader_location: 0, + }, + wgpu::VertexAttribute { + format: wgpu::VertexFormat::Sint32x2, + offset: offset_of!(PathBbox, x1) as u64, + shader_location: 1, + }, + ], + }), + &[(BindType::Uniform, wgpu::ShaderStages::VERTEX)], + ); + let linesoup = engine.add_render_shader( + device, + "linesoup-debug", + &module, + "linesoup_vert", + "fs_main", + wgpu::PrimitiveTopology::LineList, + wgpu::ColorTargetState { + format: target_format, + blend: None, + write_mask: wgpu::ColorWrites::ALL, + }, + // This mirrors the layout of the LineSoup structure. + Some(wgpu::VertexBufferLayout { + array_stride: std::mem::size_of::() as u64, + step_mode: wgpu::VertexStepMode::Instance, + attributes: &[ + wgpu::VertexAttribute { + format: wgpu::VertexFormat::Float32x2, + offset: offset_of!(LineSoup, p0) as u64, + shader_location: 0, + }, + wgpu::VertexAttribute { + format: wgpu::VertexFormat::Float32x2, + offset: offset_of!(LineSoup, p1) as u64, + shader_location: 1, + }, + ], + }), + &[(BindType::Uniform, wgpu::ShaderStages::VERTEX)], + ); + + Self { bboxes, linesoup } + } + + pub fn render( + &self, + recording: &mut Recording, + target: ImageProxy, + captured: &CapturedBuffers, + bump: Option<&BumpAllocators>, + params: &RenderParams, + ) { + let uniforms = Uniforms { + width: params.width, + height: params.height, + }; + let uniforms_buf = + ResourceProxy::Buffer(recording.upload_uniform("uniforms", bytemuck::bytes_of(&uniforms))); + recording.draw(DrawParams { + shader_id: self.bboxes, + instance_count: captured.sizes.path_bboxes.len(), + vertex_count: 5, + vertex_buffer: Some(captured.path_bboxes), + resources: vec![uniforms_buf], + target, + clear_color: None, + }); + recording.draw(DrawParams { + shader_id: self.linesoup, + instance_count: bump.unwrap().lines, + vertex_count: 2, + vertex_buffer: Some(captured.lines), + resources: vec![uniforms_buf], + target, + clear_color: None, + }); + + recording.free_resource(uniforms_buf); + } +} + +#[derive(Copy, Clone, Zeroable, Pod)] +#[repr(C)] +struct Uniforms { + width: u32, + height: u32, +} + +const SHADERS: &str = r#" + +alias QuadVertices = array; +var quad_vertices: QuadVertices = QuadVertices( + vec2(0., 1.), + vec2(0., 0.), + vec2(1., 0.), + vec2(1., 1.), +); + +struct Uniforms { + width: u32, + height: u32, +} +@binding(0) @group(0) var uniforms: Uniforms; + +struct VSOut { + @builtin(position) pos: vec4f, + @location(0) color: vec3f, +} + +struct BboxIn { + @location(0) p0: vec2i, + @location(1) p1: vec2i, +} + +@vertex +fn bbox_vert(@builtin(vertex_index) vid: u32, bbox: BboxIn) -> VSOut { + let ul = vec2f(f32(bbox.p0.x), f32(bbox.p0.y)); + let br = vec2f(f32(bbox.p1.x), f32(bbox.p1.y)); + let dim = br - ul; + let p = (ul + dim * quad_vertices[vid % 4u]) / vec2f(f32(uniforms.width), f32(uniforms.height)); + + // Map from y-down viewport coordinates to NDC: + return VSOut(vec4(vec2(1., -1.) * (2. * p - vec2f(1.f)), 0., 1.), vec3f(0., 1., 0.));; +} + +struct LinesoupIn { + @location(0) p0: vec2f, + @location(1) p1: vec2f, +} + +@vertex +fn linesoup_vert(@builtin(vertex_index) vid: u32, line: LinesoupIn) -> VSOut { + let p = select(line.p0, line.p1, vid == 1u) / vec2f(f32(uniforms.width), f32(uniforms.height)); + + // Map from y-down viewport coordinates to NDC: + return VSOut(vec4(vec2(1., -1.) * (2. * p - vec2f(1.f)), 0., 1.), vec3f(1., 1., 0.)); +} + +@fragment +fn fs_main(in: VSOut) -> @location(0) vec4f { + return vec4(in.color, 1.); +} + +"#; diff --git a/vello/src/lib.rs b/vello/src/lib.rs index 2cef9c7cc..d72ce7519 100644 --- a/vello/src/lib.rs +++ b/vello/src/lib.rs @@ -81,6 +81,8 @@ //! //! See the [`examples/`](https://github.com/linebender/vello/tree/main/examples) folder to see how that code integrates with frameworks like winit. +#[cfg(all(feature = "debug_layers", feature = "wgpu"))] +mod debug; mod recording; mod render; mod scene; @@ -241,6 +243,8 @@ pub struct Renderer { resolver: Resolver, shaders: FullShaders, blit: Option, + #[cfg(feature = "debug_layers")] + debug: Option, target: Option, #[cfg(feature = "wgpu-profiler")] pub profiler: GpuProfiler, @@ -296,6 +300,12 @@ pub struct RendererOptions { pub num_init_threads: Option, } +struct RenderResult { + bump: Option, + #[cfg(feature = "debug_layers")] + captured: Option, +} + #[cfg(feature = "wgpu")] impl Renderer { /// Creates a new renderer for the specified device. @@ -312,6 +322,10 @@ impl Renderer { let blit = options .surface_format .map(|surface_format| BlitPipeline::new(device, surface_format, &mut engine)); + #[cfg(feature = "debug_layers")] + let debug = options + .surface_format + .map(|surface_format| debug::DebugLayers::new(device, surface_format, &mut engine)); Ok(Self { options, @@ -319,6 +333,8 @@ impl Renderer { resolver: Resolver::new(), shaders, blit, + #[cfg(feature = "debug_layers")] + debug, target: None, // Use 3 pending frames #[cfg(feature = "wgpu-profiler")] @@ -489,10 +505,45 @@ impl Renderer { texture: &TextureView, params: &RenderParams, ) -> Result> { + let result = self + .render_to_texture_async_internal(device, queue, scene, texture, params) + .await?; + #[cfg(feature = "debug_layers")] + { + // TODO: it would be much better to have a way to safely destroy a buffer. + if let Some(captured) = result.captured { + let mut recording = Recording::default(); + // TODO: this sucks. better to release everything in a helper + self.engine.free_download(captured.lines); + captured.release_buffers(&mut recording); + self.engine.run_recording( + device, + queue, + &recording, + &[], + "free memory", + #[cfg(feature = "wgpu-profiler")] + &mut self.profiler, + )?; + } + } + Ok(result.bump) + } + + async fn render_to_texture_async_internal( + &mut self, + device: &Device, + queue: &Queue, + scene: &Scene, + texture: &TextureView, + params: &RenderParams, + ) -> Result { let mut render = Render::new(); let encoding = scene.encoding(); - // TODO: turn this on; the download feature interacts with CPU dispatch - let robust = false; + // TODO: turn this on; the download feature interacts with CPU dispatch. + // Currently this is always enabled when the `debug_layers` setting is enabled as the bump + // counts are used for debug visualiation. + let robust = cfg!(feature = "debug_layers"); let recording = render.render_encoding_coarse( encoding, &mut self.resolver, @@ -502,6 +553,8 @@ impl Renderer { ); let target = render.out_image(); let bump_buf = render.bump_buf(); + #[cfg(feature = "debug_layers")] + let captured = render.take_captured_buffers(); self.engine.run_recording( device, queue, @@ -537,7 +590,11 @@ impl Renderer { #[cfg(feature = "wgpu-profiler")] &mut self.profiler, )?; - Ok(bump) + Ok(RenderResult { + bump, + #[cfg(feature = "debug_layers")] + captured, + }) } /// See [`Self::render_to_surface`] @@ -560,8 +617,8 @@ impl Renderer { if target.width != width || target.height != height { target = TargetTexture::new(device, width, height); } - let bump = self - .render_to_texture_async(device, queue, scene, &target.view, params) + let result = self + .render_to_texture_async_internal(device, queue, scene, &target.view, params) .await?; let blit = self .blit @@ -584,6 +641,28 @@ impl Renderer { clear_color: Some([0., 0., 0., 0.]), }); + #[cfg(feature = "debug_layers")] + { + if let Some(captured) = result.captured { + let debug = self + .debug + .as_ref() + .expect("renderer should have configured surface_format to use on a surface"); + debug.render( + &mut recording, + surface_proxy, + &captured, + result.bump.as_ref(), + ¶ms, + ); + + // TODO: this sucks. better to release everything in a helper + // TODO: it would be much better to have a way to safely destroy a buffer. + self.engine.free_download(captured.lines); + captured.release_buffers(&mut recording); + } + } + let surface_view = surface .texture .create_view(&wgpu::TextureViewDescriptor::default()); @@ -601,7 +680,6 @@ impl Renderer { &mut self.profiler, )?; - self.target = Some(target); #[cfg(feature = "wgpu-profiler")] { self.profiler.end_frame().unwrap(); @@ -612,7 +690,9 @@ impl Renderer { self.profile_result = Some(result); } } - Ok(bump) + + self.target = Some(target); + Ok(result.bump) } } diff --git a/vello/src/render.rs b/vello/src/render.rs index 8a81d9843..e65a7ec51 100644 --- a/vello/src/render.rs +++ b/vello/src/render.rs @@ -19,6 +19,9 @@ pub struct Render { fine_wg_count: Option, fine_resources: Option, mask_buf: Option, + + #[cfg(feature = "debug_layers")] + captured_buffers: Option, } /// Resources produced by pipeline, needed for fine rasterization. @@ -37,6 +40,31 @@ struct FineResources { out_image: ImageProxy, } +/// A collection of internal buffers that are used for debug visualization when the +/// `debug_layers` feature is enabled. The contents of these buffers remain GPU resident +/// and must be freed directly by the caller. +/// +/// Some of these buffers are also scheduled for a download to allow their contents to be +/// processed for CPU-side validation. These buffers are documented as such. +#[cfg(feature = "debug_layers")] +pub struct CapturedBuffers { + pub sizes: vello_encoding::BufferSizes, + + /// Buffers that remain GPU-only + pub path_bboxes: BufferProxy, + + /// Downloaded buffers for validation + pub lines: BufferProxy, +} + +#[cfg(feature = "debug_layers")] +impl CapturedBuffers { + pub fn release_buffers(self, recording: &mut Recording) { + recording.free_buffer(self.path_bboxes); + recording.free_buffer(self.lines); + } +} + #[cfg(feature = "wgpu")] pub(crate) fn render_full( scene: &Scene, @@ -77,6 +105,8 @@ impl Render { fine_wg_count: None, fine_resources: None, mask_buf: None, + #[cfg(feature = "debug_layers")] + captured_buffers: None, } } @@ -95,6 +125,15 @@ impl Render { use vello_encoding::RenderConfig; let mut recording = Recording::default(); let mut packed = vec![]; + + #[cfg(feature = "debug_layers")] + { + let captured = self.captured_buffers.take(); + if let Some(buffers) = captured { + buffers.release_buffers(&mut recording); + } + } + let (layout, ramps, images) = resolver.resolve(encoding, &mut packed); let gradient_image = if ramps.height == 0 { ResourceProxy::new_image(1, 1, ImageFormat::Rgba8) @@ -310,7 +349,6 @@ impl Render { ], ); recording.free_resource(draw_monoid_buf); - recording.free_resource(path_bbox_buf); recording.free_resource(clip_bbox_buf); // Note: this only needs to be rounded up because of the workaround to store the tile_offset // in storage rather than workgroup memory. @@ -396,7 +434,6 @@ impl Render { ); recording.free_buffer(indirect_count_buf); recording.free_resource(seg_counts_buf); - recording.free_resource(lines_buf); recording.free_resource(scene_buf); recording.free_resource(draw_monoid_buf); recording.free_resource(bin_header_buf); @@ -419,6 +456,25 @@ impl Render { recording.download(*bump_buf.as_buf().unwrap()); } recording.free_resource(bump_buf); + + #[cfg(feature = "debug_layers")] + { + let path_bboxes = *path_bbox_buf.as_buf().unwrap(); + let lines = *lines_buf.as_buf().unwrap(); + // TODO: recording.download(lines); + + self.captured_buffers = Some(CapturedBuffers { + sizes: cpu_config.buffer_sizes, + path_bboxes, + lines, + }); + } + #[cfg(not(feature = "debug_layers"))] + { + recording.free_resource(path_bbox_buf); + recording.free_resource(lines_buf); + } + recording } @@ -509,4 +565,9 @@ impl Render { .as_buf() .unwrap() } + + #[cfg(feature = "debug_layers")] + pub fn take_captured_buffers(&mut self) -> Option { + self.captured_buffers.take() + } } From ce5abc9064783b907f679933f41c2032d14e5c45 Mon Sep 17 00:00:00 2001 From: Arman Uguray Date: Tue, 12 Dec 2023 13:21:28 -0800 Subject: [PATCH 04/24] [vello][engine] Buffer offset binding Added the `ResourceProxy::BufferRange` type, which represents a buffer binding with an offset. --- vello/src/wgpu_engine.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/vello/src/wgpu_engine.rs b/vello/src/wgpu_engine.rs index 76134f481..e8f25b599 100644 --- a/vello/src/wgpu_engine.rs +++ b/vello/src/wgpu_engine.rs @@ -1145,6 +1145,24 @@ impl<'a> TransientBindMap<'a> { }), } } + ResourceProxy::BufRange { + proxy, + offset, + size, + } => { + let buf = match self.bufs.get(&proxy.id) { + Some(TransientBuf::Gpu(b)) => b, + _ => bind_map.get_gpu_buf(proxy.id).unwrap(), + }; + Ok(wgpu::BindGroupEntry { + binding: i as u32, + resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding { + buffer: buf, + offset: *offset, + size: core::num::NonZeroU64::new(*size), + }), + }) + } ResourceProxy::Image(proxy) => { let view = self .images From 4f942093ba523c13c95d5e17d8fd52157d80a941 Mon Sep 17 00:00:00 2001 From: Arman Uguray Date: Tue, 12 Dec 2023 13:52:51 -0800 Subject: [PATCH 05/24] Fix hot_reload when debug layers are enabled Both blit and debug pipelines need to be recompiled when the engine gets recreated. --- vello/src/lib.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/vello/src/lib.rs b/vello/src/lib.rs index d72ce7519..435b5df71 100644 --- a/vello/src/lib.rs +++ b/vello/src/lib.rs @@ -477,12 +477,26 @@ impl Renderer { let mut engine = WgpuEngine::new(self.options.use_cpu); // We choose not to initialise these shaders in parallel, to ensure the error scope works correctly let shaders = shaders::full_shaders(device, &mut engine, &self.options)?; + let blit = self + .options + .surface_format + .map(|surface_format| BlitPipeline::new(device, surface_format, &mut engine)); + #[cfg(feature = "debug_layers")] + let debug = self + .options + .surface_format + .map(|format| debug::DebugLayers::new(device, format, &mut engine)); let error = device.pop_error_scope().await; if let Some(error) = error { return Err(error.into()); } self.engine = engine; self.shaders = shaders; + self.blit = blit; + #[cfg(feature = "debug_layers")] + { + self.debug = debug; + } Ok(()) } From bfebc055540f5381d98837915160a30cd8160b1d Mon Sep 17 00:00:00 2001 From: Arman Uguray Date: Tue, 12 Dec 2023 13:59:53 -0800 Subject: [PATCH 06/24] [debug_layers] Validation, LineSoup visualization, and UI toggles - Introduce the VALIDATION layer which runs a watertightness test on LineSoup buffer contents. - Add debug visualization layers for LineSoup endpoints and validation test errors. - Add DebugLayers options to toggle individual layers. - Add with_winit key bindings to toggle individual layers. --- examples/with_winit/src/lib.rs | 26 +++ vello/src/debug.rs | 413 ++++++++++++++++++++++++++++++--- vello/src/debug/validate.rs | 62 +++++ vello/src/lib.rs | 52 ++++- vello/src/render.rs | 4 +- vello/src/wgpu_engine.rs | 18 -- 6 files changed, 521 insertions(+), 54 deletions(-) create mode 100644 vello/src/debug/validate.rs diff --git a/examples/with_winit/src/lib.rs b/examples/with_winit/src/lib.rs index 4c6bc97a3..2e3066f83 100644 --- a/examples/with_winit/src/lib.rs +++ b/examples/with_winit/src/lib.rs @@ -162,6 +162,9 @@ struct VelloApp<'s> { prev_scene_ix: i32, modifiers: ModifiersState, + + #[cfg(feature = "debug_layers")] + debug: vello::DebugLayers, } impl<'s> ApplicationHandler for VelloApp<'s> { @@ -329,6 +332,22 @@ impl<'s> ApplicationHandler for VelloApp<'s> { }, ); } + #[cfg(feature = "debug_layers")] + "1" => { + self.debug.toggle(vello::DebugLayers::BOUNDING_BOXES); + } + #[cfg(feature = "debug_layers")] + "2" => { + self.debug.toggle(vello::DebugLayers::LINESOUP_SEGMENTS); + } + #[cfg(feature = "debug_layers")] + "3" => { + self.debug.toggle(vello::DebugLayers::LINESOUP_POINTS); + } + #[cfg(feature = "debug_layers")] + "4" => { + self.debug.toggle(vello::DebugLayers::VALIDATION); + } _ => {} } } @@ -464,6 +483,8 @@ impl<'s> ApplicationHandler for VelloApp<'s> { width, height, antialiasing_method, + #[cfg(feature = "debug_layers")] + debug: self.debug, }; self.scene.reset(); let mut transform = self.transform; @@ -674,6 +695,9 @@ fn run( Some(render_state) }; + #[cfg(feature = "debug_layers")] + let debug = vello::DebugLayers::none(); + let mut app = VelloApp { context: render_cx, renderers, @@ -718,6 +742,8 @@ fn run( complexity: 0, prev_scene_ix: 0, modifiers: ModifiersState::default(), + #[cfg(feature = "debug_layers")] + debug, }; event_loop.run_app(&mut app).expect("run to completion"); diff --git a/vello/src/debug.rs b/vello/src/debug.rs index 44319596f..d3327fb5f 100644 --- a/vello/src/debug.rs +++ b/vello/src/debug.rs @@ -1,23 +1,99 @@ // Copyright 2023 The Vello authors // SPDX-License-Identifier: Apache-2.0 OR MIT +mod validate; + use crate::{ + debug::validate::{validate_line_soup, LineEndpoint}, recording::{BindType, DrawParams, ImageProxy, Recording, ResourceProxy, ShaderId}, render::CapturedBuffers, wgpu_engine::WgpuEngine, - RenderParams, + DebugDownloads, RenderParams, }; use { bytemuck::{offset_of, Pod, Zeroable}, + peniko::Color, vello_encoding::{BumpAllocators, LineSoup, PathBbox}, }; -pub(crate) struct DebugLayers { +#[derive(Copy, Clone)] +pub struct DebugLayers(u8); + +// TODO: Currently all layers require read-back of the BumpAllocators buffer. This isn't strictly +// necessary for layers other than `VALIDATION`. The debug visualizations use the bump buffer only +// to obtain various instance counts for draws and these could instead get written out to an +// indirect draw buffer. OTOH `VALIDATION` should always require readback since we want to be able +// to run the same CPU-side tests for both CPU and GPU shaders. +impl DebugLayers { + /// Visualize the bounding box of every path. + pub const BOUNDING_BOXES: DebugLayers = DebugLayers(1 << 0); + + /// Visualize the post-flattening line segments using line primitives. + pub const LINESOUP_SEGMENTS: DebugLayers = DebugLayers(1 << 1); + + /// Visualize the post-flattening line endpoints. + pub const LINESOUP_POINTS: DebugLayers = DebugLayers(1 << 2); + + /// Enable validation of internal buffer contents and visualize errors. Validation tests are + /// run on the CPU and require buffer contents to be read-back. + /// + /// Supported validation tests: + /// + /// - Watertightness: validate that every line segment within a path is connected without + /// any gaps. Line endpoints that don't precisely overlap another endpoint get visualized + /// as red circles and logged to stderr. + /// + pub const VALIDATION: DebugLayers = DebugLayers(1 << 3); + + pub const fn from_bits(bits: u8) -> Self { + Self(bits) + } + + pub const fn none() -> Self { + Self(0) + } + + pub const fn all() -> Self { + Self( + Self::BOUNDING_BOXES.0 + | Self::LINESOUP_SEGMENTS.0 + | Self::LINESOUP_POINTS.0 + | Self::VALIDATION.0, + ) + } + + pub fn is_empty(&self) -> bool { + self.0 == 0 + } + + pub fn check_bits(&self, mask: DebugLayers) -> bool { + self.0 & mask.0 == mask.0 + } + + pub fn toggle(&mut self, mask: DebugLayers) { + self.0 ^= mask.0 + } +} + +impl std::ops::BitOr for DebugLayers { + type Output = Self; + + fn bitor(self, rhs: Self) -> Self { + Self(self.0 | rhs.0) + } +} + +pub(crate) struct DebugRenderer { + // `clear_tint` slightly darkens the output from the vello renderer to make the debug overlays + // more distinguishable. + clear_tint: ShaderId, bboxes: ShaderId, linesoup: ShaderId, + linesoup_points: ShaderId, + unpaired_points: ShaderId, } -impl DebugLayers { +impl DebugRenderer { pub fn new( device: &wgpu::Device, target_format: wgpu::TextureFormat, @@ -27,12 +103,35 @@ impl DebugLayers { label: Some("debug layers"), source: wgpu::ShaderSource::Wgsl(SHADERS.into()), }); + + let clear_tint = engine.add_render_shader( + device, + "clear-tint", + &module, + "full_screen_quad_vert", + "solid_color_frag", + wgpu::PrimitiveTopology::TriangleStrip, + wgpu::ColorTargetState { + format: target_format, + blend: Some(wgpu::BlendState { + color: wgpu::BlendComponent { + src_factor: wgpu::BlendFactor::SrcAlpha, + dst_factor: wgpu::BlendFactor::OneMinusSrcAlpha, + operation: wgpu::BlendOperation::Add, + }, + alpha: wgpu::BlendComponent::OVER, + }), + write_mask: wgpu::ColorWrites::ALL, + }, + None, + &[], + ); let bboxes = engine.add_render_shader( device, "bbox-debug", &module, "bbox_vert", - "fs_main", + "solid_color_frag", wgpu::PrimitiveTopology::LineStrip, wgpu::ColorTargetState { format: target_format, @@ -63,7 +162,7 @@ impl DebugLayers { "linesoup-debug", &module, "linesoup_vert", - "fs_main", + "solid_color_frag", wgpu::PrimitiveTopology::LineList, wgpu::ColorTargetState { format: target_format, @@ -89,8 +188,91 @@ impl DebugLayers { }), &[(BindType::Uniform, wgpu::ShaderStages::VERTEX)], ); + let linesoup_points = engine.add_render_shader( + device, + "linepoints-debug", + &module, + "linepoints_vert", + "sdf_circle_frag", + wgpu::PrimitiveTopology::TriangleStrip, + wgpu::ColorTargetState { + format: target_format, + blend: Some(wgpu::BlendState { + color: wgpu::BlendComponent { + src_factor: wgpu::BlendFactor::SrcAlpha, + dst_factor: wgpu::BlendFactor::OneMinusSrcAlpha, + operation: wgpu::BlendOperation::Add, + }, + alpha: wgpu::BlendComponent::OVER, + }), + write_mask: wgpu::ColorWrites::ALL, + }, + // This mirrors the layout of the LineSoup structure. The pipeline only processes the + // first point of each line. Since all points should be paired, this is enough to + // render all points. All unpaired points alone get drawn by the `unpaired_points` + // pipeline, so no point should get missed. + Some(wgpu::VertexBufferLayout { + array_stride: std::mem::size_of::() as u64, + step_mode: wgpu::VertexStepMode::Instance, + attributes: &[wgpu::VertexAttribute { + format: wgpu::VertexFormat::Float32x2, + offset: offset_of!(LineSoup, p0) as u64, + shader_location: 0, + }], + }), + &[ + (BindType::Uniform, wgpu::ShaderStages::VERTEX), + ( + BindType::Uniform, + wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT, + ), + ], + ); + let unpaired_points = engine.add_render_shader( + device, + "linepoints-debug", + &module, + "linepoints_vert", + "sdf_circle_frag", + wgpu::PrimitiveTopology::TriangleStrip, + wgpu::ColorTargetState { + format: target_format, + blend: Some(wgpu::BlendState { + color: wgpu::BlendComponent { + src_factor: wgpu::BlendFactor::SrcAlpha, + dst_factor: wgpu::BlendFactor::OneMinusSrcAlpha, + operation: wgpu::BlendOperation::Add, + }, + alpha: wgpu::BlendComponent::OVER, + }), + write_mask: wgpu::ColorWrites::ALL, + }, + // This mirrors the layout of the LineSoup structure. + Some(wgpu::VertexBufferLayout { + array_stride: std::mem::size_of::() as u64, + step_mode: wgpu::VertexStepMode::Instance, + attributes: &[wgpu::VertexAttribute { + format: wgpu::VertexFormat::Float32x2, + offset: offset_of!(LineEndpoint, x) as u64, + shader_location: 0, + }], + }), + &[ + (BindType::Uniform, wgpu::ShaderStages::VERTEX), + ( + BindType::Uniform, + wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT, + ), + ], + ); - Self { bboxes, linesoup } + Self { + clear_tint, + bboxes, + linesoup, + linesoup_points, + unpaired_points, + } } pub fn render( @@ -98,35 +280,121 @@ impl DebugLayers { recording: &mut Recording, target: ImageProxy, captured: &CapturedBuffers, - bump: Option<&BumpAllocators>, + bump: &BumpAllocators, params: &RenderParams, + downloads: &DebugDownloads, ) { + if params.debug.is_empty() { + return; + } + + let (unpaired_pts_len, unpaired_pts_buf) = + if params.debug.check_bits(DebugLayers::VALIDATION) { + // TODO: have this write directly to a GPU buffer? + let unpaired_pts: Vec = + validate_line_soup(bytemuck::cast_slice(&downloads.lines.get_mapped_range())); + if unpaired_pts.is_empty() { + (0, None) + } else { + ( + unpaired_pts.len(), + Some( + recording + .upload("unpaired points", bytemuck::cast_slice(&unpaired_pts[..])), + ), + ) + } + } else { + (0, None) + }; + let uniforms = Uniforms { width: params.width, height: params.height, }; let uniforms_buf = ResourceProxy::Buffer(recording.upload_uniform("uniforms", bytemuck::bytes_of(&uniforms))); + + let linepoints_uniforms = [ + LinepointsUniforms::new(Color::CYAN, 10.), + LinepointsUniforms::new(Color::RED, 80.), + ]; + let linepoints_uniforms_buf = recording.upload_uniform( + "linepoints uniforms", + bytemuck::bytes_of(&linepoints_uniforms), + ); + recording.draw(DrawParams { - shader_id: self.bboxes, - instance_count: captured.sizes.path_bboxes.len(), - vertex_count: 5, - vertex_buffer: Some(captured.path_bboxes), - resources: vec![uniforms_buf], - target, - clear_color: None, - }); - recording.draw(DrawParams { - shader_id: self.linesoup, - instance_count: bump.unwrap().lines, - vertex_count: 2, - vertex_buffer: Some(captured.lines), - resources: vec![uniforms_buf], + shader_id: self.clear_tint, + instance_count: 1, + vertex_count: 4, + vertex_buffer: None, + resources: vec![], target, clear_color: None, }); + if params.debug.check_bits(DebugLayers::BOUNDING_BOXES) { + recording.draw(DrawParams { + shader_id: self.bboxes, + instance_count: captured.sizes.path_bboxes.len(), + vertex_count: 5, + vertex_buffer: Some(captured.path_bboxes), + resources: vec![uniforms_buf], + target, + clear_color: None, + }); + } + if params.debug.check_bits(DebugLayers::LINESOUP_SEGMENTS) { + recording.draw(DrawParams { + shader_id: self.linesoup, + instance_count: bump.lines, + vertex_count: 2, + vertex_buffer: Some(captured.lines), + resources: vec![uniforms_buf], + target, + clear_color: None, + }); + } + if params.debug.check_bits(DebugLayers::LINESOUP_POINTS) { + recording.draw(DrawParams { + shader_id: self.linesoup_points, + instance_count: bump.lines, + vertex_count: 4, + vertex_buffer: Some(captured.lines), + resources: vec![ + uniforms_buf, + ResourceProxy::BufferRange { + proxy: linepoints_uniforms_buf, + offset: 0, + size: std::mem::size_of::() as u64, + }, + ], + target, + clear_color: None, + }); + } + if let Some(unpaired_pts_buf) = unpaired_pts_buf { + recording.draw(DrawParams { + shader_id: self.unpaired_points, + instance_count: unpaired_pts_len.try_into().unwrap(), + vertex_count: 4, + vertex_buffer: Some(unpaired_pts_buf), + resources: vec![ + uniforms_buf, + ResourceProxy::BufferRange { + proxy: linepoints_uniforms_buf, + offset: std::mem::size_of::() as u64, + size: std::mem::size_of::() as u64, + }, + ], + target, + clear_color: None, + }); + recording.free_buffer(unpaired_pts_buf); + } recording.free_resource(uniforms_buf); + recording.free_buffer(linepoints_uniforms_buf); } } @@ -137,8 +405,40 @@ struct Uniforms { height: u32, } +#[derive(Copy, Clone, Zeroable, Pod)] +#[repr(C)] +struct LinepointsUniforms { + point_color: [f32; 3], + point_size: f32, + // Uniform parameters for individual SDF point draws are stored in a single buffer. + // This 240 byte padding is here to bring the element ffset alignment of 256 bytes. + // (see https://www.w3.org/TR/webgpu/#dom-supported-limits-minuniformbufferoffsetalignment) + _pad0: [u32; 30], + _pad1: [u32; 30], +} + +impl LinepointsUniforms { + fn new(color: Color, point_size: f32) -> Self { + Self { + point_color: [ + color.r as f32 / 255., + color.g as f32 / 255., + color.b as f32 / 255., + ], + point_size, + _pad0: [0; 30], + _pad1: [0; 30], + } + } +} + const SHADERS: &str = r#" +// Map from y-down normalized coordinates to NDC: +fn map_to_ndc(p: vec2f) -> vec4f { + return vec4(vec2(1., -1.) * (2. * p - vec2(1.)), 0., 1.); +} + alias QuadVertices = array; var quad_vertices: QuadVertices = QuadVertices( vec2(0., 1.), @@ -147,6 +447,8 @@ var quad_vertices: QuadVertices = QuadVertices( vec2(1., 1.), ); +var quad_fill_indices: array = array(0u, 3u, 1u, 2u); + struct Uniforms { width: u32, height: u32, @@ -155,9 +457,19 @@ struct Uniforms { struct VSOut { @builtin(position) pos: vec4f, - @location(0) color: vec3f, + @location(0) color: vec4f, } +//////////// + +@vertex +fn full_screen_quad_vert(@builtin(vertex_index) vid: u32) -> VSOut { + let p = quad_vertices[quad_fill_indices[vid]]; + return VSOut(map_to_ndc(p), vec4(0., 0., 0., 0.5)); +} + +//////////// + struct BboxIn { @location(0) p0: vec2i, @location(1) p1: vec2i, @@ -169,11 +481,11 @@ fn bbox_vert(@builtin(vertex_index) vid: u32, bbox: BboxIn) -> VSOut { let br = vec2f(f32(bbox.p1.x), f32(bbox.p1.y)); let dim = br - ul; let p = (ul + dim * quad_vertices[vid % 4u]) / vec2f(f32(uniforms.width), f32(uniforms.height)); - - // Map from y-down viewport coordinates to NDC: - return VSOut(vec4(vec2(1., -1.) * (2. * p - vec2f(1.f)), 0., 1.), vec3f(0., 1., 0.));; + return VSOut(map_to_ndc(p), vec4(0., 1., 0., 1.)); } +//////////// + struct LinesoupIn { @location(0) p0: vec2f, @location(1) p1: vec2f, @@ -182,14 +494,57 @@ struct LinesoupIn { @vertex fn linesoup_vert(@builtin(vertex_index) vid: u32, line: LinesoupIn) -> VSOut { let p = select(line.p0, line.p1, vid == 1u) / vec2f(f32(uniforms.width), f32(uniforms.height)); + return VSOut(map_to_ndc(p), vec4(0.7, 0.5, 0., 1.)); +} + +//////////// - // Map from y-down viewport coordinates to NDC: - return VSOut(vec4(vec2(1., -1.) * (2. * p - vec2f(1.f)), 0., 1.), vec3f(1., 1., 0.)); +struct LinepointsUniforms { + point_color: vec3f, + point_size: f32, +} +@binding(1) @group(0) var linepoints_uniforms: LinepointsUniforms; + +struct SDFCircleOut { + @builtin(position) pos: vec4f, + + // Unpremultiplied color of the circle. + @location(0) color: vec3f, + + // The 2D position of the pixel fragment relative to the center of the quad. The quad edges + // are at coordinates (±1, 0) and (0, ±1). + @location(1) quad_relative: vec2f, +} + +@vertex +fn linepoints_vert(@builtin(vertex_index) vid: u32, @location(0) point: vec2f) -> SDFCircleOut { + let quad_corner = quad_vertices[quad_fill_indices[vid]] - vec2(0.5); + let rect_dim = vec2(linepoints_uniforms.point_size); + let p = (point + rect_dim * quad_corner) / vec2(f32(uniforms.width), f32(uniforms.height)); + + return SDFCircleOut( + map_to_ndc(p), + linepoints_uniforms.point_color, + // Normalize the corners of the quad such that they form a vector of length √2. This should + // align the edge fragments to ±1. The post-interpolation values of `quad_relative` will + // then form a distance field that can represent a circle of radius 1 within the quad + // (where the distance is relative to the center of the circle). + normalize(quad_corner) * sqrt(2.), + ); } @fragment -fn fs_main(in: VSOut) -> @location(0) vec4f { - return vec4(in.color, 1.); +fn solid_color_frag(in: VSOut) -> @location(0) vec4f { + return in.color; } +@fragment +fn sdf_circle_frag(in: SDFCircleOut) -> @location(0) vec4f { + // Draw an antialiased circle with a fading margin as a visual effect. `THRESHOLD` is the + // distance from the center of the circle to the edge where the fade begins. + let THRESHOLD = 0.6; + let d = saturate(length(in.quad_relative)); + let alpha = select(1., 1. - smoothstep(THRESHOLD, 1., d), d > THRESHOLD); + return vec4(in.color.rgb, alpha); +} "#; diff --git a/vello/src/debug/validate.rs b/vello/src/debug/validate.rs new file mode 100644 index 000000000..78adf7529 --- /dev/null +++ b/vello/src/debug/validate.rs @@ -0,0 +1,62 @@ +// Copyright 2023 The Vello authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use { + bytemuck::{Pod, Zeroable}, + std::{collections::BTreeSet, fmt}, + vello_encoding::LineSoup, +}; + +#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Pod, Zeroable)] +#[repr(C)] +pub struct LineEndpoint { + pub path_ix: u32, + + // Coordinates in IEEE-754 32-bit float representation + pub x: u32, + pub y: u32, +} + +impl LineEndpoint { + pub fn new(line: &LineSoup, start_or_end: bool) -> Self { + let (x, y) = if start_or_end { + (line.p0[0], line.p0[1]) + } else { + (line.p1[0], line.p1[1]) + }; + Self { + path_ix: line.path_ix, + x: x.to_bits(), + y: y.to_bits(), + } + } +} + +impl fmt::Debug for LineEndpoint { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Point") + .field("path_ix", &self.path_ix) + .field("x", &f32::from_bits(self.x)) + .field("y", &f32::from_bits(self.y)) + .finish() + } +} + +pub(crate) fn validate_line_soup(lines: &[LineSoup]) -> Vec { + let mut points = BTreeSet::new(); + for line in lines { + let pts = [ + LineEndpoint::new(line, true), + LineEndpoint::new(line, false), + ]; + for p in pts { + if !points.remove(&p) { + points.insert(p); + } + } + } + if !points.is_empty() { + eprintln!("Unpaired points are present: {:#?}", points); + } + points.into_iter().collect() +} diff --git a/vello/src/lib.rs b/vello/src/lib.rs index 435b5df71..a94fde183 100644 --- a/vello/src/lib.rs +++ b/vello/src/lib.rs @@ -126,6 +126,8 @@ use vello_encoding::Resolver; #[cfg(feature = "wgpu")] use wgpu_engine::{ExternalResource, WgpuEngine}; +#[cfg(all(feature = "debug_layers", feature = "wgpu"))] +pub use debug::DebugLayers; /// Temporary export, used in `with_winit` for stats pub use vello_encoding::BumpAllocators; #[cfg(feature = "wgpu")] @@ -214,6 +216,11 @@ pub enum Error { #[cfg(feature = "wgpu")] #[error("Failed to async map a buffer")] BufferAsyncError(#[from] wgpu::BufferAsyncError), + /// Failed to download an internal buffer for debug visualization. + #[cfg(feature = "wgpu")] + #[cfg(feature = "debug_layers")] + #[error("Failed to download internal buffer for visualization")] + DownloadError(&'static str), #[cfg(feature = "wgpu")] #[error("wgpu Error from scope")] @@ -244,7 +251,7 @@ pub struct Renderer { shaders: FullShaders, blit: Option, #[cfg(feature = "debug_layers")] - debug: Option, + debug: Option, target: Option, #[cfg(feature = "wgpu-profiler")] pub profiler: GpuProfiler, @@ -272,6 +279,10 @@ pub struct RenderParams { /// The anti-aliasing algorithm. The selected algorithm must have been initialized while /// constructing the `Renderer`. pub antialiasing_method: AaConfig, + + #[cfg(feature = "debug_layers")] + /// Options for debug layer rendering. + pub debug: DebugLayers, } #[cfg(feature = "wgpu")] @@ -325,7 +336,7 @@ impl Renderer { #[cfg(feature = "debug_layers")] let debug = options .surface_format - .map(|surface_format| debug::DebugLayers::new(device, surface_format, &mut engine)); + .map(|surface_format| debug::DebugRenderer::new(device, surface_format, &mut engine)); Ok(Self { options, @@ -485,7 +496,7 @@ impl Renderer { let debug = self .options .surface_format - .map(|format| debug::DebugLayers::new(device, format, &mut engine)); + .map(|format| debug::DebugRenderer::new(device, format, &mut engine)); let error = device.pop_error_scope().await; if let Some(error) = error { return Err(error.into()); @@ -524,7 +535,8 @@ impl Renderer { .await?; #[cfg(feature = "debug_layers")] { - // TODO: it would be much better to have a way to safely destroy a buffer. + // TODO: it would be better to improve buffer ownership tracking so that it's not + // necessary to submit a whole new Recording to free the captured buffers. if let Some(captured) = result.captured { let mut recording = Recording::default(); // TODO: this sucks. better to release everything in a helper @@ -662,12 +674,16 @@ impl Renderer { .debug .as_ref() .expect("renderer should have configured surface_format to use on a surface"); + let bump = result.bump.as_ref().unwrap(); + // TODO: We could avoid this download if `DebugLayers::VALIDATION` is unset. + let downloads = DebugDownloads::map(&mut self.engine, &captured, bump).await?; debug.render( &mut recording, surface_proxy, &captured, - result.bump.as_ref(), + bump, ¶ms, + &downloads, ); // TODO: this sucks. better to release everything in a helper @@ -806,3 +822,29 @@ impl BlitPipeline { Self(shader_id) } } + +#[cfg(all(feature = "debug_layers", feature = "wgpu"))] +pub(crate) struct DebugDownloads<'a> { + pub lines: wgpu::BufferSlice<'a>, +} + +#[cfg(all(feature = "debug_layers", feature = "wgpu"))] +impl<'a> DebugDownloads<'a> { + pub async fn map( + engine: &'a WgpuEngine, + captured: &render::CapturedBuffers, + bump: &BumpAllocators, + ) -> Result> { + use vello_encoding::LineSoup; + + let Some(lines_buf) = engine.get_download(captured.lines) else { + return Err(Error::DownloadError("could not download LineSoup buffer")); + }; + + let lines = lines_buf.slice(..bump.lines as u64 * std::mem::size_of::() as u64); + let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel(); + lines.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap()); + receiver.receive().await.expect("channel was closed")?; + Ok(Self { lines }) + } +} diff --git a/vello/src/render.rs b/vello/src/render.rs index e65a7ec51..c16b5f805 100644 --- a/vello/src/render.rs +++ b/vello/src/render.rs @@ -53,7 +53,7 @@ pub struct CapturedBuffers { /// Buffers that remain GPU-only pub path_bboxes: BufferProxy, - /// Downloaded buffers for validation + /// Buffers scheduled for download pub lines: BufferProxy, } @@ -461,7 +461,7 @@ impl Render { { let path_bboxes = *path_bbox_buf.as_buf().unwrap(); let lines = *lines_buf.as_buf().unwrap(); - // TODO: recording.download(lines); + recording.download(lines); self.captured_buffers = Some(CapturedBuffers { sizes: cpu_config.buffer_sizes, diff --git a/vello/src/wgpu_engine.rs b/vello/src/wgpu_engine.rs index e8f25b599..76134f481 100644 --- a/vello/src/wgpu_engine.rs +++ b/vello/src/wgpu_engine.rs @@ -1145,24 +1145,6 @@ impl<'a> TransientBindMap<'a> { }), } } - ResourceProxy::BufRange { - proxy, - offset, - size, - } => { - let buf = match self.bufs.get(&proxy.id) { - Some(TransientBuf::Gpu(b)) => b, - _ => bind_map.get_gpu_buf(proxy.id).unwrap(), - }; - Ok(wgpu::BindGroupEntry { - binding: i as u32, - resource: wgpu::BindingResource::Buffer(wgpu::BufferBinding { - buffer: buf, - offset: *offset, - size: core::num::NonZeroU64::new(*size), - }), - }) - } ResourceProxy::Image(proxy) => { let view = self .images From ed3e01702f72dce57283e569ca17bf7a46e79b32 Mon Sep 17 00:00:00 2001 From: Arman Uguray Date: Tue, 4 Jun 2024 21:31:58 -0700 Subject: [PATCH 07/24] cargo fmt --- vello/src/debug.rs | 5 +++-- vello/src/lib.rs | 2 +- vello/src/wgpu_engine.rs | 27 ++++++++++++++------------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/vello/src/debug.rs b/vello/src/debug.rs index d3327fb5f..d7640d57c 100644 --- a/vello/src/debug.rs +++ b/vello/src/debug.rs @@ -312,8 +312,9 @@ impl DebugRenderer { width: params.width, height: params.height, }; - let uniforms_buf = - ResourceProxy::Buffer(recording.upload_uniform("uniforms", bytemuck::bytes_of(&uniforms))); + let uniforms_buf = ResourceProxy::Buffer( + recording.upload_uniform("uniforms", bytemuck::bytes_of(&uniforms)), + ); let linepoints_uniforms = [ LinepointsUniforms::new(Color::CYAN, 10.), diff --git a/vello/src/lib.rs b/vello/src/lib.rs index a94fde183..158e87976 100644 --- a/vello/src/lib.rs +++ b/vello/src/lib.rs @@ -220,7 +220,7 @@ pub enum Error { #[cfg(feature = "wgpu")] #[cfg(feature = "debug_layers")] #[error("Failed to download internal buffer for visualization")] - DownloadError(&'static str), + DownloadError(&'static str), #[cfg(feature = "wgpu")] #[error("wgpu Error from scope")] diff --git a/vello/src/wgpu_engine.rs b/vello/src/wgpu_engine.rs index 76134f481..0ba0ab1cc 100644 --- a/vello/src/wgpu_engine.rs +++ b/vello/src/wgpu_engine.rs @@ -11,8 +11,8 @@ use vello_shaders::cpu::CpuBinding; use wgpu::{ BindGroup, BindGroupLayout, Buffer, BufferUsages, CommandEncoder, CommandEncoderDescriptor, - ComputePipeline, Device, PipelineCompilationOptions, Queue, RenderPipeline, Texture, TextureAspect, - TextureUsages, TextureView, TextureViewDimension, + ComputePipeline, Device, PipelineCompilationOptions, Queue, RenderPipeline, Texture, + TextureAspect, TextureUsages, TextureView, TextureViewDimension, }; use crate::{ @@ -631,8 +631,8 @@ impl WgpuEngine { &shader.bind_group_layout, &draw_params.resources, ); - let render_target = - transient_map.materialize_external_image_for_render_pass(&draw_params.target); + let render_target = transient_map + .materialize_external_image_for_render_pass(&draw_params.target); let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { label: None, color_attachments: &[Some(wgpu::RenderPassColorAttachment { @@ -666,9 +666,10 @@ impl WgpuEngine { if let Some(proxy) = draw_params.vertex_buffer { // TODO: need a way to materialize a CPU initialized buffer. For now assume // buffer exists? Also, need to materialize this buffer with vertex usage - let buf = self.bind_map.get_gpu_buf(proxy.id).ok_or( - Error::UnavailableBufferUsed(proxy.name, "draw"), - )?; + let buf = self + .bind_map + .get_gpu_buf(proxy.id) + .ok_or(Error::UnavailableBufferUsed(proxy.name, "draw"))?; rpass.set_vertex_buffer(0, buf.slice(..)); } rpass.set_bind_group(0, &bind_group, &[]); @@ -1175,11 +1176,11 @@ impl<'a> TransientBindMap<'a> { for resource in bindings { match resource { ResourceProxy::Buffer(proxy) - | ResourceProxy::BufferRange { - proxy, - offset: _, - size: _, - } => match self.bufs.get(&proxy.id) { + | ResourceProxy::BufferRange { + proxy, + offset: _, + size: _, + } => match self.bufs.get(&proxy.id) { Some(TransientBuf::Cpu(_)) => (), Some(TransientBuf::Gpu(_)) => panic!("buffer was already materialized on GPU"), _ => bind_map.materialize_cpu_buf(proxy), @@ -1195,7 +1196,7 @@ impl<'a> TransientBindMap<'a> { Some(TransientBuf::Cpu(b)) => CpuBinding::Buffer(b), _ => bind_map.get_cpu_buf(buf.id), }, - ResourceProxy::BufferRange { .. } => todo!(), + ResourceProxy::BufferRange { .. } => todo!(), ResourceProxy::Image(_) => todo!(), }) .collect() From 53b9c81991417744bcdc757f7bb9e9afd0528857 Mon Sep 17 00:00:00 2001 From: Arman Uguray Date: Fri, 7 Jun 2024 18:24:13 -0700 Subject: [PATCH 08/24] [debug] Thicken lines and show winding --- vello/src/debug.rs | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/vello/src/debug.rs b/vello/src/debug.rs index d7640d57c..7fd8d5211 100644 --- a/vello/src/debug.rs +++ b/vello/src/debug.rs @@ -163,7 +163,7 @@ impl DebugRenderer { &module, "linesoup_vert", "solid_color_frag", - wgpu::PrimitiveTopology::LineList, + wgpu::PrimitiveTopology::TriangleStrip, wgpu::ColorTargetState { format: target_format, blend: None, @@ -317,7 +317,7 @@ impl DebugRenderer { ); let linepoints_uniforms = [ - LinepointsUniforms::new(Color::CYAN, 10.), + LinepointsUniforms::new(Color::DARK_CYAN, 10.), LinepointsUniforms::new(Color::RED, 80.), ]; let linepoints_uniforms_buf = recording.upload_uniform( @@ -325,6 +325,7 @@ impl DebugRenderer { bytemuck::bytes_of(&linepoints_uniforms), ); + /* recording.draw(DrawParams { shader_id: self.clear_tint, instance_count: 1, @@ -333,7 +334,7 @@ impl DebugRenderer { resources: vec![], target, clear_color: None, - }); + });*/ if params.debug.check_bits(DebugLayers::BOUNDING_BOXES) { recording.draw(DrawParams { shader_id: self.bboxes, @@ -349,7 +350,7 @@ impl DebugRenderer { recording.draw(DrawParams { shader_id: self.linesoup, instance_count: bump.lines, - vertex_count: 2, + vertex_count: 4, vertex_buffer: Some(captured.lines), resources: vec![uniforms_buf], target, @@ -492,10 +493,22 @@ struct LinesoupIn { @location(1) p1: vec2f, } +const LINE_THICKNESS: f32 = 4.; +const WIND_DOWN_COLOR: vec3f = vec3(0., 1., 0.); +const WIND_UP_COLOR: vec3f = vec3(1., 0., 0.); + @vertex fn linesoup_vert(@builtin(vertex_index) vid: u32, line: LinesoupIn) -> VSOut { - let p = select(line.p0, line.p1, vid == 1u) / vec2f(f32(uniforms.width), f32(uniforms.height)); - return VSOut(map_to_ndc(p), vec4(0.7, 0.5, 0., 1.)); + let quad_corner = quad_vertices[quad_fill_indices[vid]] - vec2(0.5); + let v = line.p1 - line.p0; + let m = mix(line.p0, line.p1, 0.5); + let s = vec2(LINE_THICKNESS, length(v)); + let vn = normalize(v); + let r = mat2x2(vn.y, -vn.x, vn.x, vn.y); + let p = (m + r * (s * quad_corner)) / vec2f(f32(uniforms.width), f32(uniforms.height)); + //let color = vec4(0.7, 0.5, 0., 1.); + let color = vec4(select(WIND_UP_COLOR, WIND_DOWN_COLOR, v.y >= 0.), 1.); + return VSOut(map_to_ndc(p), color); } //////////// From eefc505b5457b1be2b9298abfd14b4f210e4e1a8 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Mon, 29 Jul 2024 16:35:56 +0100 Subject: [PATCH 09/24] Address review feedback --- vello/src/lib.rs | 2 +- vello/src/wgpu_engine.rs | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/vello/src/lib.rs b/vello/src/lib.rs index 158e87976..2298b0d2a 100644 --- a/vello/src/lib.rs +++ b/vello/src/lib.rs @@ -463,7 +463,7 @@ impl Renderer { queue, &recording, &external_resources, - "blit (render_to_surface_async)", + "blit (render_to_surface)", #[cfg(feature = "wgpu-profiler")] &mut self.profiler, )?; diff --git a/vello/src/wgpu_engine.rs b/vello/src/wgpu_engine.rs index 0ba0ab1cc..179ba425c 100644 --- a/vello/src/wgpu_engine.rs +++ b/vello/src/wgpu_engine.rs @@ -713,6 +713,8 @@ impl WgpuEngine { } #[cfg(feature = "wgpu-profiler")] profiler.end_query(&mut encoder, query); + // TODO: This only actually needs to happen once per frame, but run_recording happens two or three times + profiler.resolve_queries(&mut encoder); queue.submit(Some(encoder.finish())); for id in free_bufs { if let Some(buf) = self.bind_map.buf_map.remove(&id) { From 6f5d11a1ad4e4306c736a9b3755f87a643a1b716 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Mon, 29 Jul 2024 16:41:55 +0100 Subject: [PATCH 10/24] Fix handling of debug layers memory management --- vello/src/debug.rs | 5 ++--- vello/src/lib.rs | 4 ++-- vello/src/render.rs | 38 ++++++++++++++++++++++---------------- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/vello/src/debug.rs b/vello/src/debug.rs index 7fd8d5211..74329431a 100644 --- a/vello/src/debug.rs +++ b/vello/src/debug.rs @@ -71,7 +71,7 @@ impl DebugLayers { } pub fn toggle(&mut self, mask: DebugLayers) { - self.0 ^= mask.0 + self.0 ^= mask.0; } } @@ -325,7 +325,6 @@ impl DebugRenderer { bytemuck::bytes_of(&linepoints_uniforms), ); - /* recording.draw(DrawParams { shader_id: self.clear_tint, instance_count: 1, @@ -334,7 +333,7 @@ impl DebugRenderer { resources: vec![], target, clear_color: None, - });*/ + }); if params.debug.check_bits(DebugLayers::BOUNDING_BOXES) { recording.draw(DrawParams { shader_id: self.bboxes, diff --git a/vello/src/lib.rs b/vello/src/lib.rs index 2298b0d2a..06136b4ba 100644 --- a/vello/src/lib.rs +++ b/vello/src/lib.rs @@ -676,13 +676,13 @@ impl Renderer { .expect("renderer should have configured surface_format to use on a surface"); let bump = result.bump.as_ref().unwrap(); // TODO: We could avoid this download if `DebugLayers::VALIDATION` is unset. - let downloads = DebugDownloads::map(&mut self.engine, &captured, bump).await?; + let downloads = DebugDownloads::map(&self.engine, &captured, bump).await?; debug.render( &mut recording, surface_proxy, &captured, bump, - ¶ms, + params, &downloads, ); diff --git a/vello/src/render.rs b/vello/src/render.rs index c16b5f805..11bada553 100644 --- a/vello/src/render.rs +++ b/vello/src/render.rs @@ -24,6 +24,15 @@ pub struct Render { captured_buffers: Option, } +#[cfg(feature = "debug_layers")] +impl Drop for Render { + fn drop(&mut self) { + if self.captured_buffers.is_some() { + unreachable!("Render captured buffers without freeing them"); + } + } +} + /// Resources produced by pipeline, needed for fine rasterization. struct FineResources { aa_config: AaConfig, @@ -126,14 +135,6 @@ impl Render { let mut recording = Recording::default(); let mut packed = vec![]; - #[cfg(feature = "debug_layers")] - { - let captured = self.captured_buffers.take(); - if let Some(buffers) = captured { - buffers.release_buffers(&mut recording); - } - } - let (layout, ramps, images) = resolver.resolve(encoding, &mut packed); let gradient_image = if ramps.height == 0 { ResourceProxy::new_image(1, 1, ImageFormat::Rgba8) @@ -459,15 +460,20 @@ impl Render { #[cfg(feature = "debug_layers")] { - let path_bboxes = *path_bbox_buf.as_buf().unwrap(); - let lines = *lines_buf.as_buf().unwrap(); - recording.download(lines); + if robust { + let path_bboxes = *path_bbox_buf.as_buf().unwrap(); + let lines = *lines_buf.as_buf().unwrap(); + recording.download(lines); - self.captured_buffers = Some(CapturedBuffers { - sizes: cpu_config.buffer_sizes, - path_bboxes, - lines, - }); + self.captured_buffers = Some(CapturedBuffers { + sizes: cpu_config.buffer_sizes, + path_bboxes, + lines, + }); + } else { + recording.free_resource(path_bbox_buf); + recording.free_resource(lines_buf); + } } #[cfg(not(feature = "debug_layers"))] { From 9c3afa2d6124002f262f5f1fa29ad22dcda8ed0d Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Mon, 29 Jul 2024 16:42:30 +0100 Subject: [PATCH 11/24] Reduce opacity of the `clear_tint` layer --- vello/src/debug.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vello/src/debug.rs b/vello/src/debug.rs index 74329431a..a542bcbab 100644 --- a/vello/src/debug.rs +++ b/vello/src/debug.rs @@ -466,7 +466,7 @@ struct VSOut { @vertex fn full_screen_quad_vert(@builtin(vertex_index) vid: u32) -> VSOut { let p = quad_vertices[quad_fill_indices[vid]]; - return VSOut(map_to_ndc(p), vec4(0., 0., 0., 0.5)); + return VSOut(map_to_ndc(p), vec4(0., 0., 0., 0.2)); } //////////// From 02e7fc0dce52f1eb70a7b5a569dba9d32f50a000 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Mon, 29 Jul 2024 16:44:51 +0100 Subject: [PATCH 12/24] Add comment about alpha factor --- vello/src/debug.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vello/src/debug.rs b/vello/src/debug.rs index a542bcbab..434b07c8c 100644 --- a/vello/src/debug.rs +++ b/vello/src/debug.rs @@ -466,6 +466,8 @@ struct VSOut { @vertex fn full_screen_quad_vert(@builtin(vertex_index) vid: u32) -> VSOut { let p = quad_vertices[quad_fill_indices[vid]]; + // TODO: Make the alpha configurable here. + // The clear tint is a full-screen layer above the entire image with this color. return VSOut(map_to_ndc(p), vec4(0., 0., 0., 0.2)); } From 1c9fc785f9a216cc31dba80d8ac90ed8fe8c47b2 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Mon, 29 Jul 2024 16:50:48 +0100 Subject: [PATCH 13/24] Fix clippy issues --- vello/src/debug.rs | 2 +- vello/src/debug/validate.rs | 2 +- vello/src/lib.rs | 3 ++- vello/src/recording.rs | 1 - vello/src/wgpu_engine.rs | 2 ++ 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/vello/src/debug.rs b/vello/src/debug.rs index 434b07c8c..762fdde4a 100644 --- a/vello/src/debug.rs +++ b/vello/src/debug.rs @@ -1,4 +1,4 @@ -// Copyright 2023 The Vello authors +// Copyright 2023 the Vello Authors // SPDX-License-Identifier: Apache-2.0 OR MIT mod validate; diff --git a/vello/src/debug/validate.rs b/vello/src/debug/validate.rs index 78adf7529..8c3381264 100644 --- a/vello/src/debug/validate.rs +++ b/vello/src/debug/validate.rs @@ -1,4 +1,4 @@ -// Copyright 2023 The Vello authors +// Copyright 2023 the Vello Authors // SPDX-License-Identifier: Apache-2.0 OR MIT use { diff --git a/vello/src/lib.rs b/vello/src/lib.rs index 06136b4ba..596238d9b 100644 --- a/vello/src/lib.rs +++ b/vello/src/lib.rs @@ -280,7 +280,7 @@ pub struct RenderParams { /// constructing the `Renderer`. pub antialiasing_method: AaConfig, - #[cfg(feature = "debug_layers")] + #[cfg(all(feature = "debug_layers", feature = "wgpu"))] /// Options for debug layer rendering. pub debug: DebugLayers, } @@ -311,6 +311,7 @@ pub struct RendererOptions { pub num_init_threads: Option, } +#[cfg(feature = "wgpu")] struct RenderResult { bump: Option, #[cfg(feature = "debug_layers")] diff --git a/vello/src/recording.rs b/vello/src/recording.rs index 6ca9a2f01..503d65cdb 100644 --- a/vello/src/recording.rs +++ b/vello/src/recording.rs @@ -85,7 +85,6 @@ pub enum Command { Draw(DrawParams), } -#[cfg(feature = "wgpu")] /// The type of resource that will be bound to a slot in a shader. #[derive(Clone, Copy, PartialEq, Eq)] pub enum BindType { diff --git a/vello/src/wgpu_engine.rs b/vello/src/wgpu_engine.rs index 179ba425c..1c13e7f33 100644 --- a/vello/src/wgpu_engine.rs +++ b/vello/src/wgpu_engine.rs @@ -618,6 +618,7 @@ impl WgpuEngine { } Command::Draw(draw_params) => { let shader = &self.shaders[draw_params.shader_id.0]; + #[cfg(feature = "wgpu-profiler")] let label = shader.label; let ShaderKind::Wgpu(shader) = shader.select() else { panic!("a render pass does not have a CPU equivalent"); @@ -714,6 +715,7 @@ impl WgpuEngine { #[cfg(feature = "wgpu-profiler")] profiler.end_query(&mut encoder, query); // TODO: This only actually needs to happen once per frame, but run_recording happens two or three times + #[cfg(feature = "wgpu-profiler")] profiler.resolve_queries(&mut encoder); queue.submit(Some(encoder.finish())); for id in free_bufs { From f44d2179ddd83d860cca95a6cc7138d3a85e8753 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Tue, 30 Jul 2024 09:53:01 +0100 Subject: [PATCH 14/24] Always expose control over `DebugLayers` --- examples/headless/src/main.rs | 1 + examples/simple/src/main.rs | 3 +- examples/with_winit/src/lib.rs | 4 - vello/Cargo.toml | 2 +- vello/src/debug.rs | 505 +-------------------------------- vello/src/debug/renderer.rs | 494 ++++++++++++++++++++++++++++++++ vello/src/lib.rs | 7 +- vello_tests/src/lib.rs | 1 + 8 files changed, 515 insertions(+), 502 deletions(-) create mode 100644 vello/src/debug/renderer.rs diff --git a/examples/headless/src/main.rs b/examples/headless/src/main.rs index c5cb8b06b..98d00c3f8 100644 --- a/examples/headless/src/main.rs +++ b/examples/headless/src/main.rs @@ -139,6 +139,7 @@ async fn render(mut scenes: SceneSet, index: usize, args: &Args) -> Result<()> { width, height, antialiasing_method: vello::AaConfig::Area, + debug: vello::DebugLayers::none(), }; let mut scene = Scene::new(); scene.append(&fragment, Some(transform)); diff --git a/examples/simple/src/main.rs b/examples/simple/src/main.rs index 5e5c581ad..bbf721645 100644 --- a/examples/simple/src/main.rs +++ b/examples/simple/src/main.rs @@ -7,7 +7,7 @@ use std::sync::Arc; use vello::kurbo::{Affine, Circle, Ellipse, Line, RoundedRect, Stroke}; use vello::peniko::Color; use vello::util::{RenderContext, RenderSurface}; -use vello::{AaConfig, Renderer, RendererOptions, Scene}; +use vello::{AaConfig, DebugLayers, Renderer, RendererOptions, Scene}; use winit::application::ApplicationHandler; use winit::dpi::LogicalSize; use winit::event::*; @@ -151,6 +151,7 @@ impl<'s> ApplicationHandler for SimpleVelloApp<'s> { width, height, antialiasing_method: AaConfig::Msaa16, + debug: DebugLayers::none(), }, ) .expect("failed to render to surface"); diff --git a/examples/with_winit/src/lib.rs b/examples/with_winit/src/lib.rs index 2e3066f83..47ef5239d 100644 --- a/examples/with_winit/src/lib.rs +++ b/examples/with_winit/src/lib.rs @@ -163,7 +163,6 @@ struct VelloApp<'s> { prev_scene_ix: i32, modifiers: ModifiersState, - #[cfg(feature = "debug_layers")] debug: vello::DebugLayers, } @@ -483,7 +482,6 @@ impl<'s> ApplicationHandler for VelloApp<'s> { width, height, antialiasing_method, - #[cfg(feature = "debug_layers")] debug: self.debug, }; self.scene.reset(); @@ -695,7 +693,6 @@ fn run( Some(render_state) }; - #[cfg(feature = "debug_layers")] let debug = vello::DebugLayers::none(); let mut app = VelloApp { @@ -742,7 +739,6 @@ fn run( complexity: 0, prev_scene_ix: 0, modifiers: ModifiersState::default(), - #[cfg(feature = "debug_layers")] debug, }; diff --git a/vello/Cargo.toml b/vello/Cargo.toml index 83ba98d55..15365beb1 100644 --- a/vello/Cargo.toml +++ b/vello/Cargo.toml @@ -10,7 +10,7 @@ license.workspace = true repository.workspace = true [features] -default = ["wgpu"] +default = ["wgpu", "debug_layers"] # Enables GPU memory usage estimation. This performs additional computations # in order to estimate the minimum required allocations for buffers backing # bump-allocated GPU memory. diff --git a/vello/src/debug.rs b/vello/src/debug.rs index 762fdde4a..421b79210 100644 --- a/vello/src/debug.rs +++ b/vello/src/debug.rs @@ -1,21 +1,17 @@ // Copyright 2023 the Vello Authors // SPDX-License-Identifier: Apache-2.0 OR MIT +#[cfg(all(feature = "debug_layers", feature = "wgpu"))] +mod renderer; +#[cfg(all(feature = "debug_layers", feature = "wgpu"))] mod validate; -use crate::{ - debug::validate::{validate_line_soup, LineEndpoint}, - recording::{BindType, DrawParams, ImageProxy, Recording, ResourceProxy, ShaderId}, - render::CapturedBuffers, - wgpu_engine::WgpuEngine, - DebugDownloads, RenderParams, -}; -use { - bytemuck::{offset_of, Pod, Zeroable}, - peniko::Color, - vello_encoding::{BumpAllocators, LineSoup, PathBbox}, -}; +#[cfg(all(feature = "debug_layers", feature = "wgpu"))] +pub(crate) use renderer::*; +/// Bitflags for enabled debug operations. +/// +/// Currently, all layers additionally require the `debug_layers` feature. #[derive(Copy, Clone)] pub struct DebugLayers(u8); @@ -26,12 +22,15 @@ pub struct DebugLayers(u8); // to run the same CPU-side tests for both CPU and GPU shaders. impl DebugLayers { /// Visualize the bounding box of every path. + /// Requires the `debug_layers` feature. pub const BOUNDING_BOXES: DebugLayers = DebugLayers(1 << 0); /// Visualize the post-flattening line segments using line primitives. + /// Requires the `debug_layers` feature. pub const LINESOUP_SEGMENTS: DebugLayers = DebugLayers(1 << 1); /// Visualize the post-flattening line endpoints. + /// Requires the `debug_layers` feature. pub const LINESOUP_POINTS: DebugLayers = DebugLayers(1 << 2); /// Enable validation of internal buffer contents and visualize errors. Validation tests are @@ -43,6 +42,7 @@ impl DebugLayers { /// any gaps. Line endpoints that don't precisely overlap another endpoint get visualized /// as red circles and logged to stderr. /// + /// Requires the `debug_layers` feature. pub const VALIDATION: DebugLayers = DebugLayers(1 << 3); pub const fn from_bits(bits: u8) -> Self { @@ -82,484 +82,3 @@ impl std::ops::BitOr for DebugLayers { Self(self.0 | rhs.0) } } - -pub(crate) struct DebugRenderer { - // `clear_tint` slightly darkens the output from the vello renderer to make the debug overlays - // more distinguishable. - clear_tint: ShaderId, - bboxes: ShaderId, - linesoup: ShaderId, - linesoup_points: ShaderId, - unpaired_points: ShaderId, -} - -impl DebugRenderer { - pub fn new( - device: &wgpu::Device, - target_format: wgpu::TextureFormat, - engine: &mut WgpuEngine, - ) -> Self { - let module = device.create_shader_module(wgpu::ShaderModuleDescriptor { - label: Some("debug layers"), - source: wgpu::ShaderSource::Wgsl(SHADERS.into()), - }); - - let clear_tint = engine.add_render_shader( - device, - "clear-tint", - &module, - "full_screen_quad_vert", - "solid_color_frag", - wgpu::PrimitiveTopology::TriangleStrip, - wgpu::ColorTargetState { - format: target_format, - blend: Some(wgpu::BlendState { - color: wgpu::BlendComponent { - src_factor: wgpu::BlendFactor::SrcAlpha, - dst_factor: wgpu::BlendFactor::OneMinusSrcAlpha, - operation: wgpu::BlendOperation::Add, - }, - alpha: wgpu::BlendComponent::OVER, - }), - write_mask: wgpu::ColorWrites::ALL, - }, - None, - &[], - ); - let bboxes = engine.add_render_shader( - device, - "bbox-debug", - &module, - "bbox_vert", - "solid_color_frag", - wgpu::PrimitiveTopology::LineStrip, - wgpu::ColorTargetState { - format: target_format, - blend: None, - write_mask: wgpu::ColorWrites::ALL, - }, - // This mirrors the layout of the PathBbox structure. - Some(wgpu::VertexBufferLayout { - array_stride: std::mem::size_of::() as u64, - step_mode: wgpu::VertexStepMode::Instance, - attributes: &[ - wgpu::VertexAttribute { - format: wgpu::VertexFormat::Sint32x2, - offset: offset_of!(PathBbox, x0) as u64, - shader_location: 0, - }, - wgpu::VertexAttribute { - format: wgpu::VertexFormat::Sint32x2, - offset: offset_of!(PathBbox, x1) as u64, - shader_location: 1, - }, - ], - }), - &[(BindType::Uniform, wgpu::ShaderStages::VERTEX)], - ); - let linesoup = engine.add_render_shader( - device, - "linesoup-debug", - &module, - "linesoup_vert", - "solid_color_frag", - wgpu::PrimitiveTopology::TriangleStrip, - wgpu::ColorTargetState { - format: target_format, - blend: None, - write_mask: wgpu::ColorWrites::ALL, - }, - // This mirrors the layout of the LineSoup structure. - Some(wgpu::VertexBufferLayout { - array_stride: std::mem::size_of::() as u64, - step_mode: wgpu::VertexStepMode::Instance, - attributes: &[ - wgpu::VertexAttribute { - format: wgpu::VertexFormat::Float32x2, - offset: offset_of!(LineSoup, p0) as u64, - shader_location: 0, - }, - wgpu::VertexAttribute { - format: wgpu::VertexFormat::Float32x2, - offset: offset_of!(LineSoup, p1) as u64, - shader_location: 1, - }, - ], - }), - &[(BindType::Uniform, wgpu::ShaderStages::VERTEX)], - ); - let linesoup_points = engine.add_render_shader( - device, - "linepoints-debug", - &module, - "linepoints_vert", - "sdf_circle_frag", - wgpu::PrimitiveTopology::TriangleStrip, - wgpu::ColorTargetState { - format: target_format, - blend: Some(wgpu::BlendState { - color: wgpu::BlendComponent { - src_factor: wgpu::BlendFactor::SrcAlpha, - dst_factor: wgpu::BlendFactor::OneMinusSrcAlpha, - operation: wgpu::BlendOperation::Add, - }, - alpha: wgpu::BlendComponent::OVER, - }), - write_mask: wgpu::ColorWrites::ALL, - }, - // This mirrors the layout of the LineSoup structure. The pipeline only processes the - // first point of each line. Since all points should be paired, this is enough to - // render all points. All unpaired points alone get drawn by the `unpaired_points` - // pipeline, so no point should get missed. - Some(wgpu::VertexBufferLayout { - array_stride: std::mem::size_of::() as u64, - step_mode: wgpu::VertexStepMode::Instance, - attributes: &[wgpu::VertexAttribute { - format: wgpu::VertexFormat::Float32x2, - offset: offset_of!(LineSoup, p0) as u64, - shader_location: 0, - }], - }), - &[ - (BindType::Uniform, wgpu::ShaderStages::VERTEX), - ( - BindType::Uniform, - wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT, - ), - ], - ); - let unpaired_points = engine.add_render_shader( - device, - "linepoints-debug", - &module, - "linepoints_vert", - "sdf_circle_frag", - wgpu::PrimitiveTopology::TriangleStrip, - wgpu::ColorTargetState { - format: target_format, - blend: Some(wgpu::BlendState { - color: wgpu::BlendComponent { - src_factor: wgpu::BlendFactor::SrcAlpha, - dst_factor: wgpu::BlendFactor::OneMinusSrcAlpha, - operation: wgpu::BlendOperation::Add, - }, - alpha: wgpu::BlendComponent::OVER, - }), - write_mask: wgpu::ColorWrites::ALL, - }, - // This mirrors the layout of the LineSoup structure. - Some(wgpu::VertexBufferLayout { - array_stride: std::mem::size_of::() as u64, - step_mode: wgpu::VertexStepMode::Instance, - attributes: &[wgpu::VertexAttribute { - format: wgpu::VertexFormat::Float32x2, - offset: offset_of!(LineEndpoint, x) as u64, - shader_location: 0, - }], - }), - &[ - (BindType::Uniform, wgpu::ShaderStages::VERTEX), - ( - BindType::Uniform, - wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT, - ), - ], - ); - - Self { - clear_tint, - bboxes, - linesoup, - linesoup_points, - unpaired_points, - } - } - - pub fn render( - &self, - recording: &mut Recording, - target: ImageProxy, - captured: &CapturedBuffers, - bump: &BumpAllocators, - params: &RenderParams, - downloads: &DebugDownloads, - ) { - if params.debug.is_empty() { - return; - } - - let (unpaired_pts_len, unpaired_pts_buf) = - if params.debug.check_bits(DebugLayers::VALIDATION) { - // TODO: have this write directly to a GPU buffer? - let unpaired_pts: Vec = - validate_line_soup(bytemuck::cast_slice(&downloads.lines.get_mapped_range())); - if unpaired_pts.is_empty() { - (0, None) - } else { - ( - unpaired_pts.len(), - Some( - recording - .upload("unpaired points", bytemuck::cast_slice(&unpaired_pts[..])), - ), - ) - } - } else { - (0, None) - }; - - let uniforms = Uniforms { - width: params.width, - height: params.height, - }; - let uniforms_buf = ResourceProxy::Buffer( - recording.upload_uniform("uniforms", bytemuck::bytes_of(&uniforms)), - ); - - let linepoints_uniforms = [ - LinepointsUniforms::new(Color::DARK_CYAN, 10.), - LinepointsUniforms::new(Color::RED, 80.), - ]; - let linepoints_uniforms_buf = recording.upload_uniform( - "linepoints uniforms", - bytemuck::bytes_of(&linepoints_uniforms), - ); - - recording.draw(DrawParams { - shader_id: self.clear_tint, - instance_count: 1, - vertex_count: 4, - vertex_buffer: None, - resources: vec![], - target, - clear_color: None, - }); - if params.debug.check_bits(DebugLayers::BOUNDING_BOXES) { - recording.draw(DrawParams { - shader_id: self.bboxes, - instance_count: captured.sizes.path_bboxes.len(), - vertex_count: 5, - vertex_buffer: Some(captured.path_bboxes), - resources: vec![uniforms_buf], - target, - clear_color: None, - }); - } - if params.debug.check_bits(DebugLayers::LINESOUP_SEGMENTS) { - recording.draw(DrawParams { - shader_id: self.linesoup, - instance_count: bump.lines, - vertex_count: 4, - vertex_buffer: Some(captured.lines), - resources: vec![uniforms_buf], - target, - clear_color: None, - }); - } - if params.debug.check_bits(DebugLayers::LINESOUP_POINTS) { - recording.draw(DrawParams { - shader_id: self.linesoup_points, - instance_count: bump.lines, - vertex_count: 4, - vertex_buffer: Some(captured.lines), - resources: vec![ - uniforms_buf, - ResourceProxy::BufferRange { - proxy: linepoints_uniforms_buf, - offset: 0, - size: std::mem::size_of::() as u64, - }, - ], - target, - clear_color: None, - }); - } - if let Some(unpaired_pts_buf) = unpaired_pts_buf { - recording.draw(DrawParams { - shader_id: self.unpaired_points, - instance_count: unpaired_pts_len.try_into().unwrap(), - vertex_count: 4, - vertex_buffer: Some(unpaired_pts_buf), - resources: vec![ - uniforms_buf, - ResourceProxy::BufferRange { - proxy: linepoints_uniforms_buf, - offset: std::mem::size_of::() as u64, - size: std::mem::size_of::() as u64, - }, - ], - target, - clear_color: None, - }); - recording.free_buffer(unpaired_pts_buf); - } - - recording.free_resource(uniforms_buf); - recording.free_buffer(linepoints_uniforms_buf); - } -} - -#[derive(Copy, Clone, Zeroable, Pod)] -#[repr(C)] -struct Uniforms { - width: u32, - height: u32, -} - -#[derive(Copy, Clone, Zeroable, Pod)] -#[repr(C)] -struct LinepointsUniforms { - point_color: [f32; 3], - point_size: f32, - // Uniform parameters for individual SDF point draws are stored in a single buffer. - // This 240 byte padding is here to bring the element ffset alignment of 256 bytes. - // (see https://www.w3.org/TR/webgpu/#dom-supported-limits-minuniformbufferoffsetalignment) - _pad0: [u32; 30], - _pad1: [u32; 30], -} - -impl LinepointsUniforms { - fn new(color: Color, point_size: f32) -> Self { - Self { - point_color: [ - color.r as f32 / 255., - color.g as f32 / 255., - color.b as f32 / 255., - ], - point_size, - _pad0: [0; 30], - _pad1: [0; 30], - } - } -} - -const SHADERS: &str = r#" - -// Map from y-down normalized coordinates to NDC: -fn map_to_ndc(p: vec2f) -> vec4f { - return vec4(vec2(1., -1.) * (2. * p - vec2(1.)), 0., 1.); -} - -alias QuadVertices = array; -var quad_vertices: QuadVertices = QuadVertices( - vec2(0., 1.), - vec2(0., 0.), - vec2(1., 0.), - vec2(1., 1.), -); - -var quad_fill_indices: array = array(0u, 3u, 1u, 2u); - -struct Uniforms { - width: u32, - height: u32, -} -@binding(0) @group(0) var uniforms: Uniforms; - -struct VSOut { - @builtin(position) pos: vec4f, - @location(0) color: vec4f, -} - -//////////// - -@vertex -fn full_screen_quad_vert(@builtin(vertex_index) vid: u32) -> VSOut { - let p = quad_vertices[quad_fill_indices[vid]]; - // TODO: Make the alpha configurable here. - // The clear tint is a full-screen layer above the entire image with this color. - return VSOut(map_to_ndc(p), vec4(0., 0., 0., 0.2)); -} - -//////////// - -struct BboxIn { - @location(0) p0: vec2i, - @location(1) p1: vec2i, -} - -@vertex -fn bbox_vert(@builtin(vertex_index) vid: u32, bbox: BboxIn) -> VSOut { - let ul = vec2f(f32(bbox.p0.x), f32(bbox.p0.y)); - let br = vec2f(f32(bbox.p1.x), f32(bbox.p1.y)); - let dim = br - ul; - let p = (ul + dim * quad_vertices[vid % 4u]) / vec2f(f32(uniforms.width), f32(uniforms.height)); - return VSOut(map_to_ndc(p), vec4(0., 1., 0., 1.)); -} - -//////////// - -struct LinesoupIn { - @location(0) p0: vec2f, - @location(1) p1: vec2f, -} - -const LINE_THICKNESS: f32 = 4.; -const WIND_DOWN_COLOR: vec3f = vec3(0., 1., 0.); -const WIND_UP_COLOR: vec3f = vec3(1., 0., 0.); - -@vertex -fn linesoup_vert(@builtin(vertex_index) vid: u32, line: LinesoupIn) -> VSOut { - let quad_corner = quad_vertices[quad_fill_indices[vid]] - vec2(0.5); - let v = line.p1 - line.p0; - let m = mix(line.p0, line.p1, 0.5); - let s = vec2(LINE_THICKNESS, length(v)); - let vn = normalize(v); - let r = mat2x2(vn.y, -vn.x, vn.x, vn.y); - let p = (m + r * (s * quad_corner)) / vec2f(f32(uniforms.width), f32(uniforms.height)); - //let color = vec4(0.7, 0.5, 0., 1.); - let color = vec4(select(WIND_UP_COLOR, WIND_DOWN_COLOR, v.y >= 0.), 1.); - return VSOut(map_to_ndc(p), color); -} - -//////////// - -struct LinepointsUniforms { - point_color: vec3f, - point_size: f32, -} -@binding(1) @group(0) var linepoints_uniforms: LinepointsUniforms; - -struct SDFCircleOut { - @builtin(position) pos: vec4f, - - // Unpremultiplied color of the circle. - @location(0) color: vec3f, - - // The 2D position of the pixel fragment relative to the center of the quad. The quad edges - // are at coordinates (±1, 0) and (0, ±1). - @location(1) quad_relative: vec2f, -} - -@vertex -fn linepoints_vert(@builtin(vertex_index) vid: u32, @location(0) point: vec2f) -> SDFCircleOut { - let quad_corner = quad_vertices[quad_fill_indices[vid]] - vec2(0.5); - let rect_dim = vec2(linepoints_uniforms.point_size); - let p = (point + rect_dim * quad_corner) / vec2(f32(uniforms.width), f32(uniforms.height)); - - return SDFCircleOut( - map_to_ndc(p), - linepoints_uniforms.point_color, - // Normalize the corners of the quad such that they form a vector of length √2. This should - // align the edge fragments to ±1. The post-interpolation values of `quad_relative` will - // then form a distance field that can represent a circle of radius 1 within the quad - // (where the distance is relative to the center of the circle). - normalize(quad_corner) * sqrt(2.), - ); -} - -@fragment -fn solid_color_frag(in: VSOut) -> @location(0) vec4f { - return in.color; -} - -@fragment -fn sdf_circle_frag(in: SDFCircleOut) -> @location(0) vec4f { - // Draw an antialiased circle with a fading margin as a visual effect. `THRESHOLD` is the - // distance from the center of the circle to the edge where the fade begins. - let THRESHOLD = 0.6; - let d = saturate(length(in.quad_relative)); - let alpha = select(1., 1. - smoothstep(THRESHOLD, 1., d), d > THRESHOLD); - return vec4(in.color.rgb, alpha); -} -"#; diff --git a/vello/src/debug/renderer.rs b/vello/src/debug/renderer.rs new file mode 100644 index 000000000..0b7af1fea --- /dev/null +++ b/vello/src/debug/renderer.rs @@ -0,0 +1,494 @@ +use super::DebugLayers; +use crate::{ + debug::validate::{validate_line_soup, LineEndpoint}, + recording::{BindType, DrawParams, ImageProxy, Recording, ResourceProxy, ShaderId}, + render::CapturedBuffers, + wgpu_engine::WgpuEngine, + DebugDownloads, RenderParams, +}; + +use { + bytemuck::{offset_of, Pod, Zeroable}, + peniko::Color, + vello_encoding::{BumpAllocators, LineSoup, PathBbox}, +}; +pub(crate) struct DebugRenderer { + // `clear_tint` slightly darkens the output from the vello renderer to make the debug overlays + // more distinguishable. + clear_tint: ShaderId, + bboxes: ShaderId, + linesoup: ShaderId, + linesoup_points: ShaderId, + unpaired_points: ShaderId, +} + +impl DebugRenderer { + pub fn new( + device: &wgpu::Device, + target_format: wgpu::TextureFormat, + engine: &mut WgpuEngine, + ) -> Self { + let module = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some("debug layers"), + source: wgpu::ShaderSource::Wgsl(SHADERS.into()), + }); + + let clear_tint = engine.add_render_shader( + device, + "clear-tint", + &module, + "full_screen_quad_vert", + "solid_color_frag", + wgpu::PrimitiveTopology::TriangleStrip, + wgpu::ColorTargetState { + format: target_format, + blend: Some(wgpu::BlendState { + color: wgpu::BlendComponent { + src_factor: wgpu::BlendFactor::SrcAlpha, + dst_factor: wgpu::BlendFactor::OneMinusSrcAlpha, + operation: wgpu::BlendOperation::Add, + }, + alpha: wgpu::BlendComponent::OVER, + }), + write_mask: wgpu::ColorWrites::ALL, + }, + None, + &[], + ); + let bboxes = engine.add_render_shader( + device, + "bbox-debug", + &module, + "bbox_vert", + "solid_color_frag", + wgpu::PrimitiveTopology::LineStrip, + wgpu::ColorTargetState { + format: target_format, + blend: None, + write_mask: wgpu::ColorWrites::ALL, + }, + // This mirrors the layout of the PathBbox structure. + Some(wgpu::VertexBufferLayout { + array_stride: std::mem::size_of::() as u64, + step_mode: wgpu::VertexStepMode::Instance, + attributes: &[ + wgpu::VertexAttribute { + format: wgpu::VertexFormat::Sint32x2, + offset: offset_of!(PathBbox, x0) as u64, + shader_location: 0, + }, + wgpu::VertexAttribute { + format: wgpu::VertexFormat::Sint32x2, + offset: offset_of!(PathBbox, x1) as u64, + shader_location: 1, + }, + ], + }), + &[(BindType::Uniform, wgpu::ShaderStages::VERTEX)], + ); + let linesoup = engine.add_render_shader( + device, + "linesoup-debug", + &module, + "linesoup_vert", + "solid_color_frag", + wgpu::PrimitiveTopology::TriangleStrip, + wgpu::ColorTargetState { + format: target_format, + blend: None, + write_mask: wgpu::ColorWrites::ALL, + }, + // This mirrors the layout of the LineSoup structure. + Some(wgpu::VertexBufferLayout { + array_stride: std::mem::size_of::() as u64, + step_mode: wgpu::VertexStepMode::Instance, + attributes: &[ + wgpu::VertexAttribute { + format: wgpu::VertexFormat::Float32x2, + offset: offset_of!(LineSoup, p0) as u64, + shader_location: 0, + }, + wgpu::VertexAttribute { + format: wgpu::VertexFormat::Float32x2, + offset: offset_of!(LineSoup, p1) as u64, + shader_location: 1, + }, + ], + }), + &[(BindType::Uniform, wgpu::ShaderStages::VERTEX)], + ); + let linesoup_points = engine.add_render_shader( + device, + "linepoints-debug", + &module, + "linepoints_vert", + "sdf_circle_frag", + wgpu::PrimitiveTopology::TriangleStrip, + wgpu::ColorTargetState { + format: target_format, + blend: Some(wgpu::BlendState { + color: wgpu::BlendComponent { + src_factor: wgpu::BlendFactor::SrcAlpha, + dst_factor: wgpu::BlendFactor::OneMinusSrcAlpha, + operation: wgpu::BlendOperation::Add, + }, + alpha: wgpu::BlendComponent::OVER, + }), + write_mask: wgpu::ColorWrites::ALL, + }, + // This mirrors the layout of the LineSoup structure. The pipeline only processes the + // first point of each line. Since all points should be paired, this is enough to + // render all points. All unpaired points alone get drawn by the `unpaired_points` + // pipeline, so no point should get missed. + Some(wgpu::VertexBufferLayout { + array_stride: std::mem::size_of::() as u64, + step_mode: wgpu::VertexStepMode::Instance, + attributes: &[wgpu::VertexAttribute { + format: wgpu::VertexFormat::Float32x2, + offset: offset_of!(LineSoup, p0) as u64, + shader_location: 0, + }], + }), + &[ + (BindType::Uniform, wgpu::ShaderStages::VERTEX), + ( + BindType::Uniform, + wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT, + ), + ], + ); + let unpaired_points = engine.add_render_shader( + device, + "linepoints-debug", + &module, + "linepoints_vert", + "sdf_circle_frag", + wgpu::PrimitiveTopology::TriangleStrip, + wgpu::ColorTargetState { + format: target_format, + blend: Some(wgpu::BlendState { + color: wgpu::BlendComponent { + src_factor: wgpu::BlendFactor::SrcAlpha, + dst_factor: wgpu::BlendFactor::OneMinusSrcAlpha, + operation: wgpu::BlendOperation::Add, + }, + alpha: wgpu::BlendComponent::OVER, + }), + write_mask: wgpu::ColorWrites::ALL, + }, + // This mirrors the layout of the LineSoup structure. + Some(wgpu::VertexBufferLayout { + array_stride: std::mem::size_of::() as u64, + step_mode: wgpu::VertexStepMode::Instance, + attributes: &[wgpu::VertexAttribute { + format: wgpu::VertexFormat::Float32x2, + offset: offset_of!(LineEndpoint, x) as u64, + shader_location: 0, + }], + }), + &[ + (BindType::Uniform, wgpu::ShaderStages::VERTEX), + ( + BindType::Uniform, + wgpu::ShaderStages::VERTEX | wgpu::ShaderStages::FRAGMENT, + ), + ], + ); + + Self { + clear_tint, + bboxes, + linesoup, + linesoup_points, + unpaired_points, + } + } + + pub fn render( + &self, + recording: &mut Recording, + target: ImageProxy, + captured: &CapturedBuffers, + bump: &BumpAllocators, + params: &RenderParams, + downloads: &DebugDownloads, + ) { + if params.debug.is_empty() { + return; + } + + let (unpaired_pts_len, unpaired_pts_buf) = + if params.debug.check_bits(DebugLayers::VALIDATION) { + // TODO: have this write directly to a GPU buffer? + let unpaired_pts: Vec = + validate_line_soup(bytemuck::cast_slice(&downloads.lines.get_mapped_range())); + if unpaired_pts.is_empty() { + (0, None) + } else { + ( + unpaired_pts.len(), + Some( + recording + .upload("unpaired points", bytemuck::cast_slice(&unpaired_pts[..])), + ), + ) + } + } else { + (0, None) + }; + + let uniforms = Uniforms { + width: params.width, + height: params.height, + }; + let uniforms_buf = ResourceProxy::Buffer( + recording.upload_uniform("uniforms", bytemuck::bytes_of(&uniforms)), + ); + + let linepoints_uniforms = [ + LinepointsUniforms::new(Color::DARK_CYAN, 10.), + LinepointsUniforms::new(Color::RED, 80.), + ]; + let linepoints_uniforms_buf = recording.upload_uniform( + "linepoints uniforms", + bytemuck::bytes_of(&linepoints_uniforms), + ); + + recording.draw(DrawParams { + shader_id: self.clear_tint, + instance_count: 1, + vertex_count: 4, + vertex_buffer: None, + resources: vec![], + target, + clear_color: None, + }); + if params.debug.check_bits(DebugLayers::BOUNDING_BOXES) { + recording.draw(DrawParams { + shader_id: self.bboxes, + instance_count: captured.sizes.path_bboxes.len(), + vertex_count: 5, + vertex_buffer: Some(captured.path_bboxes), + resources: vec![uniforms_buf], + target, + clear_color: None, + }); + } + if params.debug.check_bits(DebugLayers::LINESOUP_SEGMENTS) { + recording.draw(DrawParams { + shader_id: self.linesoup, + instance_count: bump.lines, + vertex_count: 4, + vertex_buffer: Some(captured.lines), + resources: vec![uniforms_buf], + target, + clear_color: None, + }); + } + if params.debug.check_bits(DebugLayers::LINESOUP_POINTS) { + recording.draw(DrawParams { + shader_id: self.linesoup_points, + instance_count: bump.lines, + vertex_count: 4, + vertex_buffer: Some(captured.lines), + resources: vec![ + uniforms_buf, + ResourceProxy::BufferRange { + proxy: linepoints_uniforms_buf, + offset: 0, + size: std::mem::size_of::() as u64, + }, + ], + target, + clear_color: None, + }); + } + if let Some(unpaired_pts_buf) = unpaired_pts_buf { + recording.draw(DrawParams { + shader_id: self.unpaired_points, + instance_count: unpaired_pts_len.try_into().unwrap(), + vertex_count: 4, + vertex_buffer: Some(unpaired_pts_buf), + resources: vec![ + uniforms_buf, + ResourceProxy::BufferRange { + proxy: linepoints_uniforms_buf, + offset: std::mem::size_of::() as u64, + size: std::mem::size_of::() as u64, + }, + ], + target, + clear_color: None, + }); + recording.free_buffer(unpaired_pts_buf); + } + + recording.free_resource(uniforms_buf); + recording.free_buffer(linepoints_uniforms_buf); + } +} + +#[derive(Copy, Clone, Zeroable, Pod)] +#[repr(C)] +struct Uniforms { + width: u32, + height: u32, +} + +#[derive(Copy, Clone, Zeroable, Pod)] +#[repr(C)] +struct LinepointsUniforms { + point_color: [f32; 3], + point_size: f32, + // Uniform parameters for individual SDF point draws are stored in a single buffer. + // This 240 byte padding is here to bring the element ffset alignment of 256 bytes. + // (see https://www.w3.org/TR/webgpu/#dom-supported-limits-minuniformbufferoffsetalignment) + _pad0: [u32; 30], + _pad1: [u32; 30], +} + +impl LinepointsUniforms { + fn new(color: Color, point_size: f32) -> Self { + Self { + point_color: [ + color.r as f32 / 255., + color.g as f32 / 255., + color.b as f32 / 255., + ], + point_size, + _pad0: [0; 30], + _pad1: [0; 30], + } + } +} + +const SHADERS: &str = r#" + +// Map from y-down normalized coordinates to NDC: +fn map_to_ndc(p: vec2f) -> vec4f { + return vec4(vec2(1., -1.) * (2. * p - vec2(1.)), 0., 1.); +} + +alias QuadVertices = array; +var quad_vertices: QuadVertices = QuadVertices( + vec2(0., 1.), + vec2(0., 0.), + vec2(1., 0.), + vec2(1., 1.), +); + +var quad_fill_indices: array = array(0u, 3u, 1u, 2u); + +struct Uniforms { + width: u32, + height: u32, +} +@binding(0) @group(0) var uniforms: Uniforms; + +struct VSOut { + @builtin(position) pos: vec4f, + @location(0) color: vec4f, +} + +//////////// + +@vertex +fn full_screen_quad_vert(@builtin(vertex_index) vid: u32) -> VSOut { + let p = quad_vertices[quad_fill_indices[vid]]; + // TODO: Make the alpha configurable here. + // The clear tint is a full-screen layer above the entire image with this color. + return VSOut(map_to_ndc(p), vec4(0., 0., 0., 0.2)); +} + +//////////// + +struct BboxIn { + @location(0) p0: vec2i, + @location(1) p1: vec2i, +} + +@vertex +fn bbox_vert(@builtin(vertex_index) vid: u32, bbox: BboxIn) -> VSOut { + let ul = vec2f(f32(bbox.p0.x), f32(bbox.p0.y)); + let br = vec2f(f32(bbox.p1.x), f32(bbox.p1.y)); + let dim = br - ul; + let p = (ul + dim * quad_vertices[vid % 4u]) / vec2f(f32(uniforms.width), f32(uniforms.height)); + return VSOut(map_to_ndc(p), vec4(0., 1., 0., 1.)); +} + +//////////// + +struct LinesoupIn { + @location(0) p0: vec2f, + @location(1) p1: vec2f, +} + +const LINE_THICKNESS: f32 = 4.; +const WIND_DOWN_COLOR: vec3f = vec3(0., 1., 0.); +const WIND_UP_COLOR: vec3f = vec3(1., 0., 0.); + +@vertex +fn linesoup_vert(@builtin(vertex_index) vid: u32, line: LinesoupIn) -> VSOut { + let quad_corner = quad_vertices[quad_fill_indices[vid]] - vec2(0.5); + let v = line.p1 - line.p0; + let m = mix(line.p0, line.p1, 0.5); + let s = vec2(LINE_THICKNESS, length(v)); + let vn = normalize(v); + let r = mat2x2(vn.y, -vn.x, vn.x, vn.y); + let p = (m + r * (s * quad_corner)) / vec2f(f32(uniforms.width), f32(uniforms.height)); + //let color = vec4(0.7, 0.5, 0., 1.); + let color = vec4(select(WIND_UP_COLOR, WIND_DOWN_COLOR, v.y >= 0.), 1.); + return VSOut(map_to_ndc(p), color); +} + +//////////// + +struct LinepointsUniforms { + point_color: vec3f, + point_size: f32, +} +@binding(1) @group(0) var linepoints_uniforms: LinepointsUniforms; + +struct SDFCircleOut { + @builtin(position) pos: vec4f, + + // Unpremultiplied color of the circle. + @location(0) color: vec3f, + + // The 2D position of the pixel fragment relative to the center of the quad. The quad edges + // are at coordinates (±1, 0) and (0, ±1). + @location(1) quad_relative: vec2f, +} + +@vertex +fn linepoints_vert(@builtin(vertex_index) vid: u32, @location(0) point: vec2f) -> SDFCircleOut { + let quad_corner = quad_vertices[quad_fill_indices[vid]] - vec2(0.5); + let rect_dim = vec2(linepoints_uniforms.point_size); + let p = (point + rect_dim * quad_corner) / vec2(f32(uniforms.width), f32(uniforms.height)); + + return SDFCircleOut( + map_to_ndc(p), + linepoints_uniforms.point_color, + // Normalize the corners of the quad such that they form a vector of length √2. This should + // align the edge fragments to ±1. The post-interpolation values of `quad_relative` will + // then form a distance field that can represent a circle of radius 1 within the quad + // (where the distance is relative to the center of the circle). + normalize(quad_corner) * sqrt(2.), + ); +} + +@fragment +fn solid_color_frag(in: VSOut) -> @location(0) vec4f { + return in.color; +} + +@fragment +fn sdf_circle_frag(in: SDFCircleOut) -> @location(0) vec4f { + // Draw an antialiased circle with a fading margin as a visual effect. `THRESHOLD` is the + // distance from the center of the circle to the edge where the fade begins. + let THRESHOLD = 0.6; + let d = saturate(length(in.quad_relative)); + let alpha = select(1., 1. - smoothstep(THRESHOLD, 1., d), d > THRESHOLD); + return vec4(in.color.rgb, alpha); +} +"#; diff --git a/vello/src/lib.rs b/vello/src/lib.rs index 596238d9b..f0cecf341 100644 --- a/vello/src/lib.rs +++ b/vello/src/lib.rs @@ -81,7 +81,6 @@ //! //! See the [`examples/`](https://github.com/linebender/vello/tree/main/examples) folder to see how that code integrates with frameworks like winit. -#[cfg(all(feature = "debug_layers", feature = "wgpu"))] mod debug; mod recording; mod render; @@ -126,7 +125,6 @@ use vello_encoding::Resolver; #[cfg(feature = "wgpu")] use wgpu_engine::{ExternalResource, WgpuEngine}; -#[cfg(all(feature = "debug_layers", feature = "wgpu"))] pub use debug::DebugLayers; /// Temporary export, used in `with_winit` for stats pub use vello_encoding::BumpAllocators; @@ -280,8 +278,11 @@ pub struct RenderParams { /// constructing the `Renderer`. pub antialiasing_method: AaConfig, - #[cfg(all(feature = "debug_layers", feature = "wgpu"))] /// Options for debug layer rendering. + /// + /// This only has an effect when the `debug_layers` feature is enabled. + // This is exposed publicly as a least-effort to avoid changing the API when features change. + // We expect the API to change here in the near future. pub debug: DebugLayers, } diff --git a/vello_tests/src/lib.rs b/vello_tests/src/lib.rs index e2b6c7898..e97910593 100644 --- a/vello_tests/src/lib.rs +++ b/vello_tests/src/lib.rs @@ -98,6 +98,7 @@ pub async fn get_scene_image(params: &TestParams, scene: &Scene) -> Result Date: Tue, 30 Jul 2024 10:28:11 +0100 Subject: [PATCH 15/24] Add missing copyright header --- vello/Cargo.toml | 2 +- vello/src/debug/renderer.rs | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/vello/Cargo.toml b/vello/Cargo.toml index 15365beb1..83ba98d55 100644 --- a/vello/Cargo.toml +++ b/vello/Cargo.toml @@ -10,7 +10,7 @@ license.workspace = true repository.workspace = true [features] -default = ["wgpu", "debug_layers"] +default = ["wgpu"] # Enables GPU memory usage estimation. This performs additional computations # in order to estimate the minimum required allocations for buffers backing # bump-allocated GPU memory. diff --git a/vello/src/debug/renderer.rs b/vello/src/debug/renderer.rs index 0b7af1fea..4937ddce7 100644 --- a/vello/src/debug/renderer.rs +++ b/vello/src/debug/renderer.rs @@ -1,3 +1,6 @@ +// Copyright 2023 the Vello Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + use super::DebugLayers; use crate::{ debug::validate::{validate_line_soup, LineEndpoint}, From aeb196850f0ec41967dfb890e879d0405c234b4b Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Tue, 30 Jul 2024 12:02:25 +0100 Subject: [PATCH 16/24] Document `DebugLayers` --- vello/src/debug.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/vello/src/debug.rs b/vello/src/debug.rs index 421b79210..ab988e55d 100644 --- a/vello/src/debug.rs +++ b/vello/src/debug.rs @@ -45,15 +45,24 @@ impl DebugLayers { /// Requires the `debug_layers` feature. pub const VALIDATION: DebugLayers = DebugLayers(1 << 3); + /// Construct a `DebugLayers` from the raw bits. pub const fn from_bits(bits: u8) -> Self { Self(bits) } + /// Get the raw representation of this value. + pub const fn bits(self) -> u8 { + self.0 + } + + /// A `DebugLayers` with no layers enabled. pub const fn none() -> Self { Self(0) } + /// A `DebugLayers` with all layers enabled. pub const fn all() -> Self { + // Custom BitOr is not const, so need to manipulate the inner value here Self( Self::BOUNDING_BOXES.0 | Self::LINESOUP_SEGMENTS.0 @@ -62,19 +71,23 @@ impl DebugLayers { ) } - pub fn is_empty(&self) -> bool { + /// True if this `DebugLayers` has no layers enabled. + pub const fn is_empty(self) -> bool { self.0 == 0 } - pub fn check_bits(&self, mask: DebugLayers) -> bool { + /// Determine whether `self` is a superset of `mask`. + pub const fn check_bits(self, mask: DebugLayers) -> bool { self.0 & mask.0 == mask.0 } + /// Toggle the value of the layers specified in mask. pub fn toggle(&mut self, mask: DebugLayers) { self.0 ^= mask.0; } } +/// Returns the union of the two input `DebugLayers`. impl std::ops::BitOr for DebugLayers { type Output = Self; From fe4b67be37db0836e4f31179396475d984e20c0a Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Tue, 30 Jul 2024 12:03:52 +0100 Subject: [PATCH 17/24] Rename `check_bits` to `contains` --- vello/src/debug.rs | 2 +- vello/src/debug/renderer.rs | 42 ++++++++++++++++++------------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/vello/src/debug.rs b/vello/src/debug.rs index ab988e55d..1fe8b09ca 100644 --- a/vello/src/debug.rs +++ b/vello/src/debug.rs @@ -77,7 +77,7 @@ impl DebugLayers { } /// Determine whether `self` is a superset of `mask`. - pub const fn check_bits(self, mask: DebugLayers) -> bool { + pub const fn contains(self, mask: DebugLayers) -> bool { self.0 & mask.0 == mask.0 } diff --git a/vello/src/debug/renderer.rs b/vello/src/debug/renderer.rs index 4937ddce7..99020bc85 100644 --- a/vello/src/debug/renderer.rs +++ b/vello/src/debug/renderer.rs @@ -220,25 +220,25 @@ impl DebugRenderer { return; } - let (unpaired_pts_len, unpaired_pts_buf) = - if params.debug.check_bits(DebugLayers::VALIDATION) { - // TODO: have this write directly to a GPU buffer? - let unpaired_pts: Vec = - validate_line_soup(bytemuck::cast_slice(&downloads.lines.get_mapped_range())); - if unpaired_pts.is_empty() { - (0, None) - } else { - ( - unpaired_pts.len(), - Some( - recording - .upload("unpaired points", bytemuck::cast_slice(&unpaired_pts[..])), - ), - ) - } - } else { + let (unpaired_pts_len, unpaired_pts_buf) = if params.debug.contains(DebugLayers::VALIDATION) + { + // TODO: have this write directly to a GPU buffer? + let unpaired_pts: Vec = + validate_line_soup(bytemuck::cast_slice(&downloads.lines.get_mapped_range())); + if unpaired_pts.is_empty() { (0, None) - }; + } else { + ( + unpaired_pts.len(), + Some( + recording + .upload("unpaired points", bytemuck::cast_slice(&unpaired_pts[..])), + ), + ) + } + } else { + (0, None) + }; let uniforms = Uniforms { width: params.width, @@ -266,7 +266,7 @@ impl DebugRenderer { target, clear_color: None, }); - if params.debug.check_bits(DebugLayers::BOUNDING_BOXES) { + if params.debug.contains(DebugLayers::BOUNDING_BOXES) { recording.draw(DrawParams { shader_id: self.bboxes, instance_count: captured.sizes.path_bboxes.len(), @@ -277,7 +277,7 @@ impl DebugRenderer { clear_color: None, }); } - if params.debug.check_bits(DebugLayers::LINESOUP_SEGMENTS) { + if params.debug.contains(DebugLayers::LINESOUP_SEGMENTS) { recording.draw(DrawParams { shader_id: self.linesoup, instance_count: bump.lines, @@ -288,7 +288,7 @@ impl DebugRenderer { clear_color: None, }); } - if params.debug.check_bits(DebugLayers::LINESOUP_POINTS) { + if params.debug.contains(DebugLayers::LINESOUP_POINTS) { recording.draw(DrawParams { shader_id: self.linesoup_points, instance_count: bump.lines, From 9944518379e78e96fe537579de5120cd0ce84738 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Tue, 30 Jul 2024 12:06:04 +0100 Subject: [PATCH 18/24] Add `{0}` in download error --- vello/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vello/src/lib.rs b/vello/src/lib.rs index f0cecf341..4aa072381 100644 --- a/vello/src/lib.rs +++ b/vello/src/lib.rs @@ -217,7 +217,7 @@ pub enum Error { /// Failed to download an internal buffer for debug visualization. #[cfg(feature = "wgpu")] #[cfg(feature = "debug_layers")] - #[error("Failed to download internal buffer for visualization")] + #[error("Failed to download internal buffer '{0}' for visualization")] DownloadError(&'static str), #[cfg(feature = "wgpu")] @@ -840,7 +840,7 @@ impl<'a> DebugDownloads<'a> { use vello_encoding::LineSoup; let Some(lines_buf) = engine.get_download(captured.lines) else { - return Err(Error::DownloadError("could not download LineSoup buffer")); + return Err(Error::DownloadError("linesoup")); }; let lines = lines_buf.slice(..bump.lines as u64 * std::mem::size_of::() as u64); From 9327ba2b9e02231b905a0c6bf083fe7ae0eec57b Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Tue, 30 Jul 2024 12:11:51 +0100 Subject: [PATCH 19/24] Add a warning message if using debug_layers without the async pipeline --- examples/with_winit/src/lib.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/examples/with_winit/src/lib.rs b/examples/with_winit/src/lib.rs index 47ef5239d..e40aa7f49 100644 --- a/examples/with_winit/src/lib.rs +++ b/examples/with_winit/src/lib.rs @@ -694,6 +694,9 @@ fn run( }; let debug = vello::DebugLayers::none(); + if cfg!(feature = "debug_layers") && !args.async_pipeline { + log::warn!("Debug Layers won't work without using `--async-pipeline`."); + } let mut app = VelloApp { context: render_cx, @@ -808,6 +811,7 @@ pub fn main() -> anyhow::Result<()> { #[cfg(not(target_arch = "wasm32"))] env_logger::builder() .format_timestamp(Some(env_logger::TimestampPrecision::Millis)) + .filter_level(log::LevelFilter::Warn) .init(); let args = parse_arguments(); let scenes = args.args.select_scene_set()?; From d6d436c987a55a3b14fa62eab0eb0dd2266ba04a Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Tue, 30 Jul 2024 13:55:11 +0100 Subject: [PATCH 20/24] Address review feedback Co-Authored-By: Bruce Mitchener --- vello/src/debug.rs | 22 ++++++++++++++++++++++ vello/src/debug/renderer.rs | 2 +- vello/src/debug/validate.rs | 2 ++ vello/src/render.rs | 10 ++++++++++ 4 files changed, 35 insertions(+), 1 deletion(-) diff --git a/vello/src/debug.rs b/vello/src/debug.rs index 1fe8b09ca..5ddf89e1d 100644 --- a/vello/src/debug.rs +++ b/vello/src/debug.rs @@ -6,6 +6,8 @@ mod renderer; #[cfg(all(feature = "debug_layers", feature = "wgpu"))] mod validate; +use std::fmt::Debug; + #[cfg(all(feature = "debug_layers", feature = "wgpu"))] pub(crate) use renderer::*; @@ -15,6 +17,26 @@ pub(crate) use renderer::*; #[derive(Copy, Clone)] pub struct DebugLayers(u8); +impl Debug for DebugLayers { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut tuple = f.debug_tuple("DebugLayers"); + if self.contains(Self::BOUNDING_BOXES) { + tuple.field(&"BOUNDING_BOXES"); + } + if self.contains(Self::LINESOUP_SEGMENTS) { + tuple.field(&"LINESOUP_SEGMENTS"); + } + if self.contains(Self::LINESOUP_POINTS) { + tuple.field(&"LINESOUP_POINTS"); + } + if self.contains(Self::VALIDATION) { + tuple.field(&"VALIDATION"); + } + + tuple.finish() + } +} + // TODO: Currently all layers require read-back of the BumpAllocators buffer. This isn't strictly // necessary for layers other than `VALIDATION`. The debug visualizations use the bump buffer only // to obtain various instance counts for draws and these could instead get written out to an diff --git a/vello/src/debug/renderer.rs b/vello/src/debug/renderer.rs index 99020bc85..534607894 100644 --- a/vello/src/debug/renderer.rs +++ b/vello/src/debug/renderer.rs @@ -344,7 +344,7 @@ struct LinepointsUniforms { point_color: [f32; 3], point_size: f32, // Uniform parameters for individual SDF point draws are stored in a single buffer. - // This 240 byte padding is here to bring the element ffset alignment of 256 bytes. + // This 240 byte padding is here to bring the element offset alignment of 256 bytes. // (see https://www.w3.org/TR/webgpu/#dom-supported-limits-minuniformbufferoffsetalignment) _pad0: [u32; 30], _pad1: [u32; 30], diff --git a/vello/src/debug/validate.rs b/vello/src/debug/validate.rs index 8c3381264..d3bd6e5d3 100644 --- a/vello/src/debug/validate.rs +++ b/vello/src/debug/validate.rs @@ -13,6 +13,8 @@ pub struct LineEndpoint { pub path_ix: u32, // Coordinates in IEEE-754 32-bit float representation + // We use u32 here because we are comparing bit patterns rather than proximity, to evaluate exact watertightness + // To accelerate this, we use a BTreeSet, which don't support f32 values directly. pub x: u32, pub y: u32, } diff --git a/vello/src/render.rs b/vello/src/render.rs index 11bada553..bbd1c8c57 100644 --- a/vello/src/render.rs +++ b/vello/src/render.rs @@ -4,6 +4,7 @@ //! Take an encoded scene and create a graph to render it use std::mem::size_of; +use std::sync::atomic::AtomicBool; use crate::recording::{BufferProxy, ImageFormat, ImageProxy, Recording, ResourceProxy}; use crate::shaders::FullShaders; @@ -147,6 +148,15 @@ impl Render { data, )) }; + if cfg!(not(feature = "debug_layers")) && !params.debug.is_empty() { + static HAS_WARNED: AtomicBool = AtomicBool::new(false); + if !HAS_WARNED.swap(true, std::sync::atomic::Ordering::Release) { + log::warn!( + "Requested debug layers {debug:?} but `debug_layers` feature is not enabled.", + debug = params.debug + ); + } + } let image_atlas = if images.images.is_empty() { ImageProxy::new(1, 1, ImageFormat::Rgba8) } else { From 7bf41f1d4121df10900a56f3a9b465492243ff1e Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Fri, 2 Aug 2024 10:50:38 +0100 Subject: [PATCH 21/24] Always enable debug_layers in `with_winit` --- examples/with_winit/Cargo.toml | 3 +-- examples/with_winit/src/lib.rs | 38 ++++++++++++++++++---------------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/examples/with_winit/Cargo.toml b/examples/with_winit/Cargo.toml index 8bbf30fc7..783e233d2 100644 --- a/examples/with_winit/Cargo.toml +++ b/examples/with_winit/Cargo.toml @@ -16,7 +16,6 @@ default = ["wgpu-profiler"] # Enable the use of wgpu-profiler. This is an optional feature for times when we use a git dependency on # wgpu (which means the dependency used in wgpu-profiler would be incompatible) wgpu-profiler = ["dep:wgpu-profiler", "vello/wgpu-profiler"] -debug_layers = ["vello/debug_layers"] [lints] workspace = true @@ -28,7 +27,7 @@ path = "src/main.rs" [dependencies] -vello = { workspace = true, features = ["buffer_labels"] } +vello = { workspace = true, features = ["buffer_labels", "debug_layers"] } scenes = { workspace = true } anyhow = { workspace = true } diff --git a/examples/with_winit/src/lib.rs b/examples/with_winit/src/lib.rs index e40aa7f49..0e8678149 100644 --- a/examples/with_winit/src/lib.rs +++ b/examples/with_winit/src/lib.rs @@ -331,21 +331,26 @@ impl<'s> ApplicationHandler for VelloApp<'s> { }, ); } - #[cfg(feature = "debug_layers")] - "1" => { - self.debug.toggle(vello::DebugLayers::BOUNDING_BOXES); - } - #[cfg(feature = "debug_layers")] - "2" => { - self.debug.toggle(vello::DebugLayers::LINESOUP_SEGMENTS); - } - #[cfg(feature = "debug_layers")] - "3" => { - self.debug.toggle(vello::DebugLayers::LINESOUP_POINTS); - } - #[cfg(feature = "debug_layers")] - "4" => { - self.debug.toggle(vello::DebugLayers::VALIDATION); + debug_layer @ ("1" | "2" | "3" | "4") => { + match debug_layer { + "1" => { + self.debug.toggle(vello::DebugLayers::BOUNDING_BOXES); + } + "2" => { + self.debug + .toggle(vello::DebugLayers::LINESOUP_SEGMENTS); + } + "3" => { + self.debug.toggle(vello::DebugLayers::LINESOUP_POINTS); + } + "4" => { + self.debug.toggle(vello::DebugLayers::VALIDATION); + } + _ => unreachable!(), + } + if !self.debug.is_empty() && !self.async_pipeline { + log::warn!("Debug Layers won't work without using `--async-pipeline`. Requested {:?}", self.debug); + } } _ => {} } @@ -694,9 +699,6 @@ fn run( }; let debug = vello::DebugLayers::none(); - if cfg!(feature = "debug_layers") && !args.async_pipeline { - log::warn!("Debug Layers won't work without using `--async-pipeline`."); - } let mut app = VelloApp { context: render_cx, From c09d27fd1a76406fbe195e3ff3eb8496d98a663a Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Fri, 2 Aug 2024 10:52:51 +0100 Subject: [PATCH 22/24] Double check that CI still validates the without debug_layers scenario --- vello/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/vello/src/lib.rs b/vello/src/lib.rs index 4aa072381..ddc2c4a12 100644 --- a/vello/src/lib.rs +++ b/vello/src/lib.rs @@ -335,7 +335,6 @@ impl Renderer { let blit = options .surface_format .map(|surface_format| BlitPipeline::new(device, surface_format, &mut engine)); - #[cfg(feature = "debug_layers")] let debug = options .surface_format .map(|surface_format| debug::DebugRenderer::new(device, surface_format, &mut engine)); From 58c9b38d05ce27f258362cf2b72badd9c839fd2e Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Fri, 2 Aug 2024 10:57:06 +0100 Subject: [PATCH 23/24] Fix rebase change --- vello/src/wgpu_engine.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/vello/src/wgpu_engine.rs b/vello/src/wgpu_engine.rs index 1c13e7f33..42ab919db 100644 --- a/vello/src/wgpu_engine.rs +++ b/vello/src/wgpu_engine.rs @@ -355,6 +355,7 @@ impl WgpuEngine { depth_stencil: None, multisample: wgpu::MultisampleState::default(), multiview: None, + cache: None, }); let id = self.shaders.len(); self.shaders.push(Shader { From f6abadfa2116736d3c8032f95af072992ae36c83 Mon Sep 17 00:00:00 2001 From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com> Date: Fri, 2 Aug 2024 10:58:07 +0100 Subject: [PATCH 24/24] Revert c09d27fd1a76406fbe195e3ff3eb8496d98a663a --- vello/src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/vello/src/lib.rs b/vello/src/lib.rs index ddc2c4a12..4aa072381 100644 --- a/vello/src/lib.rs +++ b/vello/src/lib.rs @@ -335,6 +335,7 @@ impl Renderer { let blit = options .surface_format .map(|surface_format| BlitPipeline::new(device, surface_format, &mut engine)); + #[cfg(feature = "debug_layers")] let debug = options .surface_format .map(|surface_format| debug::DebugRenderer::new(device, surface_format, &mut engine));