From 9085b4c215a3331a2ad15348a4eec6235d948507 Mon Sep 17 00:00:00 2001 From: Marcell Kiss Date: Sun, 11 Feb 2024 21:13:00 +0000 Subject: [PATCH] add support for breaking cross-stream deps; fix usage bug in ex9 --- examples/09_persistent_descriptorset.cpp | 1 + src/ExecutableRenderGraph.cpp | 48 ++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/examples/09_persistent_descriptorset.cpp b/examples/09_persistent_descriptorset.cpp index 15c209c3..c77656c7 100644 --- a/examples/09_persistent_descriptorset.cpp +++ b/examples/09_persistent_descriptorset.cpp @@ -70,6 +70,7 @@ namespace { // Similarly to buffers, we allocate the image and enqueue the upload texture_of_doge = vuk::ImageAttachment::from_preset( vuk::ImageAttachment::Preset::eMap2D, vuk::Format::eR8G8B8A8Srgb, vuk::Extent3D{ (unsigned)x, (unsigned)y, 1u }, vuk::Samples::e1); + texture_of_doge.usage |= vuk::ImageUsageFlagBits::eTransferSrc; texture_of_doge.level_count = 1; auto [image, view, doge_src] = vuk::create_image_and_view_with_data(allocator, vuk::DomainFlagBits::eTransferOnTransfer, texture_of_doge, doge_image); image_of_doge = std::move(image); diff --git a/src/ExecutableRenderGraph.cpp b/src/ExecutableRenderGraph.cpp index 2c766668..c5684789 100644 --- a/src/ExecutableRenderGraph.cpp +++ b/src/ExecutableRenderGraph.cpp @@ -1072,6 +1072,7 @@ namespace vuk { std::shared_ptr cg_module; std::unordered_map> streams; + std::unordered_map pending_syncs; // start recording if needed // all dependant domains flushed @@ -1108,6 +1109,7 @@ namespace vuk { bool has_both = has_src && has_dst; bool cross = has_both && (src_stream != dst_stream); bool only_src = has_src && !has_dst; + bool only_dst = !has_src && has_dst; if (cross) { dst_stream->add_dependency(src_stream); @@ -1116,17 +1118,41 @@ namespace vuk { if (base_ty == cg_module->builtin_image) { auto& img_att = *reinterpret_cast(value); if (has_dst) { + if (only_dst) { + auto it = pending_syncs.find(value); + if (it != pending_syncs.end()) { + if (it->second != dst_stream) { + it->second->synch_image(img_att, src_use, dst_use, value); + } + pending_syncs.erase(it); + } + } dst_stream->synch_image(img_att, src_use, dst_use, value); } if (only_src || cross) { src_stream->synch_image(img_att, src_use, dst_use, value); + if (only_src) { + pending_syncs.emplace(value, src_stream); + } } } else if (base_ty == cg_module->builtin_buffer) { // buffer needs no cross if (has_dst) { + if (only_dst) { + auto it = pending_syncs.find(value); + if (it != pending_syncs.end()) { + if (it->second != dst_stream) { + it->second->synch_memory(src_use, dst_use, value); + } + pending_syncs.erase(it); + } + } dst_stream->synch_memory(src_use, dst_use, value); } else if (has_src) { src_stream->synch_memory(src_use, dst_use, value); + if (only_src) { + pending_syncs.emplace(value, src_stream); + } } } else if (base_ty->kind == Type::ARRAY_TY) { auto elem_ty = base_ty->array.T; @@ -1135,10 +1161,22 @@ namespace vuk { auto img_atts = reinterpret_cast(value); for (int i = 0; i < size; i++) { if (has_dst) { + if (only_dst) { + auto it = pending_syncs.find(value); + if (it != pending_syncs.end()) { + if (it->second != dst_stream) { + it->second->synch_image(img_atts[i], src_use, dst_use, value); + } + pending_syncs.erase(it); + } + } dst_stream->synch_image(img_atts[i], src_use, dst_use, &img_atts[i]); } if (only_src || cross) { src_stream->synch_image(img_atts[i], src_use, dst_use, &img_atts[i]); + if (only_src) { + pending_syncs.emplace(&img_atts[i], src_stream); + } } } } else if (elem_ty == cg_module->builtin_buffer) { @@ -1146,9 +1184,19 @@ namespace vuk { // buffer needs no cross auto bufs = reinterpret_cast(value); if (has_dst) { + auto it = pending_syncs.find(value); + if (it != pending_syncs.end()) { + if (it->second != dst_stream) { + it->second->synch_memory(src_use, dst_use, &bufs[i]); + } + pending_syncs.erase(it); + } dst_stream->synch_memory(src_use, dst_use, &bufs[i]); } else if (has_src) { src_stream->synch_memory(src_use, dst_use, &bufs[i]); + if (only_src) { + pending_syncs.emplace(&bufs[i], src_stream); + } } } } else {