diff --git a/CMakeSettings.json b/CMakeSettings.json index f616c989..6965d2e1 100644 --- a/CMakeSettings.json +++ b/CMakeSettings.json @@ -65,6 +65,11 @@ "name": "VUK_USE_DXC", "value": "False", "type": "BOOL" + }, + { + "name": "VUK_BUILD_TESTS", + "value": "True", + "type": "BOOL" } ] }, diff --git a/include/vuk/Future.hpp b/include/vuk/Future.hpp index ad08914b..d22e8666 100644 --- a/include/vuk/Future.hpp +++ b/include/vuk/Future.hpp @@ -38,21 +38,24 @@ namespace vuk { class UntypedFuture { public: - UntypedFuture(std::shared_ptr rg, Ref ref, Ref def) { + UntypedFuture(std::shared_ptr rg, Ref ref, Ref def, std::vector> dependent_blocks) { this->control = std::make_shared(); this->head = { rg->make_release(ref, &this->control->acqrel, Access::eNone, DomainFlagBits::eAny), 0 }; this->control->rg = std::move(rg); this->def = def; + this->dependent_blocks = std::move(dependent_blocks); } UntypedFuture(const UntypedFuture& o) noexcept : control{ std::make_shared(*o.control) }, def{ o.def } { head = { control->rg->make_release(o.get_head(), &this->control->acqrel, Access::eNone, DomainFlagBits::eAny), 0 }; + dependent_blocks = o.dependent_blocks; } UntypedFuture(UntypedFuture&& o) noexcept : control{ std::exchange(o.control, nullptr) }, + dependent_blocks{ std::exchange(o.dependent_blocks, {}) }, def{ std::exchange(o.def, {}) }, head{ std::exchange(o.head, {}) } {} @@ -61,12 +64,14 @@ namespace vuk { def = { o.def }; head = { control->rg->make_release(o.get_head(), &this->control->acqrel, Access::eNone, DomainFlagBits::eAny), 0 }; + dependent_blocks = o.dependent_blocks; return *this; } UntypedFuture& operator=(UntypedFuture&& o) noexcept { std::swap(o.control, control); + std::swap(o.dependent_blocks, dependent_blocks); std::swap(o.def, def); std::swap(o.head, head); @@ -126,6 +131,7 @@ namespace vuk { // TODO: remove this from public API std::shared_ptr control; + std::vector> dependent_blocks; protected: Ref def; @@ -258,7 +264,7 @@ namespace vuk { Future get_size() requires std::is_same_v { - return { get_render_graph(), def.node->valloc.args[1], {} }; + return { get_render_graph(), def.node->valloc.args[1], {}, {control} }; } void set_size(Future arg) @@ -275,7 +281,7 @@ namespace vuk { Ref item = control->rg->make_array_indexing(def.type()->array.T, get_head(), control->rg->make_constant(index)); assert(def.node->kind == Node::AALLOC); assert(def.type()->kind == Type::ARRAY_TY); - return Future()[0])>>(get_render_graph(), item, item_def); + return Future()[0])>>(get_render_graph(), item, item_def, {control}); } }; diff --git a/include/vuk/IR.hpp b/include/vuk/IR.hpp index 97a5f67a..be08ad21 100644 --- a/include/vuk/IR.hpp +++ b/include/vuk/IR.hpp @@ -106,7 +106,10 @@ namespace vuk { }; struct SchedulingInfo { - DomainFlags required_domain; + SchedulingInfo(DomainFlags required_domains) : required_domains(required_domains) {} + SchedulingInfo(DomainFlagBits required_domain) : required_domains(required_domain) {} + + DomainFlags required_domains; }; struct NodeDebugInfo { diff --git a/include/vuk/RenderGraph.hpp b/include/vuk/RenderGraph.hpp index 75c8f729..75acd1bb 100644 --- a/include/vuk/RenderGraph.hpp +++ b/include/vuk/RenderGraph.hpp @@ -516,10 +516,10 @@ private: struct is_tuple> : std::true_type {}; template - static auto make_ret(std::shared_ptr rg, Node* node, const std::tuple& us) { + static auto make_ret(std::shared_ptr rg, Node* node, const std::tuple& us, std::vector> dependent_blocks) { if constexpr (sizeof...(T) > 0) { size_t i = 0; - return std::tuple{ Future{ rg, { node, sizeof...(T) - (++i) }, std::get(us).def }... }; + return std::tuple{ Future{ rg, { node, sizeof...(T) - (++i) }, std::get(us).def, dependent_blocks }... }; } } @@ -541,7 +541,7 @@ private: using ret_tuple = std::tuple...>; template - static auto make_lam(Name name, F&& body) { + static auto make_lam(Name name, F&& body, SchedulingInfo scheduling_info) { auto callback = [typed_cb = std::move(body)](CommandBuffer& cb, std::span args, std::span meta, std::span rets) { // we do type recovery here -> convert untyped args to typed ones alignas(alignof(std::tuple)) char storage[sizeof(std::tuple)]; @@ -556,14 +556,19 @@ private: }; // when this function is called, we weave in this call into the IR - return [untyped_cb = std::move(callback), name](Future... args) mutable { + return [untyped_cb = std::move(callback), name, scheduling_info](Future... args) mutable { auto& first = [](auto& first, auto&...) -> auto& { return first; }(args...); auto& rgp = first.get_render_graph(); RG& rg = *rgp.get(); - [](auto& first, auto&... rest) { + + // TODO: write a test that this doesn't pass... + std::vector> dependent_blocks; + [&dependent_blocks](auto& first, auto&... rest) { (first.get_render_graph()->subgraphs.push_back(rest.get_render_graph()), ...); + dependent_blocks.push_back(first.control); + (dependent_blocks.push_back(rest.control), ...); }(args...); std::vector arg_types; @@ -582,35 +587,36 @@ private: opaque_fn_ty->debug_info = new TypeDebugInfo{ .name = name.c_str() }; auto opaque_fn = rg.make_declare_fn(opaque_fn_ty); Node* node = rg.make_call(opaque_fn, args.get_head()...); + node->scheduling_info = new SchedulingInfo(scheduling_info); if constexpr (is_tuple::value) { auto [idxs, ret_tuple] = intersect_tuples, Ret>(arg_tuple_as_a); - return make_ret(rgp, node, ret_tuple); + return make_ret(rgp, node, ret_tuple, std::move(dependent_blocks)); } else if constexpr (!std::is_same_v) { auto [idxs, ret_tuple] = intersect_tuples, std::tuple>(arg_tuple_as_a); - return std::get<0>(make_ret(rgp, node, ret_tuple)); + return std::get<0>(make_ret(rgp, node, ret_tuple, std::move(dependent_blocks))); } }; } }; template - [[nodiscard]] auto make_pass(Name name, F&& body) { + [[nodiscard]] auto make_pass(Name name, F&& body, SchedulingInfo scheduling_info = SchedulingInfo(DomainFlagBits::eAny)) { using traits = closure_traits; - return TupleMap>::template make_lam(name, std::forward(body)); + return TupleMap>::template make_lam(name, std::forward(body), scheduling_info); } [[nodiscard]] inline Future declare_ia(Name name, ImageAttachment ia = {}) { std::shared_ptr rg = std::make_shared(); Ref ref = rg->make_declare_image(ia); rg->name_outputs(ref.node, { name.c_str() }); - return { rg, ref, ref }; + return { rg, ref, ref, {} }; } [[nodiscard]] inline Future declare_buf(Name name, Buffer buf = {}) { std::shared_ptr rg = std::make_shared(); Ref ref = rg->make_declare_buffer(buf); rg->name_outputs(ref.node, { name.c_str() }); - return { rg, ref, ref }; + return { rg, ref, ref, {} }; } template @@ -623,7 +629,7 @@ private: (args.abandon(), ...); Ref ref = rg->make_declare_array(Type::stripped(refs[0].type()), refs, defs); rg->name_outputs(ref.node, { name.c_str() }); - return { rg, ref, ref }; + return { rg, ref, ref, {} }; } template @@ -639,13 +645,13 @@ private: } Ref ref = rg->make_declare_array(Type::stripped(refs[0].type()), refs, defs); rg->name_outputs(ref.node, { name.c_str() }); - return { rg, ref, ref }; + return { rg, ref, ref, {} }; } [[nodiscard]] inline Future declare_swapchain(Swapchain bundle) { std::shared_ptr rg = std::make_shared(); Ref ref = rg->make_declare_swapchain(bundle); - return { rg, ref, ref }; + return { rg, ref, ref, {} }; } [[nodiscard]] inline Future acquire_next_image(Name name, Future in) { diff --git a/src/ExecutableRenderGraph.cpp b/src/ExecutableRenderGraph.cpp index 7065731d..8f0cf686 100644 --- a/src/ExecutableRenderGraph.cpp +++ b/src/ExecutableRenderGraph.cpp @@ -1045,7 +1045,11 @@ namespace vuk { } Stream* stream_for_domain(DomainFlagBits domain) { - return streams.at(domain).get(); + auto it = streams.find(domain); + if (it != streams.end()) { + return it->second.get(); + } + return nullptr; } Stream* stream_for_executor(Executor* executor) { @@ -1158,6 +1162,14 @@ namespace vuk { recorder.streams.emplace(DomainFlagBits::eGraphicsQueue, std::make_unique(alloc, static_cast(exe), &impl->callbacks)); } + if (auto exe = ctx.get_executor(DomainFlagBits::eComputeQueue)) { + recorder.streams.emplace(DomainFlagBits::eComputeQueue, + std::make_unique(alloc, static_cast(exe), &impl->callbacks)); + } + if (auto exe = ctx.get_executor(DomainFlagBits::eTransferQueue)) { + recorder.streams.emplace(DomainFlagBits::eTransferQueue, + std::make_unique(alloc, static_cast(exe), &impl->callbacks)); + } auto host_stream = recorder.streams.at(DomainFlagBits::eHost).get(); std::deque pe_streams; diff --git a/src/RenderGraph.cpp b/src/RenderGraph.cpp index 97182291..54bf246a 100644 --- a/src/RenderGraph.cpp +++ b/src/RenderGraph.cpp @@ -410,7 +410,7 @@ namespace vuk { auto& execable = schedule_items[pop_idx]; ScheduledItem item{ .execable = execable, .scheduled_domain = - execable->scheduling_info ? pick_first_domain(execable->scheduling_info->required_domain) : vuk::DomainFlagBits::eAny }; + execable->scheduling_info ? pick_first_domain(execable->scheduling_info->required_domains) : vuk::DomainFlagBits::eAny }; if (execable->kind != Node::VALLOC) { // we use def nodes for deps, but we don't want to schedule them later as their ordering doesn't matter scheduled_execables.push_back(item); } @@ -640,7 +640,7 @@ namespace vuk { if (!node->scheduling_info) { return; } - auto& domain = node->scheduling_info->required_domain; + auto& domain = node->scheduling_info->required_domains; if (domain != last_domain && domain != DomainFlagBits::eDevice && domain != DomainFlagBits::eAny) { last_domain = domain; } diff --git a/src/tests/commands.cpp b/src/tests/commands.cpp index 62a15bab..2acfce87 100644 --- a/src/tests/commands.cpp +++ b/src/tests/commands.cpp @@ -184,13 +184,13 @@ TEST_CASE("multi return pass") { TEST_CASE("scheduling single-queue") { { std::string execution; - + auto buf0 = allocate_buffer(*test_context.allocator, { .mem_usage = MemoryUsage::eGPUonly, .size = sizeof(uint32_t) * 4 }); - + auto write = make_pass("write", [&](CommandBuffer& cbuf, VUK_BA(Access::eTransferWrite) dst) { execution += "w"; - return dst; - }); + return dst; + }); auto read = make_pass("read", [&](CommandBuffer& cbuf, VUK_BA(Access::eTransferRead) dst) { execution += "r"; return dst; @@ -254,11 +254,78 @@ TEST_CASE("scheduling with submitted") { { auto written = write(declare_buf("src0", **buf0)); written.wait(*test_context.allocator, test_context.compiler); - write(std::move(written)).wait(*test_context.allocator, test_context.compiler); + auto res = write(std::move(written)); + res.wait(*test_context.allocator, test_context.compiler); CHECK(execution == "ww"); execution = ""; } } } +TEST_CASE("multi-queue buffers") { + { + std::string execution; + + auto buf0 = allocate_buffer(*test_context.allocator, { .mem_usage = MemoryUsage::eGPUonly, .size = sizeof(uint32_t) * 4 }); + + auto write = make_pass( + "write_A", + [&](CommandBuffer& cbuf, VUK_BA(Access::eTransferWrite) dst) { + cbuf.fill_buffer(dst, 0xf); + execution += "w"; + return dst; + }, + DomainFlagBits::eTransferQueue); + auto read = make_pass( + "read_B", + [&](CommandBuffer& cbuf, VUK_BA(Access::eTransferRead) dst) { + auto dummy = allocate_buffer(*test_context.allocator, { .mem_usage = MemoryUsage::eGPUonly, .size = sizeof(uint32_t) * 4 }); + cbuf.copy_buffer(**dummy, dst); + execution += "r"; + return dst; + }, + DomainFlagBits::eGraphicsQueue); + + { + auto written = write(declare_buf("src0", **buf0)); + written.wait(*test_context.allocator, test_context.compiler); + read(written).wait(*test_context.allocator, test_context.compiler); + CHECK(execution == "wr"); + execution = ""; + } + { + auto written = write(declare_buf("src0", **buf0)); + written.wait(*test_context.allocator, test_context.compiler); + read(std::move(written)).wait(*test_context.allocator, test_context.compiler); + CHECK(execution == "wr"); + execution = ""; + } + /* { + auto written = write(declare_buf("src0", **buf0)); + written.wait(*test_context.allocator, test_context.compiler); + write(read(std::move(written))).wait(*test_context.allocator, test_context.compiler); + CHECK(execution == "wrw"); + execution = ""; +}*/ + { + auto written = write(declare_buf("src0", **buf0)); + read(written).wait(*test_context.allocator, test_context.compiler); + CHECK(execution == "wr"); + execution = ""; + } + { + auto written = write(declare_buf("src0", **buf0)); + read(std::move(written)).wait(*test_context.allocator, test_context.compiler); + CHECK(execution == "wr"); + execution = ""; + } + { + auto written = write(declare_buf("src0", **buf0)); + write(read(std::move(written))).wait(*test_context.allocator, test_context.compiler); + CHECK(execution == "wrw"); + execution = ""; + } + } +} + // TEST TODOS: image2image copy, resolve \ No newline at end of file