From e22f52af846649223ee799c41848f64b63a6d075 Mon Sep 17 00:00:00 2001 From: Marcell Kiss Date: Sat, 23 Dec 2023 00:10:44 +0000 Subject: [PATCH] PE + Executors --- CMakeLists.txt | 6 +- examples/01_triangle.cpp | 6 +- examples/CMakeLists.txt | 2 +- examples/example_runner.hpp | 40 +- examples/example_runner_single.cpp | 15 +- examples/imgui.cpp | 227 +++-- examples/utils.hpp | 55 +- include/vuk/Context.hpp | 148 +--- include/vuk/Exception.hpp | 24 +- include/vuk/Executor.hpp | 29 + include/vuk/Future.hpp | 22 +- include/vuk/IR.hpp | 34 +- include/vuk/ImageAttachment.hpp | 1 + include/vuk/RenderGraph.hpp | 60 +- include/vuk/Swapchain.hpp | 21 +- include/vuk/Types.hpp | 19 +- include/vuk/VulkanPFNRequired.hpp | 1 + include/vuk/runtime/ThisThreadExecutor.hpp | 25 + .../vuk/runtime/vk/VulkanQueueExecutor.hpp | 39 + include/vuk/vuk_fwd.hpp | 1 - src/Context.cpp | 259 ++---- src/ContextImpl.hpp | 2 + src/DeviceVkResource.cpp | 10 +- src/ExecutableRenderGraph.cpp | 810 ++++++++---------- src/RenderGraph.cpp | 13 + src/RenderGraphImpl.hpp | 31 +- src/RenderGraphUtil.hpp | 6 + src/Util.cpp | 491 +---------- src/runtime/vk/VulkanQueueExecutor.cpp | 252 ++++++ src/tests/TestContext.hpp | 21 +- src/tests/arrays.cpp | 4 +- 31 files changed, 1190 insertions(+), 1484 deletions(-) create mode 100644 include/vuk/Executor.hpp create mode 100644 include/vuk/runtime/ThisThreadExecutor.hpp create mode 100644 include/vuk/runtime/vk/VulkanQueueExecutor.hpp create mode 100644 src/runtime/vk/VulkanQueueExecutor.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 75f0b572..03526b89 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -108,7 +108,7 @@ target_sources(vuk PRIVATE src/DeviceVkResource.cpp src/BufferAllocator.cpp src/DeviceLinearResource.cpp -) + src/runtime/vk/VulkanQueueExecutor.cpp) target_include_directories(vuk PUBLIC ext/plf_colony) add_subdirectory(ext/robin-hood-hashing) @@ -195,8 +195,8 @@ if(VUK_BUILD_TESTS) include(doctest_force_link_static_lib_in_target) # until we can use cmake 3.24 add_executable(vuk-tests src/tests/Test.cpp - #src/tests/commands.cpp - #src/tests/renderpass.cpp + src/tests/commands.cpp + src/tests/renderpass.cpp src/tests/arrays.cpp ) #target_compile_features(vuk-tests PRIVATE cxx_std_17) diff --git a/examples/01_triangle.cpp b/examples/01_triangle.cpp index 14c259cd..ac794d18 100644 --- a/examples/01_triangle.cpp +++ b/examples/01_triangle.cpp @@ -27,13 +27,13 @@ namespace { }, // Code ran every frame .render = - [](vuk::ExampleRunner& runner, vuk::Allocator& frame_allocator, vuk::TypedFuture target) { + [](vuk::ExampleRunner& runner, vuk::Allocator& frame_allocator, vuk::TypedFuture target) { // The framework provides us with an image to render to in "target" // We attach this to the rendergraph named as "01_triangle" // The rendergraph is composed of passes (vuk::Pass) // Each pass declares which resources are used // And it provides a callback which is executed when this pass is being ran - auto pass = vuk::make_pass("01_triangle", [](vuk::CommandBuffer& command_buffer, vuk::IA color_rt) { + auto pass = vuk::make_pass("01_triangle", [](vuk::CommandBuffer& command_buffer, VUK_IA(vuk::eColorWrite) color_rt) { command_buffer.set_viewport(0, vuk::Rect2D::framebuffer()); // Set the scissor area to cover the entire framebuffer command_buffer.set_scissor(0, vuk::Rect2D::framebuffer()); @@ -42,7 +42,7 @@ namespace { .set_color_blend(color_rt, {}) // Set the default color blend state .bind_graphics_pipeline("triangle") // Recall pipeline for "triangle" and bind .draw(3, 1, 0, 0); // Draw 3 vertices - return std::make_tuple(color_rt); + return color_rt; }); auto drawn = pass(std::move(target)); diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 86422f24..c2b5187f 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -8,7 +8,7 @@ endif() FetchContent_Declare( vk-bootstrap GIT_REPOSITORY https://github.com/charles-lunarg/vk-bootstrap - GIT_TAG 8e61b2d81c3f5f84339735085ff5651f71bbe1e7 + GIT_TAG 3ad0388f1b68dbf99746a2fab8c3115c8fa887bb ) FetchContent_MakeAvailable(vk-bootstrap) diff --git a/examples/example_runner.hpp b/examples/example_runner.hpp index 0c0331cd..a77cc58f 100644 --- a/examples/example_runner.hpp +++ b/examples/example_runner.hpp @@ -10,6 +10,7 @@ #include "vuk/RenderGraph.hpp" #include "vuk/SampledImage.hpp" #include "vuk/resources/DeviceFrameResource.hpp" +#include "vuk/runtime/ThisThreadExecutor.hpp" #include #include #include @@ -48,7 +49,7 @@ namespace vuk { std::optional superframe_resource; std::optional superframe_allocator; bool suspend = false; - vuk::SwapchainRef swapchain; + std::optional swapchain; GLFWwindow* window; VkSurfaceKHR surface; vkb::Instance vkbinstance; @@ -74,7 +75,7 @@ namespace vuk { futures.emplace_back(std::move(fut)); } - plf::colony sampled_images; + std::vector> sampled_images; std::vector examples; ExampleRunner(); @@ -99,12 +100,9 @@ namespace vuk { if (width == 0 && height == 0) { runner.suspend = true; } else { - runner.superframe_allocator->deallocate(std::span{ &runner.swapchain->swapchain, 1 }); - runner.superframe_allocator->deallocate(runner.swapchain->image_views); - runner.context->remove_swapchain(runner.swapchain); - runner.swapchain = runner.context->add_swapchain(util::make_swapchain(runner.vkbdevice, runner.swapchain->swapchain)); - for (auto& iv : runner.swapchain->image_views) { - runner.context->set_name(iv.payload, "Swapchain ImageView"); + runner.swapchain = util::make_swapchain(*runner.superframe_allocator, runner.vkbdevice, runner.swapchain); + for (auto& iv : runner.swapchain->images) { + runner.context->set_name(iv.image_view.payload, "Swapchain ImageView"); } runner.suspend = false; } @@ -137,8 +135,8 @@ namespace vuk { tracy_cpool.reset(); present_ready.reset(); render_complete.reset(); - imgui_data.font_texture.view.reset(); - imgui_data.font_texture.image.reset(); + imgui_data.font_image.reset(); + imgui_data.font_image_view.reset(); superframe_resource.reset(); context.reset(); auto vkDestroySurfaceKHR = (PFN_vkDestroySurfaceKHR)vkbinstance.fp_vkGetInstanceProcAddr(vkbinstance.instance, "vkDestroySurfaceKHR"); @@ -245,23 +243,21 @@ namespace vuk { transfer_queue = vkbdevice.get_queue(vkb::QueueType::transfer).value(); auto transfer_queue_family_index = vkbdevice.get_queue_index(vkb::QueueType::transfer).value(); device = vkbdevice.device; - ContextCreateParameters::FunctionPointers fps; + vuk::rtvk::FunctionPointers fps; fps.vkGetInstanceProcAddr = vkbinstance.fp_vkGetInstanceProcAddr; fps.vkGetDeviceProcAddr = vkbinstance.fp_vkGetDeviceProcAddr; - context.emplace(ContextCreateParameters{ instance, - device, - physical_device, - graphics_queue, - graphics_queue_family_index, - VK_NULL_HANDLE, - VK_QUEUE_FAMILY_IGNORED, - transfer_queue, - transfer_queue_family_index, - fps }); + fps.load_pfns(instance, device, true); + std::vector> executors; + + executors.push_back(rtvk::create_vkqueue_executor(fps, device, graphics_queue, graphics_queue_family_index, DomainFlagBits::eGraphicsQueue)); + executors.push_back(rtvk::create_vkqueue_executor(fps, device, transfer_queue, transfer_queue_family_index, DomainFlagBits::eTransferQueue)); + executors.push_back(std::make_unique()); + + context.emplace(ContextCreateParameters{ instance, device, physical_device, std::move(executors), fps }); const unsigned num_inflight_frames = 3; superframe_resource.emplace(*context, num_inflight_frames); superframe_allocator.emplace(*superframe_resource); - swapchain = context->add_swapchain(util::make_swapchain(vkbdevice, {})); + swapchain = util::make_swapchain(*superframe_allocator, vkbdevice, {}); present_ready = vuk::Unique>(*superframe_allocator); render_complete = vuk::Unique>(*superframe_allocator); diff --git a/examples/example_runner_single.cpp b/examples/example_runner_single.cpp index 5cba2202..d3635fb8 100644 --- a/examples/example_runner_single.cpp +++ b/examples/example_runner_single.cpp @@ -1,7 +1,5 @@ #include "example_runner.hpp" -vuk::SwapchainRenderBundle bundle; - void vuk::ExampleRunner::render() { Compiler compiler; // the examples can all enqueue upload tasks via enqueue_setup. for simplicity, we submit and wait for all the upload tasks before moving on to the render @@ -25,16 +23,15 @@ void vuk::ExampleRunner::render() { // optional Allocator frame_allocator(frame_resource); // create a rendergraph we will use to prepare a swapchain image for the example to render into - auto imported_swapchain = import_swapchain(bundle); + auto imported_swapchain = declare_swapchain(*swapchain); // acquire an image on the swapchain - auto swapchain_image = acquire_next_image("swapchain image", imported_swapchain); + auto swapchain_image = acquire_next_image("swp_img", std::move(imported_swapchain)); // clear the swapchain image - TypedFuture cleared_image_to_render_into = clear_image(swapchain_image, vuk::ClearColor{ 0.3f, 0.5f, 0.3f, 1.0f }); + TypedFuture cleared_image_to_render_into = clear_image(std::move(swapchain_image), vuk::ClearColor{ 0.3f, 0.5f, 0.3f, 1.0f }); // invoke the render method of the example with the cleared image TypedFuture example_result = examples[0]->render(*this, frame_allocator, std::move(cleared_image_to_render_into)); - // set up some profiling callbacks for our example Tracy integration vuk::ProfilingCallbacks cbs; cbs.user_data = &get_runner(); @@ -72,8 +69,10 @@ void vuk::ExampleRunner::render() { // compile the RG that contains all the rendering of the example // submit and present the results to the swapchain we imported previously - present_one(example_result, { .callbacks = cbs }); - + auto entire_thing = enqueue_presentation(std::move(example_result)); + + entire_thing.wait(frame_allocator, compiler, { .callbacks = cbs }); + // update window title with FPS if (++num_frames == 16) { auto new_time = get_time(); diff --git a/examples/imgui.cpp b/examples/imgui.cpp index 392ec07a..ccf2ad0b 100644 --- a/examples/imgui.cpp +++ b/examples/imgui.cpp @@ -10,8 +10,10 @@ #include "vuk/RenderGraph.hpp" #include "vuk/SampledImage.hpp" -util::ImGuiData util::ImGui_ImplVuk_Init(vuk::Allocator& allocator) { - vuk::Context& ctx = allocator.get_context(); +using namespace vuk; + +util::ImGuiData util::ImGui_ImplVuk_Init(Allocator& allocator) { + Context& ctx = allocator.get_context(); auto& io = ImGui::GetIO(); io.BackendRendererName = "imgui_impl_vuk"; io.BackendFlags |= ImGuiBackendFlags_RendererHasVtxOffset; // We can honor the ImDrawCmd::VtxOffset field, allowing for large meshes. @@ -21,23 +23,23 @@ util::ImGuiData util::ImGui_ImplVuk_Init(vuk::Allocator& allocator) { io.Fonts->GetTexDataAsRGBA32(&pixels, &width, &height); ImGuiData data; - auto font_ia = vuk::ImageAttachment::from_preset( - vuk::ImageAttachment::Preset::eMap2D, vuk::Format::eR8G8B8A8Srgb, vuk::Extent3D{ (unsigned)width, (unsigned)height, 1u }, vuk::Samples::e1); - auto [image, view, fut] = vuk::create_image_and_view_with_data(allocator, vuk::DomainFlagBits::eTransferOnTransfer, font_ia, pixels); + auto font_ia = + ImageAttachment::from_preset(ImageAttachment::Preset::eMap2D, Format::eR8G8B8A8Srgb, Extent3D{ (unsigned)width, (unsigned)height, 1u }, Samples::e1); + auto [image, view, fut] = create_image_and_view_with_data(allocator, DomainFlagBits::eTransferOnTransfer, font_ia, pixels); data.font_image = std::move(image); data.font_image_view = std::move(view); - vuk::Compiler comp; + Compiler comp; fut.wait(allocator, comp); - ctx.set_name(data.font_image_view, "ImGui/font"); - vuk::SamplerCreateInfo sci; - sci.minFilter = sci.magFilter = vuk::Filter::eLinear; - sci.mipmapMode = vuk::SamplerMipmapMode::eLinear; - sci.addressModeU = sci.addressModeV = sci.addressModeW = vuk::SamplerAddressMode::eRepeat; + ctx.set_name(data.font_image_view->payload, "ImGui/font"); + SamplerCreateInfo sci; + sci.minFilter = sci.magFilter = Filter::eLinear; + sci.mipmapMode = SamplerMipmapMode::eLinear; + sci.addressModeU = sci.addressModeV = sci.addressModeW = SamplerAddressMode::eRepeat; data.font_sci = sci; - data.font_si = std::make_unique(vuk::SampledImage::Global{ *data.font_image_view, sci, vuk::ImageLayout::eReadOnlyOptimalKHR }); + data.font_si = std::make_unique(SampledImage::Global{ *data.font_image_view, sci, ImageLayout::eReadOnlyOptimalKHR }); io.Fonts->TexID = (ImTextureID)data.font_si.get(); { - vuk::PipelineBaseCreateInfo pci; + PipelineBaseCreateInfo pci; // glslangValidator.exe -V imgui.vert --vn imgui_vert -o examples/imgui_vert.hpp pci.add_static_spirv(imgui_vert, sizeof(imgui_vert) / 4, "imgui.vert"); // glslangValidator.exe -V imgui.frag --vn imgui_frag -o examples/imgui_frag.hpp @@ -47,19 +49,19 @@ util::ImGuiData util::ImGui_ImplVuk_Init(vuk::Allocator& allocator) { return data; } -vuk::TypedFuture util::ImGui_ImplVuk_Render(vuk::Allocator& allocator, - vuk::TypedFuture target, - util::ImGuiData& data, - ImDrawData* draw_data, - const plf::colony& sampled_images) { - auto reset_render_state = [](const util::ImGuiData& data, vuk::CommandBuffer& command_buffer, ImDrawData* draw_data, vuk::Buffer vertex, vuk::Buffer index) { +TypedFuture util::ImGui_ImplVuk_Render(Allocator& allocator, + TypedFuture target, + util::ImGuiData& data, + ImDrawData* draw_data, + const std::vector>& sampled_images) { + auto reset_render_state = [](const util::ImGuiData& data, CommandBuffer& command_buffer, ImDrawData* draw_data, Buffer vertex, Buffer index) { command_buffer.bind_image(0, 0, *data.font_image_view).bind_sampler(0, 0, data.font_sci); if (index.size > 0) { - command_buffer.bind_index_buffer(index, sizeof(ImDrawIdx) == 2 ? vuk::IndexType::eUint16 : vuk::IndexType::eUint32); + command_buffer.bind_index_buffer(index, sizeof(ImDrawIdx) == 2 ? IndexType::eUint16 : IndexType::eUint32); } - command_buffer.bind_vertex_buffer(0, vertex, 0, vuk::Packed{ vuk::Format::eR32G32Sfloat, vuk::Format::eR32G32Sfloat, vuk::Format::eR8G8B8A8Unorm }); + command_buffer.bind_vertex_buffer(0, vertex, 0, Packed{ Format::eR32G32Sfloat, Format::eR32G32Sfloat, Format::eR8G8B8A8Unorm }); command_buffer.bind_graphics_pipeline("imgui"); - command_buffer.set_viewport(0, vuk::Rect2D::framebuffer()); + command_buffer.set_viewport(0, Rect2D::framebuffer()); struct PC { float scale[2]; float translate[2]; @@ -68,123 +70,102 @@ vuk::TypedFuture util::ImGui_ImplVuk_Render(vuk::Allocator pc.scale[1] = 2.0f / draw_data->DisplaySize.y; pc.translate[0] = -1.0f - draw_data->DisplayPos.x * pc.scale[0]; pc.translate[1] = -1.0f - draw_data->DisplayPos.y * pc.scale[1]; - command_buffer.push_constants(vuk::ShaderStageFlagBits::eVertex, 0, pc); + command_buffer.push_constants(ShaderStageFlagBits::eVertex, 0, pc); }; size_t vertex_size = draw_data->TotalVtxCount * sizeof(ImDrawVert); size_t index_size = draw_data->TotalIdxCount * sizeof(ImDrawIdx); - auto imvert = *allocate_buffer(allocator, { vuk::MemoryUsage::eCPUtoGPU, vertex_size, 1 }); - auto imind = *allocate_buffer(allocator, { vuk::MemoryUsage::eCPUtoGPU, index_size, 1 }); + auto imvert = *allocate_buffer(allocator, { MemoryUsage::eCPUtoGPU, vertex_size, 1 }); + auto imind = *allocate_buffer(allocator, { MemoryUsage::eCPUtoGPU, index_size, 1 }); size_t vtx_dst = 0, idx_dst = 0; - vuk::Compiler comp; + Compiler comp; for (int n = 0; n < draw_data->CmdListsCount; n++) { const ImDrawList* cmd_list = draw_data->CmdLists[n]; auto imverto = imvert->add_offset(vtx_dst * sizeof(ImDrawVert)); auto imindo = imind->add_offset(idx_dst * sizeof(ImDrawIdx)); // TODO: - vuk::host_data_to_buffer(allocator, vuk::DomainFlagBits{}, imverto, std::span(cmd_list->VtxBuffer.Data, cmd_list->VtxBuffer.Size)).wait(allocator, comp); - vuk::host_data_to_buffer(allocator, vuk::DomainFlagBits{}, imindo, std::span(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size)).wait(allocator, comp); + host_data_to_buffer(allocator, DomainFlagBits{}, imverto, std::span(cmd_list->VtxBuffer.Data, cmd_list->VtxBuffer.Size)).wait(allocator, comp); + host_data_to_buffer(allocator, DomainFlagBits{}, imindo, std::span(cmd_list->IdxBuffer.Data, cmd_list->IdxBuffer.Size)).wait(allocator, comp); vtx_dst += cmd_list->VtxBuffer.Size; idx_dst += cmd_list->IdxBuffer.Size; } // add rendergraph dependencies to be transitioned // make all rendergraph sampled images available - std::vector resources; - resources.emplace_back(vuk::Resource{ "target", vuk::Resource::Type::eImage, vuk::eColorRW, "target+" }); - for (auto& si : sampled_images) { - if (!si.is_global) { - resources.emplace_back( - vuk::Resource{ si.rg_attachment.reference.rg, si.rg_attachment.reference.name, vuk::Resource::Type::eImage, vuk::Access::eFragmentSampled }); - } - } - auto pass = vuk::make_pass( - "imgui", - [&data, &allocator, verts = imvert.get(), inds = imind.get(), draw_data, reset_render_state]( - vuk::CommandBuffer& command_buffer, VUK_IA(vuk::Access::eColorWrite) dst, VUK_IA(vuk::Access::eFragmentSampled) sis) { - command_buffer.set_dynamic_state(vuk::DynamicStateFlagBits::eViewport | vuk::DynamicStateFlagBits::eScissor); - command_buffer.set_rasterization(vuk::PipelineRasterizationStateCreateInfo{}); - command_buffer.set_color_blend(dst, vuk::BlendPreset::eAlphaBlend); - reset_render_state(data, command_buffer, draw_data, verts, inds); - // Will project scissor/clipping rectangles into framebuffer space - ImVec2 clip_off = draw_data->DisplayPos; // (0,0) unless using multi-viewports - ImVec2 clip_scale = draw_data->FramebufferScale; // (1,1) unless using retina display which are often (2,2) - - // Render command lists - // (Because we merged all buffers into a single one, we maintain our own offset into them) - int global_vtx_offset = 0; - int global_idx_offset = 0; - for (int n = 0; n < draw_data->CmdListsCount; n++) { - const ImDrawList* cmd_list = draw_data->CmdLists[n]; - for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++) { - const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; - if (pcmd->UserCallback != nullptr) { - // User callback, registered via ImDrawList::AddCallback() - // (ImDrawCallback_ResetRenderState is a special callback value used by the user to request the renderer to reset render state.) - if (pcmd->UserCallback == ImDrawCallback_ResetRenderState) - reset_render_state(data, command_buffer, draw_data, verts, inds); - else - pcmd->UserCallback(cmd_list, pcmd); - } else { - // Project scissor/clipping rectangles into framebuffer space - ImVec4 clip_rect; - clip_rect.x = (pcmd->ClipRect.x - clip_off.x) * clip_scale.x; - clip_rect.y = (pcmd->ClipRect.y - clip_off.y) * clip_scale.y; - clip_rect.z = (pcmd->ClipRect.z - clip_off.x) * clip_scale.x; - clip_rect.w = (pcmd->ClipRect.w - clip_off.y) * clip_scale.y; - - auto fb_width = command_buffer.get_ongoing_render_pass().extent.width; - auto fb_height = command_buffer.get_ongoing_render_pass().extent.height; - if (clip_rect.x < fb_width && clip_rect.y < fb_height && clip_rect.z >= 0.0f && clip_rect.w >= 0.0f) { - // Negative offsets are illegal for vkCmdSetScissor - if (clip_rect.x < 0.0f) - clip_rect.x = 0.0f; - if (clip_rect.y < 0.0f) - clip_rect.y = 0.0f; - - // Apply scissor/clipping rectangle - vuk::Rect2D scissor; - scissor.offset.x = (int32_t)(clip_rect.x); - scissor.offset.y = (int32_t)(clip_rect.y); - scissor.extent.width = (uint32_t)(clip_rect.z - clip_rect.x); - scissor.extent.height = (uint32_t)(clip_rect.w - clip_rect.y); - command_buffer.set_scissor(0, scissor); - - // Bind texture - if (pcmd->TextureId) { - auto& si = *reinterpret_cast(pcmd->TextureId); - if (si.is_global) { - command_buffer.bind_image(0, 0, si.global.iv).bind_sampler(0, 0, si.global.sci); - } else { - if (si.rg_attachment.ivci) { - auto ivci = *si.rg_attachment.ivci; - // it is possible that we end up binding multiple images here with the same name - - // the rendergraph sorts this out, but we need to refer to the correct one here - // so we use a NameReference to make sure that we include the source rendergraph for identification - // this is useful for generic name binding, but not really needed for usual passes - auto res_img = command_buffer.get_resource_image_attachment(si.rg_attachment.reference)->image; - ivci.image = res_img.image; - auto iv = vuk::allocate_image_view(allocator, ivci); - command_buffer.bind_image(0, 0, **iv).bind_sampler(0, 0, si.rg_attachment.sci); - } else { - command_buffer - .bind_image(0, 0, *command_buffer.get_resource_image_attachment(si.rg_attachment.reference), vuk::ImageLayout::eShaderReadOnlyOptimal) - .bind_sampler(0, 0, si.rg_attachment.sci); - } - } - } - // Draw - command_buffer.draw_indexed(pcmd->ElemCount, 1, pcmd->IdxOffset + global_idx_offset, pcmd->VtxOffset + global_vtx_offset, 0); - } - } - } - global_idx_offset += cmd_list->IdxBuffer.Size; - global_vtx_offset += cmd_list->VtxBuffer.Size; - } - return dst; - }); - - return pass(target); + auto sampled_images_array = declare_array("imgui_sampled", std::span(sampled_images)); + + auto pass = make_pass("imgui", + [&data, &allocator, verts = imvert.get(), inds = imind.get(), draw_data, reset_render_state]( + CommandBuffer& command_buffer, VUK_IA(Access::eColorWrite) dst, VUK_ARG(ImageAttachment[], Access::eFragmentSampled) sis) { + command_buffer.set_dynamic_state(DynamicStateFlagBits::eViewport | DynamicStateFlagBits::eScissor); + command_buffer.set_rasterization(PipelineRasterizationStateCreateInfo{}); + command_buffer.set_color_blend(dst, BlendPreset::eAlphaBlend); + reset_render_state(data, command_buffer, draw_data, verts, inds); + // Will project scissor/clipping rectangles into framebuffer space + ImVec2 clip_off = draw_data->DisplayPos; // (0,0) unless using multi-viewports + ImVec2 clip_scale = draw_data->FramebufferScale; // (1,1) unless using retina display which are often (2,2) + + // Render command lists + // (Because we merged all buffers into a single one, we maintain our own offset into them) + int global_vtx_offset = 0; + int global_idx_offset = 0; + for (int n = 0; n < draw_data->CmdListsCount; n++) { + const ImDrawList* cmd_list = draw_data->CmdLists[n]; + for (int cmd_i = 0; cmd_i < cmd_list->CmdBuffer.Size; cmd_i++) { + const ImDrawCmd* pcmd = &cmd_list->CmdBuffer[cmd_i]; + if (pcmd->UserCallback != nullptr) { + // User callback, registered via ImDrawList::AddCallback() + // (ImDrawCallback_ResetRenderState is a special callback value used by the user to request the renderer to reset render state.) + if (pcmd->UserCallback == ImDrawCallback_ResetRenderState) + reset_render_state(data, command_buffer, draw_data, verts, inds); + else + pcmd->UserCallback(cmd_list, pcmd); + } else { + // Project scissor/clipping rectangles into framebuffer space + ImVec4 clip_rect; + clip_rect.x = (pcmd->ClipRect.x - clip_off.x) * clip_scale.x; + clip_rect.y = (pcmd->ClipRect.y - clip_off.y) * clip_scale.y; + clip_rect.z = (pcmd->ClipRect.z - clip_off.x) * clip_scale.x; + clip_rect.w = (pcmd->ClipRect.w - clip_off.y) * clip_scale.y; + + auto fb_width = command_buffer.get_ongoing_render_pass().extent.width; + auto fb_height = command_buffer.get_ongoing_render_pass().extent.height; + if (clip_rect.x < fb_width && clip_rect.y < fb_height && clip_rect.z >= 0.0f && clip_rect.w >= 0.0f) { + // Negative offsets are illegal for vkCmdSetScissor + if (clip_rect.x < 0.0f) + clip_rect.x = 0.0f; + if (clip_rect.y < 0.0f) + clip_rect.y = 0.0f; + + // Apply scissor/clipping rectangle + Rect2D scissor; + scissor.offset.x = (int32_t)(clip_rect.x); + scissor.offset.y = (int32_t)(clip_rect.y); + scissor.extent.width = (uint32_t)(clip_rect.z - clip_rect.x); + scissor.extent.height = (uint32_t)(clip_rect.w - clip_rect.y); + command_buffer.set_scissor(0, scissor); + + // Bind texture + if (pcmd->TextureId) { + auto si_index = reinterpret_cast(pcmd->TextureId); + + command_buffer.bind_image(0, 0, sis[si_index]).bind_sampler(0, 0, {}); + + // TODO: SampledImage + //.bind_sampler(0, 0, sis[si_index]); + } + // Draw + command_buffer.draw_indexed(pcmd->ElemCount, 1, pcmd->IdxOffset + global_idx_offset, pcmd->VtxOffset + global_vtx_offset, 0); + } + } + } + global_idx_offset += cmd_list->IdxBuffer.Size; + global_vtx_offset += cmd_list->VtxBuffer.Size; + } + return dst; + }); + + return pass(target, sampled_images_array); } diff --git a/examples/utils.hpp b/examples/utils.hpp index 4d318601..60497b93 100644 --- a/examples/utils.hpp +++ b/examples/utils.hpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -58,34 +57,52 @@ namespace util { // clang-format on } - inline vuk::Swapchain make_swapchain(vkb::Device vkbdevice, std::optional old_swapchain) { + inline vuk::Swapchain make_swapchain(vuk::Allocator allocator, vkb::Device vkbdevice, std::optional old_swapchain) { vkb::SwapchainBuilder swb(vkbdevice); swb.set_desired_format(vuk::SurfaceFormatKHR{ vuk::Format::eR8G8B8A8Srgb, vuk::ColorSpaceKHR::eSrgbNonlinear }); swb.add_fallback_format(vuk::SurfaceFormatKHR{ vuk::Format::eB8G8R8A8Srgb, vuk::ColorSpaceKHR::eSrgbNonlinear }); swb.set_desired_present_mode((VkPresentModeKHR)vuk::PresentModeKHR::eImmediate); swb.set_image_usage_flags(VkImageUsageFlagBits::VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VkImageUsageFlagBits::VK_IMAGE_USAGE_TRANSFER_DST_BIT); - if (old_swapchain) { - swb.set_old_swapchain(*old_swapchain); + + bool is_recycle = false; + vkb::Result vkswapchain = { vkb::Swapchain{} }; + if (!old_swapchain) { + vkswapchain = swb.build(); + old_swapchain.emplace(allocator, vkswapchain->image_count); + } else { + is_recycle = true; + swb.set_old_swapchain(old_swapchain->swapchain); + vkswapchain = swb.build(); + } + + if (is_recycle) { + allocator.deallocate(std::span{ &old_swapchain->swapchain, 1 }); + for (auto& iv : old_swapchain->images) { + allocator.deallocate(std::span{ &iv.image_view, 1 }); + } } - auto vkswapchain = swb.build(); - vuk::Swapchain sw{}; auto images = *vkswapchain->get_images(); auto views = *vkswapchain->get_image_views(); - for (auto& i : images) { - sw.images.push_back(vuk::Image{ i, nullptr }); - } - for (auto& i : views) { - sw.image_views.emplace_back(); - sw.image_views.back().payload = i; - sw.image_views.back().id = 0; + old_swapchain->images.clear(); + + for (auto i = 0; i < images.size(); i++) { + vuk::ImageAttachment ia; + ia.extent = vuk::Dimension3D::absolute( vkswapchain->extent.width, vkswapchain->extent.height, 1 ); + ia.format = (vuk::Format)vkswapchain->image_format; + ia.image = vuk::Image{ images[i], nullptr }; + ia.image_view = vuk::ImageView{ 0, views[i] }; + ia.view_type = vuk::ImageViewType::e2D; + ia.sample_count = vuk::Samples::e1; + ia.base_level = ia.base_layer = 0; + ia.level_count = ia.layer_count = 1; + old_swapchain->images.push_back(ia); } - sw.extent = vuk::Extent2D{ vkswapchain->extent.width, vkswapchain->extent.height }; - sw.format = vuk::Format(vkswapchain->image_format); - sw.surface = vkbdevice.surface; - sw.swapchain = vkswapchain->swapchain; - return sw; + + old_swapchain->swapchain = vkswapchain->swapchain; + old_swapchain->surface = vkbdevice.surface; + return *old_swapchain; } struct ImGuiData { @@ -99,7 +116,7 @@ namespace util { vuk::TypedFuture target, ImGuiData& data, ImDrawData* draw_data, - const plf::colony& sampled_images); + const std::vector>& sampled_images); inline std::string read_entire_file(const std::string& path) { std::ostringstream buf; diff --git a/include/vuk/Context.hpp b/include/vuk/Context.hpp index dcae582e..c2095ba3 100644 --- a/include/vuk/Context.hpp +++ b/include/vuk/Context.hpp @@ -10,6 +10,7 @@ #include "vuk/Buffer.hpp" #include "vuk/Image.hpp" #include "vuk/Swapchain.hpp" +#include "vuk/runtime/vk/VulkanQueueExecutor.hpp" #include "vuk_fwd.hpp" #include "vuk/SourceLocation.hpp" @@ -20,65 +21,46 @@ namespace std { } // namespace std namespace vuk { - /// @brief Parameters used for creating a Context - struct ContextCreateParameters { - /// @brief Vulkan instance - VkInstance instance; - /// @brief Vulkan device - VkDevice device; - /// @brief Vulkan physical device - VkPhysicalDevice physical_device; - /// @brief Optional graphics queue - VkQueue graphics_queue = VK_NULL_HANDLE; - /// @brief Optional graphics queue family index - uint32_t graphics_queue_family_index = VK_QUEUE_FAMILY_IGNORED; - /// @brief Optional compute queue - VkQueue compute_queue = VK_NULL_HANDLE; - /// @brief Optional compute queue family index - uint32_t compute_queue_family_index = VK_QUEUE_FAMILY_IGNORED; - /// @brief Optional transfer queue - VkQueue transfer_queue = VK_NULL_HANDLE; - /// @brief Optional transfer queue family index - uint32_t transfer_queue_family_index = VK_QUEUE_FAMILY_IGNORED; - + namespace rtvk { #define VUK_X(name) PFN_##name name = nullptr; #define VUK_Y(name) PFN_##name name = nullptr; - /// @brief User provided function pointers. If you want dynamic loading, you must set vkGetInstanceProcAddr & vkGetDeviceProcAddr + struct FunctionPointers { PFN_vkGetInstanceProcAddr vkGetInstanceProcAddr = nullptr; PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr = nullptr; -#include "vuk/VulkanPFNRequired.hpp" #include "vuk/VulkanPFNOptional.hpp" - } pointers; +#include "vuk/VulkanPFNRequired.hpp" + + /// @brief Check if all required function pointers are available (if providing them externally) + bool check_pfns(); + /// @brief Load function pointers that the runtime needs + /// @param instance Vulkan instance + /// @param device Vulkan device + /// @param allow_dynamic_loading_of_vk_function_pointers If true, then this function will attempt dynamic loading of the fn pointers + /// If this is false, then you must fill in all required function pointers + vuk::Result load_pfns(VkInstance instance, VkDevice device, bool allow_dynamic_loading_of_vk_function_pointers); + }; #undef VUK_X #undef VUK_Y - /// @brief Allow vuk to load missing required and optional function pointers dynamically - /// If this is false, then you must fill in all required function pointers - bool allow_dynamic_loading_of_vk_function_pointers = true; - }; - - /// @brief Abstraction of a device queue in Vulkan - struct Queue { - Queue(PFN_vkQueueSubmit fn1, PFN_vkQueueSubmit2KHR fn2, VkQueue queue, uint32_t queue_family_index, TimelineSemaphore ts); - ~Queue(); + std::unique_ptr create_vkqueue_executor(const FunctionPointers& fps, VkDevice device, VkQueue queue, uint32_t queue_family_index, DomainFlagBits domain); + } // namespace rtvk - Queue(const Queue&) = delete; - Queue& operator=(const Queue&) = delete; - - Queue(Queue&&) noexcept; - Queue& operator=(Queue&&) noexcept; - - TimelineSemaphore& get_submit_sync(); - std::recursive_mutex& get_queue_lock(); - - Result submit(std::span submit_infos, VkFence fence); - Result submit(std::span submit_infos, VkFence fence); - - struct QueueImpl* impl; + /// @brief Parameters used for creating a Context + struct ContextCreateParameters { + /// @brief Vulkan instance + VkInstance instance; + /// @brief Vulkan device + VkDevice device; + /// @brief Vulkan physical device + VkPhysicalDevice physical_device; + /// @brief Executors available to the runtime for scheduling + std::vector> executors; + /// @brief User provided function pointers. If you want dynamic loading, you must set vkGetInstanceProcAddr & vkGetDeviceProcAddr + rtvk::FunctionPointers pointers; }; - class Context : public ContextCreateParameters::FunctionPointers { + class Context : public rtvk::FunctionPointers { public: /// @brief Create a new Context /// @param params Vulkan parameters initialized beforehand @@ -91,22 +73,11 @@ namespace vuk { Context(Context&&) noexcept; Context& operator=(Context&&) noexcept; - // Vulkan instance, device and queues + // Vulkan instance and device VkInstance instance; VkDevice device; VkPhysicalDevice physical_device; - uint32_t graphics_queue_family_index; - uint32_t compute_queue_family_index; - uint32_t transfer_queue_family_index; - - std::optional dedicated_graphics_queue; - std::optional dedicated_compute_queue; - std::optional dedicated_transfer_queue; - - Queue* graphics_queue = nullptr; - Queue* compute_queue = nullptr; - Queue* transfer_queue = nullptr; // Vulkan properties @@ -115,9 +86,16 @@ namespace vuk { VkPhysicalDeviceAccelerationStructurePropertiesKHR as_properties{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR }; size_t min_buffer_alignment; + // Executors + std::vector all_queue_families; + // retrieve a specific executor from the runtime + Executor* get_executor(Executor::Tag tag); + // retrieve an executor for the given domain from the runtime + Executor* get_executor(DomainFlagBits domain); + // Debug functions - - /// @brief If debug utils is available and debug names & markers are supported + + /// @brief If debug utils is available and debug names & markers are supported bool debug_enabled() const; /// @brief Set debug name for object @@ -132,7 +110,7 @@ namespace vuk { void end_region(const VkCommandBuffer&); // Pipeline management - + /// Internal pipeline cache to use VkPipelineCache vk_pipeline_cache = VK_NULL_HANDLE; @@ -155,7 +133,7 @@ namespace vuk { /// @brief Load a Vulkan pipeline cache bool load_pipeline_cache(std::span data); - /// @brief Retrieve the current Vulkan pipeline cache + /// @brief Retrieve the current Vulkan pipeline cache std::vector save_pipeline_cache(); // Allocator support @@ -164,19 +142,9 @@ namespace vuk { /// @return The resource DeviceVkResource& get_vk_resource(); - // Swapchain management - - /// @brief Add a swapchain to be managed by the Context - /// @return Reference to the new swapchain that can be used during presentation - SwapchainRef add_swapchain(Swapchain); - - /// @brief Remove a swapchain that is managed by the Context - /// the swapchain is not destroyed - void remove_swapchain(SwapchainRef); - // Frame management - /// @brief Retrieve the current frame count + /// @brief Retrieve the current frame count uint64_t get_frame_count() const; /// @brief Advance internal counter used for caching and garbage collect caches @@ -185,11 +153,6 @@ namespace vuk { /// @brief Wait for the device to become idle. Useful for only a few synchronisation events, like resizing or shutting down. Result wait_idle(); - Result submit_graphics(std::span, VkFence); - Result submit_transfer(std::span, VkFence); - Result submit_graphics(std::span); - Result submit_transfer(std::span); - Result wait_for_domains(std::span sync_points); // Query functionality @@ -245,10 +208,6 @@ namespace vuk { template Handle wrap(T payload); - Queue& domain_to_queue(DomainFlags) const; - uint32_t domain_to_queue_index(DomainFlags) const; - uint32_t domain_to_queue_family_index(DomainFlags) const; - private: struct ContextImpl* impl; friend struct ContextImpl; @@ -308,27 +267,16 @@ namespace vuk { /// @param allocator Allocator to use for submission resources /// @param rendergraphs `RenderGraph`s for compilation /// @param option Compilation options - Result link_execute_submit(Allocator& allocator, - Compiler& compiler, - std::span> rendergraphs, - RenderGraphCompileOptions options = {}); + Result + link_execute_submit(Allocator& allocator, Compiler& compiler, std::span> rendergraphs, RenderGraphCompileOptions options = {}); /// @brief Execute given `ExecutableRenderGraph`s into API VkCommandBuffers, then submit them to queues /// @param allocator Allocator to use for submission resources /// @param executable_rendergraphs `ExecutableRenderGraph`s for execution /// @param swapchains_with_indexes Swapchains references by the rendergraphs /// @param present_rdy Semaphore used to gate device-side execution /// @param render_complete Semaphore used to gate presentation - Result execute_submit(Allocator& allocator, - std::span> executable_rendergraphs, - std::vector> swapchains_with_indexes, - VkSemaphore present_rdy, - VkSemaphore render_complete); + Result execute_submit(Allocator& allocator, std::span> executable_rendergraphs); - /// @brief Execute given `ExecutableRenderGraph` into API VkCommandBuffers, then submit them to queues, presenting to a single swapchain - /// @param allocator Allocator to use for submission resources - /// @param executable_rendergraph `ExecutableRenderGraph`s for execution - /// @param swapchain Swapchain referenced by the rendergraph - Result execute_submit_and_present_to_one(Allocator& allocator, ExecutableRenderGraph&& executable_rendergraph, SwapchainRef swapchain); /// @brief Execute given `ExecutableRenderGraph` into API VkCommandBuffers, then submit them to queues, then blocking-wait for the submission to complete /// @param allocator Allocator to use for submission resources /// @param executable_rendergraph `ExecutableRenderGraph`s for execution @@ -336,12 +284,6 @@ namespace vuk { struct RenderGraphCompileOptions; - Result acquire_one(Allocator& allocator, SwapchainRef swapchain); - Result acquire_one(Context& ctx, SwapchainRef swapchain, VkSemaphore present_ready, VkSemaphore render_complete); - Result execute_submit(Allocator& allocator, ExecutableRenderGraph&& rg, SwapchainRenderBundle&& bundle); - Result present_to_one(Context& ctx, SwapchainRenderBundle&& bundle); - Result present(Allocator& allocator, Compiler& compiler, SwapchainRef swapchain, FutureBase&& future, RenderGraphCompileOptions = {}); - struct SampledImage make_sampled_image(ImageView iv, SamplerCreateInfo sci); struct SampledImage make_sampled_image(struct NameReference n, SamplerCreateInfo sci); diff --git a/include/vuk/Exception.hpp b/include/vuk/Exception.hpp index cc26cb99..4997f3db 100644 --- a/include/vuk/Exception.hpp +++ b/include/vuk/Exception.hpp @@ -1,9 +1,9 @@ #pragma once +#include "vuk/Config.hpp" +#include #include #include -#include -#include "vuk/Config.hpp" namespace vuk { struct Exception : std::exception { @@ -35,11 +35,19 @@ namespace vuk { } }; + struct RequiredPFNMissingException : Exception { + using Exception::Exception; + + void throw_this() override { + throw *this; + } + }; + struct VkException : Exception { VkResult error_code; - + using Exception::Exception; - + VkException(VkResult res) { error_code = res; switch (res) { @@ -104,8 +112,10 @@ namespace vuk { break; } } - - VkResult code() const { return error_code; } + + VkResult code() const { + return error_code; + } void throw_this() override { throw *this; @@ -115,7 +125,7 @@ namespace vuk { struct AllocateException : VkException { AllocateException(VkResult res) { error_code = res; - + switch (res) { case VK_ERROR_OUT_OF_HOST_MEMORY: { error_message = "Out of host memory."; diff --git a/include/vuk/Executor.hpp b/include/vuk/Executor.hpp new file mode 100644 index 00000000..b1879a0a --- /dev/null +++ b/include/vuk/Executor.hpp @@ -0,0 +1,29 @@ +#pragma once + +#include "vuk/Types.hpp" + +namespace vuk { + /// @brief Base class for high level execution + struct Executor { + enum class Type { eVulkanDeviceQueue, eThisThread } type; + struct Tag { + DomainFlagBits domain; + size_t executor_id; + + std::strong_ordering operator<=>(const Tag&) const = default; + } tag; + + Executor(Type type, DomainFlagBits domain, size_t executor_id) : type(type), tag{domain, executor_id} {} + virtual ~Executor() {} + Executor(const Executor&) = delete; + Executor& operator=(const Executor&) = delete; + + // lock this executor + virtual void lock() = 0; + // unlock this executor + virtual void unlock() = 0; + + virtual Result wait_idle() = 0; + }; + +} // namespace vuk \ No newline at end of file diff --git a/include/vuk/Future.hpp b/include/vuk/Future.hpp index b30d4945..25a987dc 100644 --- a/include/vuk/Future.hpp +++ b/include/vuk/Future.hpp @@ -38,22 +38,24 @@ namespace vuk { public: TypedFuture(std::shared_ptr rg, Ref ref, Ref def) { this->control = std::make_shared(); - this->head = { rg->make_release(ref, &this->control->acqrel), 0 }; + + this->head = { rg->make_release(ref, &this->control->acqrel, Access::eHostRW, DomainFlagBits::eHost), 0 }; + this->control->rg = std::move(rg); this->def = def; } - TypedFuture(const TypedFuture& o) noexcept : - control{ std::make_shared(*o.control) }, - def{ o.def }, - head{ control->rg->make_release(o.get_head(), &this->control->acqrel), 0 } {} + TypedFuture(const TypedFuture& o) noexcept : control{ std::make_shared(*o.control) }, def{ o.def } { + head = { control->rg->make_release(o.get_head(), &this->control->acqrel, Access::eHostRW, DomainFlagBits::eHost), 0 }; + } TypedFuture(TypedFuture&& o) noexcept : control{ std::exchange(o.control, nullptr) }, def{ std::exchange(o.def, {}) }, head{ std::exchange(o.head, {}) } {} TypedFuture& operator=(const TypedFuture& o) noexcept { control = { std::make_shared(*o.control) }; def = { o.def }; - head = { control->rg->make_release(o.get_head(), &this->control->acqrel), 0 }; + + head = { control->rg->make_release(o.get_head(), &this->control->acqrel, Access::eHostRW, DomainFlagBits::eHost), 0 }; return *this; } @@ -98,6 +100,14 @@ namespace vuk { return *reinterpret_cast*>(this); // TODO: not cool } + template + TypedFuture release_to(Ref ref, Access access, DomainFlagBits domain) noexcept { + head.node->release.src = ref; + head.node->release.dst_access = access; + head.node->release.dst_domain = domain; + return std::move(*reinterpret_cast*>(this)); // TODO: not cool + } + T* operator->() noexcept { return reinterpret_cast(def.node->valloc.args[0].node->constant.value); } diff --git a/include/vuk/IR.hpp b/include/vuk/IR.hpp index 05e9e710..08e4caa0 100644 --- a/include/vuk/IR.hpp +++ b/include/vuk/IR.hpp @@ -12,8 +12,8 @@ namespace vuk { struct SyncPoint { - DomainFlagBits domain = DomainFlagBits::eNone; // domain of the point - uint64_t visibility; // results are available if waiting for {domain, visibility} + Executor* executor; + uint64_t visibility; // results are available if waiting for {executor, visibility} }; /// @brief Encapsulates a SyncPoint that can be synchronized against in the future @@ -131,6 +131,7 @@ namespace vuk { ACQUIRE, RELEASE, ACQUIRE_NEXT_IMAGE, + PRESENT, INDEXING, CAST } kind; @@ -189,6 +190,8 @@ namespace vuk { struct { Ref src; AcquireRelease* release; + Access dst_access; + DomainFlagBits dst_domain; } release; struct { Ref swapchain; @@ -200,6 +203,9 @@ namespace vuk { struct { Ref src; } cast; + struct { + Ref src; + } present; }; std::string_view kind_to_sv() { @@ -212,6 +218,8 @@ namespace vuk { return "call"; case INDEXING: return "indexing"; + case PRESENT: + return "present"; } assert(0); return ""; @@ -242,6 +250,7 @@ namespace vuk { RG() { builtin_image = &types.emplace_back(Type{ .kind = Type::IMAGE_TY }); builtin_buffer = &types.emplace_back(Type{ .kind = Type::BUFFER_TY }); + builtin_swapchain = &types.emplace_back(Type{ .kind = Type::SWAPCHAIN_TY }); } std::deque op_arena; @@ -362,6 +371,14 @@ namespace vuk { .aalloc = { .args = std::span(args_ptr, args.size() + 1), .defs = std::span(defs_ptr, defs.size()) } })); } + Ref make_declare_swapchain(Swapchain& bundle) { + auto buf_ptr = new Swapchain(bundle); + auto args_ptr = new Ref[1]; + auto mem_ty = new Type*(emplace_type(Type{ .kind = Type::MEMORY_TY })); + args_ptr[0] = first(emplace_op(Node{ .kind = Node::CONSTANT, .type = std::span{ mem_ty, 1 }, .constant = { .value = buf_ptr } })); + return first(emplace_op(Node{ .kind = Node::VALLOC, .type = std::span{ &builtin_swapchain, 1 }, .valloc = { .args = std::span(args_ptr, 1) } })); + } + Ref make_array_indexing(Type* type, Ref array, Ref index) { auto ty = new Type*(type); return first(emplace_op(Node{ .kind = Node::INDEXING, .type = std::span{ ty, 1 }, .indexing = { .array = array, .index = index } })); @@ -372,11 +389,6 @@ namespace vuk { return first(emplace_op(Node{ .kind = Node::CAST, .type = std::span{ ty, 1 }, .cast = { .src = src } })); } - Ref make_import_swapchain(SwapchainRenderBundle& bundle) { - return first( - emplace_op(Node{ .kind = Node::IMPORT, .type = std::span{ &builtin_swapchain, 1 }, .import = { .value = new SwapchainRenderBundle(bundle) } })); - } - Ref make_acquire_next_image(Ref swapchain) { return first( emplace_op(Node{ .kind = Node::ACQUIRE_NEXT_IMAGE, .type = std::span{ &builtin_image, 1 }, .acquire_next_image = { .swapchain = swapchain } })); @@ -418,8 +430,12 @@ namespace vuk { return emplace_op(n); } - Node* make_release(Ref src, AcquireRelease* acq_rel) { - return emplace_op(Node{ .kind = Node::RELEASE, .release = { .src = src, .release = acq_rel } }); + Node* make_release(Ref src, AcquireRelease* acq_rel, Access dst_access, DomainFlagBits dst_domain) { + return emplace_op(Node{ .kind = Node::RELEASE, .release = { .src = src, .release = acq_rel, .dst_access = dst_access, .dst_domain = dst_domain } }); + } + + Node* make_present(Ref src) { + return emplace_op(Node{ .kind = Node::PRESENT, .type = std::span{ &builtin_image, 1 }, .present = { .src = src } }); } }; } // namespace vuk \ No newline at end of file diff --git a/include/vuk/ImageAttachment.hpp b/include/vuk/ImageAttachment.hpp index 62a5b0db..d47c57f5 100644 --- a/include/vuk/ImageAttachment.hpp +++ b/include/vuk/ImageAttachment.hpp @@ -150,6 +150,7 @@ namespace vuk { AccessFlags access; ImageLayout layout; // ignored for buffers DomainFlags domain = DomainFlagBits::eAny; + uint32_t queue_family_index; }; union Subrange { diff --git a/include/vuk/RenderGraph.hpp b/include/vuk/RenderGraph.hpp index 526699b3..fa42435e 100644 --- a/include/vuk/RenderGraph.hpp +++ b/include/vuk/RenderGraph.hpp @@ -632,9 +632,7 @@ private: template [[nodiscard]] inline TypedFuture declare_array(Name name, const TypedFuture& arg, Args... args) { auto rg = arg.get_render_graph(); - [&rg](auto&... rest) { - (rg->subgraphs.push_back(rest.get_render_graph()), ...); - }(args...); + (rg->subgraphs.push_back(args.get_render_graph()), ...); std::array refs = { arg.get_head(), args.get_head()... }; std::array defs = { arg.get_def(), args.get_def()... }; Ref ref = rg->make_declare_array(Type::stripped(refs[0].type()), refs, defs); @@ -642,22 +640,43 @@ private: return { rg, ref, ref }; } + template + [[nodiscard]] inline TypedFuture declare_array(Name name, std::span> args) { + assert(args.size() > 0); + auto rg = args[0].get_render_graph(); + std::vector refs; + std::vector defs; + for (auto& arg : args) { + rg->subgraphs.push_back(arg.get_render_graph()); + refs.push_back(arg.get_head()); + defs.push_back(arg.get_def()); + } + Ref ref = rg->make_declare_array(Type::stripped(refs[0].type()), refs, defs); + rg->name_outputs(ref.node, { name.c_str() }); + return { rg, ref, ref }; + } + [[nodiscard]] inline TypedFuture clear(TypedFuture in, Clear clear_value) { auto& rg = in.get_render_graph(); - return in.transmute(rg->make_clear_image(in.get_head(), clear_value)); + return std::move(std::move(in).transmute(rg->make_clear_image(in.get_head(), clear_value))); } - [[nodiscard]] inline TypedFuture import_swapchain(SwapchainRenderBundle bundle) { + [[nodiscard]] inline TypedFuture declare_swapchain(Swapchain bundle) { std::shared_ptr rg = std::make_shared(); - Ref ref = rg->make_import_swapchain(bundle); + Ref ref = rg->make_declare_swapchain(bundle); return { rg, ref, ref }; } - [[nodiscard]] inline TypedFuture acquire_next_image(Name name, TypedFuture in) { + [[nodiscard]] inline TypedFuture acquire_next_image(Name name, TypedFuture in) { auto& rg = in.get_render_graph(); Ref ref = rg->make_acquire_next_image(in.get_head()); rg->name_outputs(ref.node, { name.c_str() }); - return in.transmute(ref); + return std::move(std::move(in).transmute(ref)); + } + + [[nodiscard]] inline TypedFuture enqueue_presentation(TypedFuture in) { + auto& rg = in.get_render_graph(); + return std::move(std::move(in).release_to(in.get_head(), Access::ePresent, DomainFlagBits::ePE)); } struct InferenceContext { @@ -715,8 +734,6 @@ private: ImageUsageFlags compute_usage(const struct ChainLink* chain); /// @brief Get the image attachment heading this use chain const struct AttachmentInfo& get_chain_attachment(const struct ChainLink* chain); - /// @brief Get the last name that references this chain (may not exist) - std::optional get_last_use_name(const struct ChainLink* chain); /// @brief Dump the pass dependency graph in graphviz format std::string dump_graph(); @@ -734,20 +751,12 @@ private: }; struct SubmitInfo { - std::vector> relative_waits; - std::vector> absolute_waits; std::vector command_buffers; - std::vector future_signals; - std::vector used_swapchains; - }; - - struct SubmitBatch { - DomainFlagBits domain; - std::vector submits; - }; - - struct SubmitBundle { - std::vector batches; + std::vector> relative_waits; + std::vector waits; + std::vector signals; + std::vector pres_wait; + std::vector pres_signal; }; struct ExecutableRenderGraph { @@ -760,15 +769,12 @@ private: ExecutableRenderGraph(ExecutableRenderGraph&&) noexcept; ExecutableRenderGraph& operator=(ExecutableRenderGraph&&) noexcept; - Result execute(Allocator&, std::vector> swp_with_index); - - Result is_resource_image_in_general_layout(const NameReference&, struct PassInfo* pass_info); + Result execute(Allocator& allocator); private: struct RGCImpl* impl; void fill_render_pass_info(struct RenderPassInfo& rpass, const size_t& i, class CommandBuffer& cobuf); - Result record_single_submit(Allocator&, std::span passes, DomainFlagBits domain); friend struct InferenceContext; }; diff --git a/include/vuk/Swapchain.hpp b/include/vuk/Swapchain.hpp index 88e9ed22..b2d6dcc8 100644 --- a/include/vuk/Swapchain.hpp +++ b/include/vuk/Swapchain.hpp @@ -2,6 +2,7 @@ #include "vuk/Config.hpp" #include "vuk/Types.hpp" +#include "vuk/vuk_fwd.hpp" #include @@ -58,23 +59,19 @@ namespace vuk { eSharedContinuousRefresh = VK_PRESENT_MODE_SHARED_CONTINUOUS_REFRESH_KHR }; + struct ImageAttachment; + struct Swapchain { + Swapchain(Allocator allocator, size_t image_count); + + Allocator allocator; VkSwapchainKHR swapchain; VkSurfaceKHR surface; - vuk::Format format; - vuk::Extent2D extent = { 0, 0 }; - std::vector images; - std::vector image_views; - }; - - using SwapchainRef = Swapchain*; - - struct SwapchainRenderBundle { - SwapchainRef swapchain; + std::vector images; + uint32_t linear_index = 0; uint32_t image_index; - VkSemaphore present_ready; - VkSemaphore render_complete; + std::vector semaphores; /* present_rdy_0 render_complete_0 present_rdy_1 render_complete_1 ... */ VkResult acquire_result; }; } // namespace vuk \ No newline at end of file diff --git a/include/vuk/Types.hpp b/include/vuk/Types.hpp index 079308a3..ec208624 100644 --- a/include/vuk/Types.hpp +++ b/include/vuk/Types.hpp @@ -1033,14 +1033,15 @@ namespace vuk { enum class DomainFlagBits { eNone = 0, eHost = 1 << 0, - eGraphicsQueue = 1 << 1, - eComputeQueue = 1 << 2, - eTransferQueue = 1 << 3, - eGraphicsOperation = 1 << 4, - eComputeOperation = 1 << 5, - eTransferOperation = 1 << 6, - eQueueMask = 0b1110, - eOpMask = 0b1110000, + ePE = 1 << 1, + eGraphicsQueue = 1 << 2, + eComputeQueue = 1 << 3, + eTransferQueue = 1 << 4, + eGraphicsOperation = 1 << 5, + eComputeOperation = 1 << 6, + eTransferOperation = 1 << 7, + eQueueMask = 0b11100, + eOpMask = 0b11100000, eGraphicsOnGraphics = eGraphicsQueue | eGraphicsOperation, eComputeOnGraphics = eGraphicsQueue | eComputeOperation, eTransferOnGraphics = eGraphicsQueue | eTransferOperation, @@ -1048,7 +1049,7 @@ namespace vuk { eTransferOnCompute = eComputeQueue | eComputeOperation, eTransferOnTransfer = eTransferQueue | eTransferOperation, eDevice = eGraphicsQueue | eComputeQueue | eTransferQueue, - eAny = eDevice | eHost + eAny = eDevice | eHost | ePE }; using DomainFlags = Flags; diff --git a/include/vuk/VulkanPFNRequired.hpp b/include/vuk/VulkanPFNRequired.hpp index c9970de2..40c424f0 100644 --- a/include/vuk/VulkanPFNRequired.hpp +++ b/include/vuk/VulkanPFNRequired.hpp @@ -92,6 +92,7 @@ VUK_X(vkWaitSemaphores) VUK_X(vkDestroySemaphore) VUK_X(vkQueueSubmit) +VUK_X(vkQueueWaitIdle) VUK_X(vkDeviceWaitIdle) VUK_Y(vkGetPhysicalDeviceMemoryProperties) diff --git a/include/vuk/runtime/ThisThreadExecutor.hpp b/include/vuk/runtime/ThisThreadExecutor.hpp new file mode 100644 index 00000000..f8a659f4 --- /dev/null +++ b/include/vuk/runtime/ThisThreadExecutor.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include "vuk/Config.hpp" +#include "vuk/Executor.hpp" +#include "vuk/Result.hpp" + +#include +#include + +namespace vuk { + /// @brief Abstraction of execution on the current thread + struct ThisThreadExecutor : Executor { + ThisThreadExecutor() : Executor(Executor::Type::eThisThread, DomainFlagBits::eHost, 0) {} + + ThisThreadExecutor(ThisThreadExecutor&&) = default; + ThisThreadExecutor& operator=(ThisThreadExecutor&&) = default; + + // scheduling on the current thread is lock-free + void lock() override {} + void unlock() override {} + Result wait_idle() { + return { expected_value }; + } + }; +} // namespace vuk \ No newline at end of file diff --git a/include/vuk/runtime/vk/VulkanQueueExecutor.hpp b/include/vuk/runtime/vk/VulkanQueueExecutor.hpp new file mode 100644 index 00000000..740a76a3 --- /dev/null +++ b/include/vuk/runtime/vk/VulkanQueueExecutor.hpp @@ -0,0 +1,39 @@ +#pragma once + +#include "vuk/Config.hpp" +#include "vuk/Executor.hpp" + +#include +#include + +namespace vuk { + struct SubmitInfo; + struct TimelineSemaphore; +} + +namespace vuk::rtvk { + /// @brief Abstraction of a device queue in Vulkan + struct QueueExecutor : Executor { + QueueExecutor(VkDevice device, DomainFlagBits domain, const struct FunctionPointers& fps, VkQueue queue, uint32_t queue_family_index, TimelineSemaphore ts); + ~QueueExecutor(); + + QueueExecutor(QueueExecutor&&) noexcept; + QueueExecutor& operator=(QueueExecutor&&) noexcept; + + Result submit_batch(std::vector batch); + uint64_t get_sync_value(); + VkSemaphore get_semaphore(); + uint32_t get_queue_family_index(); + + void lock() override; + void unlock() override; + Result wait_idle() override; + + Result submit(std::span submit_infos, VkFence fence); + Result submit(std::span submit_infos, VkFence fence); + + Result queue_present(VkPresentInfoKHR pi); + + struct QueueImpl* impl; + }; +} // namespace vuk::rtvk \ No newline at end of file diff --git a/include/vuk/vuk_fwd.hpp b/include/vuk/vuk_fwd.hpp index 1471eddd..9dcdb381 100644 --- a/include/vuk/vuk_fwd.hpp +++ b/include/vuk/vuk_fwd.hpp @@ -9,7 +9,6 @@ namespace vuk { class CommandBuffer; struct Swapchain; - using SwapchainRef = Swapchain*; class LegacyGPUAllocator; diff --git a/src/Context.cpp b/src/Context.cpp index 407f0947..4533e3b7 100644 --- a/src/Context.cpp +++ b/src/Context.cpp @@ -43,7 +43,7 @@ namespace { #undef VUK_Y }*/ - void load_pfns_dynamic(VkInstance instance, VkDevice device, vuk::ContextCreateParameters::FunctionPointers& pfns) { + void load_pfns_dynamic(VkInstance instance, VkDevice device, vuk::rtvk::FunctionPointers& pfns) { #define VUK_X(name) \ if (pfns.name == nullptr) { \ pfns.name = (PFN_##name)pfns.vkGetDeviceProcAddr(device, #name); \ @@ -57,93 +57,53 @@ namespace { #undef VUK_X #undef VUK_Y } +} // namespace - bool check_pfns(vuk::ContextCreateParameters::FunctionPointers& pfns) { - bool valid = true; -#define VUK_X(name) valid = valid && pfns.name; -#define VUK_Y(name) valid = valid && pfns.name; +bool vuk::rtvk::FunctionPointers::check_pfns() { + bool valid = true; +#define VUK_X(name) valid = valid && name; +#define VUK_Y(name) valid = valid && name; #include "vuk/VulkanPFNRequired.hpp" #undef VUK_X #undef VUK_Y - return valid; - } + return valid; +} - bool load_pfns(vuk::ContextCreateParameters params, vuk::ContextCreateParameters::FunctionPointers& pfns) { - // PFN loading - // if the user passes in PFNs, those will be used, always - if (check_pfns(pfns)) { - return true; - } - // we don't have all the PFNs, so we will load them if this is allowed - if (pfns.vkGetInstanceProcAddr && pfns.vkGetDeviceProcAddr && params.allow_dynamic_loading_of_vk_function_pointers) { - load_pfns_dynamic(params.instance, params.device, pfns); - return check_pfns(pfns); - } else { - return false; +vuk::Result vuk::rtvk::FunctionPointers::load_pfns(VkInstance instance, VkDevice device, bool allow_dynamic_loading_of_vk_function_pointers) { + // PFN loading + // if the user passes in PFNs, those will be used, always + if (check_pfns()) { + return { vuk::expected_value }; + } + // we don't have all the PFNs, so we will load them if this is allowed + if (vkGetInstanceProcAddr && vkGetDeviceProcAddr && allow_dynamic_loading_of_vk_function_pointers) { + load_pfns_dynamic(instance, device, *this); + if (!check_pfns()) { + return { vuk::expected_error, + vuk::RequiredPFNMissingException{ "A Vulkan PFN is required, but was not provided and dynamic loading could not load it." } }; } + } else { + return { vuk::expected_error, vuk::RequiredPFNMissingException{ "A Vulkan PFN is required, but was not provided and dynamic loading was not allowed." } }; } -} // namespace + + return { vuk::expected_value }; +} namespace vuk { Context::Context(ContextCreateParameters params) : - ContextCreateParameters::FunctionPointers(params.pointers), + rtvk::FunctionPointers(params.pointers), instance(params.instance), device(params.device), - physical_device(params.physical_device), - graphics_queue_family_index(params.graphics_queue_family_index), - compute_queue_family_index(params.compute_queue_family_index), - transfer_queue_family_index(params.transfer_queue_family_index) { - // TODO: conversion to static factory fn - bool pfn_load_success = load_pfns(params, *this); - assert(pfn_load_success); - - [[maybe_unused]] bool dedicated_graphics_queue_ = false; - bool dedicated_compute_queue_ = false; - bool dedicated_transfer_queue_ = false; - - if (params.graphics_queue != VK_NULL_HANDLE && params.graphics_queue_family_index != VK_QUEUE_FAMILY_IGNORED) { - dedicated_graphics_queue_ = true; - } + physical_device(params.physical_device) { + assert(check_pfns()); - if (params.compute_queue != VK_NULL_HANDLE && params.compute_queue_family_index != VK_QUEUE_FAMILY_IGNORED) { - dedicated_compute_queue_ = true; - } else { - compute_queue_family_index = params.graphics_queue_family_index; - } - - if (params.transfer_queue != VK_NULL_HANDLE && params.transfer_queue_family_index != VK_QUEUE_FAMILY_IGNORED) { - dedicated_transfer_queue_ = true; - } else { - transfer_queue_family_index = compute_queue ? params.compute_queue_family_index : params.graphics_queue_family_index; - } impl = new ContextImpl(*this); - - { - TimelineSemaphore ts; - impl->device_vk_resource->allocate_timeline_semaphores(std::span{ &ts, 1 }, {}); - dedicated_graphics_queue.emplace(this->vkQueueSubmit, this->vkQueueSubmit2KHR, params.graphics_queue, params.graphics_queue_family_index, ts); - set_name(params.graphics_queue, "Graphics Queue"); - graphics_queue = &dedicated_graphics_queue.value(); - } - if (dedicated_compute_queue_) { - TimelineSemaphore ts; - impl->device_vk_resource->allocate_timeline_semaphores(std::span{ &ts, 1 }, {}); - dedicated_compute_queue.emplace(this->vkQueueSubmit, this->vkQueueSubmit2KHR, params.compute_queue, params.compute_queue_family_index, ts); - set_name(params.compute_queue, "Compute Queue"); - compute_queue = &dedicated_compute_queue.value(); - } else { - compute_queue = graphics_queue; - } - if (dedicated_transfer_queue_) { - TimelineSemaphore ts; - impl->device_vk_resource->allocate_timeline_semaphores(std::span{ &ts, 1 }, {}); - dedicated_transfer_queue.emplace(this->vkQueueSubmit, this->vkQueueSubmit2KHR, params.transfer_queue, params.transfer_queue_family_index, ts); - set_name(params.transfer_queue, "Transfer Queue"); - transfer_queue = &dedicated_transfer_queue.value(); - } else { - transfer_queue = compute_queue ? compute_queue : graphics_queue; + impl->executors = std::move(params.executors); + for (auto& exe : impl->executors) { + if (exe->type == Executor::Type::eVulkanDeviceQueue) { + all_queue_families.push_back(static_cast(exe.get())->get_queue_family_index()); + } } - this->vkGetPhysicalDeviceProperties(physical_device, &physical_device_properties); min_buffer_alignment = std::max(physical_device_properties.limits.minUniformBufferOffsetAlignment, physical_device_properties.limits.minStorageBufferOffsetAlignment); @@ -159,23 +119,6 @@ namespace vuk { instance = o.instance; device = o.device; physical_device = o.physical_device; - graphics_queue_family_index = o.graphics_queue_family_index; - compute_queue_family_index = o.compute_queue_family_index; - transfer_queue_family_index = o.transfer_queue_family_index; - dedicated_graphics_queue = std::move(o.dedicated_graphics_queue); - graphics_queue = &dedicated_graphics_queue.value(); - dedicated_compute_queue = std::move(o.dedicated_compute_queue); - if (dedicated_compute_queue) { - compute_queue = &o.dedicated_compute_queue.value(); - } else { - compute_queue = graphics_queue; - } - dedicated_transfer_queue = std::move(o.dedicated_transfer_queue); - if (dedicated_transfer_queue) { - transfer_queue = &dedicated_transfer_queue.value(); - } else { - transfer_queue = compute_queue ? compute_queue : graphics_queue; - } rt_properties = o.rt_properties; impl->pipelinebase_cache.allocator = this; @@ -192,23 +135,6 @@ namespace vuk { instance = o.instance; device = o.device; physical_device = o.physical_device; - graphics_queue_family_index = o.graphics_queue_family_index; - compute_queue_family_index = o.compute_queue_family_index; - transfer_queue_family_index = o.transfer_queue_family_index; - dedicated_graphics_queue = std::move(o.dedicated_graphics_queue); - graphics_queue = &dedicated_graphics_queue.value(); - dedicated_compute_queue = std::move(o.dedicated_compute_queue); - if (dedicated_compute_queue) { - compute_queue = &o.dedicated_compute_queue.value(); - } else { - compute_queue = graphics_queue; - } - dedicated_transfer_queue = std::move(o.dedicated_transfer_queue); - if (dedicated_transfer_queue) { - transfer_queue = &dedicated_transfer_queue.value(); - } else { - transfer_queue = compute_queue ? compute_queue : graphics_queue; - } impl->pipelinebase_cache.allocator = this; impl->pool_cache.allocator = this; @@ -221,6 +147,24 @@ namespace vuk { return *this; } + Executor* Context::get_executor(Executor::Tag tag) { + auto it = std::find_if(impl->executors.begin(), impl->executors.end(), [=](auto& exe) { return exe->tag == tag; }); + if (it != impl->executors.end()) { + return it->get(); + } else { + return nullptr; + } + } + + Executor* Context::get_executor(DomainFlagBits domain) { + auto it = std::find_if(impl->executors.begin(), impl->executors.end(), [=](auto& exe) { return exe->tag.domain == domain; }); + if (it != impl->executors.end()) { + return it->get(); + } else { + return nullptr; + } + } + bool Context::debug_enabled() const { return this->vkSetDebugUtilsObjectNameEXT != nullptr; } @@ -240,22 +184,6 @@ namespace vuk { this->vkCmdEndDebugUtilsLabelEXT(cb); } - Result Context::submit_graphics(std::span sis, VkFence fence) { - return graphics_queue->submit(sis, fence); - } - - Result Context::submit_graphics(std::span sis) { - return graphics_queue->submit(sis, VK_NULL_HANDLE); - } - - Result Context::submit_transfer(std::span sis, VkFence fence) { - return transfer_queue->submit(sis, fence); - } - - Result Context::submit_transfer(std::span sis) { - return transfer_queue->submit(sis, VK_NULL_HANDLE); - } - void PersistentDescriptorSet::update_combined_image_sampler(unsigned binding, unsigned array_index, ImageView iv, Sampler sampler, ImageLayout layout) { assert(binding < descriptor_bindings.size()); assert(array_index < descriptor_bindings[binding].size()); @@ -564,40 +492,6 @@ namespace vuk { return data; } - Queue& Context::domain_to_queue(DomainFlags domain) const { - auto queue_only = (DomainFlagBits)(domain & DomainFlagBits::eQueueMask).m_mask; - switch (queue_only) { - case DomainFlagBits::eGraphicsQueue: - return *graphics_queue; - case DomainFlagBits::eComputeQueue: - return *compute_queue; - case DomainFlagBits::eTransferQueue: - return *transfer_queue; - default: - assert(0); - return *transfer_queue; - } - }; - - uint32_t Context::domain_to_queue_index(DomainFlags domain) const { - auto queue_only = (DomainFlagBits)(domain & DomainFlagBits::eQueueMask).m_mask; - switch (queue_only) { - case DomainFlagBits::eGraphicsQueue: - return graphics_queue_family_index; - case DomainFlagBits::eComputeQueue: - return compute_queue_family_index; - case DomainFlagBits::eTransferQueue: - return transfer_queue_family_index; - default: - assert(0); - return 0; - } - }; - - uint32_t Context::domain_to_queue_family_index(DomainFlags domain) const { - return domain_to_queue_index(domain); - } - Query Context::create_timestamp_query() { return { impl->query_id_counter++ }; } @@ -635,21 +529,6 @@ namespace vuk { return pl; } - SwapchainRef Context::add_swapchain(Swapchain sw) { - std::lock_guard _(impl->swapchains_lock); - return &*impl->swapchains.emplace(sw); - } - - void Context::remove_swapchain(SwapchainRef sw) { - std::lock_guard _(impl->swapchains_lock); - for (auto it = impl->swapchains.begin(); it != impl->swapchains.end(); it++) { - if (&*it == sw) { - impl->swapchains.erase(it); - return; - } - } - } - uint64_t Context::get_frame_count() const { return impl->frame_counter; } @@ -722,27 +601,8 @@ namespace vuk { if (impl) { this->vkDeviceWaitIdle(device); - for (auto& s : impl->swapchains) { - for (auto& swiv : s.image_views) { - this->vkDestroyImageView(device, swiv.payload, nullptr); - } - this->vkDestroySwapchainKHR(device, s.swapchain, nullptr); - } - this->vkDestroyPipelineCache(device, vk_pipeline_cache, nullptr); - if (dedicated_graphics_queue) { - impl->device_vk_resource->deallocate_timeline_semaphores(std::span{ &dedicated_graphics_queue->get_submit_sync(), 1 }); - } - - if (dedicated_compute_queue) { - impl->device_vk_resource->deallocate_timeline_semaphores(std::span{ &dedicated_compute_queue->get_submit_sync(), 1 }); - } - - if (dedicated_transfer_queue) { - impl->device_vk_resource->deallocate_timeline_semaphores(std::span{ &dedicated_transfer_queue->get_submit_sync(), 1 }); - } - delete impl; } } @@ -758,19 +618,14 @@ namespace vuk { Result Context::wait_idle() { std::unique_lock graphics_lock; - if (dedicated_graphics_queue) { - graphics_lock = std::unique_lock{ graphics_queue->get_queue_lock() }; - } - std::unique_lock compute_lock; - if (dedicated_compute_queue) { - compute_lock = std::unique_lock{ compute_queue->get_queue_lock() }; + for (auto& exe : impl->executors) { + exe->lock(); } - std::unique_lock transfer_lock; - if (dedicated_transfer_queue) { - transfer_lock = std::unique_lock{ transfer_queue->get_queue_lock() }; - } - VkResult result = this->vkDeviceWaitIdle(device); + + for (auto& exe : impl->executors) { + exe->unlock(); + } if (result < 0) { return { expected_error, VkException{ result } }; } diff --git a/src/ContextImpl.hpp b/src/ContextImpl.hpp index 6c575afc..ae502845 100644 --- a/src/ContextImpl.hpp +++ b/src/ContextImpl.hpp @@ -32,6 +32,8 @@ namespace vuk { std::unique_ptr device_vk_resource; Allocator direct_allocator; + std::vector> executors; + Cache pipelinebase_cache; Cache pool_cache; Cache sampler_cache; diff --git a/src/DeviceVkResource.cpp b/src/DeviceVkResource.cpp index 1b4c91fb..aa294f5f 100644 --- a/src/DeviceVkResource.cpp +++ b/src/DeviceVkResource.cpp @@ -83,15 +83,7 @@ namespace vuk { vmaCreateAllocator(&allocatorInfo, &impl->allocator); ctx.vkGetPhysicalDeviceProperties(ctx.physical_device, &impl->properties); - if (ctx.transfer_queue_family_index != ctx.graphics_queue_family_index && ctx.compute_queue_family_index != ctx.graphics_queue_family_index) { - impl->all_queue_families = { ctx.graphics_queue_family_index, ctx.compute_queue_family_index, ctx.transfer_queue_family_index }; - } else if (ctx.transfer_queue_family_index != ctx.graphics_queue_family_index) { - impl->all_queue_families = { ctx.graphics_queue_family_index, ctx.transfer_queue_family_index }; - } else if (ctx.compute_queue_family_index != ctx.graphics_queue_family_index) { - impl->all_queue_families = { ctx.graphics_queue_family_index, ctx.compute_queue_family_index }; - } else { - impl->all_queue_families = { ctx.graphics_queue_family_index }; - } + impl->all_queue_families = ctx.all_queue_families; impl->queue_family_count = (uint32_t)impl->all_queue_families.size(); } diff --git a/src/ExecutableRenderGraph.cpp b/src/ExecutableRenderGraph.cpp index a38133d6..394d96ba 100644 --- a/src/ExecutableRenderGraph.cpp +++ b/src/ExecutableRenderGraph.cpp @@ -7,8 +7,10 @@ #include "vuk/Hash.hpp" // for create #include "vuk/RenderGraph.hpp" #include "vuk/Util.hpp" +#include "vuk/runtime/vk/VulkanQueueExecutor.hpp" #include +#include #include #include #include @@ -33,7 +35,7 @@ namespace vuk { ctx.vkCmdBeginRenderPass(cbuf, &rbi, use_secondary_command_buffers ? VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS : VK_SUBPASS_CONTENTS_INLINE); } - + /* [[nodiscard]] bool resolve_image_barrier(const Context& ctx, VkImageMemoryBarrier2KHR& dep, const AttachmentInfo& bound, vuk::DomainFlagBits current_domain) { dep.image = bound.attachment.image.image; // turn base_{layer, level} into absolute values wrt the image @@ -70,22 +72,22 @@ namespace vuk { dep.subresourceRange.levelCount = bound.attachment.level_count; } - if (dep.srcQueueFamilyIndex != VK_QUEUE_FAMILY_IGNORED) { - assert(dep.dstQueueFamilyIndex != VK_QUEUE_FAMILY_IGNORED); - bool transition = dep.dstQueueFamilyIndex != dep.srcQueueFamilyIndex; - auto src_domain = static_cast(dep.srcQueueFamilyIndex); - auto dst_domain = static_cast(dep.dstQueueFamilyIndex); - dep.srcQueueFamilyIndex = ctx.domain_to_queue_family_index(static_cast(dep.srcQueueFamilyIndex)); - dep.dstQueueFamilyIndex = ctx.domain_to_queue_family_index(static_cast(dep.dstQueueFamilyIndex)); - if (dep.srcQueueFamilyIndex == dep.dstQueueFamilyIndex && transition) { - if (dst_domain != current_domain) { - return false; // discard release barriers if they map to the same queue - } - } - } + if (dep.srcQueueFamilyIndex != VK_QUEUE_FAMILY_IGNORED) { + assert(dep.dstQueueFamilyIndex != VK_QUEUE_FAMILY_IGNORED); + bool transition = dep.dstQueueFamilyIndex != dep.srcQueueFamilyIndex; + auto src_domain = static_cast(dep.srcQueueFamilyIndex); + auto dst_domain = static_cast(dep.dstQueueFamilyIndex); + dep.srcQueueFamilyIndex = ctx.domain_to_queue_family_index(static_cast(dep.srcQueueFamilyIndex)); + dep.dstQueueFamilyIndex = ctx.domain_to_queue_family_index(static_cast(dep.dstQueueFamilyIndex)); + if (dep.srcQueueFamilyIndex == dep.dstQueueFamilyIndex && transition) { + if (dst_domain != current_domain) { + return false; // discard release barriers if they map to the same queue + } + } + } - return true; - } + return true; + }*/ void ExecutableRenderGraph::fill_render_pass_info(vuk::RenderPassInfo& rpass, const size_t& i, vuk::CommandBuffer& cobuf) { if (rpass.handle == VK_NULL_HANDLE) { @@ -283,15 +285,21 @@ namespace vuk { return { expected_value, std::move(si) }; }*/ - struct QueueRecording { - SubmitBatch sb; + + struct VkQueueStream : public Stream { + Context& ctx; DomainFlagBits domain; + vuk::rtvk::QueueExecutor* executor; + + std::vector batch; + std::deque signals; SubmitInfo si; Unique cpool; Unique hl_cbuf; - VkCommandBuffer cbuf; + VkCommandBuffer cbuf = VK_NULL_HANDLE; + ProfilingCallbacks* callbacks; bool is_recording = false; - void* cbuf_profile_data; + void* cbuf_profile_data = nullptr; RenderPassInfo rp = {}; std::vector im_bars; @@ -299,7 +307,114 @@ namespace vuk { std::vector mem_bars; std::vector half_mem_bars; - void flush_barriers(Context& ctx) { + VkQueueStream(Allocator alloc, vuk::rtvk::QueueExecutor* qe, ProfilingCallbacks* callbacks) : + Stream(alloc, qe->tag.domain), + ctx(alloc.get_context()), + domain(qe->tag.domain), + executor(qe), + callbacks(callbacks) { + batch.resize(1); + } + + void add_dependency(Stream* dep) override { + if (is_recording) { + end_cbuf(); + } + batch.emplace_back(); + dependencies.push_back(dep); + } + + void sync_deps() override { + for (auto dep : dependencies) { + auto res = *dep->submit(); + if (res.signal) { + batch.front().waits.push_back(res.signal); + } + if (res.sema_wait != VK_NULL_HANDLE) { + batch.front().pres_wait.push_back(res.sema_wait); + } + } + if (!is_recording) { + begin_cbuf(); + } + flush_barriers(); + } + + Result submit(Signal* signal = nullptr) override { + sync_deps(); + end_cbuf(); + if (!signal) { + signal = &signals.emplace_back(); + } + batch.back().signals.emplace_back(signal); + executor->submit_batch(batch); + batch.clear(); + batch.resize(1); + return { expected_value, signal }; + } + + Result present(Swapchain& swp) { + sync_deps(); + end_cbuf(); + batch.back().pres_signal.emplace_back(swp.semaphores[swp.linear_index * 2 + 1]); + executor->submit_batch(batch); + batch.clear(); + batch.resize(1); + VkPresentInfoKHR pi{ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR }; + pi.swapchainCount = 1; + pi.pSwapchains = &swp.swapchain; + pi.pImageIndices = &swp.image_index; + pi.waitSemaphoreCount = 1; + pi.pWaitSemaphores = &swp.semaphores[swp.linear_index * 2 + 1]; + auto res = executor->queue_present(pi); + if (res.value() && swp.acquire_result == VK_SUBOPTIMAL_KHR) { + return { expected_value, VK_SUBOPTIMAL_KHR }; + } + return res; + } + + Result begin_cbuf() { + assert(!is_recording); + is_recording = true; + domain = domain; + if (cpool->command_pool == VK_NULL_HANDLE) { + cpool = Unique(alloc); + VkCommandPoolCreateInfo cpci{ VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO }; + cpci.flags = VkCommandPoolCreateFlagBits::VK_COMMAND_POOL_CREATE_TRANSIENT_BIT; + cpci.queueFamilyIndex = executor->get_queue_family_index(); // currently queue family idx = queue idx + + VUK_DO_OR_RETURN(alloc.allocate_command_pools(std::span{ &*cpool, 1 }, std::span{ &cpci, 1 })); + } + hl_cbuf = Unique(alloc); + CommandBufferAllocationCreateInfo ci{ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .command_pool = *cpool }; + VUK_DO_OR_RETURN(alloc.allocate_command_buffers(std::span{ &*hl_cbuf, 1 }, std::span{ &ci, 1 })); + + si.command_buffers.emplace_back(*hl_cbuf); + + cbuf = hl_cbuf->command_buffer; + + VkCommandBufferBeginInfo cbi{ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT }; + alloc.get_context().vkBeginCommandBuffer(cbuf, &cbi); + + cbuf_profile_data = nullptr; + if (callbacks->on_begin_command_buffer) + cbuf_profile_data = callbacks->on_begin_command_buffer(callbacks->user_data, cbuf); + } + + Result end_cbuf() { + flush_barriers(); + is_recording = false; + if (callbacks->on_end_command_buffer) + callbacks->on_end_command_buffer(callbacks->user_data, cbuf_profile_data); + if (auto result = ctx.vkEndCommandBuffer(hl_cbuf->command_buffer); result != VK_SUCCESS) { + return { expected_error, VkException{ result } }; + } + batch.back().command_buffers.push_back(hl_cbuf->command_buffer); + cbuf = VK_NULL_HANDLE; + return { expected_value }; + }; + + void flush_barriers() { VkDependencyInfoKHR dependency_info{ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR, .memoryBarrierCount = (uint32_t)mem_bars.size(), .pMemoryBarriers = mem_bars.data(), @@ -350,8 +465,18 @@ namespace vuk { dst_use.domain = src_use.domain; } - barrier.srcQueueFamilyIndex = static_cast((src_use.domain & DomainFlagBits::eQueueMask).m_mask); - barrier.dstQueueFamilyIndex = static_cast((dst_use.domain & DomainFlagBits::eQueueMask).m_mask); + barrier.srcQueueFamilyIndex = src_use.queue_family_index; + if (dst_use.domain & DomainFlagBits::eDevice) { + barrier.dstQueueFamilyIndex = dst_use.queue_family_index; + } else { + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + } + + if (src_use.queue_family_index == dst_use.queue_family_index) { + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + } if (src_use.stages == PipelineStageFlags{}) { barrier.srcAccessMask = {}; @@ -383,6 +508,10 @@ namespace vuk { im_bars.push_back(barrier); img_att.layout = (ImageLayout)barrier.newLayout; } + + if (is_framebuffer_attachment(dst_access)) { + prepare_render_pass_attachment(alloc, img_att); + } }; void synch_memory(QueueResourceUse src_use, QueueResourceUse dst_use, void* tag) { @@ -462,61 +591,109 @@ namespace vuk { rp.fbci.attachments.push_back(img_att.image_view); } - Result prepare_render_pass(Allocator alloc) { - if (rp.rpci.attachments.size() > 0) { - SubpassDescription sd; - size_t color_count = 0; - sd.colorAttachmentCount = (uint32_t)rp.rpci.color_refs.size(); - sd.pColorAttachments = rp.rpci.color_refs.data(); - - sd.pDepthStencilAttachment = rp.rpci.ds_ref ? &*rp.rpci.ds_ref : nullptr; - sd.flags = {}; - sd.inputAttachmentCount = 0; - sd.pInputAttachments = nullptr; - sd.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; - sd.preserveAttachmentCount = 0; - sd.pPreserveAttachments = nullptr; - - rp.rpci.subpass_descriptions.push_back(sd); - - rp.rpci.subpassCount = (uint32_t)rp.rpci.subpass_descriptions.size(); - rp.rpci.pSubpasses = rp.rpci.subpass_descriptions.data(); - - // we use barriers - rp.rpci.dependencyCount = 0; - rp.rpci.pDependencies = nullptr; - - rp.rpci.attachmentCount = (uint32_t)rp.rpci.attachments.size(); - rp.rpci.pAttachments = rp.rpci.attachments.data(); - - auto result = alloc.allocate_render_passes(std::span{ &rp.handle, 1 }, std::span{ &rp.rpci, 1 }); - - rp.fbci.renderPass = rp.handle; - rp.fbci.pAttachments = rp.framebuffer_ivs.data(); - rp.fbci.attachmentCount = (uint32_t)rp.framebuffer_ivs.size(); - - Unique fb(alloc); - VUK_DO_OR_RETURN(alloc.allocate_framebuffers(std::span{ &*fb, 1 }, std::span{ &rp.fbci, 1 })); - rp.framebuffer = *fb; // queue framebuffer for destruction - // drop render pass immediately - if (result) { - alloc.deallocate(std::span{ &rp.handle, 1 }); - } - begin_render_pass(alloc.get_context(), rp, cbuf, false); + Result prepare_render_pass() { + SubpassDescription sd; + size_t color_count = 0; + sd.colorAttachmentCount = (uint32_t)rp.rpci.color_refs.size(); + sd.pColorAttachments = rp.rpci.color_refs.data(); - return { expected_value }; + sd.pDepthStencilAttachment = rp.rpci.ds_ref ? &*rp.rpci.ds_ref : nullptr; + sd.flags = {}; + sd.inputAttachmentCount = 0; + sd.pInputAttachments = nullptr; + sd.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + sd.preserveAttachmentCount = 0; + sd.pPreserveAttachments = nullptr; + + rp.rpci.subpass_descriptions.push_back(sd); + + rp.rpci.subpassCount = (uint32_t)rp.rpci.subpass_descriptions.size(); + rp.rpci.pSubpasses = rp.rpci.subpass_descriptions.data(); + + // we use barriers + rp.rpci.dependencyCount = 0; + rp.rpci.pDependencies = nullptr; + + rp.rpci.attachmentCount = (uint32_t)rp.rpci.attachments.size(); + rp.rpci.pAttachments = rp.rpci.attachments.data(); + + auto result = alloc.allocate_render_passes(std::span{ &rp.handle, 1 }, std::span{ &rp.rpci, 1 }); + + rp.fbci.renderPass = rp.handle; + rp.fbci.pAttachments = rp.framebuffer_ivs.data(); + rp.fbci.attachmentCount = (uint32_t)rp.framebuffer_ivs.size(); + + Unique fb(alloc); + VUK_DO_OR_RETURN(alloc.allocate_framebuffers(std::span{ &*fb, 1 }, std::span{ &rp.fbci, 1 })); + rp.framebuffer = *fb; // queue framebuffer for destruction + // drop render pass immediately + if (result) { + alloc.deallocate(std::span{ &rp.handle, 1 }); } + begin_render_pass(alloc.get_context(), rp, cbuf, false); + + return { expected_value }; } - void end_render_pass(Allocator alloc) { + void end_render_pass() { alloc.get_context().vkCmdEndRenderPass(cbuf); rp = {}; } }; + struct HostStream : Stream { + HostStream(Allocator alloc) : Stream(alloc, DomainFlagBits::eHost) {} + + void add_dependency(Stream* dep) { + dependencies.push_back(dep); + } + void sync_deps() { + assert(false); + } + + void synch_image(ImageAttachment& img_att, QueueResourceUse src_use, QueueResourceUse dst_use, Access dst_access, void* tag) { + /* host -> host and host -> device not needed, device -> host inserts things on the device side */ + return; + } + void synch_memory(QueueResourceUse src_use, QueueResourceUse dst_use, void* tag) { + /* host -> host and host -> device not needed, device -> host inserts things on the device side */ + return; + } + + Result submit(Signal* signal = nullptr) { + return { expected_value, signal }; + } + }; + + struct VkPEStream : Stream { + VkPEStream(Allocator alloc, Swapchain& swp) : Stream(alloc, DomainFlagBits::ePE), swp(&swp) {} + Swapchain* swp; + + void add_dependency(Stream* dep) { + dependencies.push_back(dep); + } + void sync_deps() { + assert(false); + } + + void synch_image(ImageAttachment& img_att, QueueResourceUse src_use, QueueResourceUse dst_use, Access dst_access, void* tag) { + } + + void synch_memory(QueueResourceUse src_use, QueueResourceUse dst_use, void* tag) { /* PE doesn't do memory */ + assert(false); + } + + Result submit(Signal* signal = nullptr) { + assert(swp); + assert(signal == nullptr); + SubmitResult sr{ .sema_wait = swp->semaphores[2 * swp->linear_index] }; + return { expected_value, sr }; + } + }; + enum class RW { eRead, eWrite }; struct ExecutionInfo { - DomainFlagBits domain; + Stream* stream; size_t naming_index; }; @@ -592,19 +769,17 @@ namespace vuk { } } - void done(Node* node, DomainFlagBits dst_domain) { - executed.emplace(node, ExecutionInfo{ dst_domain, naming_index_counter++ }); + void done(Node* node, Stream* stream) { + executed.emplace(node, ExecutionInfo{ stream, naming_index_counter++ }); } template T& get_value(Ref parm) { auto& link = res_to_links[parm]; - if (link.urdef.node->kind == Node::VALLOC) { - return *reinterpret_cast(link.urdef.node->valloc.args[0].node->constant.value); - } else if (link.urdef.node->kind == Node::AALLOC) { + if (link.urdef.node->kind == Node::AALLOC) { return reinterpret_cast(link.urdef.node->aalloc.args[0].node->constant.value); } else { - assert(0); + return *reinterpret_cast(get_value(parm)); } }; @@ -614,6 +789,9 @@ namespace vuk { return link.urdef.node->valloc.args[0].node->constant.value; } else if (link.urdef.node->kind == Node::AALLOC) { return link.urdef.node->aalloc.args[0].node->constant.value; + } else if (link.urdef.node->kind == Node::ACQUIRE_NEXT_IMAGE) { + Swapchain* swp = reinterpret_cast(link.urdef.node->acquire_next_image.swapchain.node->valloc.args[0].node->constant.value); + return &swp->images[swp->image_index]; } else { assert(0); } @@ -638,96 +816,41 @@ namespace vuk { Context& ctx; Allocator alloc; ProfilingCallbacks* callbacks; - SubmitBundle sbundle; - - std::unordered_map streams; - - auto begin_cbuf(vuk::DomainFlagBits domain) -> Result { - auto& queue_rec = streams[domain]; - assert(!queue_rec.is_recording); - queue_rec.is_recording = true; - queue_rec.domain = domain; - if (queue_rec.cpool->command_pool == VK_NULL_HANDLE) { - queue_rec.cpool = Unique(alloc); - VkCommandPoolCreateInfo cpci{ VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO }; - cpci.flags = VkCommandPoolCreateFlagBits::VK_COMMAND_POOL_CREATE_TRANSIENT_BIT; - cpci.queueFamilyIndex = ctx.domain_to_queue_family_index(domain); // currently queue family idx = queue idx - VUK_DO_OR_RETURN(alloc.allocate_command_pools(std::span{ &*queue_rec.cpool, 1 }, std::span{ &cpci, 1 })); - } - queue_rec.hl_cbuf = Unique(alloc); - CommandBufferAllocationCreateInfo ci{ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .command_pool = *queue_rec.cpool }; - VUK_DO_OR_RETURN(alloc.allocate_command_buffers(std::span{ &*queue_rec.hl_cbuf, 1 }, std::span{ &ci, 1 })); - - queue_rec.si.command_buffers.emplace_back(*queue_rec.hl_cbuf); - - VkCommandBuffer cbuf = queue_rec.hl_cbuf->command_buffer; - - VkCommandBufferBeginInfo cbi{ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT }; - ctx.vkBeginCommandBuffer(cbuf, &cbi); - - queue_rec.cbuf = queue_rec.hl_cbuf->command_buffer; - queue_rec.cbuf_profile_data = nullptr; - if (callbacks->on_begin_command_buffer) - queue_rec.cbuf_profile_data = callbacks->on_begin_command_buffer(callbacks->user_data, cbuf); - return { expected_value }; - }; + std::unordered_map> streams; - auto end_cbuf(vuk::DomainFlagBits domain) -> Result { - auto& queue_rec = streams[domain]; - queue_rec.is_recording = false; - if (callbacks->on_end_command_buffer) - callbacks->on_end_command_buffer(callbacks->user_data, queue_rec.cbuf_profile_data); - if (auto result = ctx.vkEndCommandBuffer(queue_rec.hl_cbuf->command_buffer); result != VK_SUCCESS) { - return { expected_error, VkException{ result } }; - } - return { expected_value }; - }; - - QueueRecording* synchronize_domain(DomainFlagBits domain) { - auto& queue_rec = streams[domain]; - - if (!queue_rec.is_recording) { - begin_cbuf(domain); - } - - queue_rec.flush_barriers(alloc.get_context()); + // start recording if needed + // all dependant domains flushed + // all pending sync to be flushed + void synchronize_stream(Stream* stream) { + stream->sync_deps(); + } - return &queue_rec; + Stream* stream_for_domain(DomainFlagBits domain) { + return streams.at(domain).get(); } - auto flush_domain(vuk::DomainFlagBits domain) -> SubmitInfo* { + auto flush_domain(vuk::DomainFlagBits domain, Signal* signal) -> SubmitInfo* { if (domain == DomainFlagBits::eHost) { return nullptr; } - auto& queue_rec = streams[domain]; - - if (queue_rec.cpool.get().command_pool == VK_NULL_HANDLE) { - return nullptr; - } + auto& stream = streams.at(domain); - if (queue_rec.is_recording) { - end_cbuf(domain); - return &queue_rec.sb.submits.emplace_back(std::move(queue_rec.si)); - } else { - return &queue_rec.sb.submits.back(); - } + stream->submit(signal); }; - auto complete_domain(vuk::DomainFlagBits domain) { - auto& queue_rec = streams[domain]; - if (queue_rec.cpool.get().command_pool == VK_NULL_HANDLE) { - return; - } - queue_rec.sb.domain = domain; - sbundle.batches.emplace_back(std::move(queue_rec.sb)); - }; - - void add_sync(Ref parm, Type* arg_ty, void* value, DomainFlagBits src_domain, DomainFlagBits dst_domain) { + void add_sync(Ref parm, + Type* arg_ty, + void* value, + Stream* src_stream, + Stream* dst_stream, + Access src_access = Access::eNone, + Access dst_access = Access::eNone) { auto parm_ty = parm.type(); - Access src_access = Access::eNone; - Access dst_access = Access::eNone; + DomainFlagBits src_domain = src_stream ? src_stream->domain : DomainFlagBits::eNone; + DomainFlagBits dst_domain = dst_stream ? dst_stream->domain : DomainFlagBits::eNone; + Type* base_ty = Type::stripped(arg_ty); if (arg_ty->kind == Type::IMBUED_TY) { dst_access = arg_ty->imbued.access; @@ -743,7 +866,6 @@ namespace vuk { } else if (parm_ty->kind == Type::IMBUED_TY) { assert(0); } else { // there is no need to sync (eg. declare) - src_access = Access::eNone; } } else if (parm_ty->kind == Type::ALIASED_TY) { // this is coming from an output annotated, so we know the source access auto src_arg = parm.node->call.args[parm_ty->aliased.ref_idx]; @@ -752,7 +874,6 @@ namespace vuk { src_access = call_ty->imbued.access; } else { // TODO: handling unimbued aliased - src_access = Access::eNone; } } else { /* no dst access */ @@ -767,26 +888,24 @@ namespace vuk { bool cross = has_both && (src_domain != dst_domain); bool only_src = has_src && !has_dst; - auto src_rec = has_src ? &streams[src_domain] : nullptr; - auto dst_rec = has_dst ? &streams[dst_domain] : nullptr; + if (cross) { + dst_stream->add_dependency(src_stream); + } if (base_ty->is_image()) { // TODO: of course cross-queue barriers we need to issue twice auto& img_att = *reinterpret_cast(value); if (has_dst) { - dst_rec->synch_image(img_att, src_use, dst_use, dst_access, value); - if (is_framebuffer_attachment(dst_access)) { - dst_rec->prepare_render_pass_attachment(alloc, img_att); - } + dst_stream->synch_image(img_att, src_use, dst_use, dst_access, value); } if (only_src || cross) { - src_rec->synch_image(img_att, src_use, dst_use, dst_access, value); + src_stream->synch_image(img_att, src_use, dst_use, dst_access, value); } } else if (base_ty->is_buffer()) { // buffer needs no cross if (has_dst) { - dst_rec->synch_memory(src_use, dst_use, value); + dst_stream->synch_memory(src_use, dst_use, value); } else if (has_src) { - src_rec->synch_memory(src_use, dst_use, value); + src_stream->synch_memory(src_use, dst_use, value); } } else if (base_ty->kind == Type::ARRAY_TY) { auto elem_ty = base_ty->array.T; @@ -795,13 +914,10 @@ namespace vuk { auto img_atts = reinterpret_cast(value); for (int i = 0; i < size; i++) { if (has_dst) { - dst_rec->synch_image(*img_atts[i], src_use, dst_use, dst_access, img_atts[i]); - if (is_framebuffer_attachment(dst_access)) { - dst_rec->prepare_render_pass_attachment(alloc, *img_atts[i]); - } + dst_stream->synch_image(*img_atts[i], src_use, dst_use, dst_access, img_atts[i]); } if (only_src || cross) { - src_rec->synch_image(*img_atts[i], src_use, dst_use, dst_access, img_atts[i]); + src_stream->synch_image(*img_atts[i], src_use, dst_use, dst_access, img_atts[i]); } } } else if (elem_ty->is_buffer()) { @@ -809,9 +925,9 @@ namespace vuk { // buffer needs no cross auto bufs = reinterpret_cast(value); if (has_dst) { - dst_rec->synch_memory(src_use, dst_use, bufs[i]); + dst_stream->synch_memory(src_use, dst_use, bufs[i]); } else if (has_src) { - src_rec->synch_memory(src_use, dst_use, bufs[i]); + src_stream->synch_memory(src_use, dst_use, bufs[i]); } } } else { @@ -825,12 +941,24 @@ namespace vuk { #define VUK_DUMP_EXEC - Result ExecutableRenderGraph::execute(Allocator& alloc, std::vector> swp_with_index) { + Result ExecutableRenderGraph::execute(Allocator& alloc) { Context& ctx = alloc.get_context(); - Scheduler sched(alloc, impl->scheduled_execables, impl->res_to_links, impl->pass_reads); - Recorder recorder(alloc, &impl->callbacks); + recorder.streams.emplace(DomainFlagBits::eHost, std::make_unique(alloc)); + if (auto exe = ctx.get_executor(DomainFlagBits::eGraphicsQueue)) { + recorder.streams.emplace(DomainFlagBits::eGraphicsQueue, + std::make_unique(alloc, static_cast(exe), &impl->callbacks)); + } + auto host_stream = recorder.streams.at(DomainFlagBits::eHost).get(); + + std::deque pe_streams; + + for (auto& item : impl->scheduled_execables) { + item.scheduled_stream = recorder.stream_for_domain(item.scheduled_domain); + } + + Scheduler sched(alloc, impl->scheduled_execables, impl->res_to_links, impl->pass_reads); // DYNAMO // loop through scheduled items @@ -911,10 +1039,16 @@ namespace vuk { attachment.image = **img; // ctx.set_name(attachment.image.image, bound.name.name); } + } else if (node->type[0]->kind == Type::SWAPCHAIN_TY) { +#ifdef VUK_DUMP_EXEC + print_results(node); + fmt::print(" = declare\n"); +#endif + /* no-op */ } else { assert(0); } - sched.done(node, DomainFlagBits::eHost); // declarations execute on the host + sched.done(node, host_stream); // declarations execute on the host break; } case Node::AALLOC: { @@ -924,8 +1058,7 @@ namespace vuk { auto arg_ty = node->aalloc.args[i].type(); auto& parm = node->aalloc.args[i]; - DomainFlagBits src_domain = sched.executed.at(parm.node).domain; - recorder.add_sync(parm, arg_ty, sched.get_value(parm), src_domain, DomainFlagBits::eNone); + recorder.add_sync(parm, arg_ty, sched.get_value(parm), host_stream, nullptr); } #ifdef VUK_DUMP_EXEC @@ -958,7 +1091,7 @@ namespace vuk { fmt::print("\n"); #endif - sched.done(node, DomainFlagBits::eHost); // declarations execute on the host + sched.done(node, host_stream); // declarations execute on the host } else { for (auto& parm : node->valloc.args.subspan(1)) { sched.schedule_dependency(parm, RW::eWrite); @@ -967,8 +1100,8 @@ namespace vuk { break; } case Node::CALL: { - if (sched.process(item)) { // we have executed every dep, so execute ourselves too - DomainFlagBits dst_domain = item.scheduled_domain; // the domain this call will execute on + if (sched.process(item)) { // we have executed every dep, so execute ourselves too + Stream* dst_stream = item.scheduled_stream; // the domain this call will execute on // run all the barriers here! @@ -977,7 +1110,6 @@ namespace vuk { auto& parm = node->call.args[i]; auto& link = impl->res_to_links[parm]; - DomainFlagBits src_domain = sched.executed.at(parm.node).domain; if (arg_ty->kind == Type::IMBUED_TY) { auto dst_access = arg_ty->imbued.access; @@ -985,7 +1117,7 @@ namespace vuk { if (is_framebuffer_attachment(dst_access)) { auto urdef = link.urdef.node; auto allocator = urdef->valloc.allocator ? *urdef->valloc.allocator : alloc; - auto& img_att = *reinterpret_cast(link.urdef.node->valloc.args[0].node->constant.value); + auto& img_att = sched.get_value(parm); if (img_att.image_view.payload == VK_NULL_HANDLE) { auto iv = allocate_image_view(allocator, img_att); // TODO: dropping error img_att.image_view = **iv; @@ -996,19 +1128,19 @@ namespace vuk { } } auto value = sched.get_value(parm); - ImageAttachment& att = *reinterpret_cast(value); - recorder.add_sync(parm, arg_ty, value, src_domain, dst_domain); + recorder.add_sync(parm, arg_ty, value, sched.executed.at(parm.node).stream, dst_stream); } // make the renderpass if needed! - auto rec = recorder.synchronize_domain(dst_domain); - + recorder.synchronize_stream(dst_stream); + auto vk_rec = dynamic_cast(dst_stream); // TODO: change this into dynamic dispatch on the Stream + assert(vk_rec); // run the user cb! if (node->call.fn.type()->kind == Type::OPAQUE_FN_TY) { - CommandBuffer cobuf(*this, ctx, alloc, rec->cbuf); - if (rec->rp.handle) { - rec->prepare_render_pass(alloc); - fill_render_pass_info(rec->rp, 0, cobuf); + CommandBuffer cobuf(*this, ctx, alloc, vk_rec->cbuf); + if (vk_rec->rp.rpci.attachments.size() > 0) { + vk_rec->prepare_render_pass(); + fill_render_pass_info(vk_rec->rp, 0, cobuf); } std::vector opaque_args; @@ -1018,35 +1150,28 @@ namespace vuk { auto& parm = node->call.args[i]; auto& link = impl->res_to_links[parm]; assert(link.urdef); - if (link.urdef.node->kind == Node::VALLOC) { - opaque_args.push_back(link.urdef.node->valloc.args[0].node->constant.value); - opaque_meta.push_back(&link.urdef); - } else if (link.urdef.node->kind == Node::AALLOC) { - opaque_args.push_back(link.urdef.node->aalloc.args[0].node->constant.value); - opaque_meta.push_back(&link.urdef); - } else { - assert(0); - } + opaque_args.push_back(sched.get_value(parm)); + opaque_meta.push_back(&link.urdef); } opaque_rets.resize(node->call.fn.type()->opaque_fn.return_types.size()); (*node->call.fn.type()->opaque_fn.callback)(cobuf, opaque_args, opaque_meta, opaque_rets); - if (rec->rp.handle) { - rec->end_render_pass(alloc); + if (vk_rec->rp.handle) { + vk_rec->end_render_pass(); } } else { assert(0); } #ifdef VUK_DUMP_EXEC print_results(node); - fmt::print(" = call ${} ", static_cast(dst_domain)); + fmt::print(" = call ${} ", static_cast(dst_stream->domain)); if (node->call.fn.type()->debug_info) { fmt::print("<{}> ", node->call.fn.type()->debug_info->name); } print_args(node->call.args); fmt::print("\n"); #endif - sched.done(node, dst_domain); + sched.done(node, dst_stream); } else { // execute deps for (size_t i = 0; i < node->call.args.size(); i++) { auto& arg_ty = node->call.fn.type()->opaque_fn.args[i]; @@ -1069,18 +1194,31 @@ namespace vuk { if (sched.process(item)) { // release is to execute: we need to flush current queue -> end current batch and add signal auto parm = node->release.src; - DomainFlagBits src_domain = sched.executed.at(parm.node).domain; + auto src_stream = sched.executed.at(parm.node).stream; + DomainFlagBits src_domain = src_stream->domain; + Stream* dst_stream; + if (node->release.dst_domain == DomainFlagBits::ePE) { + auto& link = sched.res_to_links[node->release.src]; + auto& swp = sched.get_value(link.urdef.node->acquire_next_image.swapchain); + auto it = std::find_if(pe_streams.begin(), pe_streams.end(), [&](auto& pe_stream) { return pe_stream.swp == &swp; }); + assert(it != pe_streams.end()); + dst_stream = &*it; + } else { + dst_stream = recorder.stream_for_domain(node->release.dst_domain); + } + assert(dst_stream); + DomainFlagBits dst_domain = dst_stream->domain; + + Type* parm_ty = parm.type(); + recorder.add_sync(parm, parm_ty, sched.get_value(parm), src_stream, dst_stream, Access::eNone, node->release.dst_access); #ifdef VUK_DUMP_EXEC print_results(node); - fmt::print("release "); + fmt::print("release ${} ", static_cast(node->release.dst_domain)); print_args(std::span{ &node->release.src, 1 }); fmt::print("\n"); #endif auto& acqrel = node->release.release; Access src_access = Access::eNone; - Access dst_access = Access::eNone; - - Type* parm_ty = parm.type(); if (parm_ty->kind == Type::ALIASED_TY) { // this is coming from an output annotated, so we know the source access auto src_arg = parm.node->call.args[parm_ty->aliased.ref_idx]; @@ -1094,27 +1232,50 @@ namespace vuk { } else if (parm_ty->kind == Type::IMBUED_TY) { assert(0); } else { // there is no need to sync (eg. declare) - src_access = Access::eNone; } acqrel->last_use = src_access; if (src_domain == DomainFlagBits::eHost) { acqrel->status = Signal::Status::eHostAvailable; } - auto batch = recorder.flush_domain(src_domain); - if (!batch) { - continue; + if (dst_domain == DomainFlagBits::ePE) { + auto& link = sched.res_to_links[node->release.src]; + auto& swp = sched.get_value(link.urdef.node->acquire_next_image.swapchain); + assert(src_stream->domain & DomainFlagBits::eDevice); + auto result = dynamic_cast(src_stream)->present(swp); + // TODO: do something with the result here } - batch->future_signals.push_back(acqrel); + auto batch = recorder.flush_domain(src_domain, acqrel); fmt::print(""); - sched.done(node, DomainFlagBits::eNone); + sched.done(node, src_stream); } else { sched.schedule_dependency(node->release.src, RW::eWrite); } break; - case Node::ACQUIRE_NEXT_IMAGE: - // bundle = *acquire_one(*context, swapchain, (*present_ready)[context->get_frame_count() % 3], (*render_complete)[context->get_frame_count() % 3]); + case Node::ACQUIRE_NEXT_IMAGE: { + if (sched.process(item)) { + auto& swp = sched.get_value(node->acquire_next_image.swapchain); + swp.linear_index = (swp.linear_index + 1) % swp.images.size(); + swp.acquire_result = + ctx.vkAcquireNextImageKHR(ctx.device, swp.swapchain, UINT64_MAX, swp.semaphores[2 * swp.linear_index], VK_NULL_HANDLE, &swp.image_index); + // VK_SUBOPTIMAL_KHR shouldn't stop presentation; it is handled at the end + if (swp.acquire_result != VK_SUCCESS && swp.acquire_result != VK_SUBOPTIMAL_KHR) { + return { expected_error, VkException{ swp.acquire_result } }; + } + + auto pe_stream = &pe_streams.emplace_back(alloc, swp); +#ifdef VUK_DUMP_EXEC + print_results(node); + fmt::print(" = acquire_next_image "); + print_args(std::span{ &node->acquire_next_image.swapchain, 1 }); + fmt::print("\n"); +#endif + sched.done(node, pe_stream); + } else { + sched.schedule_dependency(node->acquire_next_image.swapchain, RW::eWrite); + } break; + } case Node::INDEXING: if (sched.process(item)) { // half sync @@ -1125,8 +1286,7 @@ namespace vuk { for (auto i = 0; i < size; i++) { bufs.push_back(&sched.get_value(link.urdef.node->aalloc.args[i + 1])); } - recorder.add_sync( - node->indexing.array, node->indexing.array.type(), bufs.data(), sched.executed.at(node->indexing.array.node).domain, DomainFlagBits::eNone); + recorder.add_sync(node->indexing.array, node->indexing.array.type(), bufs.data(), sched.executed.at(node->indexing.array.node).stream, nullptr); #ifdef VUK_DUMP_EXEC print_results(node); fmt::print(" = "); @@ -1134,7 +1294,7 @@ namespace vuk { fmt::print("[{}]", constant(node->indexing.index)); fmt::print("\n"); #endif - sched.done(node, DomainFlagBits::eNone); // indexing doesn't execute + sched.done(node, nullptr); // indexing doesn't execute } else { sched.schedule_dependency(node->indexing.array, RW::eWrite); sched.schedule_dependency(node->indexing.index, RW::eRead); @@ -1145,22 +1305,6 @@ namespace vuk { } } - recorder.complete_domain(DomainFlagBits::eGraphicsQueue); - // complete_domain(DomainFlagBits::eComputeQueue); - recorder.complete_domain(DomainFlagBits::eTransferQueue); - - // RESOLVE SWAPCHAIN DYNAMICITY - // bind swapchain attachment images & ivs - /*for (auto& bound : impl->bound_attachments) { - if (bound.type == AttachmentInfo::Type::eSwapchain && bound.parent_attachment == 0) { - auto it = std::find_if(swp_with_index.begin(), swp_with_index.end(), [boundb = &bound](auto& t) { return t.first == boundb->swapchain; }); - bound.attachment.image_view = it->first->image_views[it->second]; - bound.attachment.image = it->first->images[it->second]; - bound.attachment.extent = Dimension3D::absolute(it->first->extent); - bound.attachment.sample_count = vuk::Samples::e1; - } - }*/ - /* INFERENCE // pre-inference: which IAs are in which FBs? for (auto& rp : impl->rpis) { @@ -1173,8 +1317,8 @@ namespace vuk { ia.base_layer = ia.base_layer == VK_REMAINING_ARRAY_LAYERS ? 0 : ia.base_layer; ia.layer_count = - ia.layer_count == VK_REMAINING_ARRAY_LAYERS ? 1 : ia.layer_count; // TODO: this prevents inference later on, so this means we are doing it too early - ia.base_level = ia.base_level == VK_REMAINING_MIP_LEVELS ? 0 : ia.base_level; + ia.layer_count == VK_REMAINING_ARRAY_LAYERS ? 1 : ia.layer_count; // TODO: this prevents inference later on, so this means we are doing it too + early ia.base_level = ia.base_level == VK_REMAINING_MIP_LEVELS ? 0 : ia.base_level; if (ia.view_type == ImageViewType::eInfer) { if (ia.layer_count > 1) { @@ -1394,8 +1538,8 @@ namespace vuk { infer_progress = true; // infer IA -> FB - if (ia.sample_count == Samples::eInfer && (ia.extent.extent.width == 0 && ia.extent.extent.height == 0)) { // this IA is not helpful for FB inference - continue; + if (ia.sample_count == Samples::eInfer && (ia.extent.extent.width == 0 && ia.extent.extent.height == 0)) { // this IA is not helpful for FB + inference continue; } for (auto* rpi : atti.rp_uses.to_span(impl->attachment_rp_references)) { auto& fbci = rpi->fbci; @@ -1523,199 +1667,7 @@ namespace vuk { return { expected_error, RenderGraphException{ msg.str() } }; } */ - // acquire the render passes - /* - for (auto& rp : impl->rpis) { - if (rp.attachments.size() == 0) { - continue; - } - - for (auto& attrpinfo : rp.attachments.to_span(impl->rp_infos)) { - attrpinfo.description.format = (VkFormat)attrpinfo.attachment_info->attachment.format; - attrpinfo.description.samples = (VkSampleCountFlagBits)attrpinfo.attachment_info->attachment.sample_count.count; - rp.rpci.attachments.push_back(attrpinfo.description); - } - - rp.rpci.attachmentCount = (uint32_t)rp.rpci.attachments.size(); - rp.rpci.pAttachments = rp.rpci.attachments.data(); - - auto result = alloc.allocate_render_passes(std::span{ &rp.handle, 1 }, std::span{ &rp.rpci, 1 }); - // drop render pass immediately - if (result) { - alloc.deallocate(std::span{ &rp.handle, 1 }); - } - } - - // create buffers - for (auto& bound : impl->bound_buffers) { - if (bound.buffer.buffer == VK_NULL_HANDLE) { - BufferCreateInfo bci{ .mem_usage = bound.buffer.memory_usage, .size = bound.buffer.size, .alignment = 1 }; // TODO: alignment? - auto allocator = bound.allocator ? *bound.allocator : alloc; - auto buf = allocate_buffer(allocator, bci); - if (!buf) { - return buf; - } - bound.buffer = **buf; - } - } - - // create non-attachment images - for (auto& bound : impl->bound_attachments) { - if (!bound.attachment.image && bound.parent_attachment == 0) { - auto allocator = bound.allocator ? *bound.allocator : alloc; - assert(bound.attachment.usage != ImageUsageFlags{}); - auto img = allocate_image(allocator, bound.attachment); - if (!img) { - return img; - } - bound.attachment.image = **img; - ctx.set_name(bound.attachment.image.image, bound.name.name); - } - } - - // create framebuffers, create & bind attachments - for (auto& rp : impl->rpis) { - if (rp.attachments.size() == 0) - continue; - - auto& ivs = rp.fbci.attachments; - std::vector vkivs; - - Extent2D fb_extent = Extent2D{ rp.fbci.width, rp.fbci.height }; - - // create internal attachments; bind attachments to fb - std::optional fb_layer_count; - for (auto& attrpinfo : rp.attachments.to_span(impl->rp_infos)) { - auto& bound = *attrpinfo.attachment_info; - uint32_t base_layer = bound.attachment.base_layer + bound.image_subrange.base_layer; - uint32_t layer_count = bound.image_subrange.layer_count == VK_REMAINING_ARRAY_LAYERS ? bound.attachment.layer_count : bound.image_subrange.layer_count; - assert(base_layer + layer_count <= bound.attachment.base_layer + bound.attachment.layer_count); - fb_layer_count = layer_count; - - auto& specific_attachment = bound.attachment; - if (bound.parent_attachment < 0) { - specific_attachment = impl->get_bound_attachment(bound.parent_attachment).attachment; - specific_attachment.image_view = {}; - } - if (specific_attachment.image_view == ImageView{}) { - specific_attachment.base_layer = base_layer; - if (specific_attachment.view_type == ImageViewType::eCube) { - if (layer_count > 1) { - specific_attachment.view_type = ImageViewType::e2DArray; - } else { - specific_attachment.view_type = ImageViewType::e2D; - } - } - specific_attachment.layer_count = layer_count; - assert(specific_attachment.level_count == 1); - - auto allocator = bound.allocator ? *bound.allocator : alloc; - auto iv = allocate_image_view(allocator, specific_attachment); - if (!iv) { - return iv; - } - specific_attachment.image_view = **iv; - auto name = std::string("ImageView: RenderTarget ") + std::string(bound.name.name.to_sv()); - ctx.set_name(specific_attachment.image_view.payload, Name(name)); - } - - ivs.push_back(specific_attachment.image_view); - vkivs.push_back(specific_attachment.image_view.payload); - } - - rp.fbci.renderPass = rp.handle; - rp.fbci.pAttachments = &vkivs[0]; - rp.fbci.width = fb_extent.width; - rp.fbci.height = fb_extent.height; - assert(fb_extent.width > 0); - assert(fb_extent.height > 0); - rp.fbci.attachmentCount = (uint32_t)vkivs.size(); - rp.fbci.layers = *fb_layer_count; - - Unique fb(alloc); - VUK_DO_OR_RETURN(alloc.allocate_framebuffers(std::span{ &*fb, 1 }, std::span{ &rp.fbci, 1 })); - rp.framebuffer = *fb; // queue framebuffer for destruction - } - - for (auto& attachment_info : impl->bound_attachments) { - if (attachment_info.attached_future && attachment_info.parent_attachment == 0) { - ImageAttachment att = attachment_info.attachment; - attachment_info.attached_future->result = att; - } - } - - for (auto& buffer_info : impl->bound_buffers) { - if (buffer_info.attached_future) { - Buffer buf = buffer_info.buffer; - buffer_info.attached_future->result = buf; - } - } - - SubmitBundle sbundle; - - auto record_batch = [&alloc, this](std::span passes, DomainFlagBits domain) -> Result { - SubmitBatch sbatch{ .domain = domain }; - auto partition_it = passes.begin(); - while (partition_it != passes.end()) { - auto batch_index = (*partition_it)->batch_index; - auto new_partition_it = std::partition_point(partition_it, passes.end(), [batch_index](ScheduledItem* rpi) { return rpi->batch_index == batch_index; }); - auto partition_span = std::span(partition_it, new_partition_it); - auto si = record_single_submit(alloc, partition_span, domain); - if (!si) { - return si; - } - sbatch.submits.emplace_back(*si); - partition_it = new_partition_it; - } - for (auto& rel : impl->final_releases) { - if (rel.dst_use.domain & domain) { - sbatch.submits.back().future_signals.push_back(rel.signal); - } - } - - std::erase_if(impl->final_releases, [=](auto& rel) { return rel.dst_use.domain & domain; }); - return { expected_value, sbatch }; - }; - - // record cbufs - // assume that rpis are partitioned wrt batch_index - - if (impl->graphics_passes.size() > 0) { - auto batch = record_batch(impl->graphics_passes, DomainFlagBits::eGraphicsQueue); - if (!batch) { - return batch; - } - sbundle.batches.emplace_back(std::move(*batch)); - } - - if (impl->compute_passes.size() > 0) { - auto batch = record_batch(impl->compute_passes, DomainFlagBits::eComputeQueue); - if (!batch) { - return batch; - } - sbundle.batches.emplace_back(std::move(*batch)); - } - - if (impl->transfer_passes.size() > 0) { - auto batch = record_batch(impl->transfer_passes, DomainFlagBits::eTransferQueue); - if (!batch) { - return batch; - } - sbundle.batches.emplace_back(std::move(*batch)); - }*/ - - return { expected_value, std::move(recorder.sbundle) }; - } - - Result ExecutableRenderGraph::is_resource_image_in_general_layout(const NameReference& name_ref, PassInfo* pass_info) { - return { expected_error, RenderGraphException{} }; - - /* for (auto& r : pass_info->resources.to_span(impl->resources)) { - if (r.type == Resource::Type::eImage && r.original_name == name_ref.name.name && r.foreign == name_ref.rg) { - return { expected_value, r.promoted_to_general }; - } - } - return { expected_error, errors::make_cbuf_references_undeclared_resource(*pass_info, Resource::Type::eImage, name_ref.name.name) };*/ + return { expected_value }; } } // namespace vuk diff --git a/src/RenderGraph.cpp b/src/RenderGraph.cpp index d5d7f989..420d08b5 100644 --- a/src/RenderGraph.cpp +++ b/src/RenderGraph.cpp @@ -126,6 +126,19 @@ namespace vuk { res_to_links[first(&node)].def = first(&node); res_to_links[first(&node)].type = first(&node).type()->array.T; break; + + case Node::ACQUIRE_NEXT_IMAGE: + res_to_links[first(&node)].def = first(&node); + res_to_links[first(&node)].type = first(&node).type(); + break; + + case Node::PRESENT: + res_to_links[node.present.src].undef = &node; + res_to_links[{ &node, 0 }].def = { &node, 0 }; + res_to_links[node.present.src].next = &res_to_links[{ &node, 0 }]; + res_to_links[{ &node, 0 }].prev = &res_to_links[node.present.src]; + break; + default: assert(0); } diff --git a/src/RenderGraphImpl.hpp b/src/RenderGraphImpl.hpp index a4d1be1f..e60ca977 100644 --- a/src/RenderGraphImpl.hpp +++ b/src/RenderGraphImpl.hpp @@ -52,9 +52,31 @@ namespace vuk { using DefUseMap = std::unordered_map; + struct Stream { + Stream(Allocator alloc, DomainFlagBits domain) : alloc(alloc), domain(domain) {} + virtual ~Stream() {} + Allocator alloc; + std::vector dependencies; + DomainFlagBits domain; + + virtual void add_dependency(Stream* dep) = 0; + virtual void sync_deps() = 0; + + virtual void synch_image(ImageAttachment& img_att, QueueResourceUse src_use, QueueResourceUse dst_use, Access dst_access, void* tag) = 0; + virtual void synch_memory(QueueResourceUse src_use, QueueResourceUse dst_use, void* tag) = 0; + + struct SubmitResult { + Signal* signal = nullptr; + VkSemaphore sema_wait; + }; + + virtual Result submit(Signal* signal = nullptr) = 0; + }; + struct ScheduledItem { Node* execable; DomainFlagBits scheduled_domain; + Stream* scheduled_stream; int32_t is_waited_on = 0; size_t batch_index; @@ -106,8 +128,6 @@ namespace vuk { std::vector> rpis; std::span transfer_passes, compute_passes, graphics_passes; - void merge_diverge_passes(std::vector>& passes); - Result diagnose_unheaded_chains(); Result schedule_intra_queue(std::span> rgs, const RenderGraphCompileOptions& compile_options); Result perform_inference(std::span> rgs, const RenderGraphCompileOptions& compile_options); @@ -117,8 +137,11 @@ namespace vuk { Result relink_subchains(); Result fix_subchains(); - static VkImageMemoryBarrier2KHR - emit_image_barrier(QueueResourceUse last_use, QueueResourceUse current_use, const Subrange::Image& subrange, ImageAspectFlags aspect, bool is_release = false); + static VkImageMemoryBarrier2KHR emit_image_barrier(QueueResourceUse last_use, + QueueResourceUse current_use, + const Subrange::Image& subrange, + ImageAspectFlags aspect, + bool is_release = false); static VkMemoryBarrier2KHR emit_memory_barrier(QueueResourceUse last_use, QueueResourceUse current_use); // opt passes diff --git a/src/RenderGraphUtil.hpp b/src/RenderGraphUtil.hpp index f82cbeac..5620be8a 100644 --- a/src/RenderGraphUtil.hpp +++ b/src/RenderGraphUtil.hpp @@ -161,6 +161,12 @@ namespace vuk { qr.layout = combine_layout(qr.layout, ImageLayout::eTransferDstOptimal); } + if (ia & ePresent) { + qr.stages = PipelineStageFlagBits::eNone; + qr.access = {}; + qr.layout = ImageLayout::ePresentSrcKHR; + } + qr.domain = domain; return qr; } diff --git a/src/Util.cpp b/src/Util.cpp index c9652507..b6d06774 100644 --- a/src/Util.cpp +++ b/src/Util.cpp @@ -4,6 +4,7 @@ #include "vuk/Future.hpp" #include "vuk/RenderGraph.hpp" #include "vuk/SampledImage.hpp" +#include "vuk/Swapchain.hpp" #include #ifndef DOCTEST_CONFIG_DISABLE @@ -14,61 +15,10 @@ #include namespace vuk { - struct QueueImpl { - // TODO: this recursive mutex should be changed to better queue handling - std::recursive_mutex queue_lock; - PFN_vkQueueSubmit queueSubmit; - PFN_vkQueueSubmit2KHR queueSubmit2KHR; - TimelineSemaphore submit_sync; - VkQueue queue; - std::array, 3> last_device_waits; - std::atomic last_host_wait; - uint32_t family_index; - - QueueImpl(PFN_vkQueueSubmit fn1, PFN_vkQueueSubmit2KHR fn2, VkQueue queue, uint32_t queue_family_index, TimelineSemaphore ts) : - queueSubmit(fn1), - queueSubmit2KHR(fn2), - submit_sync(ts), - queue(queue), - family_index(queue_family_index) {} - }; - - Queue::Queue(PFN_vkQueueSubmit fn1, PFN_vkQueueSubmit2KHR fn2, VkQueue queue, uint32_t queue_family_index, TimelineSemaphore ts) : - impl(new QueueImpl(fn1, fn2, queue, queue_family_index, ts)) {} - Queue::~Queue() { - delete impl; - } - - Queue::Queue(Queue&& o) noexcept : impl(std::exchange(o.impl, nullptr)) {} - - Queue& Queue::operator=(Queue&& o) noexcept { - impl = std::exchange(o.impl, nullptr); - return *this; - } - - TimelineSemaphore& Queue::get_submit_sync() { - return impl->submit_sync; - } - - std::recursive_mutex& Queue::get_queue_lock() { - return impl->queue_lock; - } - - Result Queue::submit(std::span sis, VkFence fence) { - VkResult result = impl->queueSubmit2KHR(impl->queue, (uint32_t)sis.size(), sis.data(), fence); - if (result != VK_SUCCESS) { - return { expected_error, VkException{ result } }; - } - return { expected_value }; - } - - Result Queue::submit(std::span sis, VkFence fence) { - std::lock_guard _(impl->queue_lock); - VkResult result = impl->queueSubmit(impl->queue, (uint32_t)sis.size(), sis.data(), fence); - if (result != VK_SUCCESS) { - return { expected_error, VkException{ result } }; - } - return { expected_value }; + Swapchain::Swapchain(Allocator alloc, size_t image_count) : + allocator(alloc){ + semaphores.resize(image_count * 2); + allocator.allocate_semaphores(std::span(semaphores)); } Result Context::wait_for_domains(std::span queue_waits) { @@ -77,14 +27,15 @@ namespace vuk { std::array values = {}; uint32_t count = 0; - for (auto& [domain, v] : queue_waits) { - auto idx = domain_to_queue_index(domain); + for (auto& [executor, v] : queue_waits) { + assert(executor->type == Executor::Type::eVulkanDeviceQueue); + auto vkq = static_cast(executor); + auto idx = vkq->get_queue_family_index(); auto& mapping = domain_to_sema_index[idx]; if (mapping == -1) { mapping = count++; } - auto& q = domain_to_queue(domain); - queue_timeline_semaphores[mapping] = q.impl->submit_sync.semaphore; + queue_timeline_semaphores[mapping] = vkq->get_semaphore(); values[mapping] = values[mapping] > v ? values[mapping] : v; } @@ -93,10 +44,6 @@ namespace vuk { swi.pValues = values.data(); swi.semaphoreCount = count; VkResult result = this->vkWaitSemaphores(device, &swi, UINT64_MAX); - for (auto [domain, v] : queue_waits) { - auto& q = domain_to_queue(domain); - q.impl->last_host_wait.store(v); - } if (result != VK_SUCCESS) { return { expected_error, VkException{ result } }; } @@ -109,42 +56,7 @@ namespace vuk { return erg; } std::pair erg_and_alloc = std::pair{ &allocator, &*erg }; - return execute_submit(allocator, std::span(&erg_and_alloc, 1), {}, {}, {}); - } - - Result> execute(std::span> ergs, - std::vector> swapchains_with_indexes) { - std::vector bundles; - for (auto& [alloc, rg] : ergs) { - auto sbundle = rg->execute(*alloc, swapchains_with_indexes); - if (!sbundle) { - return Result>(std::move(sbundle)); - } - bool has_waits = false; - for (auto& batch : sbundle->batches) { - for (auto& s : batch.submits) { - if (s.relative_waits.size() > 0) { - has_waits = true; - } - } - } - // in the case where there are no waits in the entire bundle, we can merge all the submits together - if (!has_waits && bundles.size() > 0) { - auto& last = bundles.back(); - for (auto& batch : sbundle->batches) { - auto tgt_domain = batch.domain; - auto it = std::find_if(last.batches.begin(), last.batches.end(), [=](auto& batch) { return batch.domain == tgt_domain; }); - if (it != last.batches.end()) { - it->submits.insert(it->submits.end(), batch.submits.begin(), batch.submits.end()); - } else { - last.batches.emplace_back(batch); - } - } - } else { - bundles.push_back(*sbundle); - } - } - return { expected_value, bundles }; + return execute_submit(allocator, std::span(&erg_and_alloc, 1)); } std::string_view to_name(vuk::DomainFlagBits d) { @@ -160,393 +72,24 @@ namespace vuk { } } - std::string to_dot(SubmitBundle& bundle) { - std::stringstream ss; - ss << "digraph {"; - for (auto& batch : bundle.batches) { - ss << "subgraph cluster_" << to_name(batch.domain) << " {"; - char name = 'A'; - - for (size_t i = 0; i < batch.submits.size(); i++) { - ss << to_name(batch.domain)[0] << name << ";"; - name++; - } - ss << "}"; - } - - for (auto& batch : bundle.batches) { - char name = 'A'; - - for (auto& sub : batch.submits) { - for (auto& wait : sub.relative_waits) { - char dst_name = wait.second == 0 ? 'X' : 'A' + (char)wait.second - 1; - ss << to_name(batch.domain)[0] << name << "->" << to_name(wait.first)[0] << dst_name << ";"; - } - name++; - } - } - - ss << "}"; - return ss.str(); - } - - void flatten_transfer_and_compute_onto_graphics(SubmitBundle& bundle) { - if (bundle.batches.empty()) { - return; - } - auto domain_to_index = [](vuk::DomainFlagBits d) { - switch (d) { - case DomainFlagBits::eTransferQueue: - return 2; - case DomainFlagBits::eGraphicsQueue: - return 0; - case DomainFlagBits::eComputeQueue: - return 1; - default: - assert(0); - return 4; - } - }; - size_t num_submits = 0; - for (auto& batch : bundle.batches) { - num_submits += batch.submits.size(); - } - SubmitBatch dst_batch{ .domain = DomainFlagBits::eGraphicsQueue }; - uint64_t progress[3] = {}; - while (true) { - for (auto& batch : bundle.batches) { - auto queue = (DomainFlagBits)(batch.domain & DomainFlagBits::eQueueMask).m_mask; - auto our_id = domain_to_index(queue); - for (size_t i = progress[our_id]; i < batch.submits.size(); i++) { - auto b = batch.submits[i]; - bool all_waits_satisfied = true; - // check if all waits can be satisfied for this submit - for (auto& [queue, wait_id] : b.relative_waits) { - auto q_id = domain_to_index((DomainFlagBits)(queue & DomainFlagBits::eQueueMask).m_mask); - auto& progress_on_wait_queue = progress[q_id]; - if (progress_on_wait_queue < wait_id) { - all_waits_satisfied = false; - break; - } - } - if (all_waits_satisfied) { - if (!b.relative_waits.empty()) { - b.relative_waits = { { DomainFlagBits::eGraphicsQueue, dst_batch.submits.size() } }; // collapse into a single wait - } - dst_batch.submits.emplace_back(b); - progress[our_id]++; // retire this batch - } else { - // couldn't make progress - // break here is not correct, because there might be multiple waits with the same rank - // TODO: we need to break here anyways for unsorted - we need to sort - break; - } - } - } - if (dst_batch.submits.size() == num_submits) { // we have moved all the submits to the dst_batch - break; - } - } - bundle.batches = { dst_batch }; - } - -#ifndef DOCTEST_CONFIG_DISABLE - TEST_CASE("testing flattening submit graphs") { - { - SubmitBundle empty{}; - auto before = to_dot(empty); - flatten_transfer_and_compute_onto_graphics(empty); - auto after = to_dot(empty); - CHECK(before == after); - } - { - // transfer : TD -> TC -> TB -> TA - // everything moved to graphics - SubmitBundle only_transfer{ .batches = { SubmitBatch{ .domain = vuk::DomainFlagBits::eTransferQueue, - .submits = { { .relative_waits = {} }, - { .relative_waits = { { vuk::DomainFlagBits::eTransferQueue, 1 } } }, - { .relative_waits = { { vuk::DomainFlagBits::eTransferQueue, 2 } } }, - { .relative_waits = { { vuk::DomainFlagBits::eTransferQueue, 3 } } } } } } }; - - auto before = to_dot(only_transfer); - flatten_transfer_and_compute_onto_graphics(only_transfer); - auto after = to_dot(only_transfer); - CHECK(after == "digraph {subgraph cluster_Graphics {GA;GB;GC;GD;}GB->GA;GC->GB;GD->GC;}"); - } - { - // transfer : TD TC -> TB TA - // v ^ v - // graphics : GD->GC GB->GA - // flattens to - // graphics : TD -> GD -> GC -> TC -> TB -> GB -> GA TA - SubmitBundle two_queue{ .batches = { SubmitBatch{ .domain = vuk::DomainFlagBits::eTransferQueue, - .submits = { { .relative_waits = {} }, - { .relative_waits = { { vuk::DomainFlagBits::eGraphicsQueue, 2 } } }, - { .relative_waits = { { vuk::DomainFlagBits::eTransferQueue, 2 } } }, - { .relative_waits = { { vuk::DomainFlagBits::eGraphicsQueue, 4 } } } } }, - SubmitBatch{ .domain = vuk::DomainFlagBits::eGraphicsQueue, - .submits = { { .relative_waits = {} }, - { .relative_waits = { { vuk::DomainFlagBits::eGraphicsQueue, 1 } } }, - { .relative_waits = { { vuk::DomainFlagBits::eTransferQueue, 3 } } }, - { .relative_waits = { { vuk::DomainFlagBits::eGraphicsQueue, 3 } } } } } } }; - - auto before = to_dot(two_queue); - flatten_transfer_and_compute_onto_graphics(two_queue); - auto after = to_dot(two_queue); - CHECK(after == "digraph {subgraph cluster_Graphics {GA;GB;GC;GD;GE;GF;GG;GH;}GC->GB;GD->GC;GE->GD;GF->GE;GG->GF;GH->GG;}"); - } - } -#endif - - Result submit(Allocator& allocator, SubmitBundle bundle, VkSemaphore present_rdy, VkSemaphore render_complete) { - Context& ctx = allocator.get_context(); - - vuk::DomainFlags used_domains; - for (auto& batch : bundle.batches) { - used_domains |= batch.domain; - } - - std::array queue_progress_references; - std::unique_lock gfx_lock; - if (used_domains & DomainFlagBits::eGraphicsQueue) { - queue_progress_references[ctx.domain_to_queue_index(DomainFlagBits::eGraphicsQueue)] = *ctx.graphics_queue->impl->submit_sync.value; - gfx_lock = std::unique_lock{ ctx.graphics_queue->impl->queue_lock }; - } - std::unique_lock compute_lock; - if (used_domains & DomainFlagBits::eComputeQueue) { - queue_progress_references[ctx.domain_to_queue_index(DomainFlagBits::eComputeQueue)] = *ctx.compute_queue->impl->submit_sync.value; - compute_lock = std::unique_lock{ ctx.compute_queue->impl->queue_lock }; - } - std::unique_lock transfer_lock; - if (used_domains & DomainFlagBits::eTransferQueue) { - queue_progress_references[ctx.domain_to_queue_index(DomainFlagBits::eTransferQueue)] = *ctx.transfer_queue->impl->submit_sync.value; - transfer_lock = std::unique_lock{ ctx.transfer_queue->impl->queue_lock }; - } - bool needs_flatten = ((used_domains & DomainFlagBits::eTransferQueue) && - (ctx.domain_to_queue_index(DomainFlagBits::eTransferQueue) == ctx.domain_to_queue_index(DomainFlagBits::eGraphicsQueue) || - ctx.domain_to_queue_index(DomainFlagBits::eTransferQueue) == ctx.domain_to_queue_index(DomainFlagBits::eComputeQueue))) || - ((used_domains & DomainFlagBits::eComputeQueue) && - (ctx.domain_to_queue_index(DomainFlagBits::eComputeQueue) == ctx.domain_to_queue_index(DomainFlagBits::eGraphicsQueue))); - if (needs_flatten) { - bool needs_transfer_compute_flatten = - ctx.domain_to_queue_index(DomainFlagBits::eTransferQueue) == ctx.domain_to_queue_index(DomainFlagBits::eGraphicsQueue) && - ctx.domain_to_queue_index(DomainFlagBits::eComputeQueue) == ctx.domain_to_queue_index(DomainFlagBits::eGraphicsQueue); - if (needs_transfer_compute_flatten) { - flatten_transfer_and_compute_onto_graphics(bundle); - } else { - assert(false && "NYI"); - } - } else { - if (bundle.batches.size() > 1) { - std::swap(bundle.batches[0], bundle.batches[1]); // FIXME: silence some false positive validation - } - } - for (SubmitBatch& batch : bundle.batches) { - auto domain = batch.domain; - Queue& queue = ctx.domain_to_queue(domain); - Unique fence(allocator); - VUK_DO_OR_RETURN(allocator.allocate_fences({ &*fence, 1 })); - - uint64_t num_cbufs = 0; - uint64_t num_waits = 1; // 1 extra for present_rdy - for (uint64_t i = 0; i < batch.submits.size(); i++) { - SubmitInfo& submit_info = batch.submits[i]; - num_cbufs += submit_info.command_buffers.size(); - num_waits += submit_info.relative_waits.size(); - num_waits += submit_info.absolute_waits.size(); - } - - std::vector sis; - std::vector cbufsis; - cbufsis.reserve(num_cbufs); - std::vector wait_semas; - wait_semas.reserve(num_waits); - std::vector signal_semas; - signal_semas.reserve(batch.submits.size() + 1); // 1 extra for render_complete - - for (uint64_t i = 0; i < batch.submits.size(); i++) { - SubmitInfo& submit_info = batch.submits[i]; - - for (auto& fut : submit_info.future_signals) { - fut->status = Signal::Status::eSynchronizable; - } - - if (submit_info.command_buffers.size() == 0) { - continue; - } - - for (uint64_t i = 0; i < submit_info.command_buffers.size(); i++) { - cbufsis.emplace_back( - VkCommandBufferSubmitInfoKHR{ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR, .commandBuffer = submit_info.command_buffers[i] }); - } - - uint32_t wait_sema_count = 0; - for (auto& w : submit_info.relative_waits) { - VkSemaphoreSubmitInfoKHR ssi{ VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR }; - auto& wait_queue = ctx.domain_to_queue(w.first).impl->submit_sync; - ssi.semaphore = wait_queue.semaphore; - ssi.value = queue_progress_references[ctx.domain_to_queue_index(w.first)] + w.second; - ssi.stageMask = (VkPipelineStageFlagBits2KHR)PipelineStageFlagBits::eAllCommands; - wait_semas.emplace_back(ssi); - wait_sema_count++; - } - for (auto& w : submit_info.absolute_waits) { - VkSemaphoreSubmitInfoKHR ssi{ VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR }; - auto& wait_queue = ctx.domain_to_queue(w.first).impl->submit_sync; - ssi.semaphore = wait_queue.semaphore; - ssi.value = w.second; - ssi.stageMask = (VkPipelineStageFlagBits2KHR)PipelineStageFlagBits::eAllCommands; - wait_semas.emplace_back(ssi); - wait_sema_count++; - } - if (domain == DomainFlagBits::eGraphicsQueue && i == 0 && present_rdy != VK_NULL_HANDLE) { // TODO: for first cbuf only that refs the swapchain attment - VkSemaphoreSubmitInfoKHR ssi{ VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR }; - ssi.semaphore = present_rdy; - ssi.stageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR; - wait_semas.emplace_back(ssi); - wait_sema_count++; - } - - VkSemaphoreSubmitInfoKHR ssi{ VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR }; - ssi.semaphore = queue.impl->submit_sync.semaphore; - ssi.value = ++(*queue.impl->submit_sync.value); - - ssi.stageMask = (VkPipelineStageFlagBits2KHR)PipelineStageFlagBits::eAllCommands; - - for (auto& fut : submit_info.future_signals) { - fut->status = Signal::Status::eSynchronizable; - fut->source = { domain, ssi.value }; - } - - uint32_t signal_sema_count = 1; - signal_semas.emplace_back(ssi); - if (domain == DomainFlagBits::eGraphicsQueue && i == batch.submits.size() - 1 && - render_complete != VK_NULL_HANDLE) { // TODO: for final cbuf only that refs the swapchain attment - ssi.semaphore = render_complete; - ssi.value = 0; // binary sema - signal_semas.emplace_back(ssi); - signal_sema_count++; - } - - VkSubmitInfo2KHR& si = sis.emplace_back(VkSubmitInfo2KHR{ VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR }); - VkCommandBufferSubmitInfoKHR* p_cbuf_infos = &cbufsis.back() - (submit_info.command_buffers.size() - 1); - VkSemaphoreSubmitInfoKHR* p_wait_semas = wait_sema_count > 0 ? &wait_semas.back() - (wait_sema_count - 1) : nullptr; - VkSemaphoreSubmitInfoKHR* p_signal_semas = &signal_semas.back() - (signal_sema_count - 1); - - si.pWaitSemaphoreInfos = p_wait_semas; - si.waitSemaphoreInfoCount = wait_sema_count; - si.pCommandBufferInfos = p_cbuf_infos; - si.commandBufferInfoCount = (uint32_t)submit_info.command_buffers.size(); - si.pSignalSemaphoreInfos = p_signal_semas; - si.signalSemaphoreInfoCount = signal_sema_count; - } - - VUK_DO_OR_RETURN(queue.submit(std::span{ sis }, *fence)); - } - - return { expected_value }; - } - // assume rgs are independent - they don't reference eachother Result execute_submit(Allocator& allocator, - std::span> rgs, - std::vector> swapchains_with_indexes, - VkSemaphore present_rdy, - VkSemaphore render_complete) { - auto bundles = execute(rgs, swapchains_with_indexes); - if (!bundles) { - return bundles; - } - - for (auto& bundle : *bundles) { - VUK_DO_OR_RETURN(submit(allocator, bundle, present_rdy, render_complete)); + std::span> rgs) { + for (auto& [alloc, rg] : rgs) { + rg->execute(*alloc); } return { expected_value }; } - Result present_to_one(Context& ctx, SwapchainRenderBundle&& bundle) { - VkPresentInfoKHR pi{ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR }; - pi.swapchainCount = 1; - pi.pSwapchains = &bundle.swapchain->swapchain; - pi.pImageIndices = &bundle.image_index; - pi.waitSemaphoreCount = 1; - pi.pWaitSemaphores = &bundle.render_complete; - auto present_result = ctx.vkQueuePresentKHR(ctx.graphics_queue->impl->queue, &pi); - if (present_result != VK_SUCCESS && present_result != VK_SUBOPTIMAL_KHR) { - return { expected_error, VkException{ present_result } }; - } - if (present_result == VK_SUBOPTIMAL_KHR || bundle.acquire_result == VK_SUBOPTIMAL_KHR) { - return { expected_value, VK_SUBOPTIMAL_KHR }; - } - return { expected_value, VK_SUCCESS }; - } - - Result acquire_one(Allocator& allocator, SwapchainRef swapchain) { - Context& ctx = allocator.get_context(); - Unique> semas(allocator); - VUK_DO_OR_RETURN(allocator.allocate_semaphores(*semas)); - auto [present_rdy, render_complete] = *semas; - - uint32_t image_index = (uint32_t)-1; - VkResult acq_result = ctx.vkAcquireNextImageKHR(ctx.device, swapchain->swapchain, UINT64_MAX, present_rdy, VK_NULL_HANDLE, &image_index); - // VK_SUBOPTIMAL_KHR shouldn't stop presentation; it is handled at the end - if (acq_result != VK_SUCCESS && acq_result != VK_SUBOPTIMAL_KHR) { - return { expected_error, VkException{ acq_result } }; - } - - return { expected_value, SwapchainRenderBundle{ swapchain, image_index, present_rdy, render_complete, acq_result } }; - } - - Result acquire_one(Context& ctx, SwapchainRef swapchain, VkSemaphore present_ready, VkSemaphore render_complete) { - uint32_t image_index = (uint32_t)-1; - VkResult acq_result = ctx.vkAcquireNextImageKHR(ctx.device, swapchain->swapchain, UINT64_MAX, present_ready, VK_NULL_HANDLE, &image_index); - // VK_SUBOPTIMAL_KHR shouldn't stop presentation; it is handled at the end - if (acq_result != VK_SUCCESS && acq_result != VK_SUBOPTIMAL_KHR) { - return { expected_error, VkException{ acq_result } }; - } - - return { expected_value, SwapchainRenderBundle{ swapchain, image_index, present_ready, render_complete, acq_result } }; - } - - Result execute_submit(Allocator& allocator, ExecutableRenderGraph&& rg, SwapchainRenderBundle&& bundle) { - std::vector> swapchains_with_indexes = { { bundle.swapchain, bundle.image_index } }; - - std::pair v = { &allocator, &rg }; - VUK_DO_OR_RETURN(execute_submit(allocator, std::span{ &v, 1 }, swapchains_with_indexes, bundle.present_ready, bundle.render_complete)); - - return { expected_value, std::move(bundle) }; - } - - Result execute_submit_and_present_to_one(Allocator& allocator, ExecutableRenderGraph&& rg, SwapchainRef swapchain) { - auto bundle = acquire_one(allocator, swapchain); - if (!bundle) { - return bundle; - } - auto bundle2 = execute_submit(allocator, std::move(rg), std::move(*bundle)); - if (!bundle2) { - return bundle2; - } - return present_to_one(allocator.get_context(), std::move(*bundle2)); - } - Result execute_submit_and_wait(Allocator& allocator, ExecutableRenderGraph&& rg) { Context& ctx = allocator.get_context(); std::pair v = { &allocator, &rg }; - VUK_DO_OR_RETURN(execute_submit(allocator, std::span{ &v, 1 }, {}, {}, {})); + VUK_DO_OR_RETURN(execute_submit(allocator, std::span{ &v, 1 })); ctx.wait_idle(); // TODO: return { expected_value }; } - Result present(Allocator& allocator, Compiler& compiler, SwapchainRef swapchain, FutureBase&& future, RenderGraphCompileOptions compile_options) { - auto ptr = future.get_render_graph(); - auto erg = compiler.link(std::span{ &ptr, 1 }, compile_options); - if (!erg) { - return erg; - } - return execute_submit_and_present_to_one(allocator, std::move(*erg), swapchain); - } - SampledImage make_sampled_image(ImageView iv, SamplerCreateInfo sci) { return { SampledImage::Global{ iv, sci, ImageLayout::eReadOnlyOptimalKHR } }; } @@ -574,7 +117,7 @@ namespace vuk { return erg; } std::pair v = { &allocator, &*erg }; - VUK_DO_OR_RETURN(execute_submit(allocator, std::span{ &v, 1 }, {}, {}, {})); + VUK_DO_OR_RETURN(execute_submit(allocator, std::span{ &v, 1 })); assert(acqrel.status != Signal::Status::eDisarmed); if (acqrel.status == Signal::Status::eSynchronizable) { allocator.get_context().wait_for_domains(std::span{ &acqrel.source, 1 }); @@ -596,7 +139,7 @@ namespace vuk { return erg; } std::pair v = { &allocator, &*erg }; - VUK_DO_OR_RETURN(execute_submit(allocator, std::span{ &v, 1 }, {}, {}, {})); + VUK_DO_OR_RETURN(execute_submit(allocator, std::span{ &v, 1 })); return { expected_value }; } } diff --git a/src/runtime/vk/VulkanQueueExecutor.cpp b/src/runtime/vk/VulkanQueueExecutor.cpp new file mode 100644 index 00000000..0b0c0fd3 --- /dev/null +++ b/src/runtime/vk/VulkanQueueExecutor.cpp @@ -0,0 +1,252 @@ +#include "vuk/runtime/vk/VulkanQueueExecutor.hpp" +#include "vuk/Allocator.hpp" +#include "vuk/Exception.hpp" +#include "vuk/RenderGraph.hpp" + +#include +#include +#include +#include + +namespace vuk::rtvk { + struct QueueImpl { + VkDevice device; + // TODO: this recursive mutex should be changed to better queue handling + std::recursive_mutex queue_lock; + PFN_vkQueueSubmit queueSubmit; + PFN_vkQueueSubmit2KHR queueSubmit2KHR; + PFN_vkQueueWaitIdle queueWaitIdle; + PFN_vkDestroySemaphore destroySemaphore; + PFN_vkQueuePresentKHR queuePresentKHR; + TimelineSemaphore submit_sync; + VkQueue queue; + uint32_t family_index; + + QueueImpl(VkDevice device, const FunctionPointers& fps, VkQueue queue, uint32_t queue_family_index, TimelineSemaphore ts) : + device(device), + queueSubmit(fps.vkQueueSubmit), + queueSubmit2KHR(fps.vkQueueSubmit2KHR), + queueWaitIdle(fps.vkQueueWaitIdle), + destroySemaphore(fps.vkDestroySemaphore), + queuePresentKHR(fps.vkQueuePresentKHR), + submit_sync(ts), + queue(queue), + family_index(queue_family_index) {} + + ~QueueImpl() { + destroySemaphore(device, submit_sync.semaphore, nullptr); + } + }; + + std::unique_ptr + create_vkqueue_executor(const FunctionPointers& fps, VkDevice device, VkQueue queue, uint32_t queue_family_index, DomainFlagBits domain) { + TimelineSemaphore ts; + VkSemaphoreCreateInfo sci{ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO }; + VkSemaphoreTypeCreateInfo stci{ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO }; + stci.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE; + stci.initialValue = 0; + sci.pNext = &stci; + VkResult res = fps.vkCreateSemaphore(device, &sci, nullptr, &ts.semaphore); + if (res != VK_SUCCESS) { + return { nullptr }; + } + ts.value = new uint64_t{ 0 }; + + if (fps.vkSetDebugUtilsObjectNameEXT) { + VkDebugUtilsObjectNameInfoEXT info = { .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT }; + switch (domain) { + case DomainFlagBits::eGraphicsQueue: + info.pObjectName = "Graphics Queue"; + break; + case DomainFlagBits::eComputeQueue: + info.pObjectName = "Compute Queue"; + break; + case DomainFlagBits::eTransferQueue: + info.pObjectName = "Transfer Queue"; + break; + } + info.objectType = VK_OBJECT_TYPE_QUEUE; + info.objectHandle = reinterpret_cast(queue); + fps.vkSetDebugUtilsObjectNameEXT(device, &info); + } + + return std::make_unique(device, domain, fps, queue, queue_family_index, ts); + } + + QueueExecutor::QueueExecutor(VkDevice device, + DomainFlagBits domain, + const FunctionPointers& fps, + VkQueue queue, + uint32_t queue_family_index, + TimelineSemaphore ts) : + Executor(Executor::Type::eVulkanDeviceQueue, domain, reinterpret_cast(queue)), + impl(new QueueImpl(device, fps, queue, queue_family_index, ts)) {} + + QueueExecutor::~QueueExecutor() { + delete impl; + } + + QueueExecutor::QueueExecutor(QueueExecutor&& o) noexcept : Executor(o.type, o.tag.domain, o.tag.executor_id), impl(std::exchange(o.impl, nullptr)) {} + + QueueExecutor& QueueExecutor::operator=(QueueExecutor&& o) noexcept { + impl = std::exchange(o.impl, nullptr); + type = o.type; + tag.domain = o.tag.domain; + tag.executor_id = o.tag.executor_id; + return *this; + } + + Result QueueExecutor::submit(std::span sis, VkFence fence) { + std::lock_guard _(impl->queue_lock); + VkResult result = impl->queueSubmit2KHR(impl->queue, (uint32_t)sis.size(), sis.data(), fence); + if (result != VK_SUCCESS) { + return { expected_error, VkException{ result } }; + } + return { expected_value }; + } + + Result QueueExecutor::submit(std::span sis, VkFence fence) { + std::lock_guard _(impl->queue_lock); + VkResult result = impl->queueSubmit(impl->queue, (uint32_t)sis.size(), sis.data(), fence); + if (result != VK_SUCCESS) { + return { expected_error, VkException{ result } }; + } + return { expected_value }; + } + + Result QueueExecutor::queue_present(VkPresentInfoKHR pi) { + std::lock_guard _(impl->queue_lock); + auto present_result = impl->queuePresentKHR(impl->queue, &pi); + if (present_result != VK_SUCCESS && present_result != VK_SUBOPTIMAL_KHR) { + return { expected_error, VkException{ present_result } }; + } + if (present_result == VK_SUBOPTIMAL_KHR) { + return { expected_value, present_result }; + } + return { expected_value, VK_SUCCESS }; + } + + uint64_t QueueExecutor::get_sync_value() { + return (*impl->submit_sync.value)++; + } + + VkSemaphore QueueExecutor::get_semaphore() { + return impl->submit_sync.semaphore; + } + + uint32_t QueueExecutor::get_queue_family_index() { + return impl->family_index; + } + + Result QueueExecutor::submit_batch(std::vector batch) { + std::unique_lock _(*this); + + uint64_t num_cbufs = 0; + uint64_t num_waits = 1; // 1 extra for present_rdy + for (uint64_t i = 0; i < batch.size(); i++) { + SubmitInfo& submit_info = batch[i]; + num_cbufs += submit_info.command_buffers.size(); + num_waits += submit_info.waits.size(); + } + + std::vector sis; + std::vector cbufsis; + cbufsis.reserve(num_cbufs); + std::vector wait_semas; + wait_semas.reserve(num_waits); + std::vector signal_semas; + signal_semas.reserve(batch.size() + 1); // 1 extra for render_complete + + for (uint64_t i = 0; i < batch.size(); i++) { + SubmitInfo& submit_info = batch[i]; + + for (auto& fut : submit_info.signals) { + fut->status = Signal::Status::eSynchronizable; + } + + if (submit_info.command_buffers.size() == 0) { + continue; + } + + for (uint64_t i = 0; i < submit_info.command_buffers.size(); i++) { + cbufsis.emplace_back( + VkCommandBufferSubmitInfoKHR{ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR, .commandBuffer = submit_info.command_buffers[i] }); + } + + uint32_t wait_sema_count = 0; + for (auto& w : submit_info.waits) { + assert(w->source.executor->type == Executor::Type::eVulkanDeviceQueue); + rtvk::QueueExecutor* executor = static_cast(w->source.executor); + VkSemaphoreSubmitInfoKHR ssi{ VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR }; + ssi.semaphore = executor->get_semaphore(); + ssi.value = w->source.visibility; + ssi.stageMask = (VkPipelineStageFlagBits2KHR)PipelineStageFlagBits::eAllCommands; // TODO: w now has stage info + wait_semas.emplace_back(ssi); + wait_sema_count++; + } + + + for (auto& w : submit_info.pres_wait) { + VkSemaphoreSubmitInfoKHR ssi{ VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR }; + ssi.semaphore = w; + ssi.stageMask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR; + wait_semas.emplace_back(ssi); + wait_sema_count++; + } + + VkSemaphoreSubmitInfoKHR ssi{ VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR }; + ssi.semaphore = impl->submit_sync.semaphore; + ssi.value = ++(*impl->submit_sync.value); + + ssi.stageMask = (VkPipelineStageFlagBits2KHR)PipelineStageFlagBits::eAllCommands; + + for (auto& fut : submit_info.signals) { + fut->status = Signal::Status::eSynchronizable; + fut->source = { this, ssi.value }; + } + + uint32_t signal_sema_count = 1; + signal_semas.emplace_back(ssi); + + for (auto& w : submit_info.pres_signal) { + ssi.semaphore = w; + ssi.value = 0; // binary sema + signal_semas.emplace_back(ssi); + signal_sema_count++; + } + + VkSubmitInfo2KHR& si = sis.emplace_back(VkSubmitInfo2KHR{ VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR }); + VkCommandBufferSubmitInfoKHR* p_cbuf_infos = &cbufsis.back() - (submit_info.command_buffers.size() - 1); + VkSemaphoreSubmitInfoKHR* p_wait_semas = wait_sema_count > 0 ? &wait_semas.back() - (wait_sema_count - 1) : nullptr; + VkSemaphoreSubmitInfoKHR* p_signal_semas = &signal_semas.back() - (signal_sema_count - 1); + + si.pWaitSemaphoreInfos = p_wait_semas; + si.waitSemaphoreInfoCount = wait_sema_count; + si.pCommandBufferInfos = p_cbuf_infos; + si.commandBufferInfoCount = (uint32_t)submit_info.command_buffers.size(); + si.pSignalSemaphoreInfos = p_signal_semas; + si.signalSemaphoreInfoCount = signal_sema_count; + } + VUK_DO_OR_RETURN(submit(std::span{ sis }, VK_NULL_HANDLE)); + + return { expected_value }; + } + + void QueueExecutor::lock() { + impl->queue_lock.lock(); + } + void QueueExecutor::unlock() { + impl->queue_lock.unlock(); + } + + Result QueueExecutor::wait_idle() { + std::scoped_lock _{ *this }; + + auto result = impl->queueWaitIdle(impl->queue); + if (result < 0) { + return { expected_error, VkException(result) }; + } else { + return { expected_value }; + } + } +} // namespace vuk::rtvk \ No newline at end of file diff --git a/src/tests/TestContext.hpp b/src/tests/TestContext.hpp index a8db2cd7..6352fbd3 100644 --- a/src/tests/TestContext.hpp +++ b/src/tests/TestContext.hpp @@ -7,6 +7,7 @@ #include "vuk/Context.hpp" #include "vuk/RenderGraph.hpp" #include "vuk/resources/DeviceFrameResource.hpp" +#include "vuk/runtime/ThisThreadExecutor.hpp" #ifdef WIN32 #include #endif @@ -26,6 +27,7 @@ namespace vuk { vkb::Device vkbdevice; std::optional sfa_resource; std::optional allocator; + std::vector> executors; RENDERDOC_API_1_6_0* rdoc_api = NULL; void bringup() { @@ -112,19 +114,16 @@ namespace vuk { transfer_queue = vkbdevice.get_queue(vkb::QueueType::transfer).value(); auto transfer_queue_family_index = vkbdevice.get_queue_index(vkb::QueueType::transfer).value(); device = vkbdevice.device; - ContextCreateParameters::FunctionPointers fps; + vuk::rtvk::FunctionPointers fps; fps.vkGetInstanceProcAddr = vkbinstance.fp_vkGetInstanceProcAddr; fps.vkGetDeviceProcAddr = vkbinstance.fp_vkGetDeviceProcAddr; - context.emplace(ContextCreateParameters{ instance, - device, - physical_device, - graphics_queue, - graphics_queue_family_index, - VK_NULL_HANDLE, - VK_QUEUE_FAMILY_IGNORED, - transfer_queue, - transfer_queue_family_index, - fps }); + fps.load_pfns(instance, device, true); + + executors.push_back(rtvk::create_vkqueue_executor(fps, device, graphics_queue, graphics_queue_family_index, DomainFlagBits::eGraphicsQueue)); + executors.push_back(rtvk::create_vkqueue_executor(fps, device, transfer_queue, transfer_queue_family_index, DomainFlagBits::eTransferQueue)); + executors.push_back(std::make_unique()); + + context.emplace(ContextCreateParameters{ instance, device, physical_device, std::move(executors), fps }); needs_bringup = false; needs_teardown = true; #ifdef WIN32 diff --git a/src/tests/arrays.cpp b/src/tests/arrays.cpp index 5e17e9bd..5330b059 100644 --- a/src/tests/arrays.cpp +++ b/src/tests/arrays.cpp @@ -87,11 +87,11 @@ TEST_CASE("arrayed images, commands") { CHECK(std::all_of(updata.begin(), updata.end(), [](auto& elem) { return elem == 5; })); } { - auto futc2 = clear_image(arr[1], vuk::ClearColor(5u, 5u, 5u, 5u)); + auto futc2 = clear_image(arr[1], vuk::ClearColor(6u, 6u, 6u, 6u)); auto dst_buf = declare_buf("dst", *dst); auto res = download_buffer(image2buf(futc2, dst_buf)).get(*test_context.allocator, test_context.compiler); auto updata = std::span((uint32_t*)res->mapped_ptr, 4); - CHECK(std::all_of(updata.begin(), updata.end(), [](auto& elem) { return elem == 5; })); + CHECK(std::all_of(updata.begin(), updata.end(), [](auto& elem) { return elem == 6; })); } } } \ No newline at end of file