Skip to content

Commit

Permalink
some multiqueue tests, scheduling info
Browse files Browse the repository at this point in the history
  • Loading branch information
martty committed Jan 6, 2024
1 parent 5c11a58 commit 15b9029
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 26 deletions.
5 changes: 5 additions & 0 deletions CMakeSettings.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@
"name": "VUK_USE_DXC",
"value": "False",
"type": "BOOL"
},
{
"name": "VUK_BUILD_TESTS",
"value": "True",
"type": "BOOL"
}
]
},
Expand Down
12 changes: 9 additions & 3 deletions include/vuk/Future.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,21 +38,24 @@ namespace vuk {

class UntypedFuture {
public:
UntypedFuture(std::shared_ptr<RG> rg, Ref ref, Ref def) {
UntypedFuture(std::shared_ptr<RG> rg, Ref ref, Ref def, std::vector<std::shared_ptr<FutureControlBlock>> dependent_blocks) {
this->control = std::make_shared<FutureControlBlock>();

this->head = { rg->make_release(ref, &this->control->acqrel, Access::eNone, DomainFlagBits::eAny), 0 };

this->control->rg = std::move(rg);
this->def = def;
this->dependent_blocks = std::move(dependent_blocks);
}

UntypedFuture(const UntypedFuture& o) noexcept : control{ std::make_shared<FutureControlBlock>(*o.control) }, def{ o.def } {
head = { control->rg->make_release(o.get_head(), &this->control->acqrel, Access::eNone, DomainFlagBits::eAny), 0 };
dependent_blocks = o.dependent_blocks;
}

UntypedFuture(UntypedFuture&& o) noexcept :
control{ std::exchange(o.control, nullptr) },
dependent_blocks{ std::exchange(o.dependent_blocks, {}) },
def{ std::exchange(o.def, {}) },
head{ std::exchange(o.head, {}) } {}

Expand All @@ -61,12 +64,14 @@ namespace vuk {
def = { o.def };

head = { control->rg->make_release(o.get_head(), &this->control->acqrel, Access::eNone, DomainFlagBits::eAny), 0 };
dependent_blocks = o.dependent_blocks;

return *this;
}

UntypedFuture& operator=(UntypedFuture&& o) noexcept {
std::swap(o.control, control);
std::swap(o.dependent_blocks, dependent_blocks);
std::swap(o.def, def);
std::swap(o.head, head);

Expand Down Expand Up @@ -126,6 +131,7 @@ namespace vuk {

// TODO: remove this from public API
std::shared_ptr<FutureControlBlock> control;
std::vector<std::shared_ptr<FutureControlBlock>> dependent_blocks;

protected:
Ref def;
Expand Down Expand Up @@ -258,7 +264,7 @@ namespace vuk {
Future<uint64_t> get_size()
requires std::is_same_v<T, Buffer>
{
return { get_render_graph(), def.node->valloc.args[1], {} };
return { get_render_graph(), def.node->valloc.args[1], {}, {control} };
}

void set_size(Future<uint64_t> arg)
Expand All @@ -275,7 +281,7 @@ namespace vuk {
Ref item = control->rg->make_array_indexing(def.type()->array.T, get_head(), control->rg->make_constant(index));
assert(def.node->kind == Node::AALLOC);
assert(def.type()->kind == Type::ARRAY_TY);
return Future<std::remove_reference_t<decltype(std::declval<T>()[0])>>(get_render_graph(), item, item_def);
return Future<std::remove_reference_t<decltype(std::declval<T>()[0])>>(get_render_graph(), item, item_def, {control});
}
};

Expand Down
5 changes: 4 additions & 1 deletion include/vuk/IR.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,10 @@ namespace vuk {
};

struct SchedulingInfo {
DomainFlags required_domain;
SchedulingInfo(DomainFlags required_domains) : required_domains(required_domains) {}
SchedulingInfo(DomainFlagBits required_domain) : required_domains(required_domain) {}

DomainFlags required_domains;
};

struct NodeDebugInfo {
Expand Down
34 changes: 20 additions & 14 deletions include/vuk/RenderGraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -516,10 +516,10 @@ private:
struct is_tuple<std::tuple<T...>> : std::true_type {};

template<typename... T>
static auto make_ret(std::shared_ptr<RG> rg, Node* node, const std::tuple<T...>& us) {
static auto make_ret(std::shared_ptr<RG> rg, Node* node, const std::tuple<T...>& us, std::vector<std::shared_ptr<FutureControlBlock>> dependent_blocks) {
if constexpr (sizeof...(T) > 0) {
size_t i = 0;
return std::tuple{ Future<typename T::type>{ rg, { node, sizeof...(T) - (++i) }, std::get<T>(us).def }... };
return std::tuple{ Future<typename T::type>{ rg, { node, sizeof...(T) - (++i) }, std::get<T>(us).def, dependent_blocks }... };
}
}

Expand All @@ -541,7 +541,7 @@ private:
using ret_tuple = std::tuple<Future<typename T::type>...>;

template<class Ret, class F>
static auto make_lam(Name name, F&& body) {
static auto make_lam(Name name, F&& body, SchedulingInfo scheduling_info) {
auto callback = [typed_cb = std::move(body)](CommandBuffer& cb, std::span<void*> args, std::span<void*> meta, std::span<void*> rets) {
// we do type recovery here -> convert untyped args to typed ones
alignas(alignof(std::tuple<CommandBuffer&, T...>)) char storage[sizeof(std::tuple<CommandBuffer&, T...>)];
Expand All @@ -556,14 +556,19 @@ private:
};

// when this function is called, we weave in this call into the IR
return [untyped_cb = std::move(callback), name](Future<typename T::type>... args) mutable {
return [untyped_cb = std::move(callback), name, scheduling_info](Future<typename T::type>... args) mutable {
auto& first = [](auto& first, auto&...) -> auto& {
return first;
}(args...);
auto& rgp = first.get_render_graph();
RG& rg = *rgp.get();
[](auto& first, auto&... rest) {

// TODO: write a test that this doesn't pass...
std::vector<std::shared_ptr<FutureControlBlock>> dependent_blocks;
[&dependent_blocks](auto& first, auto&... rest) {
(first.get_render_graph()->subgraphs.push_back(rest.get_render_graph()), ...);
dependent_blocks.push_back(first.control);
(dependent_blocks.push_back(rest.control), ...);
}(args...);

std::vector<Type*> arg_types;
Expand All @@ -582,35 +587,36 @@ private:
opaque_fn_ty->debug_info = new TypeDebugInfo{ .name = name.c_str() };
auto opaque_fn = rg.make_declare_fn(opaque_fn_ty);
Node* node = rg.make_call(opaque_fn, args.get_head()...);
node->scheduling_info = new SchedulingInfo(scheduling_info);
if constexpr (is_tuple<Ret>::value) {
auto [idxs, ret_tuple] = intersect_tuples<std::tuple<T...>, Ret>(arg_tuple_as_a);
return make_ret(rgp, node, ret_tuple);
return make_ret(rgp, node, ret_tuple, std::move(dependent_blocks));
} else if constexpr (!std::is_same_v<Ret, void>) {
auto [idxs, ret_tuple] = intersect_tuples<std::tuple<T...>, std::tuple<Ret>>(arg_tuple_as_a);
return std::get<0>(make_ret(rgp, node, ret_tuple));
return std::get<0>(make_ret(rgp, node, ret_tuple, std::move(dependent_blocks)));
}
};
}
};

template<class F>
[[nodiscard]] auto make_pass(Name name, F&& body) {
[[nodiscard]] auto make_pass(Name name, F&& body, SchedulingInfo scheduling_info = SchedulingInfo(DomainFlagBits::eAny)) {
using traits = closure_traits<decltype(&F::operator())>;
return TupleMap<drop_t<1, typename traits::types>>::template make_lam<typename traits::result_type, F>(name, std::forward<F>(body));
return TupleMap<drop_t<1, typename traits::types>>::template make_lam<typename traits::result_type, F>(name, std::forward<F>(body), scheduling_info);
}

[[nodiscard]] inline Future<ImageAttachment> declare_ia(Name name, ImageAttachment ia = {}) {
std::shared_ptr<RG> rg = std::make_shared<RG>();
Ref ref = rg->make_declare_image(ia);
rg->name_outputs(ref.node, { name.c_str() });
return { rg, ref, ref };
return { rg, ref, ref, {} };
}

[[nodiscard]] inline Future<Buffer> declare_buf(Name name, Buffer buf = {}) {
std::shared_ptr<RG> rg = std::make_shared<RG>();
Ref ref = rg->make_declare_buffer(buf);
rg->name_outputs(ref.node, { name.c_str() });
return { rg, ref, ref };
return { rg, ref, ref, {} };
}

template<class T, class... Args>
Expand All @@ -623,7 +629,7 @@ private:
(args.abandon(), ...);
Ref ref = rg->make_declare_array(Type::stripped(refs[0].type()), refs, defs);
rg->name_outputs(ref.node, { name.c_str() });
return { rg, ref, ref };
return { rg, ref, ref, {} };
}

template<class T>
Expand All @@ -639,13 +645,13 @@ private:
}
Ref ref = rg->make_declare_array(Type::stripped(refs[0].type()), refs, defs);
rg->name_outputs(ref.node, { name.c_str() });
return { rg, ref, ref };
return { rg, ref, ref, {} };
}

[[nodiscard]] inline Future<Swapchain> declare_swapchain(Swapchain bundle) {
std::shared_ptr<RG> rg = std::make_shared<RG>();
Ref ref = rg->make_declare_swapchain(bundle);
return { rg, ref, ref };
return { rg, ref, ref, {} };
}

[[nodiscard]] inline Future<ImageAttachment> acquire_next_image(Name name, Future<Swapchain> in) {
Expand Down
14 changes: 13 additions & 1 deletion src/ExecutableRenderGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1045,7 +1045,11 @@ namespace vuk {
}

Stream* stream_for_domain(DomainFlagBits domain) {
return streams.at(domain).get();
auto it = streams.find(domain);
if (it != streams.end()) {
return it->second.get();
}
return nullptr;
}

Stream* stream_for_executor(Executor* executor) {
Expand Down Expand Up @@ -1158,6 +1162,14 @@ namespace vuk {
recorder.streams.emplace(DomainFlagBits::eGraphicsQueue,
std::make_unique<VkQueueStream>(alloc, static_cast<rtvk::QueueExecutor*>(exe), &impl->callbacks));
}
if (auto exe = ctx.get_executor(DomainFlagBits::eComputeQueue)) {
recorder.streams.emplace(DomainFlagBits::eComputeQueue,
std::make_unique<VkQueueStream>(alloc, static_cast<rtvk::QueueExecutor*>(exe), &impl->callbacks));
}
if (auto exe = ctx.get_executor(DomainFlagBits::eTransferQueue)) {
recorder.streams.emplace(DomainFlagBits::eTransferQueue,
std::make_unique<VkQueueStream>(alloc, static_cast<rtvk::QueueExecutor*>(exe), &impl->callbacks));
}
auto host_stream = recorder.streams.at(DomainFlagBits::eHost).get();

std::deque<VkPEStream> pe_streams;
Expand Down
4 changes: 2 additions & 2 deletions src/RenderGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ namespace vuk {
auto& execable = schedule_items[pop_idx];
ScheduledItem item{ .execable = execable,
.scheduled_domain =
execable->scheduling_info ? pick_first_domain(execable->scheduling_info->required_domain) : vuk::DomainFlagBits::eAny };
execable->scheduling_info ? pick_first_domain(execable->scheduling_info->required_domains) : vuk::DomainFlagBits::eAny };
if (execable->kind != Node::VALLOC) { // we use def nodes for deps, but we don't want to schedule them later as their ordering doesn't matter
scheduled_execables.push_back(item);
}
Expand Down Expand Up @@ -640,7 +640,7 @@ namespace vuk {
if (!node->scheduling_info) {
return;
}
auto& domain = node->scheduling_info->required_domain;
auto& domain = node->scheduling_info->required_domains;
if (domain != last_domain && domain != DomainFlagBits::eDevice && domain != DomainFlagBits::eAny) {
last_domain = domain;
}
Expand Down
77 changes: 72 additions & 5 deletions src/tests/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,13 +184,13 @@ TEST_CASE("multi return pass") {
TEST_CASE("scheduling single-queue") {
{
std::string execution;

auto buf0 = allocate_buffer(*test_context.allocator, { .mem_usage = MemoryUsage::eGPUonly, .size = sizeof(uint32_t) * 4 });

auto write = make_pass("write", [&](CommandBuffer& cbuf, VUK_BA(Access::eTransferWrite) dst) {
execution += "w";
return dst;
});
return dst;
});
auto read = make_pass("read", [&](CommandBuffer& cbuf, VUK_BA(Access::eTransferRead) dst) {
execution += "r";
return dst;
Expand Down Expand Up @@ -254,11 +254,78 @@ TEST_CASE("scheduling with submitted") {
{
auto written = write(declare_buf("src0", **buf0));
written.wait(*test_context.allocator, test_context.compiler);
write(std::move(written)).wait(*test_context.allocator, test_context.compiler);
auto res = write(std::move(written));
res.wait(*test_context.allocator, test_context.compiler);
CHECK(execution == "ww");
execution = "";
}
}
}

TEST_CASE("multi-queue buffers") {
{
std::string execution;

auto buf0 = allocate_buffer(*test_context.allocator, { .mem_usage = MemoryUsage::eGPUonly, .size = sizeof(uint32_t) * 4 });

auto write = make_pass(
"write_A",
[&](CommandBuffer& cbuf, VUK_BA(Access::eTransferWrite) dst) {
cbuf.fill_buffer(dst, 0xf);
execution += "w";
return dst;
},
DomainFlagBits::eTransferQueue);
auto read = make_pass(
"read_B",
[&](CommandBuffer& cbuf, VUK_BA(Access::eTransferRead) dst) {
auto dummy = allocate_buffer(*test_context.allocator, { .mem_usage = MemoryUsage::eGPUonly, .size = sizeof(uint32_t) * 4 });
cbuf.copy_buffer(**dummy, dst);
execution += "r";
return dst;
},
DomainFlagBits::eGraphicsQueue);

{
auto written = write(declare_buf("src0", **buf0));
written.wait(*test_context.allocator, test_context.compiler);
read(written).wait(*test_context.allocator, test_context.compiler);
CHECK(execution == "wr");
execution = "";
}
{
auto written = write(declare_buf("src0", **buf0));
written.wait(*test_context.allocator, test_context.compiler);
read(std::move(written)).wait(*test_context.allocator, test_context.compiler);
CHECK(execution == "wr");
execution = "";
}
/* {
auto written = write(declare_buf("src0", **buf0));
written.wait(*test_context.allocator, test_context.compiler);
write(read(std::move(written))).wait(*test_context.allocator, test_context.compiler);
CHECK(execution == "wrw");
execution = "";
}*/
{
auto written = write(declare_buf("src0", **buf0));
read(written).wait(*test_context.allocator, test_context.compiler);
CHECK(execution == "wr");
execution = "";
}
{
auto written = write(declare_buf("src0", **buf0));
read(std::move(written)).wait(*test_context.allocator, test_context.compiler);
CHECK(execution == "wr");
execution = "";
}
{
auto written = write(declare_buf("src0", **buf0));
write(read(std::move(written))).wait(*test_context.allocator, test_context.compiler);
CHECK(execution == "wrw");
execution = "";
}
}
}

// TEST TODOS: image2image copy, resolve

0 comments on commit 15b9029

Please sign in to comment.