Skip to content

Commit

Permalink
Merge pull request #45672 from reduz/barrier-optimization
Browse files Browse the repository at this point in the history
Rewrote how barriers work for faster rendering
  • Loading branch information
akien-mga authored Feb 4, 2021
2 parents a405a24 + f20999f commit 2ba66c1
Show file tree
Hide file tree
Showing 25 changed files with 2,109 additions and 1,171 deletions.
296 changes: 262 additions & 34 deletions drivers/vulkan/rendering_device_vulkan.cpp

Large diffs are not rendered by default.

17 changes: 16 additions & 1 deletion drivers/vulkan/rendering_device_vulkan.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ class RenderingDeviceVulkan : public RenderingDevice {

VkImageLayout layout;

uint64_t used_in_frame = 0;
bool used_in_transfer = false;
bool used_in_raster = false;
bool used_in_compute = false;

uint32_t read_aspect_mask = 0;
uint32_t barrier_aspect_mask = 0;
bool bound = false; //bound to framebffer
Expand Down Expand Up @@ -528,6 +533,8 @@ class RenderingDeviceVulkan : public RenderingDevice {

PushConstant push_constant;

uint32_t compute_local_size[3] = { 0, 0, 0 };

bool is_compute = false;
int max_output = 0;
Vector<Set> sets;
Expand Down Expand Up @@ -686,6 +693,7 @@ class RenderingDeviceVulkan : public RenderingDevice {
VkPipeline pipeline = VK_NULL_HANDLE;
uint32_t push_constant_size = 0;
uint32_t push_constant_stages = 0;
uint32_t local_group_size[3] = { 0, 0, 0 };
};

RID_Owner<ComputePipeline, true> compute_pipeline_owner;
Expand Down Expand Up @@ -808,8 +816,10 @@ class RenderingDeviceVulkan : public RenderingDevice {
uint32_t set_count = 0;
RID pipeline;
RID pipeline_shader;
uint32_t local_group_size[3] = { 0, 0, 0 };
VkPipelineLayout pipeline_layout = VK_NULL_HANDLE;
uint32_t pipeline_push_constant_stages = 0;
bool allow_draw_overlap;
} state;

#ifdef DEBUG_ENABLED
Expand Down Expand Up @@ -1028,13 +1038,14 @@ class RenderingDeviceVulkan : public RenderingDevice {
/**** COMPUTE LISTS ****/
/***********************/

virtual ComputeListID compute_list_begin();
virtual ComputeListID compute_list_begin(bool p_allow_draw_overlap = false);
virtual void compute_list_bind_compute_pipeline(ComputeListID p_list, RID p_compute_pipeline);
virtual void compute_list_bind_uniform_set(ComputeListID p_list, RID p_uniform_set, uint32_t p_index);
virtual void compute_list_set_push_constant(ComputeListID p_list, const void *p_data, uint32_t p_data_size);
virtual void compute_list_add_barrier(ComputeListID p_list);

virtual void compute_list_dispatch(ComputeListID p_list, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups);
virtual void compute_list_dispatch_threads(ComputeListID p_list, uint32_t p_x_threads, uint32_t p_y_threads, uint32_t p_z_threads);
virtual void compute_list_dispatch_indirect(ComputeListID p_list, RID p_buffer, uint32_t p_offset);
virtual void compute_list_end(uint32_t p_post_barrier = BARRIER_MASK_ALL);

Expand Down Expand Up @@ -1085,6 +1096,10 @@ class RenderingDeviceVulkan : public RenderingDevice {
virtual void draw_command_insert_label(String p_label_name, const Color p_color = Color(1, 1, 1, 1));
virtual void draw_command_end_label();

virtual String get_device_vendor_name() const;
virtual String get_device_name() const;
virtual String get_device_pipeline_cache_uuid() const;

RenderingDeviceVulkan();
~RenderingDeviceVulkan();
};
Expand Down
62 changes: 54 additions & 8 deletions drivers/vulkan/vulkan_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,8 @@ Error VulkanContext::_create_physical_device() {
ERR_FAIL_V(ERR_CANT_CREATE);
}
/* for now, just grab the first physical device */
gpu = physical_devices[0];
uint32_t device_index = 0;
gpu = physical_devices[device_index];
free(physical_devices);

/* Look for device extensions */
Expand All @@ -389,6 +390,40 @@ Error VulkanContext::_create_physical_device() {
enabled_extension_count = 0;
memset(extension_names, 0, sizeof(extension_names));

/* Get identifier properties */
vkGetPhysicalDeviceProperties(gpu, &gpu_props);

static const struct {
uint32_t id;
const char *name;
} vendor_names[] = {
{ 0x1002, "AMD" },
{ 0x1010, "ImgTec" },
{ 0x10DE, "NVIDIA" },
{ 0x13B5, "ARM" },
{ 0x5143, "Qualcomm" },
{ 0x8086, "INTEL" },
{ 0, nullptr },
};
device_name = gpu_props.deviceName;
pipeline_cache_id = String::hex_encode_buffer(gpu_props.pipelineCacheUUID, VK_UUID_SIZE);
pipeline_cache_id += "-driver-" + itos(gpu_props.driverVersion);
{
device_vendor = "Unknown";
uint32_t vendor_idx = 0;
while (vendor_names[vendor_idx].name != nullptr) {
if (gpu_props.vendorID == vendor_names[vendor_idx].id) {
device_vendor = vendor_names[vendor_idx].name;
break;
}
vendor_idx++;
}
}
#ifdef DEBUG_ENABLED
print_line("Using Vulkan Device #" + itos(device_index) + ": " + device_vendor + " - " + device_name);
#endif
device_api_version = gpu_props.apiVersion;

err = vkEnumerateDeviceExtensionProperties(gpu, nullptr, &device_extension_count, nullptr);
ERR_FAIL_COND_V(err, ERR_CANT_CREATE);

Expand Down Expand Up @@ -498,7 +533,6 @@ Error VulkanContext::_create_physical_device() {
break;
}
}
vkGetPhysicalDeviceProperties(gpu, &gpu_props);

/* Call with NULL data to get count */
vkGetPhysicalDeviceQueueFamilyProperties(gpu, &queue_family_count, nullptr);
Expand Down Expand Up @@ -565,6 +599,7 @@ Error VulkanContext::_create_device() {
}
err = vkCreateDevice(gpu, &sdevice, nullptr, &device);
ERR_FAIL_COND_V(err, ERR_CANT_CREATE);

return OK;
}

Expand Down Expand Up @@ -1590,11 +1625,12 @@ void VulkanContext::command_begin_label(VkCommandBuffer p_command_buffer, String
if (!enabled_debug_utils) {
return;
}

CharString cs = p_label_name.utf8().get_data();
VkDebugUtilsLabelEXT label;
label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT;
label.pNext = nullptr;
CharString label_name = p_label_name.utf8();
label.pLabelName = label_name.get_data();
label.pLabelName = cs.get_data();
label.color[0] = p_color[0];
label.color[1] = p_color[1];
label.color[2] = p_color[2];
Expand All @@ -1606,11 +1642,11 @@ void VulkanContext::command_insert_label(VkCommandBuffer p_command_buffer, Strin
if (!enabled_debug_utils) {
return;
}
CharString cs = p_label_name.utf8().get_data();
VkDebugUtilsLabelEXT label;
label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT;
label.pNext = nullptr;
CharString label_name = p_label_name.utf8();
label.pLabelName = label_name.get_data();
label.pLabelName = cs.get_data();
label.color[0] = p_color[0];
label.color[1] = p_color[1];
label.color[2] = p_color[2];
Expand All @@ -1629,16 +1665,26 @@ void VulkanContext::set_object_name(VkObjectType p_object_type, uint64_t p_objec
if (!enabled_debug_utils) {
return;
}
CharString obj_data = p_object_name.utf8();
VkDebugUtilsObjectNameInfoEXT name_info;
name_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT;
name_info.pNext = nullptr;
name_info.objectType = p_object_type;
name_info.objectHandle = p_object_handle;
CharString object_name = p_object_name.utf8();
name_info.pObjectName = object_name.get_data();
name_info.pObjectName = obj_data.get_data();
SetDebugUtilsObjectNameEXT(device, &name_info);
}

String VulkanContext::get_device_vendor_name() const {
return device_vendor;
}
String VulkanContext::get_device_name() const {
return device_name;
}
String VulkanContext::get_device_pipeline_cache_uuid() const {
return pipeline_cache_id;
}

VulkanContext::VulkanContext() {
use_validation_layers = Engine::get_singleton()->is_validation_layers_enabled();

Expand Down
9 changes: 9 additions & 0 deletions drivers/vulkan/vulkan_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ class VulkanContext {
bool device_initialized = false;
bool inst_initialized = false;

String device_vendor;
String device_name;
String pipeline_cache_id;
uint32_t device_api_version = 0;

bool buffers_prepared = false;

// Present queue.
Expand Down Expand Up @@ -215,6 +220,10 @@ class VulkanContext {
void command_end_label(VkCommandBuffer p_command_buffer);
void set_object_name(VkObjectType p_object_type, uint64_t p_object_handle, String p_object_name);

String get_device_vendor_name() const;
String get_device_name() const;
String get_device_pipeline_cache_uuid() const;

VulkanContext();
virtual ~VulkanContext();
};
Expand Down
1 change: 0 additions & 1 deletion scene/resources/sky_material.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -597,5 +597,4 @@ PhysicalSkyMaterial::PhysicalSkyMaterial() {

PhysicalSkyMaterial::~PhysicalSkyMaterial() {
RS::get_singleton()->free(shader);
RS::get_singleton()->material_set_shader(_get_material(), RID());
}
22 changes: 14 additions & 8 deletions servers/rendering/renderer_rd/cluster_builder_rd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -400,12 +400,14 @@ void ClusterBuilderRD::begin(const Transform &p_view_transform, const CameraMatr
void ClusterBuilderRD::bake_cluster() {
RENDER_TIMESTAMP(">Bake Cluster");

RD::get_singleton()->draw_command_begin_label("Bake Light Cluster");

//clear cluster buffer
RD::get_singleton()->buffer_clear(cluster_buffer, 0, cluster_buffer_size);
RD::get_singleton()->buffer_clear(cluster_buffer, 0, cluster_buffer_size, 0);

if (render_element_count > 0) {
//clear render buffer
RD::get_singleton()->buffer_clear(cluster_render_buffer, 0, cluster_render_buffer_size);
RD::get_singleton()->buffer_clear(cluster_render_buffer, 0, cluster_render_buffer_size, 0);

{ //fill state uniform

Expand All @@ -420,15 +422,16 @@ void ClusterBuilderRD::bake_cluster() {
state.cluster_depth_offset = (render_element_max / 32);
state.cluster_data_size = state.cluster_depth_offset + render_element_max;

RD::get_singleton()->buffer_update(state_uniform, 0, sizeof(StateUniform), &state);
RD::get_singleton()->buffer_update(state_uniform, 0, sizeof(StateUniform), &state, 0);
}

//update instances

RD::get_singleton()->buffer_update(element_buffer, 0, sizeof(RenderElementData) * render_element_count, render_elements);
RD::get_singleton()->buffer_update(element_buffer, 0, sizeof(RenderElementData) * render_element_count, render_elements, 0);

RENDER_TIMESTAMP("Render Elements");

RD::get_singleton()->barrier(RD::BARRIER_MASK_TRANSFER, RD::BARRIER_MASK_RASTER);
//render elements
{
RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(framebuffer, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD, RD::INITIAL_ACTION_DROP, RD::FINAL_ACTION_DISCARD);
Expand Down Expand Up @@ -469,7 +472,7 @@ void ClusterBuilderRD::bake_cluster() {
RD::get_singleton()->draw_list_draw(draw_list, true, instances);
i += instances;
}
RD::get_singleton()->draw_list_end();
RD::get_singleton()->draw_list_end(RD::BARRIER_MASK_COMPUTE);
}
//store elements
RENDER_TIMESTAMP("Pack Elements");
Expand All @@ -491,12 +494,15 @@ void ClusterBuilderRD::bake_cluster() {

RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterStore::PushConstant));

RD::get_singleton()->compute_list_dispatch_threads(compute_list, cluster_screen_size.x, cluster_screen_size.y, 1, 8, 8, 1);
RD::get_singleton()->compute_list_dispatch_threads(compute_list, cluster_screen_size.x, cluster_screen_size.y, 1);

RD::get_singleton()->compute_list_end();
RD::get_singleton()->compute_list_end(RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE);
}
} else {
RD::get_singleton()->barrier(RD::BARRIER_MASK_TRANSFER, RD::BARRIER_MASK_RASTER | RD::BARRIER_MASK_COMPUTE);
}
RENDER_TIMESTAMP("<Bake Cluster");
RD::get_singleton()->draw_command_end_label();
}

void ClusterBuilderRD::debug(ElementType p_element) {
Expand All @@ -519,7 +525,7 @@ void ClusterBuilderRD::debug(ElementType p_element) {

RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(ClusterBuilderSharedDataRD::ClusterDebug::PushConstant));

RD::get_singleton()->compute_list_dispatch_threads(compute_list, screen_size.x, screen_size.y, 1, 8, 8, 1);
RD::get_singleton()->compute_list_dispatch_threads(compute_list, screen_size.x, screen_size.y, 1);

RD::get_singleton()->compute_list_end();
}
Expand Down
Loading

0 comments on commit 2ba66c1

Please sign in to comment.