diff --git a/video/out/opengl/ra.h b/video/out/opengl/ra.h
index ae7fb9aea730a..1f716d98f8bdc 100644
--- a/video/out/opengl/ra.h
+++ b/video/out/opengl/ra.h
@@ -146,6 +146,7 @@ enum ra_buf_type {
RA_BUF_TYPE_TEX_UPLOAD, // texture upload buffer (pixel buffer object)
RA_BUF_TYPE_SHADER_STORAGE, // shader buffer (SSBO), for RA_VARTYPE_BUF_RW
RA_BUF_TYPE_UNIFORM, // uniform buffer (UBO), for RA_VARTYPE_BUF_RO
+ RA_BUF_TYPE_VERTEX, // not publicly usable (RA-internal usage)
};
struct ra_buf_params {
@@ -369,10 +370,10 @@ struct ra_fns {
void (*buf_destroy)(struct ra *ra, struct ra_buf *buf);
- // Update the contents of a buffer, starting at a given offset and up to a
- // given size, with the contents of *data. This is an extremely common
- // operation. Calling this while the buffer is considered "in use" is an
- // error. (See: buf_poll)
+ // Update the contents of a buffer, starting at a given offset (*must* be a
+ // multiple of 4) and up to a given size, with the contents of *data. This
+ // is an extremely common operation. Calling this while the buffer is
+ // considered "in use" is an error. (See: buf_poll)
void (*buf_update)(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
const void *data, size_t size);
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
index b8fc24a52e133..aeadd346b9408 100644
--- a/video/out/opengl/utils.c
+++ b/video/out/opengl/utils.c
@@ -64,7 +64,8 @@ static bool ra_buf_pool_grow(struct ra *ra, struct ra_buf_pool *pool)
return false;
MP_TARRAY_INSERT_AT(NULL, pool->buffers, pool->num_buffers, pool->index, buf);
- MP_VERBOSE(ra, "Resized buffer pool to size %d\n", pool->num_buffers);
+ MP_VERBOSE(ra, "Resized buffer pool of type %u to size %d\n",
+ pool->current_params.type, pool->num_buffers);
return true;
}
diff --git a/video/out/vo.c b/video/out/vo.c
index f9c5d04e24be0..06507c7f87694 100644
--- a/video/out/vo.c
+++ b/video/out/vo.c
@@ -60,6 +60,7 @@ extern const struct vo_driver video_out_drm;
extern const struct vo_driver video_out_direct3d;
extern const struct vo_driver video_out_sdl;
extern const struct vo_driver video_out_vaapi;
+extern const struct vo_driver video_out_vulkan;
extern const struct vo_driver video_out_wayland;
extern const struct vo_driver video_out_rpi;
extern const struct vo_driver video_out_tct;
@@ -78,6 +79,9 @@ const struct vo_driver *const video_out_drivers[] =
#if HAVE_DIRECT3D
&video_out_direct3d,
#endif
+#if HAVE_VULKAN
+ &video_out_vulkan,
+#endif
#if HAVE_WAYLAND
&video_out_wayland,
#endif
diff --git a/video/out/vo_vulkan.c b/video/out/vo_vulkan.c
new file mode 100644
index 0000000000000..9e6c7984c6a3d
--- /dev/null
+++ b/video/out/vo_vulkan.c
@@ -0,0 +1,335 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv. If not, see .
+ */
+
+#include "mpv_talloc.h"
+#include "options/m_config.h"
+#include "osdep/timer.h"
+#include "video/mp_image.h"
+#include "video/out/x11_common.h"
+#include "vo.h"
+#include "sub/osd.h"
+
+#include "opengl/ra.h"
+#include "opengl/video.h"
+
+#include "vulkan/common.h"
+#include "vulkan/utils.h"
+#include "vulkan/ra_vk.h"
+
+struct vo_vulkan_opts {
+ int debug; // whether to load the validation layers or not
+ int allow_sw; // whether to allow software devices
+ char *device; // force a specific GPU
+ int swsize; // swapchain size
+ int swdepth; // swapchain depth
+};
+
+struct vk_priv {
+ struct vo *vo;
+ struct mp_log *log;
+
+ struct vo_vulkan_opts opts;
+
+ struct mpvk_ctx vk;
+ struct ra *ra;
+ struct gl_video *renderer;
+
+ struct vk_swchain swchain;
+ int frames_in_flight;
+};
+
+static bool resize(struct vo *vo)
+{
+ struct vk_priv *p = vo->priv;
+
+ MP_VERBOSE(vo, "Resize: %dx%d\n", vo->dwidth, vo->dheight);
+
+ if (!vk_swchain_resize(&p->swchain, vo->dwidth, vo->dheight)) {
+ MP_ERR(vo, "Failed resizing swapchain!\n");
+ return false;
+ }
+
+ struct mp_rect src, dst;
+ struct mp_osd_res osd;
+ vo_get_src_dst_rects(vo, &src, &dst, &osd);
+
+ gl_video_resize(p->renderer, &src, &dst, &osd);
+
+ vo->want_redraw = true;
+ return true;
+}
+
+static int reconfig(struct vo *vo, struct mp_image_params *params)
+{
+ struct vk_priv *p = vo->priv;
+
+ if (vo->x11)
+ vo_x11_config_vo_window(vo);
+
+ if (!resize(vo))
+ return VO_ERROR;
+
+ gl_video_config(p->renderer, params);
+
+ return 0;
+}
+
+static void uninit(struct vo *vo)
+{
+ struct vk_priv *p = vo->priv;
+ struct mpvk_ctx *vk = &p->vk;
+
+ gl_video_uninit(p->renderer);
+
+ if (p->ra) {
+ vk_swchain_uninit(p->ra, &p->swchain);
+ p->ra->fns->destroy(p->ra);
+ }
+
+ // Clean up platform-specific windowing stuff. Do this first to prevent
+ // keeping around the window for long, then we can uninit the device etc.
+ // afterwards
+ if (vo->x11)
+ vo_x11_uninit(vo);
+
+ mpvk_uninit(vk);
+}
+
+static int preinit(struct vo *vo)
+{
+ struct vk_priv *p = vo->priv;
+ struct mpvk_ctx *vk = &p->vk;
+ p->vo = vo;
+ p->log = vk->log = vo->log;
+
+ if (!mpvk_instance_init(vk, p->opts.debug))
+ goto error;
+ if (!mpvk_surface_init(vo, vk))
+ goto error;
+ if (!mpvk_find_phys_device(vk, p->opts.device, p->opts.allow_sw))
+ goto error;
+ if (!mpvk_pick_surface_format(vk))
+ goto error;
+ if (!mpvk_device_init(vk))
+ goto error;
+ p->ra = ra_create_vk(vk, p->log);
+ if (!p->ra)
+ goto error;
+ if (!vk_swchain_init(vk, p->ra, p->opts.swsize, &p->swchain))
+ goto error;
+
+ p->renderer = gl_video_init(p->ra, vo->log, vo->global);
+ gl_video_set_osd_source(p->renderer, vo->osd);
+ gl_video_configure_queue(p->renderer, vo);
+
+ return 0;
+
+error:
+ uninit(vo);
+ return -1;
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+ struct vk_priv *p = vo->priv;
+
+ switch (request) {
+ case VOCTRL_SET_PANSCAN:
+ return resize(vo) ? VO_TRUE : VO_ERROR;
+ case VOCTRL_SET_EQUALIZER:
+ vo->want_redraw = true;
+ return VO_TRUE;
+ case VOCTRL_UPDATE_RENDER_OPTS: {
+ gl_video_update_options(p->renderer);
+ gl_video_configure_queue(p->renderer, p->vo);
+ p->vo->want_redraw = true;
+ return true;
+ }
+ case VOCTRL_RESET:
+ gl_video_reset(p->renderer);
+ return true;
+ case VOCTRL_PAUSE:
+ if (gl_video_showing_interpolated_frame(p->renderer))
+ vo->want_redraw = true;
+ return true;
+ case VOCTRL_PERFORMANCE_DATA:
+ gl_video_perfdata(p->renderer, (struct voctrl_performance_data *)data);
+ return true;
+ }
+
+ int events = 0, r = 0;
+
+ if (vo->x11)
+ r |= vo_x11_control(vo, &events, request, data);
+
+ if (events & VO_EVENT_RESIZE)
+ r |= resize(vo) ? 0 : VO_ERROR;
+
+ if (events & VO_EVENT_EXPOSE)
+ vo->want_redraw = true;
+
+ vo_event(vo, events);
+ return r;
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+ struct vk_priv *p = vo->priv;
+ struct vk_swimg swimg;
+ if (!vk_swchain_get(&p->swchain, &swimg))
+ goto error;
+
+ struct fbodst target = {
+ .tex = swimg.image,
+ .flip = false,
+ };
+
+ gl_video_render_frame(p->renderer, frame, target);
+ if (!ra_vk_present_frame(p->ra, &swimg, &p->frames_in_flight)) {
+ MP_ERR(vo, "Failed presenting frame!\n");
+ goto error;
+ }
+
+error:
+ return;
+}
+
+static void flip_page(struct vo *vo)
+{
+ struct vk_priv *p = vo->priv;
+ while (p->frames_in_flight >= p->opts.swdepth)
+ mpvk_poll_cmds(&p->vk, p->vk.pool, UINT64_MAX);
+}
+
+static int query_format(struct vo *vo, int format)
+{
+ struct vk_priv *p = vo->priv;
+ if (!gl_video_check_format(p->renderer, format))
+ return 0;
+ return 1;
+}
+
+static void wakeup(struct vo *vo)
+{
+ if (vo->x11)
+ vo_x11_wakeup(vo);
+}
+
+static void wait_events(struct vo *vo, int64_t until_time_us)
+{
+ if (vo->x11) {
+ vo_x11_wait_events(vo, until_time_us);
+ } else {
+ vo_wait_default(vo, until_time_us);
+ }
+}
+
+static struct mp_image *get_image(struct vo *vo, int imgfmt, int w, int h,
+ int stride_align)
+{
+ struct vk_priv *p = vo->priv;
+ return gl_video_get_image(p->renderer, imgfmt, w, h, stride_align);
+}
+
+static int vk_validate_dev(struct mp_log *log, const struct m_option *opt,
+ struct bstr name, struct bstr param)
+{
+ int ret = M_OPT_INVALID;
+ VkResult res;
+
+ // Create a dummy instance to validate/list the devices
+ VkInstanceCreateInfo info = {
+ .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+ };
+
+ VkInstance inst;
+ VkPhysicalDevice *devices = NULL;
+ uint32_t num = 0;
+
+ res = vkCreateInstance(&info, MPVK_ALLOCATOR, &inst);
+ if (res != VK_SUCCESS)
+ goto error;
+
+ res = vkEnumeratePhysicalDevices(inst, &num, NULL);
+ if (res != VK_SUCCESS)
+ goto error;
+
+ devices = talloc_array(NULL, VkPhysicalDevice, num);
+ vkEnumeratePhysicalDevices(inst, &num, devices);
+ if (res != VK_SUCCESS)
+ goto error;
+
+ bool help = bstr_equals0(param, "help");
+ if (help) {
+ mp_info(log, "Available vulkan devices:\n");
+ ret = M_OPT_EXIT;
+ }
+
+ for (int i = 0; i < num; i++) {
+ VkPhysicalDeviceProperties prop;
+ vkGetPhysicalDeviceProperties(devices[i], &prop);
+
+ if (help) {
+ mp_info(log, " '%s' (GPU %d, ID %x:%x)\n", prop.deviceName, i,
+ prop.vendorID, prop.deviceID);
+ } else if (bstr_equals0(param, prop.deviceName)) {
+ ret = 0;
+ break;
+ }
+ }
+
+ if (!help)
+ mp_err(log, "No device with name '%.*s'!\n", BSTR_P(param));
+
+error:
+ talloc_free(devices);
+ return ret;
+}
+
+#define OPT_BASE_STRUCT struct vk_priv
+
+const struct vo_driver video_out_vulkan = {
+ .description = "Vulkan Renderer",
+ .name = "vulkan",
+ .preinit = preinit,
+ .query_format = query_format,
+ .reconfig = reconfig,
+ .control = control,
+ .get_image = get_image,
+ .draw_frame = draw_frame,
+ .flip_page = flip_page,
+ .wait_events = wait_events,
+ .wakeup = wakeup,
+ .uninit = uninit,
+ .priv_size = sizeof(struct vk_priv),
+ .options = (const m_option_t[]) {
+ OPT_FLAG("vulkan-debug", opts.debug, 0),
+ OPT_FLAG("vulkan-sw", opts.allow_sw, 0),
+ OPT_STRING_VALIDATE("vulkan-device", opts.device, 0, vk_validate_dev),
+ OPT_INTRANGE("vulkan-swapchain-size", opts.swsize, 0, 1,
+ MPVK_MAX_STREAMING_DEPTH),
+ OPT_INTRANGE("vulkan-swapchain-depth", opts.swdepth, 0, 1,
+ MPVK_MAX_STREAMING_DEPTH),
+ {0}
+ },
+ .priv_defaults = &(const struct vk_priv) {
+ .opts = {
+ .swsize = 8,
+ .swdepth = 1,
+ },
+ },
+};
diff --git a/video/out/vulkan/common.h b/video/out/vulkan/common.h
new file mode 100644
index 0000000000000..9113d27a6a201
--- /dev/null
+++ b/video/out/vulkan/common.h
@@ -0,0 +1,50 @@
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+
+#include "config.h"
+
+#include "common/common.h"
+#include "common/msg.h"
+
+// We need to define all platforms we want to support. Since we have
+// our own mechanism for checking this, we re-define the right symbols
+#if HAVE_VULKAN_XLIB
+#define VK_USE_PLATFORM_XLIB_KHR
+#endif
+
+#include
+
+// Vulkan allows the optional use of a custom allocator. We don't need one but
+// mark this parameter with a better name in case we ever decide to change this
+// in the future. (And to make the code more readable)
+#define MPVK_ALLOCATOR NULL
+
+// A lot of things depend on streaming resources across frames. Depending on
+// how many frames we render ahead of time, we need to pick enough to avoid
+// any conflicts, so make all of these tunable relative to this constant in
+// order to centralize them.
+#define MPVK_MAX_STREAMING_DEPTH 8
+
+// Shared struct used to hold vulkan context information
+struct mpvk_ctx {
+ struct mp_log *log;
+ VkInstance inst;
+ VkPhysicalDevice physd;
+ VkDebugReportCallbackEXT dbg;
+ VkDevice dev;
+
+ // Surface, must be initialized fter the context itself
+ VkSurfaceKHR surf;
+ VkSurfaceFormatKHR surf_format; // picked at surface initialization time
+
+ struct vk_malloc *alloc; // memory allocator for this device
+ struct vk_cmdpool *pool; // command pool for this device
+
+ // Cached capabilities
+ VkPhysicalDeviceLimits limits;
+};
diff --git a/video/out/vulkan/formats.c b/video/out/vulkan/formats.c
new file mode 100644
index 0000000000000..b44bead99cc80
--- /dev/null
+++ b/video/out/vulkan/formats.c
@@ -0,0 +1,55 @@
+#include "formats.h"
+
+const struct vk_format vk_formats[] = {
+ // Regular, byte-aligned integer formats
+ {"r8", VK_FORMAT_R8_UNORM, 1, 1, {8 }, RA_CTYPE_UNORM },
+ {"rg8", VK_FORMAT_R8G8_UNORM, 2, 2, {8, 8 }, RA_CTYPE_UNORM },
+ {"rgb8", VK_FORMAT_R8G8B8_UNORM, 3, 3, {8, 8, 8 }, RA_CTYPE_UNORM },
+ {"rgba8", VK_FORMAT_R8G8B8A8_UNORM, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM },
+ {"r16", VK_FORMAT_R16_UNORM, 1, 2, {16 }, RA_CTYPE_UNORM },
+ {"rg16", VK_FORMAT_R16G16_UNORM, 2, 4, {16, 16 }, RA_CTYPE_UNORM },
+ {"rgb16", VK_FORMAT_R16G16B16_UNORM, 3, 6, {16, 16, 16 }, RA_CTYPE_UNORM },
+ {"rgba16", VK_FORMAT_R16G16B16A16_UNORM, 4, 8, {16, 16, 16, 16}, RA_CTYPE_UNORM },
+
+ // Special, integer-only formats
+ {"r32ui", VK_FORMAT_R32_UINT, 1, 4, {32 }, RA_CTYPE_UINT },
+ {"rg32ui", VK_FORMAT_R32G32_UINT, 2, 8, {32, 32 }, RA_CTYPE_UINT },
+ {"rgb32ui", VK_FORMAT_R32G32B32_UINT, 3, 12, {32, 32, 32 }, RA_CTYPE_UINT },
+ {"rgba32ui", VK_FORMAT_R32G32B32A32_UINT, 4, 16, {32, 32, 32, 32}, RA_CTYPE_UINT },
+ {"r64ui", VK_FORMAT_R64_UINT, 1, 8, {64 }, RA_CTYPE_UINT },
+ {"rg64ui", VK_FORMAT_R64G64_UINT, 2, 16, {64, 64 }, RA_CTYPE_UINT },
+ {"rgb64ui", VK_FORMAT_R64G64B64_UINT, 3, 24, {64, 64, 64 }, RA_CTYPE_UINT },
+ {"rgba64ui", VK_FORMAT_R64G64B64A64_UINT, 4, 32, {64, 64, 64, 64}, RA_CTYPE_UINT },
+
+ // Packed integer formats
+ {"rg4", VK_FORMAT_R4G4_UNORM_PACK8, 2, 1, {4, 4 }, RA_CTYPE_UNORM },
+ {"rgba4", VK_FORMAT_R4G4B4A4_UNORM_PACK16, 4, 2, {4, 4, 4, 4 }, RA_CTYPE_UNORM },
+ {"rgb565", VK_FORMAT_R5G6B5_UNORM_PACK16, 3, 2, {5, 6, 5 }, RA_CTYPE_UNORM },
+ {"rgb565a1", VK_FORMAT_R5G5B5A1_UNORM_PACK16, 4, 2, {5, 5, 5, 1 }, RA_CTYPE_UNORM },
+
+ // Float formats (native formats, hf = half float, df = double float)
+ {"r16hf", VK_FORMAT_R16_SFLOAT, 1, 2, {16 }, RA_CTYPE_FLOAT },
+ {"rg16hf", VK_FORMAT_R16G16_SFLOAT, 2, 4, {16, 16 }, RA_CTYPE_FLOAT },
+ {"rgb16hf", VK_FORMAT_R16G16B16_SFLOAT, 3, 6, {16, 16, 16 }, RA_CTYPE_FLOAT },
+ {"rgba16hf", VK_FORMAT_R16G16B16A16_SFLOAT, 4, 8, {16, 16, 16, 16}, RA_CTYPE_FLOAT },
+ {"r32f", VK_FORMAT_R32_SFLOAT, 1, 4, {32 }, RA_CTYPE_FLOAT },
+ {"rg32f", VK_FORMAT_R32G32_SFLOAT, 2, 8, {32, 32 }, RA_CTYPE_FLOAT },
+ {"rgb32f", VK_FORMAT_R32G32B32_SFLOAT, 3, 12, {32, 32, 32 }, RA_CTYPE_FLOAT },
+ {"rgba32f", VK_FORMAT_R32G32B32A32_SFLOAT, 4, 16, {32, 32, 32, 32}, RA_CTYPE_FLOAT },
+ {"r64df", VK_FORMAT_R64_SFLOAT, 1, 8, {64 }, RA_CTYPE_FLOAT },
+ {"rg64df", VK_FORMAT_R64G64_SFLOAT, 2, 16, {64, 64 }, RA_CTYPE_FLOAT },
+ {"rgb64df", VK_FORMAT_R64G64B64_SFLOAT, 3, 24, {64, 64, 64 }, RA_CTYPE_FLOAT },
+ {"rgba64df", VK_FORMAT_R64G64B64A64_SFLOAT, 4, 32, {64, 64, 64, 64}, RA_CTYPE_FLOAT },
+
+ // "Swapped" component order images
+ {"bgr8", VK_FORMAT_B8G8R8_UNORM, 3, 3, {8, 8, 8 }, RA_CTYPE_UNORM, true },
+ {"bgra8", VK_FORMAT_B8G8R8A8_UNORM, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM, true },
+ {"bgra4", VK_FORMAT_B4G4R4A4_UNORM_PACK16, 4, 2, {4, 4, 4, 4 }, RA_CTYPE_UNORM, true },
+ {"bgr565", VK_FORMAT_B5G6R5_UNORM_PACK16, 3, 2, {5, 6, 5 }, RA_CTYPE_UNORM, true },
+ {"bgr565a1", VK_FORMAT_B5G5R5A1_UNORM_PACK16, 4, 2, {5, 5, 5, 1 }, RA_CTYPE_UNORM, true },
+ {"a1rgb5", VK_FORMAT_A1R5G5B5_UNORM_PACK16, 4, 2, {1, 5, 5, 5 }, RA_CTYPE_UNORM, true },
+ {"a2rgb10", VK_FORMAT_A2R10G10B10_UNORM_PACK32, 4, 4, {2, 10, 10, 10}, RA_CTYPE_UNORM, true },
+ {"a2bgr10", VK_FORMAT_A2B10G10R10_UNORM_PACK32, 4, 4, {2, 10, 10, 10}, RA_CTYPE_UNORM, true },
+ {"abgr8", VK_FORMAT_A8B8G8R8_UNORM_PACK32, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM, true },
+ {0}
+};
diff --git a/video/out/vulkan/formats.h b/video/out/vulkan/formats.h
new file mode 100644
index 0000000000000..e57275a153a12
--- /dev/null
+++ b/video/out/vulkan/formats.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "video/out/opengl/ra.h"
+#include "common.h"
+
+struct vk_format {
+ const char *name;
+ VkFormat iformat; // vulkan format enum
+ int components; // how many components are there
+ int bytes; // how many bytes is a texel
+ int bits[4]; // how many bits per component
+ enum ra_ctype ctype; // format representation type
+ bool fucked_order; // used for formats which are not simply rgba
+};
+
+extern const struct vk_format vk_formats[];
diff --git a/video/out/vulkan/malloc.c b/video/out/vulkan/malloc.c
new file mode 100644
index 0000000000000..cdab6eb590e63
--- /dev/null
+++ b/video/out/vulkan/malloc.c
@@ -0,0 +1,315 @@
+#include "malloc.h"
+#include "utils.h"
+#include "osdep/timer.h"
+
+// Controls how much more space we will allocate than actually necessary.
+// Increasing this number increases the amount of memory used in total, but
+// decreases the frequency at which slabs need to be allocated and freed. A
+// value of 4 means the slabs will be allocated 4 times as large as they need
+// to be.
+#define MPVK_HEAP_SLAB_OVERCOMMIT 4
+
+// Controls the minimum slab size, to avoid overusing small slabs when
+// allocating many small slabs. (Default: 1 MB)
+#define MPVK_HEAP_MINIMUM_SLAB_SIZE (1 << 20)
+
+// A single slab represents a contiguous region of allocated memory. Actual
+// allocations are served as slices of this. Slabs are organized into linked
+// lists, which represent individual heaps.
+struct vk_slab {
+ struct vk_slab *next; // pointer to next vk_slab, or NULL
+ VkDeviceMemory mem; // underlying device allocation
+ VkDeviceSize size; // total size of `slab`
+ VkDeviceSize used; // number of bytes actually in use (for GC accounting)
+ VkDeviceSize index; // next free byte in `slab`
+ // optional, depends on the memory type:
+ VkBuffer buffer; // buffer spanning the entire slab
+ void *data; // mapped memory corresponding to `mem`
+};
+
+struct vk_heap {
+ VkBufferUsageFlagBits usage; // or 0 for generic heaps
+ struct vk_slab *tip; // linked list of slabs that form this heap
+};
+
+// Represents a single memory type. All allocations of this memory type are
+// grouped together into heaps; one per buffer usage type and one for generic
+// allocations (e.g. images).
+struct vk_memtype {
+ int index; // the memory type index
+ int heapIndex; // the memory heap index
+ VkMemoryPropertyFlagBits flags; // the memory type bits
+ struct vk_heap generic_heap; // the heap for generic allocations
+ // An array of heaps for each possible buffer type (grows dynamically):
+ // This is an array of sub-allocations, so we can resize the buf_heaps
+ // array without breaking the vk_heap pointers in memslice.priv.
+ struct vk_heap **buf_heaps;
+ int num_buf_heaps;
+};
+
+// The overall state of the allocator, which keeps track of a vk_heap for each
+// memory type supported by the device.
+struct vk_malloc {
+ struct vk_memtype types[VK_MAX_MEMORY_TYPES];
+ int num_types;
+};
+
+void vk_malloc_init(struct mpvk_ctx *vk)
+{
+ assert(vk->physd);
+
+ struct vk_malloc *ma = vk->alloc = talloc_zero(NULL, struct vk_malloc);
+
+ VkPhysicalDeviceMemoryProperties prop;
+ vkGetPhysicalDeviceMemoryProperties(vk->physd, &prop);
+
+ ma->num_types = prop.memoryTypeCount;
+ for (int i = 0; i < prop.memoryTypeCount; i++) {
+ ma->types[i] = (struct vk_memtype) {
+ .index = i,
+ .heapIndex = prop.memoryTypes[i].heapIndex,
+ .flags = prop.memoryTypes[i].propertyFlags,
+ };
+ }
+}
+
+// "Unlinks" a slab. The slab_ptr is updated to the next link in the chain,
+// or NULL if none left.
+static void slab_free(struct mpvk_ctx *vk, struct vk_slab **slab_ptr)
+{
+ struct vk_slab *slab = *slab_ptr;
+ if (!slab)
+ return;
+
+ assert(slab->used == 0);
+
+ int64_t start = mp_time_us();
+ vkDestroyBuffer(vk->dev, slab->buffer, MPVK_ALLOCATOR);
+ // also implicitly unmaps the memory if needed
+ vkFreeMemory(vk->dev, slab->mem, MPVK_ALLOCATOR);
+ int64_t stop = mp_time_us();
+
+ MP_VERBOSE(vk, "Freeing slab of size %lu took %ld μs.\n",
+ slab->size, stop - start);
+
+ *slab_ptr = slab->next;
+ talloc_free(slab);
+}
+
+static void heap_uninit(struct mpvk_ctx *vk, struct vk_heap *heap)
+{
+ while (heap->tip)
+ slab_free(vk, &heap->tip);
+}
+
+void vk_malloc_uninit(struct mpvk_ctx *vk)
+{
+ struct vk_malloc *ma = vk->alloc;
+ if (!ma)
+ return;
+
+ for (int i = 0; i < ma->num_types; i++) {
+ heap_uninit(vk, &ma->types[i].generic_heap);
+ for (int b = 0; b < ma->types[i].num_buf_heaps; b++) {
+ heap_uninit(vk, ma->types[i].buf_heaps[b]);
+ talloc_free(ma->types[i].buf_heaps[b]);
+ }
+ talloc_free(ma->types[i].buf_heaps);
+ }
+
+ talloc_free(vk->alloc);
+}
+
+// reqs: optional
+static struct vk_memtype *find_best_memtype(struct mpvk_ctx *vk,
+ VkMemoryPropertyFlagBits flags,
+ VkMemoryRequirements *reqs)
+{
+ struct vk_malloc *ma = vk->alloc;
+
+ // The vulkan spec requires memory types to be sorted in the "optimal"
+ // order, so the first matching type we find will be the best/fastest one.
+ for (int i = 0; i < ma->num_types; i++) {
+ // The memory type flags must include our properties
+ if ((ma->types[i].flags & flags) != flags)
+ continue;
+ // The memory type must be supported by the requirements (bitfield)
+ if (reqs && !(reqs->memoryTypeBits & (1 << i)))
+ continue;
+
+ return &ma->types[i];
+ }
+
+ MP_ERR(vk, "Found no memory type matching property flags 0x%x!\n", flags);
+ return NULL;
+}
+
+// Resizes a heap to make sure we have enough free bytes to serve an allocation
+static bool resize_heap(struct mpvk_ctx *vk, struct vk_memtype *type,
+ struct vk_heap *heap, VkDeviceSize size,
+ VkDeviceSize align)
+{
+ // If the tip already exists and is large enough, we can return right away
+ if (heap->tip) {
+ if (MP_ALIGN_UP(heap->tip->index, align) + size <= heap->tip->size)
+ return true;
+
+ // If the tip exists but is not large enough and has no other current
+ // allocations, free it right away to avoid accumulating garbage.
+ if (heap->tip->used == 0)
+ slab_free(vk, &heap->tip);
+ }
+
+ // Otherwise, allocate a new vk_slab and prepend it to the linked list
+ struct vk_slab *slab = talloc_ptrtype(NULL, slab);
+
+ VkDeviceSize minSize = MPMAX(MPVK_HEAP_MINIMUM_SLAB_SIZE,
+ MPVK_HEAP_SLAB_OVERCOMMIT * size);
+ *slab = (struct vk_slab) {
+ .next = heap->tip,
+ .size = heap->tip ? MPMAX(heap->tip->size, minSize) : minSize,
+ };
+
+ MP_VERBOSE(vk, "Allocating %lu memory of type 0x%x (id %d) in heap %d.\n",
+ slab->size, type->flags, type->index, type->heapIndex);
+
+ VkMemoryAllocateInfo minfo = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .memoryTypeIndex = type->index,
+ .allocationSize = slab->size,
+ };
+
+ if (heap->usage) {
+ VkBufferCreateInfo binfo = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .size = slab->size,
+ .usage = heap->usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ };
+
+ VK(vkCreateBuffer(vk->dev, &binfo, MPVK_ALLOCATOR, &slab->buffer));
+
+ VkMemoryRequirements reqs;
+ vkGetBufferMemoryRequirements(vk->dev, slab->buffer, &reqs);
+ minfo.allocationSize = reqs.size; // this can be larger than slab->size
+
+ // Sanity check the memory requirements to make sure we didn't screw up
+ if (!(reqs.memoryTypeBits & (1 << type->index))) {
+ MP_ERR(vk, "Chosen memory type %d does not support buffer usage "
+ "0x%x!\n", type->index, heap->usage);
+ goto error;
+ }
+ }
+
+ VK(vkAllocateMemory(vk->dev, &minfo, MPVK_ALLOCATOR, &slab->mem));
+
+ if (type->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
+ VK(vkMapMemory(vk->dev, slab->mem, 0, VK_WHOLE_SIZE, 0, &slab->data));
+
+ if (heap->usage)
+ VK(vkBindBufferMemory(vk->dev, slab->buffer, slab->mem, 0));
+
+ heap->tip = slab;
+ return true;
+
+error:
+ slab_free(vk, &slab);
+ return false;
+}
+
+void vk_free_memslice(struct mpvk_ctx *vk, struct vk_memslice slice)
+{
+ struct vk_heap *heap = slice.priv;
+
+ // Find the slab containing this allocation, while also keeping track
+ // of the pointer to it (so we can unlink it from the list if needed)
+ struct vk_slab **slab_ptr = &heap->tip;
+ struct vk_slab *slab = *slab_ptr;
+ while (slab) {
+ if (slab->mem == slice.vkmem)
+ break;
+ slab_ptr = &slab->next;
+ slab = *slab_ptr;
+ }
+
+ assert(slab);
+ assert(slab->used >= slice.size);
+ slab->used -= slice.size;
+
+ MP_DBG(vk, "Freeing slice %lu + %lu from slab with size %lu\n",
+ slice.offset, slice.size, slab->size);
+
+ if (slab->used == 0 && slab != heap->tip)
+ slab_free(vk, slab_ptr);
+}
+
+static bool slice_heap(struct mpvk_ctx *vk, struct vk_memtype *type,
+ struct vk_heap *heap, VkDeviceSize size,
+ VkDeviceSize alignment, struct vk_memslice *out)
+{
+ if (!resize_heap(vk, type, heap, size, alignment))
+ return false;
+
+ struct vk_slab *tip = heap->tip;
+ assert(tip);
+ *out = (struct vk_memslice) {
+ .vkmem = tip->mem,
+ .offset = MP_ALIGN_UP(tip->index, alignment),
+ .size = size,
+ .priv = heap,
+ };
+
+ MP_DBG(vk, "Sub-allocating slice %lu + %lu from slab with size %lu\n",
+ out->offset, out->size, tip->size);
+
+ tip->index = out->offset + size;
+ tip->used += size;
+ return true;
+}
+
+bool vk_malloc_generic(struct mpvk_ctx *vk, VkMemoryRequirements reqs,
+ VkMemoryPropertyFlagBits flags, struct vk_memslice *out)
+{
+ struct vk_memtype *type = find_best_memtype(vk, flags, &reqs);
+ if (!type)
+ return false;
+
+ struct vk_heap *heap = &type->generic_heap;
+ return slice_heap(vk, type, heap, reqs.size, reqs.alignment, out);
+}
+
+bool vk_malloc_buffer(struct mpvk_ctx *vk, VkBufferUsageFlagBits bufFlags,
+ VkMemoryPropertyFlagBits memFlags, VkDeviceSize size,
+ VkDeviceSize alignment, struct vk_bufslice *out)
+{
+ struct vk_memtype *type = find_best_memtype(vk, memFlags, NULL);
+ if (!type)
+ return false;
+
+ struct vk_heap *heap = NULL;
+ for (int i = 0; i < type->num_buf_heaps; i++) {
+ if (type->buf_heaps[i]->usage == bufFlags) {
+ heap = type->buf_heaps[i];
+ goto found;
+ }
+ }
+
+ // no buffer heap with this type => add it
+ MP_TARRAY_GROW(NULL, type->buf_heaps, type->num_buf_heaps + 1);
+ heap = type->buf_heaps[type->num_buf_heaps++] = talloc_ptrtype(NULL, heap);
+
+ *heap = (struct vk_heap) {
+ .usage = bufFlags,
+ };
+
+found:
+ if (!slice_heap(vk, type, heap, size, alignment, &out->mem))
+ return false;
+
+ struct vk_slab *tip = heap->tip;
+ out->buf = tip->buffer;
+ if (tip->data)
+ out->data = (void *)((uintptr_t)tip->data + (ptrdiff_t)out->mem.offset);
+
+ return true;
+}
diff --git a/video/out/vulkan/malloc.h b/video/out/vulkan/malloc.h
new file mode 100644
index 0000000000000..1963950d54f3a
--- /dev/null
+++ b/video/out/vulkan/malloc.h
@@ -0,0 +1,35 @@
+#pragma once
+
+#include "common.h"
+
+void vk_malloc_init(struct mpvk_ctx *vk);
+void vk_malloc_uninit(struct mpvk_ctx *vk);
+
+// Represents a single "slice" of generic (non-buffer) memory, plus some
+// metadata for accounting. This struct is essentially read-only.
+struct vk_memslice {
+ VkDeviceMemory vkmem;
+ VkDeviceSize offset;
+ VkDeviceSize size;
+ void *priv;
+};
+
+void vk_free_memslice(struct mpvk_ctx *vk, struct vk_memslice slice);
+bool vk_malloc_generic(struct mpvk_ctx *vk, VkMemoryRequirements reqs,
+ VkMemoryPropertyFlagBits flags, struct vk_memslice *out);
+
+// Represents a single "slice" of a larger buffer
+struct vk_bufslice {
+ struct vk_memslice mem; // must be freed by the user when done
+ VkBuffer buf; // the buffer this memory was sliced from
+ // For persistently mapped buffers, this points to the first usable byte of
+ // this slice.
+ void *data;
+};
+
+// Allocate a buffer slice. This is more efficient than vk_malloc_generic for
+// when the user needs lots of buffers, since it doesn't require
+// creating/destroying lots of (little) VkBuffers.
+bool vk_malloc_buffer(struct mpvk_ctx *vk, VkBufferUsageFlagBits bufFlags,
+ VkMemoryPropertyFlagBits memFlags, VkDeviceSize size,
+ VkDeviceSize alignment, struct vk_bufslice *out);
diff --git a/video/out/vulkan/ra_vk.c b/video/out/vulkan/ra_vk.c
new file mode 100644
index 0000000000000..853d868a861d6
--- /dev/null
+++ b/video/out/vulkan/ra_vk.c
@@ -0,0 +1,1588 @@
+#include "ra_vk.h"
+#include "malloc.h"
+#include "video/out/opengl/utils.h"
+
+// For ra.priv
+struct ra_vk {
+ struct mpvk_ctx *vk;
+ struct ra_tex *clear_tex; // stupid hack for clear()
+ // "Currently recording" command buffer
+ struct vk_cmd *active_cmd;
+};
+
+static struct mpvk_ctx *vk_get(struct ra *ra)
+{
+ struct ra_vk *p = ra->priv;
+ return p->vk;
+}
+
+static struct vk_cmd *vk_require_cmd(struct ra *ra)
+{
+ struct ra_vk *p = ra->priv;
+ struct mpvk_ctx *vk = vk_get(ra);
+ struct vk_cmdpool *pool = vk->pool;
+
+ if (p->active_cmd) {
+ assert(p->active_cmd->pool == pool);
+ return p->active_cmd;
+ }
+
+ struct vk_cmd *cmd = vk_cmd_begin(vk, pool);
+ return p->active_cmd = cmd;
+}
+
+// Note: This technically follows the flush() API, but we don't need
+// to expose that (and in fact, it's a bad idea) since we control flushing
+// behavior with ra_vk_present_frame already.
+static void vk_flush(struct ra *ra)
+{
+ struct ra_vk *p = ra->priv;
+ struct mpvk_ctx *vk = vk_get(ra);
+
+ if (!p->active_cmd)
+ return;
+
+ vk_cmd_submit(vk, p->active_cmd, NULL);
+ p->active_cmd = NULL;
+}
+
+// the callback's *priv will always be set to `ra`
+static void vk_callback(struct ra *ra, vk_cb callback, void *arg)
+{
+ struct ra_vk *p = ra->priv;
+ struct mpvk_ctx *vk = vk_get(ra);
+
+ if (p->active_cmd) {
+ vk_cmd_callback(p->active_cmd, callback, ra, arg);
+ } else {
+ vk_dev_callback(vk, callback, ra, arg);
+ }
+}
+
+#define MAKE_LAZY_DESTRUCTOR(fun, argtype) \
+ static void fun##_lazy(struct ra *ra, argtype *arg) { \
+ vk_callback(ra, (vk_cb) fun, arg); \
+ }
+
+static void vk_destroy_ra(struct ra *ra)
+{
+ struct ra_vk *p = ra->priv;
+ struct mpvk_ctx *vk = vk_get(ra);
+
+ vk_flush(ra);
+ mpvk_wait_idle(vk);
+ ra_tex_free(ra, &p->clear_tex);
+
+ talloc_free(ra);
+}
+
+static bool vk_setup_formats(struct ra *ra)
+{
+ struct mpvk_ctx *vk = vk_get(ra);
+
+ for (const struct vk_format *vk_fmt = vk_formats; vk_fmt->name; vk_fmt++) {
+ VkFormatProperties prop;
+ vkGetPhysicalDeviceFormatProperties(vk->physd, vk_fmt->iformat, &prop);
+
+ // As a bare minimum, we need to sample from an allocated image
+ VkFormatFeatureFlags flags = prop.optimalTilingFeatures;
+ if (!(flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT))
+ continue;
+
+ VkFormatFeatureFlags linear_bits, render_bits;
+ linear_bits = VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+ render_bits = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
+ VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
+
+ struct ra_format *fmt = talloc_zero(ra, struct ra_format);
+ *fmt = (struct ra_format) {
+ .name = vk_fmt->name,
+ .priv = (void *)vk_fmt,
+ .ctype = vk_fmt->ctype,
+ .ordered = !vk_fmt->fucked_order,
+ .num_components = vk_fmt->components,
+ .pixel_size = vk_fmt->bytes,
+ .linear_filter = !!(flags & linear_bits),
+ .renderable = !!(flags & render_bits),
+ };
+
+ for (int i = 0; i < 4; i++)
+ fmt->component_size[i] = fmt->component_depth[i] = vk_fmt->bits[i];
+
+ MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt);
+ }
+
+ // Populate some other capabilities related to formats while we're at it
+ VkImageType imgType[3] = {
+ VK_IMAGE_TYPE_1D,
+ VK_IMAGE_TYPE_2D,
+ VK_IMAGE_TYPE_3D
+ };
+
+ // R8_UNORM is supported on literally every single vulkan implementation
+ const VkFormat testfmt = VK_FORMAT_R8_UNORM;
+
+ for (int d = 0; d < 3; d++) {
+ VkImageFormatProperties iprop;
+ VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd,
+ testfmt, imgType[d], VK_IMAGE_TILING_OPTIMAL,
+ VK_IMAGE_USAGE_SAMPLED_BIT, 0, &iprop);
+
+ switch (imgType[d]) {
+ case VK_IMAGE_TYPE_1D:
+ if (res == VK_SUCCESS)
+ ra->caps |= RA_CAP_TEX_1D;
+ break;
+ case VK_IMAGE_TYPE_2D:
+ // 2D formats must be supported by RA, so ensure this is the case
+ VK_ASSERT(res, "Querying 2D format limits");
+ ra->max_texture_wh = MPMIN(iprop.maxExtent.width, iprop.maxExtent.height);
+ break;
+ case VK_IMAGE_TYPE_3D:
+ if (res == VK_SUCCESS)
+ ra->caps |= RA_CAP_TEX_3D;
+ break;
+ }
+ }
+
+ // RA_CAP_BLIT implies both blitting between images as well as blitting
+ // directly to the swapchain image, so check for all three operations
+ bool blittable = true;
+ VkFormatProperties prop;
+ vkGetPhysicalDeviceFormatProperties(vk->physd, testfmt, &prop);
+ if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT))
+ blittable = false;
+ if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT))
+ blittable = false;
+
+ vkGetPhysicalDeviceFormatProperties(vk->physd, vk->surf_format.format, &prop);
+ if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT))
+ blittable = false;
+
+ if (blittable)
+ ra->caps |= RA_CAP_BLIT;
+
+ return true;
+
+error:
+ return false;
+}
+
+static struct ra_fns ra_fns_vk;
+
+struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log)
+{
+ assert(vk->dev);
+ assert(vk->alloc);
+
+ struct ra *ra = talloc_zero(NULL, struct ra);
+ ra->log = log;
+ ra->fns = &ra_fns_vk;
+
+ struct ra_vk *p = ra->priv = talloc_zero(ra, struct ra_vk);
+ p->vk = vk;
+
+ // There's no way to query the supported GLSL version from VK_NV_glsl_shader
+ // (thanks nvidia), so just pick the GL version that modern nvidia devices
+ // support..
+ ra->glsl_version = 450;
+ ra->glsl_vulkan = true;
+ ra->max_shmem = vk->limits.maxComputeSharedMemorySize;
+ ra->caps = RA_CAP_NESTED_ARRAY;
+
+ if (vk->pool->props.queueFlags & VK_QUEUE_COMPUTE_BIT)
+ ra->caps |= RA_CAP_COMPUTE;
+
+ if (!vk_setup_formats(ra))
+ goto error;
+
+ // UBO support is required
+ ra->caps |= RA_CAP_BUF_RO;
+
+ // Try creating a shader storage buffer
+ struct ra_buf_params ssbo_params = {
+ .type = RA_BUF_TYPE_SHADER_STORAGE,
+ .size = 16,
+ };
+
+ struct ra_buf *ssbo = ra_buf_create(ra, &ssbo_params);
+ if (ssbo) {
+ ra->caps |= RA_CAP_BUF_RW;
+ ra_buf_free(ra, &ssbo);
+ }
+
+ // To support clear() by region, we need to allocate a dummy 1x1 image that
+ // will be used as the source of blit operations
+ struct ra_tex_params clear_params = {
+ .dimensions = 1, // no point in using a 2D image if height = 1
+ .w = 1,
+ .h = 1,
+ .d = 1,
+ .format = ra_find_float16_format(ra, 4),
+ .blit_src = 1,
+ .host_mutable = 1,
+ };
+
+ p->clear_tex = ra_tex_create(ra, &clear_params);
+ if (!p->clear_tex) {
+ MP_ERR(ra, "Failed creating 1x1 dummy texture for clear()!\n");
+ goto error;
+ }
+
+ return ra;
+
+error:
+ vk_destroy_ra(ra);
+ return NULL;
+}
+
+// Boilerplate wrapper around vkCreateRenderPass to ensure passes remain
+// compatible
+static VkResult vk_create_render_pass(VkDevice dev, const struct ra_format *fmt,
+ bool load_fbo, VkRenderPass *out)
+{
+ struct vk_format *vk_fmt = fmt->priv;
+ assert(fmt->renderable);
+
+ VkRenderPassCreateInfo rinfo = {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &(VkAttachmentDescription) {
+ .format = vk_fmt->iformat,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .loadOp = load_fbo ? VK_ATTACHMENT_LOAD_OP_LOAD
+ : VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ },
+ .subpassCount = 1,
+ .pSubpasses = &(VkSubpassDescription) {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = &(VkAttachmentReference) {
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ },
+ },
+ };
+
+ return vkCreateRenderPass(dev, &rinfo, MPVK_ALLOCATOR, out);
+}
+
+// For ra_tex.priv
+struct ra_tex_vk {
+ bool external_img;
+ VkImageType type;
+ VkImage img;
+ struct vk_memslice mem;
+ // for sampling
+ VkImageView view;
+ VkSampler sampler;
+ // for rendering
+ VkFramebuffer framebuffer;
+ VkRenderPass dummyPass;
+ // for uploading
+ struct ra_buf_pool pbo;
+ // "current" metadata, can change during the course of execution
+ VkImageLayout current_layout;
+ VkPipelineStageFlagBits current_stage;
+ VkAccessFlagBits current_access;
+};
+
+// Small helper to ease image barrier creation. if `discard` is set, the contents
+// of the image will be undefined after the barrier
+static void tex_barrier(struct vk_cmd *cmd, struct ra_tex_vk *tex_vk,
+ VkPipelineStageFlagBits newStage,
+ VkAccessFlagBits newAccess, VkImageLayout newLayout,
+ bool discard)
+{
+ VkImageMemoryBarrier imgBarrier = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .oldLayout = tex_vk->current_layout,
+ .newLayout = newLayout,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .srcAccessMask = tex_vk->current_access,
+ .dstAccessMask = newAccess,
+ .image = tex_vk->img,
+ .subresourceRange = vk_range,
+ };
+
+ if (discard) {
+ imgBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+ imgBarrier.srcAccessMask = 0;
+ }
+
+ vkCmdPipelineBarrier(cmd->buf, tex_vk->current_stage, newStage, 0,
+ 0, NULL, 0, NULL, 1, &imgBarrier);
+
+ tex_vk->current_stage = newStage;
+ tex_vk->current_layout = newLayout;
+ tex_vk->current_access = newAccess;
+}
+
+static void vk_tex_destroy(struct ra *ra, struct ra_tex *tex)
+{
+ if (!tex)
+ return;
+
+ struct mpvk_ctx *vk = vk_get(ra);
+ struct ra_tex_vk *tex_vk = tex->priv;
+
+ ra_buf_pool_uninit(ra, &tex_vk->pbo);
+ vkDestroyFramebuffer(vk->dev, tex_vk->framebuffer, MPVK_ALLOCATOR);
+ vkDestroyRenderPass(vk->dev, tex_vk->dummyPass, MPVK_ALLOCATOR);
+ vkDestroySampler(vk->dev, tex_vk->sampler, MPVK_ALLOCATOR);
+ vkDestroyImageView(vk->dev, tex_vk->view, MPVK_ALLOCATOR);
+ if (!tex_vk->external_img) {
+ vkDestroyImage(vk->dev, tex_vk->img, MPVK_ALLOCATOR);
+ vk_free_memslice(vk, tex_vk->mem);
+ }
+
+ talloc_free(tex);
+}
+
+MAKE_LAZY_DESTRUCTOR(vk_tex_destroy, struct ra_tex);
+
+// Initializes non-VkImage values like the image view, samplers, etc.
+static bool vk_init_image(struct ra *ra, struct ra_tex *tex)
+{
+ struct mpvk_ctx *vk = vk_get(ra);
+
+ struct ra_tex_params *params = &tex->params;
+ struct ra_tex_vk *tex_vk = tex->priv;
+ assert(tex_vk->img);
+
+ tex_vk->current_layout = VK_IMAGE_LAYOUT_UNDEFINED;
+ tex_vk->current_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+ tex_vk->current_access = 0;
+
+ if (params->render_src || params->render_dst) {
+ static const VkImageViewType viewType[] = {
+ [VK_IMAGE_TYPE_1D] = VK_IMAGE_VIEW_TYPE_1D,
+ [VK_IMAGE_TYPE_2D] = VK_IMAGE_VIEW_TYPE_2D,
+ [VK_IMAGE_TYPE_3D] = VK_IMAGE_VIEW_TYPE_3D,
+ };
+
+ const struct vk_format *fmt = params->format->priv;
+ VkImageViewCreateInfo vinfo = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = tex_vk->img,
+ .viewType = viewType[tex_vk->type],
+ .format = fmt->iformat,
+ .subresourceRange = vk_range,
+ };
+
+ VK(vkCreateImageView(vk->dev, &vinfo, MPVK_ALLOCATOR, &tex_vk->view));
+ }
+
+ if (params->render_src) {
+ assert(params->format->linear_filter || !params->src_linear);
+ VkFilter filter = params->src_linear
+ ? VK_FILTER_LINEAR
+ : VK_FILTER_NEAREST;
+ VkSamplerAddressMode wrap = params->src_repeat
+ ? VK_SAMPLER_ADDRESS_MODE_REPEAT
+ : VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
+ VkSamplerCreateInfo sinfo = {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .magFilter = filter,
+ .minFilter = filter,
+ .addressModeU = wrap,
+ .addressModeV = wrap,
+ .addressModeW = wrap,
+ .maxAnisotropy = 1.0,
+ };
+
+ VK(vkCreateSampler(vk->dev, &sinfo, MPVK_ALLOCATOR, &tex_vk->sampler));
+ }
+
+ if (params->render_dst) {
+ // Framebuffers need to be created against a specific render pass
+ // layout, so we need to temporarily create a skeleton/dummy render
+ // pass for vulkan to figure out the compatibility
+ VK(vk_create_render_pass(vk->dev, params->format, false, &tex_vk->dummyPass));
+
+ VkFramebufferCreateInfo finfo = {
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .renderPass = tex_vk->dummyPass,
+ .attachmentCount = 1,
+ .pAttachments = &tex_vk->view,
+ .width = tex->params.w,
+ .height = tex->params.h,
+ .layers = 1,
+ };
+
+ VK(vkCreateFramebuffer(vk->dev, &finfo, MPVK_ALLOCATOR,
+ &tex_vk->framebuffer));
+
+ // NOTE: Normally we would free the dummyPass again here, but a bug
+ // in the nvidia vulkan driver causes a segfault if you do.
+ }
+
+ return true;
+
+error:
+ return false;
+}
+
+static struct ra_tex *vk_tex_create(struct ra *ra,
+ const struct ra_tex_params *params)
+{
+ struct mpvk_ctx *vk = vk_get(ra);
+
+ struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
+ tex->params = *params;
+ tex->params.initial_data = NULL;
+
+ struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk);
+
+ const struct vk_format *fmt = params->format->priv;
+ switch (params->dimensions) {
+ case 1: tex_vk->type = VK_IMAGE_TYPE_1D; break;
+ case 2: tex_vk->type = VK_IMAGE_TYPE_2D; break;
+ case 3: tex_vk->type = VK_IMAGE_TYPE_3D; break;
+ default: abort();
+ }
+
+ VkImageUsageFlags usage = 0;
+ if (params->render_src)
+ usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
+ if (params->render_dst)
+ usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
+ if (params->storage_dst)
+ usage |= VK_IMAGE_USAGE_STORAGE_BIT;
+ if (params->blit_src)
+ usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
+ if (params->host_mutable || params->blit_dst || params->initial_data)
+ usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
+
+ // Double-check image usage support and fail immediately if invalid
+ VkImageFormatProperties iprop;
+ VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd,
+ fmt->iformat, tex_vk->type, VK_IMAGE_TILING_OPTIMAL, usage, 0,
+ &iprop);
+ if (res == VK_ERROR_FORMAT_NOT_SUPPORTED) {
+ return NULL;
+ } else {
+ VK_ASSERT(res, "Querying image format properties");
+ }
+
+ VkFormatProperties prop;
+ vkGetPhysicalDeviceFormatProperties(vk->physd, fmt->iformat, &prop);
+ VkFormatFeatureFlags flags = prop.optimalTilingFeatures;
+
+ bool has_blit_src = flags & VK_FORMAT_FEATURE_BLIT_SRC_BIT,
+ has_src_linear = flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+
+ if (params->w > iprop.maxExtent.width ||
+ params->h > iprop.maxExtent.height ||
+ params->d > iprop.maxExtent.depth ||
+ (params->blit_src && !has_blit_src) ||
+ (params->src_linear && !has_src_linear))
+ {
+ return NULL;
+ }
+
+ VkImageCreateInfo iinfo = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .imageType = tex_vk->type,
+ .format = fmt->iformat,
+ .extent = (VkExtent3D) { params->w, params->h, params->d },
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ };
+
+ VK(vkCreateImage(vk->dev, &iinfo, MPVK_ALLOCATOR, &tex_vk->img));
+
+ VkMemoryPropertyFlagBits memFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ VkMemoryRequirements reqs;
+ vkGetImageMemoryRequirements(vk->dev, tex_vk->img, &reqs);
+
+ struct vk_memslice *mem = &tex_vk->mem;
+ if (!vk_malloc_generic(vk, reqs, memFlags, mem))
+ goto error;
+
+ VK(vkBindImageMemory(vk->dev, tex_vk->img, mem->vkmem, mem->offset));
+
+ if (!vk_init_image(ra, tex))
+ goto error;
+
+ if (params->initial_data) {
+ struct ra_tex_upload_params ul_params = {
+ .tex = tex,
+ .invalidate = true,
+ .src = params->initial_data,
+ .stride = params->w * fmt->bytes,
+ };
+ if (!ra->fns->tex_upload(ra, &ul_params))
+ goto error;
+ }
+
+ return tex;
+
+error:
+ vk_tex_destroy(ra, tex);
+ return NULL;
+}
+
+struct ra_tex *ra_vk_wrap_swchain_img(struct ra *ra, VkImage vkimg,
+ VkSwapchainCreateInfoKHR info)
+{
+ struct mpvk_ctx *vk = vk_get(ra);
+ struct ra_tex *tex = NULL;
+
+ const struct ra_format *format = NULL;
+ for (int i = 0; i < ra->num_formats; i++) {
+ const struct vk_format *fmt = ra->formats[i]->priv;
+ if (fmt->iformat == vk->surf_format.format) {
+ format = ra->formats[i];
+ break;
+ }
+ }
+
+ if (!format) {
+ MP_ERR(ra, "Could not find ra_format suitable for wrapped swchain image "
+ "with surface format %d\n", vk->surf_format.format);
+ goto error;
+ }
+
+ tex = talloc_zero(NULL, struct ra_tex);
+ tex->params = (struct ra_tex_params) {
+ .format = format,
+ .dimensions = 2,
+ .w = info.imageExtent.width,
+ .h = info.imageExtent.height,
+ .d = 1,
+ .blit_src = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT),
+ .blit_dst = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_DST_BIT),
+ .render_src = !!(info.imageUsage & VK_IMAGE_USAGE_SAMPLED_BIT),
+ .render_dst = !!(info.imageUsage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT),
+ };
+
+ struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk);
+ tex_vk->type = VK_IMAGE_TYPE_2D;
+ tex_vk->external_img = true;
+ tex_vk->img = vkimg;
+
+ if (!vk_init_image(ra, tex))
+ goto error;
+
+ return tex;
+
+error:
+ vk_tex_destroy(ra, tex);
+ return NULL;
+}
+
+// For ra_buf.priv
+struct ra_buf_vk {
+ struct vk_bufslice slice;
+ bool inuse;
+ bool needsflush;
+ // "current" metadata, can change during course of execution
+ VkPipelineStageFlagBits current_stage;
+ VkAccessFlagBits current_access;
+};
+
+static void buf_free_to_use(void *priv, struct ra_buf_vk *buf_vk)
+{
+ buf_vk->inuse = false;
+}
+
+static void buf_barrier(struct vk_cmd *cmd, struct ra_buf *buf,
+ VkPipelineStageFlagBits newStage,
+ VkAccessFlagBits newAccess, int offset, size_t size)
+{
+ struct ra_buf_vk *buf_vk = buf->priv;
+
+ VkBufferMemoryBarrier buffBarrier = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .srcAccessMask = buf_vk->current_access,
+ .dstAccessMask = newAccess,
+ .buffer = buf_vk->slice.buf,
+ .offset = offset,
+ .size = size,
+ };
+
+ if (buf_vk->needsflush || buf->params.host_mapped) {
+ buffBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
+ buf_vk->current_stage = VK_PIPELINE_STAGE_HOST_BIT;
+ buf_vk->needsflush = false;
+ }
+
+ vkCmdPipelineBarrier(cmd->buf, buf_vk->current_stage, newStage, 0,
+ 0, NULL, 1, &buffBarrier, 0, NULL);
+
+ buf_vk->current_stage = newStage;
+ buf_vk->current_access = newAccess;
+ buf_vk->inuse = true;
+
+ vk_cmd_callback(cmd, (vk_cb) buf_free_to_use, NULL, buf_vk);
+}
+
+static void vk_buf_destroy(struct ra *ra, struct ra_buf *buf)
+{
+ if (!buf)
+ return;
+
+ struct mpvk_ctx *vk = vk_get(ra);
+ struct ra_buf_vk *buf_vk = buf->priv;
+
+ if (buf_vk->slice.buf)
+ vk_free_memslice(vk, buf_vk->slice.mem);
+
+ talloc_free(buf);
+}
+
+MAKE_LAZY_DESTRUCTOR(vk_buf_destroy, struct ra_buf);
+
+static void vk_buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
+ const void *data, size_t size)
+{
+ assert(buf->params.host_mutable || buf->params.initial_data);
+ struct ra_buf_vk *buf_vk = buf->priv;
+
+ // For host-mapped buffers, we can just directly memcpy the buffer contents.
+ // Otherwise, we can update the buffer from the GPU using a command buffer.
+ if (buf_vk->slice.data) {
+ assert(offset + size <= buf->params.size);
+ uintptr_t addr = (uintptr_t)buf_vk->slice.data + offset;
+ memcpy((void *)addr, data, size);
+ buf_vk->needsflush = true;
+ } else {
+ struct vk_cmd *cmd = vk_require_cmd(ra);
+ if (!cmd) {
+ MP_ERR(ra, "Failed updating buffer!\n");
+ return;
+ }
+
+ VkDeviceSize bufOffset = buf_vk->slice.mem.offset + offset;
+ assert(bufOffset == MP_ALIGN_UP(bufOffset, 4));
+ vkCmdUpdateBuffer(cmd->buf, buf_vk->slice.buf, bufOffset, size, data);
+ }
+}
+
+static struct ra_buf *vk_buf_create(struct ra *ra,
+ const struct ra_buf_params *params)
+{
+ struct mpvk_ctx *vk = vk_get(ra);
+
+ struct ra_buf *buf = talloc_zero(NULL, struct ra_buf);
+ buf->params = *params;
+
+ struct ra_buf_vk *buf_vk = buf->priv = talloc_zero(buf, struct ra_buf_vk);
+ buf_vk->current_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+ buf_vk->current_access = 0;
+
+ VkBufferUsageFlagBits bufFlags = 0;
+ VkMemoryPropertyFlagBits memFlags = 0;
+ VkDeviceSize align = 4; // alignment 4 is needed for buf_update
+
+ switch (params->type) {
+ case RA_BUF_TYPE_TEX_UPLOAD:
+ bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
+ memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
+ break;
+ case RA_BUF_TYPE_UNIFORM:
+ bufFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
+ memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ align = MP_ALIGN_UP(align, vk->limits.minUniformBufferOffsetAlignment);
+ break;
+ case RA_BUF_TYPE_SHADER_STORAGE:
+ bufFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
+ memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ align = MP_ALIGN_UP(align, vk->limits.minStorageBufferOffsetAlignment);
+ break;
+ case RA_BUF_TYPE_VERTEX:
+ bufFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
+ memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+ break;
+ default: abort();
+ }
+
+ if (params->host_mutable || params->initial_data) {
+ bufFlags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
+ align = MP_ALIGN_UP(align, vk->limits.optimalBufferCopyOffsetAlignment);
+ }
+
+ if (params->host_mapped) {
+ memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+ }
+
+ if (!vk_malloc_buffer(vk, bufFlags, memFlags, params->size, align,
+ &buf_vk->slice))
+ {
+ goto error;
+ }
+
+ if (params->host_mapped)
+ buf->data = buf_vk->slice.data;
+
+ if (params->initial_data)
+ vk_buf_update(ra, buf, 0, params->initial_data, params->size);
+
+ buf->params.initial_data = NULL; // do this after vk_buf_update
+ return buf;
+
+error:
+ vk_buf_destroy(ra, buf);
+ return NULL;
+}
+
+static bool vk_buf_poll(struct ra *ra, struct ra_buf *buf)
+{
+ struct ra_buf_vk *buf_vk = buf->priv;
+ return !buf_vk->inuse;
+}
+
+static bool vk_tex_upload(struct ra *ra,
+ const struct ra_tex_upload_params *params)
+{
+
+ struct ra_tex *tex = params->tex;
+ struct ra_tex_vk *tex_vk = tex->priv;
+
+ if (!params->buf)
+ return ra_tex_upload_pbo(ra, &tex_vk->pbo, params);
+
+ assert(!params->src);
+ assert(params->buf);
+ struct ra_buf *buf = params->buf;
+ struct ra_buf_vk *buf_vk = buf->priv;
+
+ VkBufferImageCopy region = {
+ .bufferOffset = buf_vk->slice.mem.offset + params->buf_offset,
+ .bufferRowLength = tex->params.w,
+ .bufferImageHeight = tex->params.h,
+ .imageSubresource = vk_layers,
+ .imageExtent = (VkExtent3D){tex->params.w, tex->params.h, tex->params.d},
+ };
+
+ if (tex->params.dimensions == 2) {
+ int pix_size = tex->params.format->pixel_size;
+ region.bufferRowLength = params->stride / pix_size;
+ if (region.bufferRowLength * pix_size != params->stride) {
+ MP_ERR(ra, "Texture upload strides must be a multiple of the texel "
+ "size!\n");
+ goto error;
+ }
+
+ if (params->rc) {
+ struct mp_rect *rc = params->rc;
+ region.imageOffset = (VkOffset3D){rc->x0, rc->y0, 0};
+ region.imageExtent = (VkExtent3D){mp_rect_w(*rc), mp_rect_h(*rc), 1};
+ }
+ }
+
+ uint64_t size = region.bufferRowLength * region.bufferImageHeight *
+ region.imageExtent.depth;
+
+ struct vk_cmd *cmd = vk_require_cmd(ra);
+ if (!cmd)
+ goto error;
+
+ buf_barrier(cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_ACCESS_TRANSFER_READ_BIT, region.bufferOffset, size);
+
+ tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_ACCESS_TRANSFER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ params->invalidate);
+
+ vkCmdCopyBufferToImage(cmd->buf, buf_vk->slice.buf, tex_vk->img,
+ tex_vk->current_layout, 1, ®ion);
+
+ return true;
+
+error:
+ return false;
+}
+
+#define MPVK_NUM_DS MPVK_MAX_STREAMING_DEPTH
+
+// For ra_renderpass.priv
+struct ra_renderpass_vk {
+ // Compiled shaders
+ VkShaderModule vert;
+ VkShaderModule frag;
+ VkShaderModule comp;
+ // Pipeline / render pass
+ VkPipeline pipe;
+ VkPipelineLayout pipeLayout;
+ VkPipelineCache pipeCache;
+ VkRenderPass renderPass;
+ // Descriptor set (bindings)
+ VkDescriptorSetLayout dsLayout;
+ VkDescriptorPool dsPool;
+ VkDescriptorSet dss[MPVK_NUM_DS];
+ int dindex;
+ // Vertex buffers (vertices)
+ struct ra_buf_pool vbo;
+
+ // For updating
+ VkWriteDescriptorSet *dswrite;
+ VkDescriptorImageInfo *dsiinfo;
+ VkDescriptorBufferInfo *dsbinfo;
+};
+
+static void vk_renderpass_destroy(struct ra *ra, struct ra_renderpass *pass)
+{
+ if (!pass)
+ return;
+
+ struct mpvk_ctx *vk = vk_get(ra);
+ struct ra_renderpass_vk *pass_vk = pass->priv;
+
+ ra_buf_pool_uninit(ra, &pass_vk->vbo);
+ vkDestroyPipeline(vk->dev, pass_vk->pipe, MPVK_ALLOCATOR);
+ vkDestroyPipelineCache(vk->dev, pass_vk->pipeCache, MPVK_ALLOCATOR);
+ vkDestroyRenderPass(vk->dev, pass_vk->renderPass, MPVK_ALLOCATOR);
+ vkDestroyPipelineLayout(vk->dev, pass_vk->pipeLayout, MPVK_ALLOCATOR);
+ vkDestroyDescriptorPool(vk->dev, pass_vk->dsPool, MPVK_ALLOCATOR);
+ vkDestroyDescriptorSetLayout(vk->dev, pass_vk->dsLayout, MPVK_ALLOCATOR);
+ vkDestroyShaderModule(vk->dev, pass_vk->vert, MPVK_ALLOCATOR);
+ vkDestroyShaderModule(vk->dev, pass_vk->frag, MPVK_ALLOCATOR);
+ vkDestroyShaderModule(vk->dev, pass_vk->comp, MPVK_ALLOCATOR);
+
+ talloc_free(pass);
+}
+
+MAKE_LAZY_DESTRUCTOR(vk_renderpass_destroy, struct ra_renderpass);
+
+static const VkDescriptorType dsType[] = {
+ [RA_VARTYPE_TEX] = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ [RA_VARTYPE_IMG_W] = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ [RA_VARTYPE_BUF_RO] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ [RA_VARTYPE_BUF_RW] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+};
+
+static bool vk_get_input_format(struct ra *ra, struct ra_renderpass_input *inp,
+ VkFormat *out_fmt)
+{
+ struct mpvk_ctx *vk = vk_get(ra);
+
+ enum ra_ctype ctype;
+ switch (inp->type) {
+ case RA_VARTYPE_FLOAT: ctype = RA_CTYPE_FLOAT; break;
+ case RA_VARTYPE_BYTE_UNORM: ctype = RA_CTYPE_UNORM; break;
+ default: abort();
+ }
+
+ assert(inp->dim_m == 1);
+ for (const struct vk_format *fmt = vk_formats; fmt->name; fmt++) {
+ if (fmt->ctype != ctype)
+ continue;
+ if (fmt->components != inp->dim_v)
+ continue;
+ if (fmt->bytes != ra_renderpass_input_layout(inp).size)
+ continue;
+
+ // Ensure this format is valid for vertex attributes
+ VkFormatProperties prop;
+ vkGetPhysicalDeviceFormatProperties(vk->physd, fmt->iformat, &prop);
+ if (!(prop.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT))
+ continue;
+
+ *out_fmt = fmt->iformat;
+ return true;
+ }
+
+ return false;
+}
+
+static const VkPipelineStageFlagBits stageFlags[] = {
+ [RA_RENDERPASS_TYPE_RASTER] = VK_SHADER_STAGE_FRAGMENT_BIT,
+ [RA_RENDERPASS_TYPE_COMPUTE] = VK_SHADER_STAGE_COMPUTE_BIT,
+};
+
+static struct ra_renderpass *vk_renderpass_create(struct ra *ra,
+ const struct ra_renderpass_params *params)
+{
+ struct mpvk_ctx *vk = vk_get(ra);
+
+ struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass);
+ pass->params = *ra_renderpass_params_copy(pass, params);
+ pass->params.cached_program = (bstr){0};
+ struct ra_renderpass_vk *pass_vk = pass->priv =
+ talloc_zero(pass, struct ra_renderpass_vk);
+
+ static int dsCount[RA_VARTYPE_COUNT] = {0};
+ VkDescriptorSetLayoutBinding *bindings = NULL;
+ int num_bindings = 0;
+
+ for (int i = 0; i < params->num_inputs; i++) {
+ struct ra_renderpass_input *inp = ¶ms->inputs[i];
+ switch (inp->type) {
+ case RA_VARTYPE_TEX:
+ case RA_VARTYPE_IMG_W:
+ case RA_VARTYPE_BUF_RO:
+ case RA_VARTYPE_BUF_RW: {
+ VkDescriptorSetLayoutBinding desc = {
+ .binding = inp->binding,
+ .descriptorType = dsType[inp->type],
+ .descriptorCount = 1,
+ .stageFlags = stageFlags[params->type],
+ };
+
+ MP_TARRAY_APPEND(pass, bindings, num_bindings, desc);
+ dsCount[inp->type]++;
+ break;
+ }
+ default: abort();
+ }
+ }
+
+ VkDescriptorPoolSize *dsPoolSizes = NULL;
+ int poolSizeCount = 0;
+ for (enum ra_vartype t = 0; t < RA_VARTYPE_COUNT; t++) {
+ if (dsCount[t] > 0) {
+ VkDescriptorPoolSize dssize = {
+ .type = dsType[t],
+ .descriptorCount = dsCount[t] * MPVK_NUM_DS,
+ };
+
+ MP_TARRAY_APPEND(pass, dsPoolSizes, poolSizeCount, dssize);
+ }
+ }
+
+ VkDescriptorPoolCreateInfo pinfo = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .maxSets = MPVK_NUM_DS,
+ .pPoolSizes = dsPoolSizes,
+ .poolSizeCount = poolSizeCount,
+ };
+
+ VK(vkCreateDescriptorPool(vk->dev, &pinfo, MPVK_ALLOCATOR, &pass_vk->dsPool));
+ talloc_free(dsPoolSizes);
+
+ pass_vk->dswrite = talloc_array(pass, VkWriteDescriptorSet, num_bindings);
+ pass_vk->dsiinfo = talloc_array(pass, VkDescriptorImageInfo, num_bindings);
+ pass_vk->dsbinfo = talloc_array(pass, VkDescriptorBufferInfo, num_bindings);
+
+ VkDescriptorSetLayoutCreateInfo dinfo = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .pBindings = bindings,
+ .bindingCount = num_bindings,
+ };
+
+ VK(vkCreateDescriptorSetLayout(vk->dev, &dinfo, MPVK_ALLOCATOR,
+ &pass_vk->dsLayout));
+
+ VkDescriptorSetAllocateInfo ainfo = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = pass_vk->dsPool,
+ .descriptorSetCount = 1,
+ .pSetLayouts = &pass_vk->dsLayout,
+ };
+
+ for (int i = 0; i < MPVK_NUM_DS; i++)
+ VK(vkAllocateDescriptorSets(vk->dev, &ainfo, &pass_vk->dss[i]));
+
+ VkPipelineLayoutCreateInfo linfo = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &pass_vk->dsLayout,
+ };
+
+ VK(vkCreatePipelineLayout(vk->dev, &linfo, MPVK_ALLOCATOR,
+ &pass_vk->pipeLayout));
+
+ VkPipelineCacheCreateInfo pcinfo = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,
+ .pInitialData = params->cached_program.start,
+ .initialDataSize = params->cached_program.len,
+ };
+
+ VK(vkCreatePipelineCache(vk->dev, &pcinfo, MPVK_ALLOCATOR, &pass_vk->pipeCache));
+
+ VkShaderModuleCreateInfo sinfo = {
+ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+ };
+
+ switch (params->type) {
+ case RA_RENDERPASS_TYPE_RASTER: {
+ sinfo.pCode = (uint32_t *)params->vertex_shader;
+ sinfo.codeSize = strlen(params->vertex_shader);
+ VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &pass_vk->vert));
+
+ sinfo.pCode = (uint32_t *)params->frag_shader;
+ sinfo.codeSize = strlen(params->frag_shader);
+ VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &pass_vk->frag));
+
+ VK(vk_create_render_pass(vk->dev, params->target_format,
+ params->enable_blend, &pass_vk->renderPass));
+
+ VkPipelineShaderStageCreateInfo stages[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = pass_vk->vert,
+ .pName = "main",
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = pass_vk->frag,
+ .pName = "main",
+ }
+ };
+
+ VkVertexInputAttributeDescription *attrs = talloc_array(pass,
+ VkVertexInputAttributeDescription, params->num_vertex_attribs);
+
+ for (int i = 0; i < params->num_vertex_attribs; i++) {
+ struct ra_renderpass_input *inp = ¶ms->vertex_attribs[i];
+ attrs[i] = (VkVertexInputAttributeDescription) {
+ .location = i,
+ .binding = 0,
+ .offset = inp->offset,
+ };
+
+ if (!vk_get_input_format(ra, inp, &attrs[i].format)) {
+ MP_ERR(ra, "No suitable VkFormat for vertex attrib '%s'!\n",
+ inp->name);
+ goto error;
+ }
+ }
+
+ static const VkBlendFactor blendFactors[] = {
+ [RA_BLEND_ZERO] = VK_BLEND_FACTOR_ZERO,
+ [RA_BLEND_ONE] = VK_BLEND_FACTOR_ONE,
+ [RA_BLEND_SRC_ALPHA] = VK_BLEND_FACTOR_SRC_ALPHA,
+ [RA_BLEND_ONE_MINUS_SRC_ALPHA] = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
+ };
+
+ VkPipelineColorBlendAttachmentState binfo = {
+ .blendEnable = params->enable_blend,
+ .colorBlendOp = VK_BLEND_OP_ADD,
+ .srcColorBlendFactor = blendFactors[params->blend_src_rgb],
+ .dstColorBlendFactor = blendFactors[params->blend_dst_rgb],
+ .alphaBlendOp = VK_BLEND_OP_ADD,
+ .srcAlphaBlendFactor = blendFactors[params->blend_src_alpha],
+ .dstAlphaBlendFactor = blendFactors[params->blend_dst_alpha],
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
+ VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT |
+ VK_COLOR_COMPONENT_A_BIT,
+ };
+
+ VkGraphicsPipelineCreateInfo cinfo = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = MP_ARRAY_SIZE(stages),
+ .pStages = &stages[0],
+ .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount = 1,
+ .pVertexBindingDescriptions = &(VkVertexInputBindingDescription) {
+ .binding = 0,
+ .stride = params->vertex_stride,
+ .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
+ },
+ .vertexAttributeDescriptionCount = params->num_vertex_attribs,
+ .pVertexAttributeDescriptions = attrs,
+ },
+ .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
+ },
+ .pViewportState = &(VkPipelineViewportStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .lineWidth = 1.0f,
+ },
+ .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+ },
+ .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &binfo,
+ },
+ .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 2,
+ .pDynamicStates = (VkDynamicState[]){
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ },
+ },
+ .layout = pass_vk->pipeLayout,
+ .renderPass = pass_vk->renderPass,
+ };
+
+ VK(vkCreateGraphicsPipelines(vk->dev, pass_vk->pipeCache, 1, &cinfo,
+ MPVK_ALLOCATOR, &pass_vk->pipe));
+ break;
+ }
+ case RA_RENDERPASS_TYPE_COMPUTE: {
+ sinfo.pCode = (uint32_t *)params->compute_shader;
+ sinfo.codeSize = strlen(params->compute_shader);
+ VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &pass_vk->comp));
+
+ VkComputePipelineCreateInfo cinfo = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = pass_vk->comp,
+ .pName = "main",
+ },
+ .layout = pass_vk->pipeLayout,
+ };
+
+ VK(vkCreateComputePipelines(vk->dev, pass_vk->pipeCache, 1, &cinfo,
+ MPVK_ALLOCATOR, &pass_vk->pipe));
+ break;
+ }
+ }
+
+ // Update cached program
+ bstr *prog = &pass->params.cached_program;
+ VK(vkGetPipelineCacheData(vk->dev, pass_vk->pipeCache, &prog->len, NULL));
+ prog->start = talloc_size(pass, prog->len);
+ VK(vkGetPipelineCacheData(vk->dev, pass_vk->pipeCache, &prog->len, prog->start));
+
+ return pass;
+
+error:
+ vk_renderpass_destroy(ra, pass);
+ return NULL;
+}
+
+static void vk_update_descriptor(struct vk_cmd *cmd,
+ struct ra_renderpass *pass,
+ struct ra_renderpass_input_val val,
+ VkDescriptorSet ds, int idx)
+{
+ struct ra_renderpass_vk *pass_vk = pass->priv;
+ struct ra_renderpass_input *inp = &pass->params.inputs[val.index];
+
+ VkWriteDescriptorSet *wds = &pass_vk->dswrite[idx];
+ *wds = (VkWriteDescriptorSet) {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = ds,
+ .dstBinding = inp->binding,
+ .descriptorCount = 1,
+ .descriptorType = dsType[inp->type],
+ };
+
+ switch (inp->type) {
+ case RA_VARTYPE_TEX: {
+ struct ra_tex *tex = *(struct ra_tex **)val.data;
+ struct ra_tex_vk *tex_vk = tex->priv;
+
+ assert(tex->params.render_src);
+ tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
+ VK_ACCESS_SHADER_READ_BIT,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, false);
+
+ VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx];
+ *iinfo = (VkDescriptorImageInfo) {
+ .sampler = tex_vk->sampler,
+ .imageView = tex_vk->view,
+ .imageLayout = tex_vk->current_layout,
+ };
+
+ wds->pImageInfo = iinfo;
+ break;
+ }
+ case RA_VARTYPE_IMG_W: {
+ struct ra_tex *tex = *(struct ra_tex **)val.data;
+ struct ra_tex_vk *tex_vk = tex->priv;
+
+ assert(tex->params.storage_dst);
+ tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
+ VK_ACCESS_SHADER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL, false);
+
+ VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx];
+ *iinfo = (VkDescriptorImageInfo) {
+ .imageView = tex_vk->view,
+ .imageLayout = tex_vk->current_layout,
+ };
+
+ wds->pImageInfo = iinfo;
+ break;
+ }
+ case RA_VARTYPE_BUF_RO:
+ case RA_VARTYPE_BUF_RW: {
+ struct ra_buf *buf = *(struct ra_buf **)val.data;
+ struct ra_buf_vk *buf_vk = buf->priv;
+
+ VkBufferUsageFlags access = VK_ACCESS_SHADER_READ_BIT;
+ if (inp->type == RA_VARTYPE_BUF_RW)
+ access |= VK_ACCESS_SHADER_WRITE_BIT;
+
+ buf_barrier(cmd, buf, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
+ access, buf_vk->slice.mem.offset, buf->params.size);
+
+ VkDescriptorBufferInfo *binfo = &pass_vk->dsbinfo[idx];
+ *binfo = (VkDescriptorBufferInfo) {
+ .buffer = buf_vk->slice.buf,
+ .offset = buf_vk->slice.mem.offset,
+ .range = buf->params.size,
+ };
+
+ wds->pBufferInfo = binfo;
+ break;
+ }
+ }
+}
+
+static void vk_renderpass_run(struct ra *ra,
+ const struct ra_renderpass_run_params *params)
+{
+ struct mpvk_ctx *vk = vk_get(ra);
+ struct ra_renderpass *pass = params->pass;
+ struct ra_renderpass_vk *pass_vk = pass->priv;
+
+ struct vk_cmd *cmd = vk_require_cmd(ra);
+ if (!cmd)
+ goto error;
+
+ static const VkPipelineBindPoint bindPoint[] = {
+ [RA_RENDERPASS_TYPE_RASTER] = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ [RA_RENDERPASS_TYPE_COMPUTE] = VK_PIPELINE_BIND_POINT_COMPUTE,
+ };
+
+ vkCmdBindPipeline(cmd->buf, bindPoint[pass->params.type], pass_vk->pipe);
+
+ VkDescriptorSet ds = pass_vk->dss[pass_vk->dindex++];
+ pass_vk->dindex %= MPVK_NUM_DS;
+
+ for (int i = 0; i < params->num_values; i++)
+ vk_update_descriptor(cmd, pass, params->values[i], ds, i);
+
+ if (params->num_values > 0) {
+ vkUpdateDescriptorSets(vk->dev, params->num_values, pass_vk->dswrite,
+ 0, NULL);
+ }
+
+ vkCmdBindDescriptorSets(cmd->buf, bindPoint[pass->params.type],
+ pass_vk->pipeLayout, 0, 1, &ds, 0, NULL);
+
+ switch (pass->params.type) {
+ case RA_RENDERPASS_TYPE_COMPUTE:
+ vkCmdDispatch(cmd->buf, params->compute_groups[0],
+ params->compute_groups[1],
+ params->compute_groups[2]);
+ break;
+ case RA_RENDERPASS_TYPE_RASTER: {
+ struct ra_tex *tex = params->target;
+ struct ra_tex_vk *tex_vk = tex->priv;
+ assert(tex->params.render_dst);
+
+ struct ra_buf_params buf_params = {
+ .type = RA_BUF_TYPE_VERTEX,
+ .size = params->vertex_count * pass->params.vertex_stride,
+ .host_mutable = true,
+ };
+
+ struct ra_buf *buf = ra_buf_pool_get(ra, &pass_vk->vbo, &buf_params);
+ if (!buf) {
+ MP_ERR(ra, "Failed allocating vertex buffer!\n");
+ goto error;
+ }
+ struct ra_buf_vk *buf_vk = buf->priv;
+
+ vk_buf_update(ra, buf, 0, params->vertex_data, buf_params.size);
+
+ buf_barrier(cmd, buf, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT,
+ VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
+ buf_vk->slice.mem.offset, buf_params.size);
+
+ vkCmdBindVertexBuffers(cmd->buf, 0, 1, &buf_vk->slice.buf,
+ &buf_vk->slice.mem.offset);
+
+ tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
+ VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
+ VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, false);
+
+ VkViewport viewport = {
+ .x = params->viewport.x0,
+ .y = params->viewport.y0,
+ .width = mp_rect_w(params->viewport),
+ .height = mp_rect_h(params->viewport),
+ };
+
+ VkRect2D scissor = {
+ .offset = {params->scissors.x0, params->scissors.y0},
+ .extent = {mp_rect_w(params->scissors), mp_rect_h(params->scissors)},
+ };
+
+ vkCmdSetViewport(cmd->buf, 0, 1, &viewport);
+ vkCmdSetScissor(cmd->buf, 0, 1, &scissor);
+
+ VkRenderPassBeginInfo binfo = {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = pass_vk->renderPass,
+ .framebuffer = tex_vk->framebuffer,
+ .renderArea = (VkRect2D){{0, 0}, {tex->params.w, tex->params.h}},
+ };
+
+ vkCmdBeginRenderPass(cmd->buf, &binfo, VK_SUBPASS_CONTENTS_INLINE);
+ vkCmdDraw(cmd->buf, params->vertex_count, 1, 0, 0);
+ vkCmdEndRenderPass(cmd->buf);
+ break;
+ }
+ default: abort();
+ };
+
+error:
+ return;
+}
+
+static void vk_blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
+ struct mp_rect *dst_rc, struct mp_rect *src_rc)
+{
+ assert(src->params.blit_src);
+ assert(dst->params.blit_dst);
+
+ struct ra_tex_vk *src_vk = src->priv;
+ struct ra_tex_vk *dst_vk = dst->priv;
+
+ struct vk_cmd *cmd = vk_require_cmd(ra);
+ if (!cmd)
+ return;
+
+ tex_barrier(cmd, src_vk, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_ACCESS_TRANSFER_READ_BIT,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ false);
+
+ bool discard = dst_rc->x0 == 0 &&
+ dst_rc->y0 == 0 &&
+ dst_rc->x1 == dst->params.w &&
+ dst_rc->y1 == dst->params.h;
+
+ tex_barrier(cmd, dst_vk, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_ACCESS_TRANSFER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ discard);
+
+ VkImageBlit region = {
+ .srcSubresource = vk_layers,
+ .srcOffsets = {{src_rc->x0, src_rc->y0, 0}, {src_rc->x1, src_rc->y1, 1}},
+ .dstSubresource = vk_layers,
+ .dstOffsets = {{dst_rc->x0, dst_rc->y0, 0}, {dst_rc->x1, dst_rc->y1, 1}},
+ };
+
+ vkCmdBlitImage(cmd->buf, src_vk->img, src_vk->current_layout, dst_vk->img,
+ dst_vk->current_layout, 1, ®ion, VK_FILTER_NEAREST);
+}
+
+static void vk_clear(struct ra *ra, struct ra_tex *tex, float color[4],
+ struct mp_rect *rc)
+{
+ struct ra_vk *p = ra->priv;
+ struct ra_tex_vk *tex_vk = tex->priv;
+ assert(tex->params.blit_dst);
+
+ struct vk_cmd *cmd = vk_require_cmd(ra);
+ if (!cmd)
+ return;
+
+ struct mp_rect full = {0, 0, tex->params.w, tex->params.h};
+ if (!rc || mp_rect_equals(rc, &full)) {
+ // To clear the entire image, we can use the efficient clear command
+ tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_ACCESS_TRANSFER_WRITE_BIT,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, true);
+
+ VkClearColorValue clearColor = {0};
+ for (int c = 0; c < 4; c++)
+ clearColor.float32[c] = color[c];
+
+ vkCmdClearColorImage(cmd->buf, tex_vk->img, tex_vk->current_layout,
+ &clearColor, 1, &vk_range);
+ } else {
+ // To simulate per-region clearing, we blit from a 1x1 texture instead
+ struct ra_tex_upload_params ul_params = {
+ .tex = p->clear_tex,
+ .invalidate = true,
+ .src = &color[0],
+ };
+ vk_tex_upload(ra, &ul_params);
+ vk_blit(ra, tex, p->clear_tex, rc, &(struct mp_rect){0, 0, 1, 1});
+ }
+}
+
+#define VK_QUERY_POOL_SIZE (MPVK_MAX_STREAMING_DEPTH * 4)
+
+struct vk_timer {
+ VkQueryPool pool;
+ int index;
+ uint64_t result;
+};
+
+static void vk_timer_destroy(struct ra *ra, ra_timer *ratimer)
+{
+ if (!ratimer)
+ return;
+
+ struct mpvk_ctx *vk = vk_get(ra);
+ struct vk_timer *timer = ratimer;
+
+ vkDestroyQueryPool(vk->dev, timer->pool, MPVK_ALLOCATOR);
+
+ talloc_free(timer);
+}
+
+MAKE_LAZY_DESTRUCTOR(vk_timer_destroy, ra_timer);
+
+static ra_timer *vk_timer_create(struct ra *ra)
+{
+ struct mpvk_ctx *vk = vk_get(ra);
+
+ struct vk_timer *timer = talloc_zero(NULL, struct vk_timer);
+
+ struct VkQueryPoolCreateInfo qinfo = {
+ .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+ .queryType = VK_QUERY_TYPE_TIMESTAMP,
+ .queryCount = VK_QUERY_POOL_SIZE,
+ };
+
+ VK(vkCreateQueryPool(vk->dev, &qinfo, MPVK_ALLOCATOR, &timer->pool));
+
+ return (ra_timer *)timer;
+
+error:
+ vk_timer_destroy(ra, timer);
+ return NULL;
+}
+
+static void vk_timer_start(struct ra *ra, ra_timer *ratimer)
+{
+ struct mpvk_ctx *vk = vk_get(ra);
+ struct vk_timer *timer = ratimer;
+
+ struct vk_cmd *cmd = vk_require_cmd(ra);
+ if (!cmd)
+ return;
+
+ timer->index = (timer->index + 2) % VK_QUERY_POOL_SIZE;
+
+ uint64_t out[2];
+ VkResult res = vkGetQueryPoolResults(vk->dev, timer->pool, timer->index, 2,
+ sizeof(out), &out[0], sizeof(uint64_t),
+ VK_QUERY_RESULT_64_BIT);
+ switch (res) {
+ case VK_SUCCESS:
+ timer->result = out[1] - out[0];
+ break;
+ case VK_NOT_READY:
+ timer->result = 0;
+ break;
+ default:
+ MP_WARN(vk, "Failed reading timer query result: %s\n", vk_err(res));
+ return;
+ };
+
+ vkCmdWriteTimestamp(cmd->buf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ timer->pool, timer->index);
+}
+
+static uint64_t vk_timer_stop(struct ra *ra, ra_timer *ratimer)
+{
+ struct vk_timer *timer = ratimer;
+ struct vk_cmd *cmd = vk_require_cmd(ra);
+
+ if (cmd) {
+ vkCmdWriteTimestamp(cmd->buf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ timer->pool, timer->index + 1);
+ }
+
+ return timer->result;
+}
+
+static struct ra_fns ra_fns_vk = {
+ .destroy = vk_destroy_ra,
+ .tex_create = vk_tex_create,
+ .tex_destroy = vk_tex_destroy_lazy,
+ .tex_upload = vk_tex_upload,
+ .buf_create = vk_buf_create,
+ .buf_destroy = vk_buf_destroy_lazy,
+ .buf_update = vk_buf_update,
+ .buf_poll = vk_buf_poll,
+ .clear = vk_clear,
+ .blit = vk_blit,
+ .renderpass_create = vk_renderpass_create,
+ .renderpass_destroy = vk_renderpass_destroy_lazy,
+ .renderpass_run = vk_renderpass_run,
+ .timer_create = vk_timer_create,
+ .timer_destroy = vk_timer_destroy_lazy,
+ .timer_start = vk_timer_start,
+ .timer_stop = vk_timer_stop,
+};
+
+static void present_cb(struct ra *ra, int *inflight)
+{
+ *inflight -= 1;
+}
+
+bool ra_vk_present_frame(struct ra *ra, struct vk_swimg *swimg, int *inflight)
+{
+ struct ra_vk *p = ra->priv;
+ struct mpvk_ctx *vk = vk_get(ra);
+ assert(p->active_cmd);
+
+ if (inflight) {
+ *inflight += 1;
+ vk_callback(ra, (vk_cb)present_cb, inflight);
+ }
+
+ struct ra_tex *img = swimg->image;
+
+ tex_barrier(p->active_cmd, img->priv, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ 0, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, false);
+
+ // These are the only two stages that we use/support for actually
+ // outputting to swapchain imagechain images, so just add a dependency
+ // on both of them. In theory, we could maybe come up with some more
+ // advanced mechanism of tracking dynamic dependencies, but that seems
+ // like overkill.
+ vk_cmd_dep(p->active_cmd, swimg->acquired,
+ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
+ VK_PIPELINE_STAGE_TRANSFER_BIT);
+
+ VkSemaphore done;
+ if (!vk_cmd_submit(vk, p->active_cmd, &done))
+ goto error;
+ p->active_cmd = NULL;
+
+ struct vk_cmdpool *pool = vk->pool;
+ VkQueue queue = pool->queues[pool->qindex];
+ pool->qindex %= pool->qcount;
+
+ VkPresentInfoKHR pinfo = {
+ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
+ .waitSemaphoreCount = 1,
+ .pWaitSemaphores = &done,
+ .swapchainCount = 1,
+ .pSwapchains = &swimg->chain->swchain,
+ .pImageIndices = &swimg->index,
+ };
+
+ VK(vkQueuePresentKHR(queue, &pinfo));
+
+ return true;
+
+error:
+ return false;
+}
diff --git a/video/out/vulkan/ra_vk.h b/video/out/vulkan/ra_vk.h
new file mode 100644
index 0000000000000..214a9af6f3552
--- /dev/null
+++ b/video/out/vulkan/ra_vk.h
@@ -0,0 +1,25 @@
+#pragma once
+
+#include "common.h"
+#include "utils.h"
+#include "video/out/opengl/ra.h"
+
+struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log);
+
+// Access to the VkDevice is needed for swapchain creation
+VkDevice ra_vk_get_dev(struct ra *ra);
+
+// Allocates a ra_tex that wraps a swapchain image. The contents of the image
+// will be invalidated, and access to it will only be internally synchronized.
+// So the calling could should not do anything else with the VkImage.
+struct ra_tex *ra_vk_wrap_swchain_img(struct ra *ra, VkImage vkimg,
+ VkSwapchainCreateInfoKHR info);
+
+// This function flushes the command buffers, and enqueues the image for
+// presentation. This command must only be used after drawing to the vk_swchain,
+// but before the command buffers are flushed for other reasons (for
+// synchronization). The frames_in_flight pointer will be used to track how
+// many frames are currently in flight. (That is, it will be incremented when
+// this function is called, and decremented when the command completes)
+bool ra_vk_present_frame(struct ra *ra, struct vk_swimg *swimg,
+ int *frames_in_flight);
diff --git a/video/out/vulkan/utils.c b/video/out/vulkan/utils.c
new file mode 100644
index 0000000000000..6c14bce2455d4
--- /dev/null
+++ b/video/out/vulkan/utils.c
@@ -0,0 +1,936 @@
+#include
+
+#include "utils.h"
+#include "malloc.h"
+#include "ra_vk.h"
+#include "video/out/x11_common.h"
+
+const char* vk_err(VkResult res)
+{
+ switch (res) {
+ // These are technically success codes, but include them nonetheless
+ case VK_SUCCESS: return "VK_SUCCESS";
+ case VK_NOT_READY: return "VK_NOT_READY";
+ case VK_TIMEOUT: return "VK_TIMEOUT";
+ case VK_EVENT_SET: return "VK_EVENT_SET";
+ case VK_EVENT_RESET: return "VK_EVENT_RESET";
+ case VK_INCOMPLETE: return "VK_INCOMPLETE";
+
+ // Actual error codes
+ case VK_ERROR_OUT_OF_HOST_MEMORY: return "VK_ERROR_OUT_OF_HOST_MEMORY";
+ case VK_ERROR_OUT_OF_DEVICE_MEMORY: return "VK_ERROR_OUT_OF_DEVICE_MEMORY";
+ case VK_ERROR_INITIALIZATION_FAILED: return "VK_ERROR_INITIALIZATION_FAILED";
+ case VK_ERROR_DEVICE_LOST: return "VK_ERROR_DEVICE_LOST";
+ case VK_ERROR_MEMORY_MAP_FAILED: return "VK_ERROR_MEMORY_MAP_FAILED";
+ case VK_ERROR_LAYER_NOT_PRESENT: return "VK_ERROR_LAYER_NOT_PRESENT";
+ case VK_ERROR_EXTENSION_NOT_PRESENT: return "VK_ERROR_EXTENSION_NOT_PRESENT";
+ case VK_ERROR_FEATURE_NOT_PRESENT: return "VK_ERROR_FEATURE_NOT_PRESENT";
+ case VK_ERROR_INCOMPATIBLE_DRIVER: return "VK_ERROR_INCOMPATIBLE_DRIVER";
+ case VK_ERROR_TOO_MANY_OBJECTS: return "VK_ERROR_TOO_MANY_OBJECTS";
+ case VK_ERROR_FORMAT_NOT_SUPPORTED: return "VK_ERROR_FORMAT_NOT_SUPPORTED";
+ case VK_ERROR_FRAGMENTED_POOL: return "VK_ERROR_FRAGMENTED_POOL";
+ }
+
+ return "Unknown error!";
+}
+
+static const char* vk_dbg_type(VkDebugReportObjectTypeEXT type)
+{
+ switch (type) {
+ case VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT:
+ return "VkInstance";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_PHYSICAL_DEVICE_EXT:
+ return "VkPhysicalDevice";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT:
+ return "VkDevice";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_QUEUE_EXT:
+ return "VkQueue";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_SEMAPHORE_EXT:
+ return "VkSemaphore";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_BUFFER_EXT:
+ return "VkCommandBuffer";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_FENCE_EXT:
+ return "VkFence";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT:
+ return "VkDeviceMemory";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT:
+ return "VkBuffer";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT:
+ return "VkImage";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_EVENT_EXT:
+ return "VkEvent";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_QUERY_POOL_EXT:
+ return "VkQueryPool";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_VIEW_EXT:
+ return "VkBufferView";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_VIEW_EXT:
+ return "VkImageView";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT:
+ return "VkShaderModule";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT:
+ return "VkPipelineCache";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_LAYOUT_EXT:
+ return "VkPipelineLayout";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_RENDER_PASS_EXT:
+ return "VkRenderPass";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT:
+ return "VkPipeline";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT_EXT:
+ return "VkDescriptorSetLayout";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_EXT:
+ return "VkSampler";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_POOL_EXT:
+ return "VkDescriptorPool";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_EXT:
+ return "VkDescriptorSet";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_FRAMEBUFFER_EXT:
+ return "VkFramebuffer";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_POOL_EXT:
+ return "VkCommandPool";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_SURFACE_KHR_EXT:
+ return "VkSurfaceKHR";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT:
+ return "VkSwapchainKHR";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT:
+ return "VkDebugReportCallbackEXT";
+ case VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT:
+ default:
+ return "unknown object";
+ }
+}
+
+static VkBool32 vk_dbg_callback(VkDebugReportFlagsEXT flags,
+ VkDebugReportObjectTypeEXT objType,
+ uint64_t obj, size_t loc, int32_t msgCode,
+ const char *layer, const char *msg, void *priv)
+{
+ struct mpvk_ctx *vk = priv;
+ int lev = MSGL_V;
+
+ switch (flags) {
+ case VK_DEBUG_REPORT_ERROR_BIT_EXT: lev = MSGL_ERR; break;
+ case VK_DEBUG_REPORT_WARNING_BIT_EXT: lev = MSGL_WARN; break;
+ case VK_DEBUG_REPORT_INFORMATION_BIT_EXT: lev = MSGL_TRACE; break;
+ case VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT: lev = MSGL_WARN; break;
+ case VK_DEBUG_REPORT_DEBUG_BIT_EXT: lev = MSGL_DEBUG; break;
+ };
+
+ MP_MSG(vk, lev, "vk [%s] %d: %s (obj 0x%lx (%s), loc 0x%lx)\n",
+ layer, msgCode, msg, obj, vk_dbg_type(objType), loc);
+
+ // The return value of this function determines whether the call will
+ // be explicitly aborted (to prevent GPU errors) or not. In this case,
+ // we generally want this to be on for the errors.
+ return (flags & VK_DEBUG_REPORT_ERROR_BIT_EXT);
+}
+
+void mpvk_uninit(struct mpvk_ctx *vk)
+{
+ if (!vk->inst)
+ return;
+
+ if (vk->dev) {
+ struct vk_cmdpool *pool = vk->pool;
+ // also frees associated command buffers
+ vkDestroyCommandPool(vk->dev, pool->pool, MPVK_ALLOCATOR);
+ for (int n = 0; n < MPVK_MAX_CMDS; n++) {
+ vkDestroyFence(vk->dev, pool->cmds[n].fence, MPVK_ALLOCATOR);
+ vkDestroySemaphore(vk->dev, pool->cmds[n].done, MPVK_ALLOCATOR);
+ talloc_free(pool->cmds[n].callbacks);
+ }
+ talloc_free(vk->pool);
+ vk_malloc_uninit(vk);
+ vkDestroyDevice(vk->dev, MPVK_ALLOCATOR);
+ }
+
+ if (vk->dbg) {
+ // Same deal as creating the debug callback, we need to load this
+ // first.
+ VK_LOAD_PFN(vkDestroyDebugReportCallbackEXT)
+ pfn_vkDestroyDebugReportCallbackEXT(vk->inst, vk->dbg, MPVK_ALLOCATOR);
+ }
+
+ vkDestroySurfaceKHR(vk->inst, vk->surf, MPVK_ALLOCATOR);
+ vkDestroyInstance(vk->inst, MPVK_ALLOCATOR);
+
+ *vk = (struct mpvk_ctx){0};
+}
+
+bool mpvk_instance_init(struct mpvk_ctx *vk, bool debug)
+{
+ VkInstanceCreateInfo info = {
+ .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+ };
+
+ if (debug) {
+ // Enables the LunarG standard validation layer, which
+ // is a meta-layer that loads lots of other validators
+ static const char* layers[] = {
+ "VK_LAYER_LUNARG_standard_validation",
+ };
+
+ info.ppEnabledLayerNames = layers;
+ info.enabledLayerCount = MP_ARRAY_SIZE(layers);
+ }
+
+ // Enable whatever extensions were compiled in.
+ static const char *extensions[] = {
+ VK_KHR_SURFACE_EXTENSION_NAME,
+#if HAVE_VULKAN_XLIB
+ VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
+#endif
+
+ // Extra extensions only used for debugging. These are toggled by
+ // decreasing the enabledExtensionCount, so the number needs to be
+ // synchronized with the code below.
+ VK_EXT_DEBUG_REPORT_EXTENSION_NAME,
+ };
+
+ const int debugExtensionCount = 1;
+
+ info.ppEnabledExtensionNames = extensions;
+ info.enabledExtensionCount = MP_ARRAY_SIZE(extensions);
+
+ if (!debug)
+ info.enabledExtensionCount -= debugExtensionCount;
+
+ VkResult res = vkCreateInstance(&info, MPVK_ALLOCATOR, &vk->inst);
+ if (res != VK_SUCCESS) {
+ MP_VERBOSE(vk, "failed creating instance: %s\n", vk_err(res));
+ return false;
+ }
+
+ if (debug) {
+ // Set up a debug callback to catch validation messages
+ VkDebugReportCallbackCreateInfoEXT dinfo = {
+ .sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT,
+ .flags = VK_DEBUG_REPORT_INFORMATION_BIT_EXT |
+ VK_DEBUG_REPORT_WARNING_BIT_EXT |
+ VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT |
+ VK_DEBUG_REPORT_ERROR_BIT_EXT |
+ VK_DEBUG_REPORT_DEBUG_BIT_EXT,
+ .pfnCallback = vk_dbg_callback,
+ .pUserData = vk,
+ };
+
+ // Since this is not part of the core spec, we need to load it. This
+ // can't fail because we've already successfully created an instance
+ // with this extension enabled.
+ VK_LOAD_PFN(vkCreateDebugReportCallbackEXT)
+ pfn_vkCreateDebugReportCallbackEXT(vk->inst, &dinfo, MPVK_ALLOCATOR,
+ &vk->dbg);
+ }
+
+ return true;
+}
+
+#define MPVK_MAX_DEVICES 16
+
+static bool physd_supports_surface(struct mpvk_ctx *vk, VkPhysicalDevice physd)
+{
+ uint32_t qfnum;
+ vkGetPhysicalDeviceQueueFamilyProperties(physd, &qfnum, NULL);
+
+ for (int i = 0; i < qfnum; i++) {
+ VkBool32 sup;
+ VK(vkGetPhysicalDeviceSurfaceSupportKHR(physd, i, vk->surf, &sup));
+ if (sup)
+ return true;
+ }
+
+error:
+ return false;
+}
+
+bool mpvk_find_phys_device(struct mpvk_ctx *vk, const char *name, bool sw)
+{
+ assert(vk->surf);
+
+ MP_VERBOSE(vk, "Probing for vulkan devices..\n");
+
+ VkPhysicalDevice *devices = NULL;
+ uint32_t num = 0;
+ VK(vkEnumeratePhysicalDevices(vk->inst, &num, NULL));
+ devices = talloc_array(NULL, VkPhysicalDevice, num);
+ VK(vkEnumeratePhysicalDevices(vk->inst, &num, devices));
+
+ // Sorted by "priority". Reuses some m_opt code for convenience
+ static const struct m_opt_choice_alternatives types[] = {
+ {"discrete", VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU},
+ {"integrated", VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU},
+ {"virtual", VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU},
+ {"software", VK_PHYSICAL_DEVICE_TYPE_CPU},
+ {"unknown", VK_PHYSICAL_DEVICE_TYPE_OTHER},
+ {0}
+ };
+
+ VkPhysicalDeviceProperties props[MPVK_MAX_DEVICES];
+ for (int i = 0; i < num; i++) {
+ vkGetPhysicalDeviceProperties(devices[i], &props[i]);
+ MP_VERBOSE(vk, "GPU %d: %s (%s)\n", i, props[i].deviceName,
+ m_opt_choice_str(types, props[i].deviceType));
+ }
+
+ // Iterate through each type in order of decreasing preference
+ for (int t = 0; types[t].name; t++) {
+ // Disallow SW rendering unless explicitly enabled
+ if (types[t].value == VK_PHYSICAL_DEVICE_TYPE_CPU && !sw)
+ continue;
+
+ for (int i = 0; i < num; i++) {
+ VkPhysicalDeviceProperties prop = props[i];
+ if (prop.deviceType != types[t].value)
+ continue;
+ if (name && strcmp(name, prop.deviceName) != 0)
+ continue;
+ if (!physd_supports_surface(vk, devices[i]))
+ continue;
+
+ MP_VERBOSE(vk, "Found device:\n");
+ MP_VERBOSE(vk, " Device Name: %s\n", prop.deviceName);
+ MP_VERBOSE(vk, " Device ID: %x:%x\n", prop.vendorID, prop.deviceID);
+ MP_VERBOSE(vk, " Driver version: %d\n", prop.driverVersion);
+ MP_VERBOSE(vk, " API version: %d.%d.%d\n",
+ VK_VERSION_MAJOR(prop.apiVersion),
+ VK_VERSION_MINOR(prop.apiVersion),
+ VK_VERSION_PATCH(prop.apiVersion));
+ vk->physd = devices[i];
+ vk->limits = prop.limits;
+ talloc_free(devices);
+ return true;
+ }
+ }
+
+error:
+ MP_VERBOSE(vk, "Found no suitable device, giving up.\n");
+ talloc_free(devices);
+ return false;
+}
+
+bool mpvk_pick_surface_format(struct mpvk_ctx *vk)
+{
+ assert(vk->physd);
+
+ VkSurfaceFormatKHR *formats = NULL;
+ int num;
+
+ // Enumerate through the surface formats and find one that we can map to
+ // a ra_format
+ VK(vkGetPhysicalDeviceSurfaceFormatsKHR(vk->physd, vk->surf, &num, NULL));
+ formats = talloc_array(NULL, VkSurfaceFormatKHR, num);
+ VK(vkGetPhysicalDeviceSurfaceFormatsKHR(vk->physd, vk->surf, &num, formats));
+
+ for (int i = 0; i < num; i++) {
+ // A value of VK_FORMAT_UNDEFINED means we can pick anything we want
+ if (formats[i].format == VK_FORMAT_UNDEFINED) {
+ vk->surf_format = (VkSurfaceFormatKHR) {
+ .colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
+ .format = VK_FORMAT_R8G8B8A8_UNORM,
+ };
+ break;
+ }
+
+ if (formats[i].colorSpace != VK_COLOR_SPACE_SRGB_NONLINEAR_KHR)
+ continue;
+
+ vk->surf_format = formats[i];
+ break;
+ }
+
+ talloc_free(formats);
+
+ if (!vk->surf_format.format)
+ goto error;
+
+ return true;
+
+error:
+ MP_ERR(vk, "Failed picking surface format!\n");
+ talloc_free(formats);
+ return false;
+}
+
+bool mpvk_surface_init(struct vo *vo, struct mpvk_ctx *vk)
+{
+ assert(vk->inst);
+ VkResult res;
+
+#if HAVE_VULKAN_XLIB
+ if (!vo_x11_init(vo))
+ goto xlib_uninit;
+
+ if (!vo_x11_create_vo_window(vo, NULL, "mpvk"))
+ goto xlib_uninit;
+
+ VkXlibSurfaceCreateInfoKHR xinfo = {
+ .sType = VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR,
+ .dpy = vo->x11->display,
+ .window = vo->x11->window,
+ };
+
+ res = vkCreateXlibSurfaceKHR(vk->inst, &xinfo, MPVK_ALLOCATOR, &vk->surf);
+ if (res != VK_SUCCESS) {
+ MP_VERBOSE(vo, "Failed creating Xlib surface: %s\n", vk_err(res));
+ goto xlib_uninit;
+ }
+
+ MP_VERBOSE(vo, "Using Xlib surface.\n");
+ return true;
+
+xlib_uninit:
+ vo_x11_uninit(vo);
+#endif
+
+ // If we're reached this point, then none of the above surface probes
+ // were successful
+ MP_ERR(vo, "Failed creating any useful vulkan surface!\n");
+ return false;
+}
+
+bool mpvk_device_init(struct mpvk_ctx *vk)
+{
+ assert(vk->physd);
+
+ VkQueueFamilyProperties *qfs = NULL;
+ int qfnum;
+
+ // Enumerate the queue families and find suitable families for each task
+ vkGetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, NULL);
+ qfs = talloc_array(NULL, VkQueueFamilyProperties, qfnum);
+ vkGetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, qfs);
+
+ MP_VERBOSE(vk, "Queue families supported by device:\n");
+
+ for (int i = 0; i < qfnum; i++) {
+ MP_VERBOSE(vk, "QF %d: flags 0x%x num %d\n", i, qfs[i].queueFlags,
+ qfs[i].queueCount);
+ }
+
+ // Since using multiple queue families is devilishly difficult, we just
+ // pick a single queue family and stick with it. So in the interest of this,
+ // it's best to pick the one that supports the most features.
+
+ int idx = -1;
+ for (int i = 0; i < qfnum; i++) {
+ if (!(qfs[i].queueFlags & VK_QUEUE_GRAPHICS_BIT))
+ continue;
+
+ // QF supports more features
+ if (idx < 0 || qfs[i].queueFlags > qfs[idx].queueFlags)
+ idx = i;
+
+ // QF supports more queues (at the same specialization level)
+ if (qfs[i].queueFlags == qfs[idx].queueFlags &&
+ qfs[i].queueCount > qfs[idx].queueCount)
+ {
+ idx = i;
+ }
+ }
+
+ // Vulkan requires at least one GRAPHICS queue, so if this fails something
+ // is horribly wrong.
+ assert(idx >= 0);
+
+ // Now that we know which queue family we want, we can create the logical
+ // device
+ static const float priorities[MPVK_MAX_QUEUES] = {0};
+ VkDeviceQueueCreateInfo qinfo = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .queueFamilyIndex = idx,
+ .queueCount = MPMIN(qfs[idx].queueCount, MPVK_MAX_QUEUES),
+ .pQueuePriorities = priorities,
+ };
+
+ static const char *exts[] = {
+ VK_KHR_SWAPCHAIN_EXTENSION_NAME,
+ VK_NV_GLSL_SHADER_EXTENSION_NAME,
+ };
+
+ VkDeviceCreateInfo dinfo = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+ .queueCreateInfoCount = 1,
+ .pQueueCreateInfos = &qinfo,
+ .ppEnabledExtensionNames = exts,
+ .enabledExtensionCount = MP_ARRAY_SIZE(exts),
+ };
+
+ MP_VERBOSE(vk, "Creating vulkan device...\n");
+ VK(vkCreateDevice(vk->physd, &dinfo, MPVK_ALLOCATOR, &vk->dev));
+
+ vk_malloc_init(vk);
+
+ // Create the vk_cmdpool and all required queues / synchronization objects
+ struct vk_cmdpool *pool = vk->pool = talloc_zero(NULL, struct vk_cmdpool);
+ *pool = (struct vk_cmdpool) {
+ .qf = qinfo.queueFamilyIndex,
+ .props = qfs[qinfo.queueFamilyIndex],
+ .qcount = qinfo.queueCount,
+ };
+
+ talloc_free(qfs);
+
+ for (int n = 0; n < pool->qcount; n++)
+ vkGetDeviceQueue(vk->dev, pool->qf, n, &pool->queues[n]);
+
+ VkCommandPoolCreateInfo cinfo = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
+ VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = pool->qf,
+ };
+
+ VK(vkCreateCommandPool(vk->dev, &cinfo, MPVK_ALLOCATOR, &pool->pool));
+
+ VkCommandBufferAllocateInfo ainfo = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .commandPool = pool->pool,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandBufferCount = MPVK_MAX_CMDS,
+ };
+
+ VkCommandBuffer cmdbufs[MPVK_MAX_CMDS];
+ VK(vkAllocateCommandBuffers(vk->dev, &ainfo, cmdbufs));
+
+ for (int n = 0; n < MPVK_MAX_CMDS; n++) {
+ struct vk_cmd *cmd = &pool->cmds[n];
+ cmd->pool = pool;
+ cmd->buf = cmdbufs[n];
+
+ VkFenceCreateInfo finfo = {
+ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
+ .flags = VK_FENCE_CREATE_SIGNALED_BIT,
+ };
+
+ VK(vkCreateFence(vk->dev, &finfo, MPVK_ALLOCATOR, &cmd->fence));
+
+ VkSemaphoreCreateInfo sinfo = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ };
+
+ VK(vkCreateSemaphore(vk->dev, &sinfo, MPVK_ALLOCATOR, &cmd->done));
+ }
+
+ // Ensure we can actually present to the surface using this queue
+ VkBool32 sup;
+ VK(vkGetPhysicalDeviceSurfaceSupportKHR(vk->physd, pool->qf, vk->surf, &sup));
+ if (!sup) {
+ MP_ERR(vk, "Queue family does not support surface presentation!\n");
+ goto error;
+ }
+
+ return true;
+
+error:
+ MP_ERR(vk, "Failed creating logical device!\n");
+ talloc_free(qfs);
+ return false;
+}
+
+static void run_callbacks(struct vk_cmd *cmd)
+{
+ for (int i = 0; i < cmd->num_callbacks; i++) {
+ struct vk_callback *cb = &cmd->callbacks[i];
+ cb->run(cb->priv, cb->arg);
+ *cb = (struct vk_callback){0};
+ }
+
+ cmd->num_callbacks = 0;
+}
+
+static void wait_for_cmds(struct mpvk_ctx *vk, struct vk_cmd cmds[], int num)
+{
+ if (!num)
+ return;
+
+ VkFence fences[MPVK_MAX_CMDS];
+ for (int i = 0; i < num; i++)
+ fences[i] = cmds[i].fence;
+
+ vkWaitForFences(vk->dev, num, fences, true, UINT64_MAX);
+
+ for (int i = 0; i < num; i++)
+ run_callbacks(&cmds[i]);
+}
+
+void mpvk_wait_idle(struct mpvk_ctx *vk)
+{
+ struct vk_cmdpool *pool = vk->pool;
+
+ int idx = pool->cindex, pidx = pool->cindex_pending;
+ if (pidx < idx) { // range doesn't wrap
+ wait_for_cmds(vk, &pool->cmds[pidx], idx - pidx);
+ } else if (pidx > idx) { // range wraps
+ wait_for_cmds(vk, &pool->cmds[pidx], MPVK_MAX_CMDS - pidx);
+ wait_for_cmds(vk, &pool->cmds[0], idx);
+ }
+ pool->cindex_pending = pool->cindex;
+}
+
+void mpvk_poll_cmds(struct mpvk_ctx *vk, struct vk_cmdpool *pool,
+ uint64_t timeout)
+{
+ // If requested, hard block until at least one command completes
+ if (timeout > 0 && pool->cindex_pending != pool->cindex) {
+ vkWaitForFences(vk->dev, 1, &pool->cmds[pool->cindex_pending].fence,
+ true, timeout);
+ }
+
+ // Lazily garbage collect the commands based on their status
+ while (pool->cindex_pending != pool->cindex) {
+ struct vk_cmd *cmd = &pool->cmds[pool->cindex_pending];
+ VkResult res = vkGetFenceStatus(vk->dev, cmd->fence);
+ if (res != VK_SUCCESS)
+ break;
+ run_callbacks(cmd);
+ pool->cindex_pending++;
+ pool->cindex_pending %= MPVK_MAX_CMDS;
+ }
+}
+
+void vk_dev_callback(struct mpvk_ctx *vk, vk_cb callback, void *p, void *arg)
+{
+ struct vk_cmdpool *pool = vk->pool;
+ if (pool->cindex_pending == pool->cindex) {
+ // The device was already idle, so we can just immediately call it
+ callback(p, arg);
+ return;
+ }
+
+ int prev_idx = pool->cindex - 1;
+ if (prev_idx < 0)
+ prev_idx += MPVK_MAX_CMDS;
+
+ struct vk_cmd *last_cmd = &pool->cmds[prev_idx];
+ vk_cmd_callback(last_cmd, callback, p, arg);
+}
+
+const VkImageSubresourceRange vk_range = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .levelCount = 1,
+ .layerCount = 1,
+};
+
+const VkImageSubresourceLayers vk_layers = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .layerCount = 1,
+};
+
+void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, void *p, void *arg)
+{
+ MP_TARRAY_GROW(NULL, cmd->callbacks, cmd->num_callbacks);
+ cmd->callbacks[cmd->num_callbacks++] = (struct vk_callback) {
+ .run = callback,
+ .priv = p,
+ .arg = arg,
+ };
+}
+
+void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep,
+ VkPipelineStageFlagBits depstage)
+{
+ assert(cmd->num_deps < MPVK_MAX_CMD_DEPS);
+ cmd->deps[cmd->num_deps] = dep;
+ cmd->depstages[cmd->num_deps++] = depstage;
+}
+
+struct vk_cmd *vk_cmd_begin(struct mpvk_ctx *vk, struct vk_cmdpool *pool)
+{
+ // Garbage collect the cmdpool first
+ mpvk_poll_cmds(vk, pool, 0);
+
+ int next = (pool->cindex + 1) % MPVK_MAX_CMDS;
+ if (next == pool->cindex_pending) {
+ MP_ERR(vk, "No free command buffers!\n");
+ goto error;
+ }
+
+ struct vk_cmd *cmd = &pool->cmds[pool->cindex];
+ pool->cindex = next;
+
+ VK(vkResetCommandBuffer(cmd->buf, 0));
+
+ VkCommandBufferBeginInfo binfo = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+
+ VK(vkBeginCommandBuffer(cmd->buf, &binfo));
+
+ return cmd;
+
+error:
+ return NULL;
+}
+
+bool vk_cmd_submit(struct mpvk_ctx *vk, struct vk_cmd *cmd, VkSemaphore *done)
+{
+ VK(vkEndCommandBuffer(cmd->buf));
+
+ struct vk_cmdpool *pool = cmd->pool;
+ VkQueue queue = pool->queues[pool->qindex++];
+ pool->qindex %= pool->qcount;
+
+ VkSubmitInfo sinfo = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &cmd->buf,
+ .waitSemaphoreCount = cmd->num_deps,
+ .pWaitSemaphores = cmd->deps,
+ .pWaitDstStageMask = cmd->depstages,
+ };
+
+ if (done) {
+ sinfo.signalSemaphoreCount = 1;
+ sinfo.pSignalSemaphores = &cmd->done;
+ *done = cmd->done;
+ }
+
+ VK(vkResetFences(vk->dev, 1, &cmd->fence));
+ VK(vkQueueSubmit(queue, 1, &sinfo, cmd->fence));
+ MP_TRACE(vk, "Submitted command on queue %p\n", (void *)queue);
+
+ for (int i = 0; i < cmd->num_deps; i++)
+ cmd->deps[i] = NULL;
+ cmd->num_deps = 0;
+
+ return true;
+
+error:
+ return false;
+}
+
+static bool vk_swchain_update_info(struct vk_swchain *chain,
+ VkSwapchainCreateInfoKHR *info)
+{
+ struct mpvk_ctx *vk = chain->vk;
+
+ // Query the supported capabilities and update this struct as needed
+ VkSurfaceCapabilitiesKHR caps;
+ VK(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk->physd, vk->surf, &caps));
+
+ // Sorted by preference
+ static const VkCompositeAlphaFlagBitsKHR alphaModes[] = {
+ VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR,
+ VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR,
+ };
+
+ for (int i = 0; i < MP_ARRAY_SIZE(alphaModes); i++) {
+ if (caps.supportedCompositeAlpha & alphaModes[i]) {
+ info->compositeAlpha = alphaModes[i];
+ break;
+ }
+ }
+
+ if (!info->compositeAlpha) {
+ MP_ERR(vk, "Failed picking alpha compositing mode (caps: %d)\n",
+ caps.supportedCompositeAlpha);
+ goto error;
+ }
+
+ static const VkSurfaceTransformFlagBitsKHR rotModes[] = {
+ VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR,
+ VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR,
+ };
+
+ for (int i = 0; i < MP_ARRAY_SIZE(rotModes); i++) {
+ if (caps.supportedTransforms & rotModes[i]) {
+ info->preTransform = rotModes[i];
+ break;
+ }
+ }
+
+ if (!info->preTransform) {
+ MP_ERR(vk, "Failed picking surface transform mode (caps: %d)\n",
+ caps.supportedTransforms);
+ goto error;
+ }
+
+ // Image count as required
+ info->minImageCount = MPMAX(info->minImageCount, caps.minImageCount);
+ if (caps.maxImageCount)
+ info->minImageCount = MPMIN(info->minImageCount, caps.maxImageCount);
+
+ // Check the extend against the allowed parameters
+ if (caps.currentExtent.width != info->imageExtent.width &&
+ caps.currentExtent.width != 0xFFFFFFFF)
+ {
+ MP_WARN(vk, "Requested width %d does not match current width %d\n",
+ info->imageExtent.width, caps.currentExtent.width);
+ info->imageExtent.width = caps.currentExtent.width;
+ }
+
+ if (caps.currentExtent.height != info->imageExtent.height &&
+ caps.currentExtent.height != 0xFFFFFFFF)
+ {
+ MP_WARN(vk, "Requested height %d does not match current height %d\n",
+ info->imageExtent.height, caps.currentExtent.height);
+ info->imageExtent.height = caps.currentExtent.height;
+ }
+
+ if (caps.minImageExtent.width > info->imageExtent.width ||
+ caps.minImageExtent.height > info->imageExtent.height)
+ {
+ MP_ERR(vk, "Requested size %dx%d smaller than device minimum %d%d\n",
+ info->imageExtent.width, info->imageExtent.height,
+ caps.minImageExtent.width, caps.minImageExtent.height);
+ goto error;
+ }
+
+ if (caps.maxImageExtent.width < info->imageExtent.width ||
+ caps.maxImageExtent.height < info->imageExtent.height)
+ {
+ MP_ERR(vk, "Requested size %dx%d larger than device maximum %d%d\n",
+ info->imageExtent.width, info->imageExtent.height,
+ caps.maxImageExtent.width, caps.maxImageExtent.height);
+ goto error;
+ }
+
+ // We just request whatever usage we can, and let the ra_vk decide what
+ // ra_tex_params that translates to. This makes the images as flexible
+ // as possible.
+ info->imageUsage = caps.supportedUsageFlags;
+ return true;
+
+error:
+ return false;
+}
+
+bool vk_swchain_init(struct mpvk_ctx *vk, struct ra *ra, int size,
+ struct vk_swchain *chain)
+{
+ assert(vk->dev);
+ assert(vk->surf_format.format);
+
+ struct VkSwapchainCreateInfoKHR dummy = {
+ .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR,
+ .surface = vk->surf,
+ .minImageCount = size,
+ .imageFormat = vk->surf_format.format,
+ .imageColorSpace = vk->surf_format.colorSpace,
+ .imageArrayLayers = 1, // non-stereoscopic
+ .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .presentMode = VK_PRESENT_MODE_FIFO_KHR,
+ .clipped = true,
+ };
+
+ *chain = (struct vk_swchain) {
+ .vk = vk,
+ .ra = ra,
+ .protoInfo = dummy,
+ };
+
+ return true;
+}
+
+void vk_swchain_uninit(struct ra *ra, struct vk_swchain *chain)
+{
+ struct mpvk_ctx *vk = chain->vk;
+ if (!vk)
+ return;
+
+ // Note: We technically don't even need the struct *ra, it's just there
+ // to "force" the correct uninitialization order at the API level. Either
+ // way, make sure the RA actually matches..
+ assert(ra == chain->ra);
+
+ mpvk_wait_idle(vk);
+
+ for (int i = 0; i < chain->num_images; i++)
+ ra_tex_free(ra, &chain->images[i]);
+ for (int i = 0; i < chain->num_acquired; i++)
+ vkDestroySemaphore(vk->dev, chain->acquired[i], MPVK_ALLOCATOR);
+
+ vkDestroySwapchainKHR(vk->dev, chain->swchain, MPVK_ALLOCATOR);
+
+ talloc_free(chain->images);
+ talloc_free(chain->acquired);
+ *chain = (struct vk_swchain){0};
+}
+
+static void destroy_swapchain(struct mpvk_ctx *vk, VkSwapchainKHR swchain)
+{
+ vkDestroySwapchainKHR(vk->dev, swchain, MPVK_ALLOCATOR);
+}
+
+bool vk_swchain_resize(struct vk_swchain *chain, int w, int h)
+{
+ if (w == chain->w && h == chain->h)
+ return true;
+
+ struct mpvk_ctx *vk = chain->vk;
+ VkImage *vkimages = NULL;
+ bool ret = false;
+
+ VkSwapchainCreateInfoKHR sinfo = chain->protoInfo;
+ sinfo.imageExtent = (VkExtent2D){ w, h };
+ sinfo.oldSwapchain = chain->swchain;
+
+ if (!vk_swchain_update_info(chain, &sinfo))
+ goto error;
+
+ VK(vkCreateSwapchainKHR(vk->dev, &sinfo, MPVK_ALLOCATOR, &chain->swchain));
+ chain->w = w;
+ chain->h = h;
+
+ // Freeing the old swapchain while it's still in use is an error, so do
+ // it asynchronously once the device is idle.
+ if (sinfo.oldSwapchain)
+ vk_dev_callback(vk, (vk_cb) destroy_swapchain, vk, sinfo.oldSwapchain);
+
+ // Get the new swapchain images
+ int num;
+ VK(vkGetSwapchainImagesKHR(vk->dev, chain->swchain, &num, NULL));
+ vkimages = talloc_array(NULL, VkImage, num);
+ VK(vkGetSwapchainImagesKHR(vk->dev, chain->swchain, &num, vkimages));
+
+ // If needed, allocate some more semaphores
+ while (num > chain->num_acquired) {
+ VkSemaphore sem;
+ static const VkSemaphoreCreateInfo seminfo = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ };
+ VK(vkCreateSemaphore(vk->dev, &seminfo, MPVK_ALLOCATOR, &sem));
+ MP_TARRAY_APPEND(NULL, chain->acquired, chain->num_acquired, sem);
+ }
+
+ // Recreate the ra_tex wrappers
+ for (int i = 0; i < chain->num_images; i++)
+ ra_tex_free(chain->ra, &chain->images[i]);
+
+ chain->num_images = num;
+ MP_TARRAY_GROW(NULL, chain->images, chain->num_images);
+ for (int i = 0; i < num; i++) {
+ chain->images[i] = ra_vk_wrap_swchain_img(chain->ra, vkimages[i], sinfo);
+ if (!chain->images[i])
+ goto error;
+ }
+
+ ret = true;
+
+error:
+ talloc_free(vkimages);
+ return ret;
+}
+
+bool vk_swchain_get(struct vk_swchain *chain, struct vk_swimg *out)
+{
+ struct mpvk_ctx *vk = chain->vk;
+
+ int semidx = chain->idx_acquired++;
+ chain->idx_acquired %= chain->num_acquired;
+
+ uint32_t imgidx = 0;
+ VK(vkAcquireNextImageKHR(vk->dev, chain->swchain, UINT64_MAX,
+ chain->acquired[semidx], NULL, &imgidx));
+
+ *out = (struct vk_swimg) {
+ .chain = chain,
+ .index = imgidx,
+ .image = chain->images[imgidx],
+ .acquired = chain->acquired[semidx],
+ };
+ return true;
+
+error:
+ return false;
+}
diff --git a/video/out/vulkan/utils.h b/video/out/vulkan/utils.h
new file mode 100644
index 0000000000000..6273ebca95ef4
--- /dev/null
+++ b/video/out/vulkan/utils.h
@@ -0,0 +1,178 @@
+#pragma once
+
+#include "video/out/vo.h"
+#include "video/mp_image.h"
+
+#include "common.h"
+#include "formats.h"
+
+#define VK_LOAD_PFN(name) PFN_##name pfn_##name = (PFN_##name) \
+ vkGetInstanceProcAddr(vk->inst, #name);
+
+// Return a human-readable name for various struct mpvk_ctx enums
+const char* vk_err(VkResult res);
+
+// Convenience macros to simplify a lot of common boilerplate
+#define VK_ASSERT(res, str) \
+ if (res != VK_SUCCESS) { \
+ MP_ERR(vk, str ": %s\n", vk_err(res)); \
+ goto error; \
+ }
+
+#define VK(cmd) \
+ { \
+ MP_TRACE(vk, #cmd "\n"); \
+ VkResult res ## __LINE__ = (cmd); \
+ VK_ASSERT(res ## __LINE__, #cmd); \
+ }
+
+// Uninits everything in the correct order
+void mpvk_uninit(struct mpvk_ctx *vk);
+
+// Initialization functions: As a rule of thumb, these need to be called in
+// this order, followed by vk_malloc_init, followed by RA initialization, and
+// finally followed by vk_swchain initialization.
+
+// Create a vulkan instance. Returns VK_NULL_HANDLE on failure
+bool mpvk_instance_init(struct mpvk_ctx *vk, bool validate);
+
+// Generate a VkSurfaceKHR usable for video output. Returns VK_NULL_HANDLE on
+// failure. Must be called after mpvk_instance_init.
+bool mpvk_surface_init(struct vo *vo, struct mpvk_ctx *vk);
+
+// Find a suitable physical device for use with rendering and which supports
+// the surface.
+// name: only match a device with this name
+// sw: also allow software/virtual devices
+bool mpvk_find_phys_device(struct mpvk_ctx *vk, const char *name, bool sw);
+
+// Pick a suitable surface format that's supported by this physical device.
+bool mpvk_pick_surface_format(struct mpvk_ctx *vk);
+
+// Create a logical device and initialize the vk_cmdpools
+bool mpvk_device_init(struct mpvk_ctx *vk);
+
+// Wait until all commands submitted to all queues have completed
+void mpvk_wait_idle(struct mpvk_ctx *vk);
+
+// Wait until at least one command submitted to any queue has completed, and
+// process the callbacks. Good for event loops that need to delay until a
+// command completes. Will block at most `timeout` nanoseconds. If used with
+// 0, it only garbage collects completed commands without blocking.
+void mpvk_poll_cmds(struct mpvk_ctx *vk, struct vk_cmdpool *pool,
+ uint64_t timeout);
+
+// Predefined structs for a simple non-layered, non-mipped image
+extern const VkImageSubresourceRange vk_range;
+extern const VkImageSubresourceLayers vk_layers;
+
+// Since lots of vulkan operations need to be done lazily once the affected
+// resources are no longer in use, provide an abstraction for tracking these.
+// In practice, these are only checked and run when submitting new commands, so
+// the actual execution may be delayed by a frame.
+typedef void (*vk_cb)(void *priv, void *arg);
+
+struct vk_callback {
+ vk_cb run;
+ void *priv;
+ void *arg; // as a convenience, you also get to pass an arg for "free"
+};
+
+// Associate a callback with the completion of all currently pending commands.
+// This will essentially run once the device is completely idle.
+void vk_dev_callback(struct mpvk_ctx *vk, vk_cb callback, void *p, void *arg);
+
+#define MPVK_MAX_CMD_DEPS 8
+
+// Helper wrapper around command buffers that also track dependencies,
+// callbacks and synchronization primitives
+struct vk_cmd {
+ struct vk_cmdpool *pool; // pool it was allocated from
+ VkCommandBuffer buf;
+ VkFence fence; // the fence guards cmd buffer reuse
+ VkSemaphore done; // the semaphore signals when execution is done
+ // The semaphores represent dependencies that need to complete before
+ // this command can be executed. These are *not* owned by the vk_cmd
+ VkSemaphore deps[MPVK_MAX_CMD_DEPS];
+ VkPipelineStageFlags depstages[MPVK_MAX_CMD_DEPS];
+ int num_deps;
+ // Since VkFences are useless, we have to manually track "callbacks"
+ // to fire once the VkFence completes. These are used for multiple purposes,
+ // ranging from garbage collection (resource deallocation) to fencing.
+ struct vk_callback *callbacks;
+ int num_callbacks;
+};
+
+// Associate a callback with the completion of the current command. This
+// bool will be set to `true` once the command completes, or shortly thereafter.
+void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, void *p, void *arg);
+
+// Associate a dependency for the current command. This semaphore must signal
+// by the corresponding stage before the command may execute.
+void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep,
+ VkPipelineStageFlagBits depstage);
+
+#define MPVK_MAX_QUEUES 8
+#define MPVK_MAX_CMDS 16
+
+// Command pool / queue family hybrid abstraction
+struct vk_cmdpool {
+ VkQueueFamilyProperties props;
+ uint32_t qf; // queue family index
+ VkCommandPool pool;
+ VkQueue queues[MPVK_MAX_QUEUES];
+ int qcount;
+ int qindex;
+ // Command buffers associated with this queue. (No, VkCommandPool is not
+ // a pool of command buffers), you still have to pool them manually. We
+ // also have to track of "in flight" (pending) command buffers separately
+ // to work around vkQueueWaitIdle being completely fucking useless when
+ // using a queue for presentation.
+ struct vk_cmd cmds[MPVK_MAX_CMDS];
+ int cindex;
+ int cindex_pending;
+};
+
+// Fetch the next command buffer from a command pool and begin recording to it.
+// Returns NULL on failure.
+struct vk_cmd *vk_cmd_begin(struct mpvk_ctx *vk, struct vk_cmdpool *pool);
+
+// Finish the currently recording command buffer and submit it for execution.
+// If `done` is not NULL, it will be set to a semaphore that will signal once
+// the command completes. (And MUST have a corresponding semaphore wait)
+// Returns whether successful.
+bool vk_cmd_submit(struct mpvk_ctx *vk, struct vk_cmd *cmd, VkSemaphore *done);
+
+// Swapchain
+struct vk_swchain {
+ struct mpvk_ctx *vk;
+ struct ra *ra;
+ int w, h; // current size
+ VkSwapchainCreateInfoKHR protoInfo; // partially filled-in prototype
+ VkSwapchainKHR swchain;
+ // state of the images:
+ struct ra_tex **images; // ra_tex wrappers for the vkimages
+ int num_images; // size of images
+ VkSemaphore *acquired; // pool of semaphores used to synchronize images
+ int num_acquired; // size of this pool
+ int idx_acquired; // index of next free semaphore within this pool
+};
+
+// depth: desired depth
+bool vk_swchain_init(struct mpvk_ctx *vk, struct ra *ra, int depth,
+ struct vk_swchain *chain);
+void vk_swchain_uninit(struct ra *ra, struct vk_swchain *chain);
+bool vk_swchain_resize(struct vk_swchain *chain, int w, int h);
+
+// Swapchain image
+struct vk_swimg {
+ struct vk_swchain *chain; // vk_swchain it was allocated from
+ int index; // index within that vk_swchain
+ struct ra_tex *image; // ra_tex wrapper for the this image
+ VkSemaphore acquired; // will be signalled once the image is ready
+};
+
+// Get the next vk_swimg. This may block if the swapchain images are exceeded,
+// but normally the user should allocate a larger swapchain than what they
+// actually use.
+bool vk_swchain_get(struct vk_swchain *chain, struct vk_swimg *out);
diff --git a/wscript b/wscript
index 9d885884d7476..42e5e726650da 100644
--- a/wscript
+++ b/wscript
@@ -780,6 +780,16 @@ video_output_features = [
'fmsg': "No OpenGL video output found or enabled. " +
"Aborting. If you really mean to compile without OpenGL " +
"video outputs use --disable-gl."
+ }, {
+ 'name': '--vulkan-xlib',
+ 'desc': 'Vulkan Xlib backend',
+ 'func': check_true,
+ 'deps': ['x11'],
+ }, {
+ 'name': '--vulkan',
+ 'desc': 'Vulkan video output',
+ 'deps_any': [ 'vulkan-xlib' ],
+ 'func': check_cc(header_name='vulkan/vulkan.h', lib='vulkan'),
}, {
'name': 'egl-helpers',
'desc': 'EGL helper functions',
diff --git a/wscript_build.py b/wscript_build.py
index 3c5c00dc6415e..878b1faf02513 100644
--- a/wscript_build.py
+++ b/wscript_build.py
@@ -437,12 +437,17 @@ def build(ctx):
( "video/out/vo_tct.c" ),
( "video/out/vo_vaapi.c", "vaapi-x11" ),
( "video/out/vo_vdpau.c", "vdpau" ),
+ ( "video/out/vo_vulkan.c", "vulkan" ),
( "video/out/vo_wayland.c", "wayland" ),
( "video/out/vo_x11.c" , "x11" ),
( "video/out/vo_xv.c", "xv" ),
( "video/out/w32_common.c", "win32-desktop" ),
( "video/out/win32/displayconfig.c", "win32-desktop" ),
( "video/out/win32/droptarget.c", "win32-desktop" ),
+ ( "video/out/vulkan/utils.c", "vulkan" ),
+ ( "video/out/vulkan/malloc.c", "vulkan" ),
+ ( "video/out/vulkan/formats.c", "vulkan" ),
+ ( "video/out/vulkan/ra_vk.c", "vulkan" ),
( "video/out/win32/exclusive_hack.c", "gl-win32" ),
( "video/out/wayland_common.c", "wayland" ),
( "video/out/wayland/buffer.c", "wayland" ),