From dbf5bf820b932a5e20b7a14badb9c474dcfdf9a4 Mon Sep 17 00:00:00 2001 From: Ramesh Errabolu Date: Fri, 10 Nov 2023 11:36:34 -0600 Subject: [PATCH 1/2] amdgpu_plugin: Refactor code in preparation to support C&R for DRM devices Add a new compilation unit to host symbols and methods that will be needed to C&R DRM devices. Refactor code that indicates support for C&R and checkpoints KFD and DRM devices Signed-off-by: Ramesh Errabolu --- plugins/amdgpu/Makefile | 2 +- plugins/amdgpu/amdgpu_plugin.c | 245 ++++-------------------- plugins/amdgpu/amdgpu_plugin_drm.c | 63 ++++++ plugins/amdgpu/amdgpu_plugin_drm.h | 22 +++ plugins/amdgpu/amdgpu_plugin_topology.c | 41 ++-- plugins/amdgpu/amdgpu_plugin_topology.h | 2 + plugins/amdgpu/amdgpu_plugin_util.c | 208 ++++++++++++++++++++ plugins/amdgpu/amdgpu_plugin_util.h | 106 ++++++++++ plugins/amdgpu/criu-amdgpu.proto | 18 +- 9 files changed, 460 insertions(+), 247 deletions(-) create mode 100644 plugins/amdgpu/amdgpu_plugin_drm.c create mode 100644 plugins/amdgpu/amdgpu_plugin_drm.h create mode 100755 plugins/amdgpu/amdgpu_plugin_util.c create mode 100755 plugins/amdgpu/amdgpu_plugin_util.h diff --git a/plugins/amdgpu/Makefile b/plugins/amdgpu/Makefile index 64a923d388..5efa8fb0ba 100644 --- a/plugins/amdgpu/Makefile +++ b/plugins/amdgpu/Makefile @@ -28,7 +28,7 @@ endif criu-amdgpu.pb-c.c: criu-amdgpu.proto protoc-c --proto_path=. --c_out=. criu-amdgpu.proto -amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_topology.c criu-amdgpu.pb-c.c +amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_drm.c amdgpu_plugin_topology.c amdgpu_plugin_util.c criu-amdgpu.pb-c.c $(CC) $(PLUGIN_CFLAGS) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) $(LIBDRM_INC) amdgpu_plugin_clean: diff --git a/plugins/amdgpu/amdgpu_plugin.c b/plugins/amdgpu/amdgpu_plugin.c index 32ff8f9364..01207d080b 100644 --- a/plugins/amdgpu/amdgpu_plugin.c +++ b/plugins/amdgpu/amdgpu_plugin.c @@ -30,55 +30,14 @@ #include "files.h" #include "common/list.h" +#include "amdgpu_plugin_drm.h" +#include "amdgpu_plugin_util.h" #include "amdgpu_plugin_topology.h" #include "img-streamer.h" #include "image.h" #include "cr_options.h" -#define AMDGPU_KFD_DEVICE "/dev/kfd" -#define PROCPIDMEM "/proc/%d/mem" -#define HSAKMT_SHM_PATH "/dev/shm/hsakmt_shared_mem" -#define HSAKMT_SHM "/hsakmt_shared_mem" -#define HSAKMT_SEM_PATH "/dev/shm/sem.hsakmt_semaphore" -#define HSAKMT_SEM "hsakmt_semaphore" - -#define KFD_IOCTL_MAJOR_VERSION 1 -#define MIN_KFD_IOCTL_MINOR_VERSION 8 - -#define IMG_KFD_FILE "amdgpu-kfd-%d.img" -#define IMG_RENDERD_FILE "amdgpu-renderD-%d.img" -#define IMG_PAGES_FILE "amdgpu-pages-%d-%04x.img" - -#ifndef _GNU_SOURCE -#define _GNU_SOURCE 1 -#endif - -#ifdef LOG_PREFIX -#undef LOG_PREFIX -#endif -#define LOG_PREFIX "amdgpu_plugin: " - -#ifdef DEBUG -#define plugin_log_msg(fmt, ...) pr_debug(fmt, ##__VA_ARGS__) -#else -#define plugin_log_msg(fmt, ...) \ - { \ - } -#endif - -#define SDMA_PACKET(op, sub_op, e) ((((e)&0xFFFF) << 16) | (((sub_op)&0xFF) << 8) | (((op)&0xFF) << 0)) - -#define SDMA_OPCODE_COPY 1 -#define SDMA_COPY_SUB_OPCODE_LINEAR 0 -#define SDMA_NOP 0 -#define SDMA_LINEAR_COPY_MAX_SIZE (1ULL << 21) - -enum sdma_op_type { - SDMA_OP_VRAM_READ, - SDMA_OP_VRAM_WRITE, -}; - struct vma_metadata { struct list_head list; uint64_t old_pgoff; @@ -89,143 +48,13 @@ struct vma_metadata { }; /************************************ Global Variables ********************************************/ -struct tp_system src_topology; -struct tp_system dest_topology; - -struct device_maps checkpoint_maps; -struct device_maps restore_maps; - -extern int fd_next; static LIST_HEAD(update_vma_info_list); -extern bool kfd_fw_version_check; -extern bool kfd_sdma_fw_version_check; -extern bool kfd_caches_count_check; -extern bool kfd_num_gws_check; -extern bool kfd_vram_size_check; -extern bool kfd_numa_check; -extern bool kfd_capability_check; - size_t kfd_max_buffer_size; /**************************************************************************************************/ -int write_fp(FILE *fp, const void *buf, const size_t buf_len) -{ - size_t len_write; - - len_write = fwrite(buf, 1, buf_len, fp); - if (len_write != buf_len) { - pr_perror("Unable to write file (wrote:%ld buf_len:%ld)", len_write, buf_len); - return -EIO; - } - return 0; -} - -int read_fp(FILE *fp, void *buf, const size_t buf_len) -{ - size_t len_read; - - len_read = fread(buf, 1, buf_len, fp); - if (len_read != buf_len) { - pr_perror("Unable to read file (read:%ld buf_len:%ld)", len_read, buf_len); - return -EIO; - } - return 0; -} - -/** - * @brief Open an image file - * - * We store the size of the actual contents in the first 8-bytes of the file. This allows us to - * determine the file size when using criu_image_streamer when fseek and fstat are not available. - * The FILE * returned is already at the location of the first actual contents. - * - * @param path The file path - * @param write False for read, true for write - * @param size Size of actual contents - * @return FILE *if successful, NULL if failed - */ -FILE *open_img_file(char *path, bool write, size_t *size) -{ - FILE *fp = NULL; - int fd, ret; - - if (opts.stream) - fd = img_streamer_open(path, write ? O_DUMP : O_RSTR); - else - fd = openat(criu_get_image_dir(), path, write ? (O_WRONLY | O_CREAT) : O_RDONLY, 0600); - - if (fd < 0) { - pr_perror("%s: Failed to open for %s", path, write ? "write" : "read"); - return NULL; - } - - fp = fdopen(fd, write ? "w" : "r"); - if (!fp) { - pr_perror("%s: Failed get pointer for %s", path, write ? "write" : "read"); - close(fd); - return NULL; - } - - if (write) - ret = write_fp(fp, size, sizeof(*size)); - else - ret = read_fp(fp, size, sizeof(*size)); - - if (ret) { - pr_perror("%s:Failed to access file size", path); - fclose(fp); - return NULL; - } - - pr_debug("%s:Opened file for %s with size:%ld\n", path, write ? "write" : "read", *size); - return fp; -} - -/** - * @brief Write an image file - * - * We store the size of the actual contents in the first 8-bytes of the file. This allows us to - * determine the file size when using criu_image_streamer when fseek and fstat are not available. - * - * @param path The file path - * @param buf pointer to data to be written - * @param buf_len size of buf - * @return 0 if successful. -errno on failure - */ -int write_img_file(char *path, const void *buf, const size_t buf_len) -{ - int ret; - FILE *fp; - size_t len = buf_len; - - fp = open_img_file(path, true, &len); - if (!fp) - return -errno; - - ret = write_fp(fp, buf, buf_len); - fclose(fp); /* this will also close fd */ - return ret; -} - -int read_file(const char *file_path, void *buf, const size_t buf_len) -{ - int ret; - FILE *fp; - - fp = fopen(file_path, "r"); - if (!fp) { - pr_perror("Cannot fopen %s", file_path); - return -errno; - } - - ret = read_fp(fp, buf, buf_len); - fclose(fp); /* this will also close fd */ - return ret; -} - /* Call ioctl, restarting if it is interrupted */ int kmtIoctl(int fd, unsigned long request, void *arg) { @@ -263,21 +92,21 @@ static void free_e(CriuKfd *e) static int allocate_device_entries(CriuKfd *e, int num_of_devices) { - e->device_entries = xmalloc(sizeof(DeviceEntry *) * num_of_devices); + e->device_entries = xmalloc(sizeof(KfdDeviceEntry *) * num_of_devices); if (!e->device_entries) { pr_err("Failed to allocate device_entries\n"); return -ENOMEM; } for (int i = 0; i < num_of_devices; i++) { - DeviceEntry *entry = xzalloc(sizeof(*entry)); + KfdDeviceEntry *entry = xzalloc(sizeof(*entry)); if (!entry) { pr_err("Failed to allocate entry\n"); return -ENOMEM; } - device_entry__init(entry); + kfd_device_entry__init(entry); e->device_entries[i] = entry; e->n_device_entries++; @@ -287,21 +116,21 @@ static int allocate_device_entries(CriuKfd *e, int num_of_devices) static int allocate_bo_entries(CriuKfd *e, int num_bos, struct kfd_criu_bo_bucket *bo_bucket_ptr) { - e->bo_entries = xmalloc(sizeof(BoEntry *) * num_bos); + e->bo_entries = xmalloc(sizeof(KfdBoEntry *) * num_bos); if (!e->bo_entries) { pr_err("Failed to allocate bo_info\n"); return -ENOMEM; } for (int i = 0; i < num_bos; i++) { - BoEntry *entry = xzalloc(sizeof(*entry)); + KfdBoEntry *entry = xzalloc(sizeof(*entry)); if (!entry) { pr_err("Failed to allocate botest\n"); return -ENOMEM; } - bo_entry__init(entry); + kfd_bo_entry__init(entry); e->bo_entries[i] = entry; e->n_bo_entries++; @@ -309,13 +138,13 @@ static int allocate_bo_entries(CriuKfd *e, int num_bos, struct kfd_criu_bo_bucke return 0; } -int topology_to_devinfo(struct tp_system *sys, struct device_maps *maps, DeviceEntry **deviceEntries) +int topology_to_devinfo(struct tp_system *sys, struct device_maps *maps, KfdDeviceEntry **deviceEntries) { uint32_t devinfo_index = 0; struct tp_node *node; list_for_each_entry(node, &sys->nodes, listm_system) { - DeviceEntry *devinfo = deviceEntries[devinfo_index++]; + KfdDeviceEntry *devinfo = deviceEntries[devinfo_index++]; devinfo->node_id = node->id; @@ -383,11 +212,11 @@ int topology_to_devinfo(struct tp_system *sys, struct device_maps *maps, DeviceE return 0; } -int devinfo_to_topology(DeviceEntry *devinfos[], uint32_t num_devices, struct tp_system *sys) +int devinfo_to_topology(KfdDeviceEntry *devinfos[], uint32_t num_devices, struct tp_system *sys) { for (int i = 0; i < num_devices; i++) { struct tp_node *node; - DeviceEntry *devinfo = devinfos[i]; + KfdDeviceEntry *devinfo = devinfos[i]; node = sys_add_node(sys, devinfo->node_id, devinfo->gpu_id); if (!node) @@ -549,7 +378,7 @@ struct thread_data { uint32_t gpu_id; pid_t pid; struct kfd_criu_bo_bucket *bo_buckets; - BoEntry **bo_entries; + KfdBoEntry **bo_entries; int drm_fd; int ret; int id; /* File ID used by CRIU to identify KFD image for this process */ @@ -557,8 +386,7 @@ struct thread_data { int amdgpu_plugin_handle_device_vma(int fd, const struct stat *st_buf) { - struct stat st_kfd, st_dri_min; - char img_path[128]; + struct stat st_kfd; int ret = 0; pr_debug("Enter %s\n", __func__); @@ -568,27 +396,18 @@ int amdgpu_plugin_handle_device_vma(int fd, const struct stat *st_buf) return ret; } - snprintf(img_path, sizeof(img_path), "/dev/dri/renderD%d", DRM_FIRST_RENDER_NODE); - - ret = stat(img_path, &st_dri_min); - if (ret == -1) { - pr_perror("stat error for %s", img_path); - return ret; - } - - if (major(st_buf->st_rdev) == major(st_kfd.st_rdev) || ((major(st_buf->st_rdev) == major(st_dri_min.st_rdev)) && - (minor(st_buf->st_rdev) >= minor(st_dri_min.st_rdev) && - minor(st_buf->st_rdev) >= DRM_FIRST_RENDER_NODE))) { + /* If input device is KFD return device as supported */ + if (major(st_buf->st_rdev) == major(st_kfd.st_rdev)) { pr_debug("Known non-regular mapping, kfd-renderD%d -> OK\n", minor(st_buf->st_rdev)); - pr_debug("AMD KFD(maj) = %d, DRI(maj,min) = %d:%d VMA Device fd(maj,min) = %d:%d\n", - major(st_kfd.st_rdev), major(st_dri_min.st_rdev), minor(st_dri_min.st_rdev), - major(st_buf->st_rdev), minor(st_buf->st_rdev)); - /* VMA belongs to kfd */ return 0; } - pr_perror("Can't handle the VMA mapping"); - return -ENOTSUP; + /* Determine if input is a DRM device and therefore is supported */ + ret = amdgpu_plugin_drm_handle_device_vma(fd, st_buf); + if (ret) + pr_perror("%s(), Can't handle VMAs of input device\n", __func__); + + return ret; } CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__HANDLE_DEVICE_VMA, amdgpu_plugin_handle_device_vma) @@ -655,8 +474,9 @@ void free_and_unmap(uint64_t size, amdgpu_bo_handle h_bo, amdgpu_va_handle h_va, amdgpu_bo_free(h_bo); } -int sdma_copy_bo(struct kfd_criu_bo_bucket bo_bucket, FILE *storage_fp, void *buffer, size_t buffer_size, - amdgpu_device_handle h_dev, uint64_t max_copy_size, enum sdma_op_type type) +static int sdma_copy_bo(struct kfd_criu_bo_bucket bo_bucket, FILE *storage_fp, + void *buffer, size_t buffer_size, amdgpu_device_handle h_dev, + uint64_t max_copy_size, enum sdma_op_type type) { uint64_t size, src_bo_size, dst_bo_size, buffer_bo_size, bytes_remain, buffer_space_remain; uint64_t gpu_addr_src, gpu_addr_dst, gpu_addr_ib, copy_src, copy_dst, copy_size; @@ -954,7 +774,7 @@ void *dump_bo_contents(void *_thread_data) goto exit; } - snprintf(img_path, sizeof(img_path), IMG_PAGES_FILE, thread_data->id, thread_data->gpu_id); + snprintf(img_path, sizeof(img_path), IMG_KFD_PAGES_FILE, thread_data->id, thread_data->gpu_id); bo_contents_fp = open_img_file(img_path, true, &image_size); if (!bo_contents_fp) { pr_perror("Cannot fopen %s", img_path); @@ -1027,7 +847,7 @@ void *restore_bo_contents(void *_thread_data) max_copy_size = (gpu_info.family_id >= AMDGPU_FAMILY_AI) ? SDMA_LINEAR_COPY_MAX_SIZE : SDMA_LINEAR_COPY_MAX_SIZE - 1; - snprintf(img_path, sizeof(img_path), IMG_PAGES_FILE, thread_data->id, thread_data->gpu_id); + snprintf(img_path, sizeof(img_path), IMG_KFD_PAGES_FILE, thread_data->id, thread_data->gpu_id); bo_contents_fp = open_img_file(img_path, false, &image_size); if (!bo_contents_fp) { pr_perror("Cannot fopen %s", img_path); @@ -1234,7 +1054,7 @@ static int save_bos(int id, int fd, struct kfd_ioctl_criu_args *args, struct kfd for (i = 0; i < e->num_of_bos; i++) { struct kfd_criu_bo_bucket *bo_bucket = &bo_buckets[i]; - BoEntry *boinfo = e->bo_entries[i]; + KfdBoEntry *boinfo = e->bo_entries[i]; boinfo->gpu_id = bo_bucket->gpu_id; boinfo->addr = bo_bucket->addr; @@ -1391,7 +1211,7 @@ int amdgpu_plugin_dump_file(int fd, int id) criu_render_node__pack(&rd, buf); - snprintf(img_path, sizeof(img_path), IMG_RENDERD_FILE, id); + snprintf(img_path, sizeof(img_path), IMG_DRM_FILE, id); ret = write_img_file(img_path, buf, len); if (ret) { xfree(buf); @@ -1399,6 +1219,7 @@ int amdgpu_plugin_dump_file(int fd, int id) } xfree(buf); + /* Need to return success here so that criu can call plugins for renderD nodes */ return ret; } @@ -1531,7 +1352,7 @@ static int restore_devices(struct kfd_ioctl_criu_args *args, CriuKfd *e) for (int entries_i = 0; entries_i < e->num_of_cpus + e->num_of_gpus; entries_i++) { struct kfd_criu_device_bucket *device_bucket; - DeviceEntry *devinfo = e->device_entries[entries_i]; + KfdDeviceEntry *devinfo = e->device_entries[entries_i]; struct tp_node *tp_node; if (!devinfo->gpu_id) @@ -1581,7 +1402,7 @@ static int restore_bos(struct kfd_ioctl_criu_args *args, CriuKfd *e) for (int i = 0; i < args->num_bos; i++) { struct kfd_criu_bo_bucket *bo_bucket = &bo_buckets[i]; - BoEntry *bo_entry = e->bo_entries[i]; + KfdBoEntry *bo_entry = e->bo_entries[i]; bo_bucket->gpu_id = bo_entry->gpu_id; bo_bucket->addr = bo_entry->addr; @@ -1736,7 +1557,7 @@ int amdgpu_plugin_restore_file(int id) * TODO: Currently, this code will only work if this function is called for /dev/kfd * first as we assume restore_maps is already filled. Need to fix this later. */ - snprintf(img_path, sizeof(img_path), IMG_RENDERD_FILE, id); + snprintf(img_path, sizeof(img_path), IMG_DRM_FILE, id); pr_info("Restoring RenderD %s\n", img_path); img_fp = open_img_file(img_path, false, &img_size); diff --git a/plugins/amdgpu/amdgpu_plugin_drm.c b/plugins/amdgpu/amdgpu_plugin_drm.c new file mode 100644 index 0000000000..a48dc68f0f --- /dev/null +++ b/plugins/amdgpu/amdgpu_plugin_drm.c @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include "common/list.h" + +#include "criu-amdgpu.pb-c.h" + +#include +#include + +#include "xmalloc.h" +#include "criu-log.h" +#include "kfd_ioctl.h" +#include "amdgpu_plugin_drm.h" +#include "amdgpu_plugin_util.h" +#include "amdgpu_plugin_topology.h" + + +int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *st) +{ + char path[PATH_MAX]; + struct stat drm; + int ret = 0; + + snprintf(path, sizeof(path), AMDGPU_DRM_DEVICE, DRM_FIRST_RENDER_NODE); + ret = stat(path, &drm); + if (ret == -1) { + pr_err("Error in getting stat for: %s", path); + return ret; + } + + if ((major(st->st_rdev) != major(drm.st_rdev)) || + (minor(st->st_rdev) < minor(drm.st_rdev)) || + (minor(st->st_rdev) > DRM_LAST_RENDER_NODE)) { + pr_err("Can't handle VMA mapping of input device\n"); + return -ENOTSUP; + } + + pr_debug("AMD DRI(maj,min) = %d:%d VMA Device FD(maj,min) = %d:%d\n", + major(drm.st_rdev), minor(drm.st_rdev), + major(st->st_rdev), minor(st->st_rdev)); + + return 0; +} + + diff --git a/plugins/amdgpu/amdgpu_plugin_drm.h b/plugins/amdgpu/amdgpu_plugin_drm.h new file mode 100644 index 0000000000..37009c8ba7 --- /dev/null +++ b/plugins/amdgpu/amdgpu_plugin_drm.h @@ -0,0 +1,22 @@ +#ifndef __AMDGPU_PLUGIN_DRM_H__ +#define __AMDGPU_PLUGIN_DRM_H__ + +#include +#include "common/list.h" + +#include "xmalloc.h" +#include "criu-log.h" +#include "kfd_ioctl.h" +#include "amdgpu_plugin_util.h" +#include "amdgpu_plugin_topology.h" + + +/** + * Determines if VMA's of input file descriptor belong to amdgpu's + * DRM device and are therefore supported + */ +int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *drm); + + +#endif /* __AMDGPU_PLUGIN_DRM_H__ */ + diff --git a/plugins/amdgpu/amdgpu_plugin_topology.c b/plugins/amdgpu/amdgpu_plugin_topology.c index 6d004247be..bc52e3e63e 100644 --- a/plugins/amdgpu/amdgpu_plugin_topology.c +++ b/plugins/amdgpu/amdgpu_plugin_topology.c @@ -16,35 +16,11 @@ #include "xmalloc.h" #include "kfd_ioctl.h" +#include "amdgpu_plugin_util.h" #include "amdgpu_plugin_topology.h" #define TOPOLOGY_PATH "/sys/class/kfd/kfd/topology/nodes/" -#ifndef _GNU_SOURCE -#define _GNU_SOURCE 1 -#endif - -#ifdef COMPILE_TESTS -#undef pr_err -#define pr_err(format, arg...) fprintf(stdout, "%s:%d ERROR:" format, __FILE__, __LINE__, ##arg) -#undef pr_info -#define pr_info(format, arg...) fprintf(stdout, "%s:%d INFO:" format, __FILE__, __LINE__, ##arg) -#undef pr_debug -#define pr_debug(format, arg...) fprintf(stdout, "%s:%d DBG:" format, __FILE__, __LINE__, ##arg) - -#undef pr_perror -#define pr_perror(format, arg...) \ - fprintf(stdout, "%s:%d: " format " (errno = %d (%s))\n", __FILE__, __LINE__, ##arg, errno, strerror(errno)) -#endif - -#ifdef DEBUG -#define plugin_log_msg(fmt, ...) pr_debug(fmt, ##__VA_ARGS__) -#else -#define plugin_log_msg(fmt, ...) \ - { \ - } -#endif - /* User override options */ /* Skip firmware version check */ bool kfd_fw_version_check = true; @@ -840,6 +816,9 @@ void topology_free(struct tp_system *sys) list_del(&p2pgroup->listm_system); xfree(p2pgroup); } + + /* Update Topology as being freed */ + sys->parsed = false; } /** @@ -1461,3 +1440,15 @@ int set_restore_gpu_maps(struct tp_system *src_sys, struct tp_system *dest_sys, return ret; } + +int topology_gpu_count(struct tp_system *sys) +{ + struct tp_node *node; + int count = 0; + + list_for_each_entry(node, &sys->nodes, listm_system) + if (NODE_IS_GPU(node)) + count++; + return count; +} + diff --git a/plugins/amdgpu/amdgpu_plugin_topology.h b/plugins/amdgpu/amdgpu_plugin_topology.h index 9d99cda1c2..c890e3ddae 100644 --- a/plugins/amdgpu/amdgpu_plugin_topology.h +++ b/plugins/amdgpu/amdgpu_plugin_topology.h @@ -107,6 +107,8 @@ int topology_parse(struct tp_system *topology, const char *msg); int topology_determine_iolinks(struct tp_system *sys); void topology_print(const struct tp_system *sys, const char *msg); +int topology_gpu_count(struct tp_system *topology); + struct id_map *maps_add_gpu_entry(struct device_maps *maps, const uint32_t src_id, const uint32_t dest_id); struct tp_node *sys_add_node(struct tp_system *sys, uint32_t id, uint32_t gpu_id); diff --git a/plugins/amdgpu/amdgpu_plugin_util.c b/plugins/amdgpu/amdgpu_plugin_util.c new file mode 100755 index 0000000000..48ff705556 --- /dev/null +++ b/plugins/amdgpu/amdgpu_plugin_util.c @@ -0,0 +1,208 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include "common/list.h" + +#include +#include + +#include "criu-plugin.h" +#include "plugin.h" +#include "criu-amdgpu.pb-c.h" + +#include "img-streamer.h" +#include "image.h" +#include "cr_options.h" + +#include "xmalloc.h" +#include "criu-log.h" +#include "kfd_ioctl.h" +#include "amdgpu_drm.h" +#include "amdgpu_plugin_util.h" +#include "amdgpu_plugin_topology.h" + +/* Tracks number of device files that need to be checkpointed */ +static int dev_file_cnt = 0; + +/* Helper structures to encode device topology of SRC and DEST platforms */ +struct tp_system src_topology; +struct tp_system dest_topology; + +/* Helper structures to encode device maps during Checkpoint and Restore operations */ +struct device_maps checkpoint_maps; +struct device_maps restore_maps; + +bool checkpoint_is_complete() +{ + return (dev_file_cnt == 0); +} + +void decrement_checkpoint_count() +{ + dev_file_cnt--; +} + +void init_gpu_count(struct tp_system *topo) +{ + if (dev_file_cnt != 0) + return; + + /* We add ONE to include checkpointing of KFD device */ + dev_file_cnt = 1 + topology_gpu_count(topo); +} + +int read_fp(FILE *fp, void *buf, const size_t buf_len) +{ + size_t len_read; + + len_read = fread(buf, 1, buf_len, fp); + if (len_read != buf_len) { + pr_err("Unable to read file (read:%ld buf_len:%ld)", len_read, buf_len); + return -EIO; + } + return 0; +} + +int write_fp(FILE *fp, const void *buf, const size_t buf_len) +{ + size_t len_write; + + len_write = fwrite(buf, 1, buf_len, fp); + if (len_write != buf_len) { + pr_err("Unable to write file (wrote:%ld buf_len:%ld)", len_write, buf_len); + return -EIO; + } + return 0; +} + +/** + * @brief Open an image file + * + * We store the size of the actual contents in the first 8-bytes of + * the file. This allows us to determine the file size when using + * criu_image_streamer when fseek and fstat are not available. The + * FILE * returned is already at the location of the first actual + * contents. + * + * @param path The file path + * @param write False for read, true for write + * @param size Size of actual contents + * @return FILE *if successful, NULL if failed + */ +FILE *open_img_file(char *path, bool write, size_t *size) +{ + FILE *fp = NULL; + int fd, ret; + + if (opts.stream) + fd = img_streamer_open(path, write ? O_DUMP : O_RSTR); + else + fd = openat(criu_get_image_dir(), path, write ? (O_WRONLY | O_CREAT) : O_RDONLY, 0600); + + if (fd < 0) { + pr_err("%s: Failed to open for %s", path, write ? "write" : "read"); + return NULL; + } + + fp = fdopen(fd, write ? "w" : "r"); + if (!fp) { + pr_err("%s: Failed get pointer for %s", path, write ? "write" : "read"); + return NULL; + } + + if (write) + ret = write_fp(fp, size, sizeof(*size)); + else + ret = read_fp(fp, size, sizeof(*size)); + + if (ret) { + pr_err("%s:Failed to access file size", path); + fclose(fp); + return NULL; + } + + pr_debug("%s:Opened file for %s with size:%ld\n", path, write ? "write" : "read", *size); + return fp; +} + +int read_file(const char *file_path, void *buf, const size_t buf_len) +{ + int ret; + FILE *fp; + + fp = fopen(file_path, "r"); + if (!fp) { + pr_err("Cannot fopen %s", file_path); + return -errno; + } + + ret = read_fp(fp, buf, buf_len); + fclose(fp); /* this will also close fd */ + return ret; +} + + +/** + * @brief Write an image file + * + * We store the size of the actual contents in the first 8-bytes of the file. This allows us to + * determine the file size when using criu_image_streamer when fseek and fstat are not available. + * + * @param path The file path + * @param buf pointer to data to be written + * @param buf_len size of buf + * @return 0 if successful. -errno on failure + */ +int write_img_file(char *path, const void *buf, const size_t buf_len) +{ + int ret; + FILE *fp; + size_t len = buf_len; + + fp = open_img_file(path, true, &len); + if (!fp) + return -errno; + + ret = write_fp(fp, buf, buf_len); + fclose(fp); /* this will also close fd */ + return ret; +} + +void print_kfd_bo_stat(int bo_cnt, struct kfd_criu_bo_bucket *bo_list) +{ + struct kfd_criu_bo_bucket *bo; + + pr_info("\n"); + for (int idx = 0; idx < bo_cnt; idx++) { + bo = &bo_list[idx]; + pr_info("\n"); + pr_info("%s(), %d. KFD BO Addr: %llx \n", __func__, idx, bo->addr); + pr_info("%s(), %d. KFD BO Size: %llx \n", __func__, idx, bo->size); + pr_info("%s(), %d. KFD BO Offset: %llx \n", __func__, idx, bo->offset); + pr_info("%s(), %d. KFD BO Restored Offset: %llx \n", __func__, idx, bo->restored_offset); + pr_info("%s(), %d. KFD BO Alloc Flags: %x \n", __func__, idx, bo->alloc_flags); + pr_info("%s(), %d. KFD BO Gpu ID: %x \n", __func__, idx, bo->gpu_id); + pr_info("%s(), %d. KFD BO Dmabuf FD: %x \n", __func__, idx, bo->dmabuf_fd); + pr_info("\n"); + } + pr_info("\n"); +} + + diff --git a/plugins/amdgpu/amdgpu_plugin_util.h b/plugins/amdgpu/amdgpu_plugin_util.h new file mode 100755 index 0000000000..aacca3a28c --- /dev/null +++ b/plugins/amdgpu/amdgpu_plugin_util.h @@ -0,0 +1,106 @@ +#ifndef __AMDGPU_PLUGIN_UTIL_H__ +#define __AMDGPU_PLUGIN_UTIL_H__ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 +#endif + +#ifdef COMPILE_TESTS +#undef pr_err +#define pr_err(format, arg...) fprintf(stdout, "%s:%d ERROR:" format, __FILE__, __LINE__, ##arg) +#undef pr_info +#define pr_info(format, arg...) fprintf(stdout, "%s:%d INFO:" format, __FILE__, __LINE__, ##arg) +#undef pr_debug +#define pr_debug(format, arg...) fprintf(stdout, "%s:%d DBG:" format, __FILE__, __LINE__, ##arg) + +#undef pr_perror +#define pr_perror(format, arg...) \ + fprintf(stdout, "%s:%d: " format " (errno = %d (%s))\n", __FILE__, __LINE__, ##arg, errno, strerror(errno)) +#endif + +#ifdef LOG_PREFIX +#undef LOG_PREFIX +#endif +#define LOG_PREFIX "amdgpu_plugin: " + +#ifdef DEBUG +#define plugin_log_msg(fmt, ...) pr_debug(fmt, ##__VA_ARGS__) +#else +#define plugin_log_msg(fmt, ...) \ + { \ + } +#endif + + +/* Path where KFD device is surfaced */ +#define AMDGPU_KFD_DEVICE "/dev/kfd" + +/* Path where DRM devices are surfaced */ +#define AMDGPU_DRM_DEVICE "/dev/dri/renderD%d" + +/* Minimum version of KFD IOCTL's that supports C&R */ +#define KFD_IOCTL_MAJOR_VERSION 1 +#define MIN_KFD_IOCTL_MINOR_VERSION 8 + +/* Name of file having serialized data of KFD device */ +#define IMG_KFD_FILE "amdgpu-kfd-%d.img" + +/* Name of file having serialized data of KFD buffer objects (BOs) */ +#define IMG_KFD_PAGES_FILE "amdgpu-pages-%d-%04x.img" + +/* Name of file having serialized data of DRM device */ +#define IMG_DRM_FILE "amdgpu-renderD-%d.img" + +/* Name of file having serialized data of DRM device buffer objects (BOs) */ +#define IMG_DRM_PAGES_FILE "amdgpu-drm-pages-%d-%04x.img" + +/* Helper macros to Checkpoint and Restore a ROCm file */ +#define HSAKMT_SHM_PATH "/dev/shm/hsakmt_shared_mem" +#define HSAKMT_SHM "/hsakmt_shared_mem" +#define HSAKMT_SEM_PATH "/dev/shm/sem.hsakmt_semaphore" +#define HSAKMT_SEM "hsakmt_semaphore" + +/* Help macros to build sDMA command packets */ +#define SDMA_PACKET(op, sub_op, e) ((((e)&0xFFFF) << 16) | (((sub_op)&0xFF) << 8) | (((op)&0xFF) << 0)) + +#define SDMA_OPCODE_COPY 1 +#define SDMA_COPY_SUB_OPCODE_LINEAR 0 +#define SDMA_NOP 0 +#define SDMA_LINEAR_COPY_MAX_SIZE (1ULL << 21) + +enum sdma_op_type { + SDMA_OP_VRAM_READ, + SDMA_OP_VRAM_WRITE, +}; + +/* Helper structures to encode device topology of SRC and DEST platforms */ +extern struct tp_system src_topology; +extern struct tp_system dest_topology; + +/* Helper structures to encode device maps during Checkpoint and Restore operations */ +extern struct device_maps checkpoint_maps; +extern struct device_maps restore_maps; + +extern int fd_next; + +extern bool kfd_fw_version_check; +extern bool kfd_sdma_fw_version_check; +extern bool kfd_caches_count_check; +extern bool kfd_num_gws_check; +extern bool kfd_vram_size_check; +extern bool kfd_numa_check; +extern bool kfd_capability_check; + +int read_fp(FILE *fp, void *buf, const size_t buf_len); +int write_fp(FILE *fp, const void *buf, const size_t buf_len); +int read_file(const char *file_path, void *buf, const size_t buf_len); +int write_img_file(char *path, const void *buf, const size_t buf_len); +FILE *open_img_file(char *path, bool write, size_t *size); + +bool checkpoint_is_complete(); +void decrement_checkpoint_count(); +void init_gpu_count(struct tp_system *topology); + +void print_kfd_bo_stat(int bo_cnt, struct kfd_criu_bo_bucket *bo_list); + +#endif /* __AMDGPU_PLUGIN_UTIL_H__ */ diff --git a/plugins/amdgpu/criu-amdgpu.proto b/plugins/amdgpu/criu-amdgpu.proto index 81d00d3ff1..078b676500 100644 --- a/plugins/amdgpu/criu-amdgpu.proto +++ b/plugins/amdgpu/criu-amdgpu.proto @@ -5,7 +5,7 @@ message dev_iolink { required uint32 node_to_id = 2; } -message device_entry { +message kfd_device_entry { required uint32 node_id = 1; required uint32 gpu_id = 2; required uint32 cpu_cores_count = 3; @@ -40,10 +40,10 @@ message device_entry { repeated dev_iolink iolinks = 32; } -message bo_entry { - required uint64 addr = 1; - required uint64 size = 2; - required uint64 offset = 3; +message kfd_bo_entry { + required uint64 addr = 1; + required uint64 size = 2; + required uint64 offset = 3; required uint32 alloc_flags = 4; required uint32 gpu_id = 5; } @@ -52,10 +52,10 @@ message criu_kfd { required uint32 pid = 1; required uint32 num_of_gpus = 2; required uint32 num_of_cpus = 3; - repeated device_entry device_entries = 4; - required uint64 num_of_bos = 5; - repeated bo_entry bo_entries = 6; - required uint32 num_of_objects = 7; + repeated kfd_device_entry device_entries = 4; + required uint64 num_of_bos = 5; + repeated kfd_bo_entry bo_entries = 6; + required uint32 num_of_objects = 7; required uint64 shared_mem_size = 8; required uint32 shared_mem_magic = 9; required bytes priv_data = 10; From ce551bc6f5ae2628e88a0fc85de798d0e234472f Mon Sep 17 00:00:00 2001 From: Ramesh Errabolu Date: Fri, 10 Nov 2023 13:02:49 -0600 Subject: [PATCH 2/2] amdgpu_plugin: Refactor code used to implement Checkpoint Refactor code used to Checkpoint DRM devices. Code is moved into amdgpu_plugin_drm.c file which hosts various methods to checkpoint and restore a workload. Signed-off-by: Ramesh Errabolu --- plugins/amdgpu/amdgpu_plugin.c | 62 ++++++++++++++---------------- plugins/amdgpu/amdgpu_plugin_drm.c | 38 ++++++++++++++++++ plugins/amdgpu/amdgpu_plugin_drm.h | 6 +++ 3 files changed, 73 insertions(+), 33 deletions(-) diff --git a/plugins/amdgpu/amdgpu_plugin.c b/plugins/amdgpu/amdgpu_plugin.c index 01207d080b..500187abd4 100644 --- a/plugins/amdgpu/amdgpu_plugin.c +++ b/plugins/amdgpu/amdgpu_plugin.c @@ -49,6 +49,13 @@ struct vma_metadata { /************************************ Global Variables ********************************************/ +/** + * FD of KFD device used to checkpoint. On a multi-process + * tree the order of checkpointing goes from parent to child + * and so on - so saving the FD will not be overwritten + */ +static int kfd_checkpoint_fd; + static LIST_HEAD(update_vma_info_list); size_t kfd_max_buffer_size; @@ -990,6 +997,10 @@ static int unpause_process(int fd) goto exit; } + // Reset the KFD FD + kfd_checkpoint_fd = -1; + sys_close_drm_render_devices(&src_topology); + exit: pr_info("Process unpaused %s (ret:%d)\n", ret ? "Failed" : "Ok", ret); @@ -1181,44 +1192,25 @@ int amdgpu_plugin_dump_file(int fd, int id) return -1; } + /* Initialize number of device files that will be checkpointed */ + init_gpu_count(&src_topology); + /* Check whether this plugin was called for kfd or render nodes */ if (major(st.st_rdev) != major(st_kfd.st_rdev) || minor(st.st_rdev) != 0) { + /* This is RenderD dumper plugin, for now just save renderD * minor number to be used during restore. In later phases this * needs to save more data for video decode etc. */ - - CriuRenderNode rd = CRIU_RENDER_NODE__INIT; - struct tp_node *tp_node; - - pr_info("Dumper called for /dev/dri/renderD%d, FD = %d, ID = %d\n", minor(st.st_rdev), fd, id); - - tp_node = sys_get_node_by_render_minor(&src_topology, minor(st.st_rdev)); - if (!tp_node) { - pr_err("Failed to find a device with minor number = %d\n", minor(st.st_rdev)); - - return -ENODEV; - } - - rd.gpu_id = maps_get_dest_gpu(&checkpoint_maps, tp_node->gpu_id); - if (!rd.gpu_id) - return -ENODEV; - - len = criu_render_node__get_packed_size(&rd); - buf = xmalloc(len); - if (!buf) - return -ENOMEM; - - criu_render_node__pack(&rd, buf); - - snprintf(img_path, sizeof(img_path), IMG_DRM_FILE, id); - ret = write_img_file(img_path, buf, len); - if (ret) { - xfree(buf); + ret = amdgpu_plugin_drm_dump_file(fd, id, &st); + if (ret) return ret; - } - xfree(buf); + /* Invoke unpause process if needed */ + decrement_checkpoint_count(); + if (checkpoint_is_complete()) { + ret = unpause_process(kfd_checkpoint_fd); + } /* Need to return success here so that criu can call plugins for renderD nodes */ return ret; @@ -1315,11 +1307,15 @@ int amdgpu_plugin_dump_file(int fd, int id) ret = write_img_file(img_path, buf, len); xfree(buf); + exit: - /* Restore all queues */ - unpause_process(fd); + /* Restore all queues if conditions permit */ + kfd_checkpoint_fd = fd; + decrement_checkpoint_count(); + if (checkpoint_is_complete()) { + ret = unpause_process(fd); + } - sys_close_drm_render_devices(&src_topology); xfree((void *)args.devices); xfree((void *)args.bos); xfree((void *)args.priv_data); diff --git a/plugins/amdgpu/amdgpu_plugin_drm.c b/plugins/amdgpu/amdgpu_plugin_drm.c index a48dc68f0f..689d620720 100644 --- a/plugins/amdgpu/amdgpu_plugin_drm.c +++ b/plugins/amdgpu/amdgpu_plugin_drm.c @@ -61,3 +61,41 @@ int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *st) } +int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm) +{ + CriuRenderNode rd = CRIU_RENDER_NODE__INIT; + struct tp_node *tp_node; + char path[PATH_MAX]; + unsigned char *buf; + int minor; + int len; + int ret; + + /* Get the topology node of the DRM device */ + minor = minor(drm->st_rdev); + tp_node = sys_get_node_by_render_minor(&src_topology, minor); + if (!tp_node) { + pr_err("Failed to find a device with minor number = %d\n", minor); + return -ENODEV; + } + + /* Get the GPU_ID of the DRM device */ + rd.gpu_id = maps_get_dest_gpu(&checkpoint_maps, tp_node->gpu_id); + if (!rd.gpu_id) { + pr_err("Failed to find valid gpu_id for the device = %d\n", rd.gpu_id); + return -ENODEV; + } + + len = criu_render_node__get_packed_size(&rd); + buf = xmalloc(len); + if (!buf) + return -ENOMEM; + + criu_render_node__pack(&rd, buf); + + snprintf(path, sizeof(path), IMG_DRM_FILE, id); + ret = write_img_file(path, buf, len); + xfree(buf); + return ret; +} + diff --git a/plugins/amdgpu/amdgpu_plugin_drm.h b/plugins/amdgpu/amdgpu_plugin_drm.h index 37009c8ba7..6f0c1a9a63 100644 --- a/plugins/amdgpu/amdgpu_plugin_drm.h +++ b/plugins/amdgpu/amdgpu_plugin_drm.h @@ -17,6 +17,12 @@ */ int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *drm); +/** + * Serialize meta-data about a particular DRM device, its number of BOs, + * etc into a file. The serialized filename has in it the value ID that + * is passed in as a parameter + */ +int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm); #endif /* __AMDGPU_PLUGIN_DRM_H__ */