From 40d6f82f8fc0bfdc3359303048aa7fcca0c9eb17 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 6 Nov 2024 22:08:24 +0530 Subject: [PATCH 1/2] pidfd: one process creates a helper and opens all fds to it Currently, the `waitpid()` call on the tmp process can be made by a process which is not its parent. This causes restore to fail. This patch instead selects one process to create the tmp process and open all the fds that point to it. These fds are sent to the correct process(es). Fixes: #2496 Signed-off-by: Andrei Vagin Signed-off-by: Bhavik Sachdev --- criu/files.c | 7 +-- criu/include/pidfd.h | 2 +- criu/pidfd.c | 124 +++++++++++++++++++++---------------------- 3 files changed, 62 insertions(+), 71 deletions(-) diff --git a/criu/files.c b/criu/files.c index a57fb860fb..31e705bcc5 100644 --- a/criu/files.c +++ b/criu/files.c @@ -1811,11 +1811,6 @@ int prepare_files(void) { init_fdesc_hash(); init_sk_info_hash(); - - if (init_dead_pidfd_hash()) { - pr_err("Could not initialise hash map for dead pidfds\n"); - return -1; - } - + init_dead_pidfd_hash(); return collect_image(&files_cinfo); } diff --git a/criu/include/pidfd.h b/criu/include/pidfd.h index 4d2d71700e..bcc0fb45ab 100644 --- a/criu/include/pidfd.h +++ b/criu/include/pidfd.h @@ -7,7 +7,7 @@ extern const struct fdtype_ops pidfd_dump_ops; extern struct collect_image_info pidfd_cinfo; extern int is_pidfd_link(char *link); -extern int init_dead_pidfd_hash(void); +extern void init_dead_pidfd_hash(void); struct pidfd_dump_info { PidfdEntry pidfe; pid_t pid; diff --git a/criu/pidfd.c b/criu/pidfd.c index 3ea3c93094..53b9bcf71a 100644 --- a/criu/pidfd.c +++ b/criu/pidfd.c @@ -21,32 +21,26 @@ struct pidfd_info { PidfdEntry *pidfe; struct file_desc d; + + struct dead_pidfd *dead; + struct pidfd_info *next; }; struct dead_pidfd { unsigned int ino; - int pid; - size_t count; - mutex_t pidfd_lock; + int creator_id; + struct hlist_node hash; + struct pidfd_info *list; }; #define DEAD_PIDFD_HASH_SIZE 32 static struct hlist_head dead_pidfd_hash[DEAD_PIDFD_HASH_SIZE]; -static mutex_t *dead_pidfd_hash_lock; -int init_dead_pidfd_hash(void) +void init_dead_pidfd_hash(void) { for (int i = 0; i < DEAD_PIDFD_HASH_SIZE; i++) INIT_HLIST_HEAD(&dead_pidfd_hash[i]); - - dead_pidfd_hash_lock = shmalloc(sizeof(*dead_pidfd_hash_lock)); - if (!dead_pidfd_hash_lock) - return -1; - - mutex_init(dead_pidfd_hash_lock); - - return 0; } static struct dead_pidfd *lookup_dead_pidfd(unsigned int ino) @@ -54,15 +48,12 @@ static struct dead_pidfd *lookup_dead_pidfd(unsigned int ino) struct dead_pidfd *dead; struct hlist_head *chain; - mutex_lock(dead_pidfd_hash_lock); chain = &dead_pidfd_hash[ino % DEAD_PIDFD_HASH_SIZE]; hlist_for_each_entry(dead, chain, hash) { if (dead->ino == ino) { - mutex_unlock(dead_pidfd_hash_lock); return dead; } } - mutex_unlock(dead_pidfd_hash_lock); return NULL; } @@ -142,7 +133,7 @@ static int create_tmp_process(void) return tmp_process; } -static int free_dead_pidfd(struct dead_pidfd *dead) +static int kill_helper(pid_t pid) { int status; sigset_t blockmask, oldmask; @@ -160,15 +151,13 @@ static int free_dead_pidfd(struct dead_pidfd *dead) goto err; } - if (kill(dead->pid, SIGKILL) < 0) { - pr_perror("Could not kill temporary process with pid: %d", - dead->pid); + if (kill(pid, SIGKILL) < 0) { + pr_perror("Could not kill temporary process with pid: %d", pid); goto err; } - if (waitpid(dead->pid, &status, 0) != dead->pid) { - pr_perror("Could not wait on temporary process with pid: %d", - dead->pid); + if (waitpid(pid, &status, 0) != pid) { + pr_perror("Could not wait on temporary process with pid: %d", pid); goto err; } @@ -188,9 +177,6 @@ static int free_dead_pidfd(struct dead_pidfd *dead) goto err; } - mutex_lock(dead_pidfd_hash_lock); - hlist_del(&dead->hash); - mutex_unlock(dead_pidfd_hash_lock); return 0; err: return -1; @@ -198,8 +184,9 @@ static int free_dead_pidfd(struct dead_pidfd *dead) static int open_one_pidfd(struct file_desc *d, int *new_fd) { - struct pidfd_info *info; + struct pidfd_info *info, *child; struct dead_pidfd *dead = NULL; + pid_t pid; int pidfd; info = container_of(d, struct pidfd_info, d); @@ -215,34 +202,44 @@ static int open_one_pidfd(struct file_desc *d, int *new_fd) dead = lookup_dead_pidfd(info->pidfe->ino); BUG_ON(!dead); - mutex_lock(&dead->pidfd_lock); - BUG_ON(dead->count == 0); - dead->count--; - if (dead->pid == -1) { - dead->pid = create_tmp_process(); - if (dead->pid < 0) { - mutex_unlock(&dead->pidfd_lock); - goto err_close; + if (info->dead && info->dead->creator_id != info->pidfe->id) { + int ret = recv_desc_from_peer(&info->d, &pidfd); + if (ret != 0) { + if (ret != 1) + pr_err("Can't get fd\n"); + return ret; } + goto out; } - pidfd = pidfd_open(dead->pid, info->pidfe->flags); - if (pidfd < 0) { - pr_perror("Could not open pidfd for %d", info->pidfe->nspid); - mutex_unlock(&dead->pidfd_lock); + pid = create_tmp_process(); + if (pid < 0) goto err_close; - } - if (dead->count == 0) { - if (free_dead_pidfd(dead)) { - pr_err("Failed to delete dead_pidfd struct\n"); - mutex_unlock(&dead->pidfd_lock); - close(pidfd); + for (child = dead->list; child; child = child->next) { + if (child == info) + continue; + pidfd = pidfd_open(pid, child->pidfe->flags); + if (pidfd < 0) { + pr_perror("Could not open pidfd for %d", child->pidfe->nspid); goto err_close; } + + if (send_desc_to_peer(pidfd, &child->d)) { + pr_perror("Can't send file descriptor"); + close(pidfd); + return -1; + } + close(pidfd); } - mutex_unlock(&dead->pidfd_lock); + pidfd = pidfd_open(pid, info->pidfe->flags); + if (pidfd < 0) { + pr_perror("Could not open pidfd for %d", info->pidfe->nspid); + goto err_close; + } + if (kill_helper(pid)) + goto err_close; out: if (rst_file_params(pidfd, info->pidfe->fown, info->pidfe->flags)) { goto err_close; @@ -269,32 +266,31 @@ static int collect_one_pidfd(void *obj, ProtobufCMessage *msg, struct cr_img *i) info->pidfe = pb_msg(msg, PidfdEntry); pr_info_pidfd("Collected ", info->pidfe); + info->dead = NULL; if (info->pidfe->nspid != -1) goto out; dead = lookup_dead_pidfd(info->pidfe->ino); - if (dead) { - mutex_lock(&dead->pidfd_lock); - dead->count++; - mutex_unlock(&dead->pidfd_lock); - goto out; - } - - dead = shmalloc(sizeof(*dead)); if (!dead) { - pr_err("Could not allocate shared memory..\n"); - return -1; + dead = xmalloc(sizeof(*dead)); + if (!dead) { + pr_err("Could not allocate memory..\n"); + return -1; + } + + INIT_HLIST_NODE(&dead->hash); + dead->list = NULL; + dead->ino = info->pidfe->ino; + dead->creator_id = info->pidfe->id; + hlist_add_head(&dead->hash, &dead_pidfd_hash[dead->ino % DEAD_PIDFD_HASH_SIZE]); } - INIT_HLIST_NODE(&dead->hash); - dead->ino = info->pidfe->ino; - dead->count = 1; - dead->pid = -1; - mutex_init(&dead->pidfd_lock); + info->dead = dead; + info->next = dead->list; + dead->list = info; + if (dead->creator_id > info->pidfe->id) + dead->creator_id = info->pidfe->id; - mutex_lock(dead_pidfd_hash_lock); - hlist_add_head(&dead->hash, &dead_pidfd_hash[dead->ino % DEAD_PIDFD_HASH_SIZE]); - mutex_unlock(dead_pidfd_hash_lock); out: return file_desc_add(&info->d, info->pidfe->id, &pidfd_desc_ops); } From 54b8bd02fd4ac47affa66ddd73db401e33e96d7a Mon Sep 17 00:00:00 2001 From: Bhavik Sachdev Date: Wed, 6 Nov 2024 22:10:08 +0530 Subject: [PATCH 2/2] zdtm: Check many processes with common dead pidfd We have multiple processes open a pidfd to a common dead process. After C/R we check that the inode numbers for these pidfds are equal or not. Signed-off-by: Bhavik Sachdev --- test/zdtm/static/Makefile | 1 + test/zdtm/static/pidfd_diffdead.c | 228 ++++++++++++++++++++++++++++++ 2 files changed, 229 insertions(+) create mode 100644 test/zdtm/static/pidfd_diffdead.c diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 44ac64fe57..71a1b6a535 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -56,6 +56,7 @@ TST_NOFILE := \ pidfd_self \ pidfd_of_thread \ pidfd_dead \ + pidfd_diffdead \ pidfd_child \ pidfd_kill \ fd_from_pidfd \ diff --git a/test/zdtm/static/pidfd_diffdead.c b/test/zdtm/static/pidfd_diffdead.c new file mode 100644 index 0000000000..5bc1911a51 --- /dev/null +++ b/test/zdtm/static/pidfd_diffdead.c @@ -0,0 +1,228 @@ +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check C/R of processes that point to a common dead pidfd\n"; +const char *test_author = "Bhavik Sachdev "; + +#ifndef PID_FS_MAGIC +#define PID_FS_MAGIC 0x50494446 +#endif + +/* + * main + * `- child + * `- grandchild + * + * main and child open a pidfd for grandchild. + * Before C/R we kill grandchild. + * We end up with two pidfds in two diff processes that point to the same dead process. + */ + +static long get_fs_type(int lfd) +{ + struct statfs fst; + + if (fstatfs(lfd, &fst)) { + return -1; + } + return fst.f_type; +} + +static int pidfd_open(pid_t pid, unsigned int flags) +{ + return syscall(__NR_pidfd_open, pid, flags); +} + +static int pidfd_send_signal(int pidfd, int sig, siginfo_t *info, unsigned int flags) +{ + return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); +} + +static int check_for_pidfs(void) +{ + long type; + int pidfd = pidfd_open(getpid(), 0); + if (pidfd < 0) { + pr_perror("pidfd open() failed"); + return -1; + } + type = get_fs_type(pidfd); + close(pidfd); + return type == PID_FS_MAGIC; +} + +int main(int argc, char *argv[]) +{ +#define READ 0 +#define WRITE 1 + + int child, ret, gchild, status; + struct statx stat; + task_waiter_t t; + unsigned long long ino; + + /* + * We use the inop pipe to send the inode number of the + * pidfd opened in the child to the main process for + * comparison. + */ + int p[2]; + int pidfd; + + test_init(argc, argv); + task_waiter_init(&t); + + ret = check_for_pidfs(); + if (ret < 0) + return 1; + + if (ret == 0) { + test_daemon(); + test_waitsig(); + skip("Test requires pidfs. skipping..."); + pass(); + return 0; + } + + if (pipe(p)) { + pr_perror("pipe"); + return 1; + } + + child = test_fork(); + if (child < 0) { + pr_perror("fork"); + return 1; + } else if (child == 0) { + int gchild; + gchild = test_fork(); + if (gchild < 0) { + pr_perror("fork"); + return 1; + } else if (gchild == 0) { + close(p[READ]); + close(p[WRITE]); + while (1) + sleep(1000); + } else { + if (write(p[WRITE], &gchild, sizeof(int)) != sizeof(int)) { + pr_perror("write"); + return 1; + } + + pidfd = pidfd_open(gchild, 0); + if (pidfd < 0) { + pr_perror("pidfd_open"); + return 1; + } + + if (waitpid(gchild, &status, 0) != gchild) { + pr_perror("waitpid"); + return 1; + } + + if (!WIFSIGNALED(status)) { + fail("Expected grandchild to be terminated by a signal"); + return 1; + } + + if (WTERMSIG(status) != SIGKILL) { + fail("Expected grandchild to be terminated by SIGKILL"); + return 1; + } + task_waiter_complete(&t, 1); + + test_waitsig(); + + if (statx(pidfd, "", AT_EMPTY_PATH, STATX_ALL, &stat) < 0) { + pr_perror("statx"); + return 1; + } + + close(p[WRITE]); + if (read(p[READ], &ino, sizeof(ino)) != sizeof(ino)) { + pr_perror("read"); + return 1; + } + close(p[READ]); + close(pidfd); + + /* ino number should be same because both pidfds were for the same process */ + if (ino != stat.stx_ino) { + exit(1); + } + exit(0); + } + } + + if (read(p[READ], &gchild, sizeof(int)) != sizeof(int)) { + pr_perror("write"); + return 1; + } + + pidfd = pidfd_open(gchild, 0); + if (pidfd < 0) { + pr_perror("pidfd_open"); + return 1; + } + + /* + * We kill grandchild process only after opening pidfd. + */ + if (pidfd_send_signal(pidfd, SIGKILL, NULL, 0)) { + pr_perror("pidfd_send_signal"); + return 1; + } + + /* Wait for child to waitpid on gchild */ + task_waiter_wait4(&t, 1); + + test_daemon(); + test_waitsig(); + + close(p[READ]); + if (statx(pidfd, "", AT_EMPTY_PATH, STATX_ALL, &stat) < 0) { + pr_perror("statx"); + goto err; + } + + /* Send inode number of pidfd to child for comparison */ + if (write(p[WRITE], &stat.stx_ino, sizeof(stat.stx_ino)) != sizeof(stat.stx_ino)) { + pr_perror("write"); + goto err; + } + close(p[WRITE]); + + if (kill(child, SIGTERM)) { + pr_perror("kill"); + goto err; + } + + if (waitpid(child, &status, 0) != child) { + pr_perror("waitpid"); + goto err; + } + + if (!WIFEXITED(status)) { + fail("Expected child to terminate normally"); + goto err; + } + + if (WEXITSTATUS(status) != 0) { + fail("Child failed"); + goto err; + } + + pass(); + close(pidfd); + return 0; +err: + close(pidfd); + return 1; +}