diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index 3ddf45cd70..ba6132d2f7 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -101,6 +101,7 @@ obj-$(CONFIG_COMPAT) += vdso-compat.o CFLAGS_REMOVE_vdso-compat.o += $(CFLAGS-ASAN) $(CFLAGS-GCOV) obj-y += pidfd-store.o obj-y += hugetlb.o +obj-y += pidfd.o PROTOBUF_GEN := scripts/protobuf-gen.sh diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 4d4dfbe6fe..d5b6c8037a 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -79,6 +79,7 @@ #include "timens.h" #include "bpfmap.h" #include "apparmor.h" +#include "pidfd.h" #include "parasite-syscall.h" #include "files-reg.h" @@ -280,7 +281,7 @@ static struct collect_image_info *cinfos_files[] = { &unix_sk_cinfo, &fifo_cinfo, &pipe_cinfo, &nsfile_cinfo, &packet_sk_cinfo, &netlink_sk_cinfo, &eventfd_cinfo, &epoll_cinfo, &epoll_tfd_cinfo, &signalfd_cinfo, &tunfile_cinfo, &timerfd_cinfo, &inotify_cinfo, &inotify_mark_cinfo, &fanotify_cinfo, - &fanotify_mark_cinfo, &ext_file_cinfo, &memfd_cinfo, + &fanotify_mark_cinfo, &ext_file_cinfo, &memfd_cinfo, &pidfd_cinfo }; /* These images are required to restore namespaces */ diff --git a/criu/files.c b/criu/files.c index 3b653e24be..a57fb860fb 100644 --- a/criu/files.c +++ b/criu/files.c @@ -49,6 +49,7 @@ #include "kerndat.h" #include "fdstore.h" #include "bpfmap.h" +#include "pidfd.h" #include "protobuf.h" #include "util.h" @@ -544,6 +545,8 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, ops = &signalfd_dump_ops; else if (is_timerfd_link(link)) ops = &timerfd_dump_ops; + else if (is_pidfd_link(link)) + ops = &pidfd_dump_ops; #ifdef CONFIG_HAS_LIBBPF else if (is_bpfmap_link(link)) ops = &bpfmap_dump_ops; @@ -554,6 +557,11 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, return do_dump_gen_file(&p, lfd, ops, e); } + if (p.fs_type == PID_FS_MAGIC) { + ops = &pidfd_dump_ops; + return do_dump_gen_file(&p, lfd, ops, e); + } + if (S_ISREG(p.stat.st_mode) || S_ISDIR(p.stat.st_mode) || S_ISLNK(p.stat.st_mode)) { if (fill_fdlink(lfd, &p, &link)) return -1; @@ -1778,6 +1786,9 @@ static int collect_one_file(void *o, ProtobufCMessage *base, struct cr_img *i) case FD_TYPES__MEMFD: ret = collect_one_file_entry(fe, fe->memfd->id, &fe->memfd->base, &memfd_cinfo); break; + case FD_TYPES__PIDFD: + ret = collect_one_file_entry(fe, fe->pidfd->id, &fe->pidfd->base, &pidfd_cinfo); + break; #ifdef CONFIG_HAS_LIBBPF case FD_TYPES__BPFMAP: ret = collect_one_file_entry(fe, fe->bpf->id, &fe->bpf->base, &bpfmap_cinfo); @@ -1800,5 +1811,11 @@ int prepare_files(void) { init_fdesc_hash(); init_sk_info_hash(); + + if (init_dead_pidfd_hash()) { + pr_err("Could not initialise hash map for dead pidfds\n"); + return -1; + } + return collect_image(&files_cinfo); } diff --git a/criu/image-desc.c b/criu/image-desc.c index d65d9c0986..2d87c73815 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -107,6 +107,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { FD_ENTRY_F(BPFMAP_FILE, "bpfmap-file", O_NOBUF), FD_ENTRY_F(BPFMAP_DATA, "bpfmap-data", O_NOBUF), FD_ENTRY(APPARMOR, "apparmor"), + FD_ENTRY(PIDFD, "pidfd"), [CR_FD_STATS] = { .fmt = "stats-%s", diff --git a/criu/include/fs-magic.h b/criu/include/fs-magic.h index ad34f48915..ffc0455d5f 100644 --- a/criu/include/fs-magic.h +++ b/criu/include/fs-magic.h @@ -57,4 +57,8 @@ #define OVERLAYFS_SUPER_MAGIC 0x794c7630 #endif +#ifndef PID_FS_MAGIC +#define PID_FS_MAGIC 0x50494446 +#endif + #endif /* __CR_FS_MAGIC_H__ */ diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index 9f369be645..79e1ac1113 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -113,6 +113,7 @@ enum { CR_FD_PIPES, CR_FD_TTY_FILES, CR_FD_MEMFD_FILE, + CR_FD_PIDFD, CR_FD_AUTOFS, diff --git a/criu/include/magic.h b/criu/include/magic.h index 0e8c37234e..6f0aff26d8 100644 --- a/criu/include/magic.h +++ b/criu/include/magic.h @@ -100,6 +100,7 @@ #define BPFMAP_FILE_MAGIC 0x57506142 /* Alapayevsk */ #define BPFMAP_DATA_MAGIC 0x64324033 /* Arkhangelsk */ #define APPARMOR_MAGIC 0x59423047 /* Nikolskoye */ +#define PIDFD_MAGIC 0x54435556 /* Ufa */ #define IFADDR_MAGIC RAW_IMAGE_MAGIC #define ROUTE_MAGIC RAW_IMAGE_MAGIC diff --git a/criu/include/pidfd.h b/criu/include/pidfd.h new file mode 100644 index 0000000000..4d2d71700e --- /dev/null +++ b/criu/include/pidfd.h @@ -0,0 +1,16 @@ +#ifndef __CR_PIDFD_H__ +#define __CR_PIDFD_H__ + +#include "files.h" +#include "pidfd.pb-c.h" + +extern const struct fdtype_ops pidfd_dump_ops; +extern struct collect_image_info pidfd_cinfo; +extern int is_pidfd_link(char *link); +extern int init_dead_pidfd_hash(void); +struct pidfd_dump_info { + PidfdEntry pidfe; + pid_t pid; +}; + +#endif /* __CR_PIDFD_H__ */ diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h index 3824de101f..c4241be557 100644 --- a/criu/include/protobuf-desc.h +++ b/criu/include/protobuf-desc.h @@ -70,6 +70,7 @@ enum { PB_BPFMAP_FILE, PB_BPFMAP_DATA, PB_APPARMOR, + PB_PIDFD, /* PB_AUTOGEN_STOP */ diff --git a/criu/pidfd.c b/criu/pidfd.c new file mode 100644 index 0000000000..fdf5dec60e --- /dev/null +++ b/criu/pidfd.c @@ -0,0 +1,287 @@ +#include "common/lock.h" +#include "imgset.h" +#include "pidfd.h" +#include "fdinfo.h" +#include "pidfd.pb-c.h" +#include "protobuf.h" +#include "pstree.h" +#include +#include +#include +#include "common/bug.h" +#include "rst-malloc.h" + +#undef LOG_PREFIX +#define LOG_PREFIX "pidfd: " + +#ifndef PIDFD_THREAD +#define PIDFD_THREAD O_EXCL +#endif + +struct pidfd_info { + PidfdEntry *pidfe; + struct file_desc d; +}; + +struct dead_pidfd { + unsigned int ino; + int pid; + size_t count; + mutex_t pidfd_lock; + struct hlist_node hash; +}; + +#define DEAD_PIDFD_HASH_SIZE 32 +static struct hlist_head dead_pidfd_hash[DEAD_PIDFD_HASH_SIZE]; +static mutex_t *dead_pidfd_hash_lock; + +int init_dead_pidfd_hash(void) +{ + for (int i = 0; i < DEAD_PIDFD_HASH_SIZE; i++) + INIT_HLIST_HEAD(&dead_pidfd_hash[i]); + + dead_pidfd_hash_lock = shmalloc(sizeof(*dead_pidfd_hash_lock)); + if (!dead_pidfd_hash_lock) + return -1; + + mutex_init(dead_pidfd_hash_lock); + + return 0; +} + +static struct dead_pidfd *lookup_dead_pidfd(unsigned int ino) +{ + struct dead_pidfd *dead; + struct hlist_head *chain; + + mutex_lock(dead_pidfd_hash_lock); + chain = &dead_pidfd_hash[ino % DEAD_PIDFD_HASH_SIZE]; + hlist_for_each_entry(dead, chain, hash) { + if (dead->ino == ino) { + mutex_unlock(dead_pidfd_hash_lock); + return dead; + } + } + mutex_unlock(dead_pidfd_hash_lock); + + return NULL; +} + +int is_pidfd_link(char *link) +{ + /* + * pidfs was introduced in Linux 6.9 + * before which anonymous-inodes were used + */ + return is_anon_link_type(link, "[pidfd]"); +} + +static void pr_info_pidfd(char *action, PidfdEntry *pidfe) +{ + pr_info("%s: id %#08x flags %u NSpid %d ino %u\n", + action, pidfe->id, pidfe->flags, pidfe->nspid, pidfe->ino + ); +} + +static int dump_one_pidfd(int pidfd, u32 id, const struct fd_parms *p) +{ + struct pidfd_dump_info pidfd_info = {.pidfe = PIDFD_ENTRY__INIT}; + FileEntry fe = FILE_ENTRY__INIT; + + if (parse_fdinfo(pidfd, FD_TYPES__PIDFD, &pidfd_info)) + return -1; + + if (p->flags & PIDFD_THREAD) { + pr_err("PIDFD_THREAD flag is currently not supported\n"); + return -1; + } + + /* + * Check if the pid pidfd refers to is part of process tree + * This ensures the process will exist on restore. + */ + if (pidfd_info.pid != -1 && !pstree_item_by_real(pidfd_info.pid)) { + pr_err("pidfd pid %d is not a part of process tree..\n", + pidfd_info.pid); + return -1; + } + + pidfd_info.pidfe.id = id; + pidfd_info.pidfe.flags = (p->flags & ~O_RDWR); + pidfd_info.pidfe.fown = (FownEntry *)&p->fown; + + fe.type = FD_TYPES__PIDFD; + fe.id = pidfd_info.pidfe.id; + fe.pidfd = &pidfd_info.pidfe; + + pr_info_pidfd("Dumping", &pidfd_info.pidfe); + return pb_write_one(img_from_set(glob_imgset, CR_FD_FILES), &fe, PB_FILE); +} + +const struct fdtype_ops pidfd_dump_ops = { + .type = FD_TYPES__PIDFD, + .dump = dump_one_pidfd, +}; + +static int pidfd_open(pid_t pid, int flags) +{ + return syscall(__NR_pidfd_open, pid, flags); +} + +static int create_tmp_process(void) +{ + int tmp_process; + tmp_process = fork(); + if (tmp_process < 0) { + pr_perror("Could not fork"); + return -1; + } else if (tmp_process == 0) { + while(1) + sleep(1); + } + return tmp_process; +} + +static int free_dead_pidfd(struct dead_pidfd *dead) +{ + int status; + + if (kill(dead->pid, SIGKILL) < 0) { + pr_perror("Could not kill temporary process with pid: %d", + dead->pid); + goto err; + } + + if (waitpid(dead->pid, &status, 0) != dead->pid) { + pr_perror("Could not wait on temporary process with pid: %d", + dead->pid); + goto err; + } + + if (!WIFSIGNALED(status)) { + pr_err("Expected temporary process to be terminated by a signal\n"); + goto err; + } + + if (WTERMSIG(status) != SIGKILL) { + pr_err("Expected temporary process to be terminated by SIGKILL\n"); + goto err; + } + + mutex_lock(dead_pidfd_hash_lock); + hlist_del(&dead->hash); + mutex_unlock(dead_pidfd_hash_lock); + return 0; +err: + return -1; +} + +static int open_one_pidfd(struct file_desc *d, int *new_fd) +{ + struct pidfd_info *info; + struct dead_pidfd *dead = NULL; + int pidfd; + + info = container_of(d, struct pidfd_info, d); + if (info->pidfe->nspid != -1) { + pidfd = pidfd_open(info->pidfe->nspid, info->pidfe->flags); + if (pidfd < 0) { + pr_perror("Could not open pidfd for %d", info->pidfe->nspid); + goto err_close; + } + goto out; + } + + dead = lookup_dead_pidfd(info->pidfe->ino); + BUG_ON(!dead); + + mutex_lock(&dead->pidfd_lock); + BUG_ON(dead->count == 0); + dead->count--; + if (dead->pid == -1) { + dead->pid = create_tmp_process(); + if (dead->pid < 0) { + mutex_unlock(&dead->pidfd_lock); + goto err_close; + } + } + + pidfd = pidfd_open(dead->pid, info->pidfe->flags); + if (pidfd < 0) { + pr_perror("Could not open pidfd for %d", info->pidfe->nspid); + mutex_unlock(&dead->pidfd_lock); + goto err_close; + } + + if (dead->count == 0) { + if (free_dead_pidfd(dead)) { + pr_err("Failed to delete dead_pidfd struct\n"); + mutex_unlock(&dead->pidfd_lock); + close(pidfd); + goto err_close; + } + } + mutex_unlock(&dead->pidfd_lock); + +out: + if (rst_file_params(pidfd, info->pidfe->fown, info->pidfe->flags)) { + goto err_close; + } + + *new_fd = pidfd; + return 0; +err_close: + pr_err("Can't create pidfd %#08x NSpid: %d flags: %u\n", + info->pidfe->id, info->pidfe->nspid, info->pidfe->flags); + return -1; +} + +static struct file_desc_ops pidfd_desc_ops = { + .type = FD_TYPES__PIDFD, + .open = open_one_pidfd +}; + +static int collect_one_pidfd(void *obj, ProtobufCMessage *msg, struct cr_img *i) +{ + struct dead_pidfd *dead; + struct pidfd_info *info = obj; + + info->pidfe = pb_msg(msg, PidfdEntry); + pr_info_pidfd("Collected ", info->pidfe); + + if (info->pidfe->nspid != -1) + goto out; + + dead = lookup_dead_pidfd(info->pidfe->ino); + if (dead) { + mutex_lock(&dead->pidfd_lock); + dead->count++; + mutex_unlock(&dead->pidfd_lock); + goto out; + } + + dead = shmalloc(sizeof(*dead)); + if (!dead) { + pr_err("Could not allocate shared memory..\n"); + return -1; + } + + INIT_HLIST_NODE(&dead->hash); + dead->ino = info->pidfe->ino; + dead->count = 1; + dead->pid = -1; + mutex_init(&dead->pidfd_lock); + + mutex_lock(dead_pidfd_hash_lock); + hlist_add_head(&dead->hash, &dead_pidfd_hash[dead->ino % DEAD_PIDFD_HASH_SIZE]); + mutex_unlock(dead_pidfd_hash_lock); +out: + return file_desc_add(&info->d, info->pidfe->id, &pidfd_desc_ops); +} + +struct collect_image_info pidfd_cinfo = { + .fd_type = CR_FD_PIDFD, + .pb_type = PB_PIDFD, + .priv_size = sizeof(struct pidfd_info), + .collect = collect_one_pidfd, +}; diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 55aefac7d7..95ebe3a411 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -42,10 +42,12 @@ #include "fault-injection.h" #include "memfd.h" #include "hugetlb.h" +#include "pidfd.h" #include "protobuf.h" #include "images/fdinfo.pb-c.h" #include "images/mnt.pb-c.h" +#include "pidfd.pb-c.h" #include "plugin.h" #include @@ -2165,6 +2167,33 @@ static int parse_fdinfo_pid_s(int pid, int fd, int type, void *arg) if (ret) goto parse_err; + entry_met = true; + continue; + } + if (fdinfo_field(str, "ino") || fdinfo_field(str, "NSpid") || fdinfo_field(str, "Pid")) { + struct pidfd_dump_info *pidfd_info = arg; + + if (type != FD_TYPES__PIDFD) + continue; + + if (fdinfo_field(str, "ino")) { + ret = sscanf(str, "%*s %u", &pidfd_info->pidfe.ino); + if (ret != 1) + goto parse_err; + } else if (fdinfo_field(str, "Pid")) { + ret = sscanf(str, "%*s %d", &pidfd_info->pid); + if (ret != 1) + goto parse_err; + } else if (fdinfo_field(str, "NSpid")) { + char *last; + + last = strrchr(str, '\t'); + if (!last || sscanf(last, "%d", &pidfd_info->pidfe.nspid) != 1) { + pr_err("Unable to parse: %s\n", str); + goto parse_err; + } + } + entry_met = true; continue; } diff --git a/criu/protobuf-desc.c b/criu/protobuf-desc.c index ff16b9f5be..e0dbfccc21 100644 --- a/criu/protobuf-desc.c +++ b/criu/protobuf-desc.c @@ -68,6 +68,7 @@ #include "images/bpfmap-file.pb-c.h" #include "images/bpfmap-data.pb-c.h" #include "images/apparmor.pb-c.h" +#include "images/pidfd.pb-c.h" struct cr_pb_message_desc cr_pb_descs[PB_MAX]; diff --git a/images/Makefile b/images/Makefile index ca85b1a213..855d894da6 100644 --- a/images/Makefile +++ b/images/Makefile @@ -73,6 +73,7 @@ proto-obj-y += bpfmap-file.o proto-obj-y += bpfmap-data.o proto-obj-y += apparmor.o proto-obj-y += rseq.o +proto-obj-y += pidfd.o CFLAGS += -iquote $(obj)/ diff --git a/images/fdinfo.proto b/images/fdinfo.proto index 88f1c11860..32ec13cf48 100644 --- a/images/fdinfo.proto +++ b/images/fdinfo.proto @@ -17,6 +17,7 @@ import "ext-file.proto"; import "sk-unix.proto"; import "fifo.proto"; import "pipe.proto"; +import "pidfd.proto"; import "tty.proto"; import "memfd.proto"; import "bpfmap-file.proto"; @@ -42,6 +43,7 @@ enum fd_types { TIMERFD = 17; MEMFD = 18; BPFMAP = 19; + PIDFD = 20; /* Any number above the real used. Not stored to image */ CTL_TTY = 65534; @@ -78,4 +80,5 @@ message file_entry { optional tty_file_entry tty = 19; optional memfd_file_entry memfd = 20; optional bpfmap_file_entry bpf = 21; + optional pidfd_entry pidfd = 22; } diff --git a/images/pidfd.proto b/images/pidfd.proto new file mode 100644 index 0000000000..a9da3e4543 --- /dev/null +++ b/images/pidfd.proto @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: MIT + +syntax = "proto2"; + +import "fown.proto"; + +message pidfd_entry { + required uint32 id = 1; + required uint32 ino = 2; + required uint32 flags = 3; + required int32 nspid = 4; + required fown_entry fown = 5; +} diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 1e891f0ba4..44ac64fe57 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -53,6 +53,12 @@ TST_NOFILE := \ shm \ shm-mp \ ptrace_sig \ + pidfd_self \ + pidfd_of_thread \ + pidfd_dead \ + pidfd_child \ + pidfd_kill \ + fd_from_pidfd \ pipe00 \ pipe01 \ pipe02 \ diff --git a/test/zdtm/static/fd_from_pidfd.c b/test/zdtm/static/fd_from_pidfd.c new file mode 100644 index 0000000000..1f863d6c0e --- /dev/null +++ b/test/zdtm/static/fd_from_pidfd.c @@ -0,0 +1,108 @@ +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check if fd obtained from pidfd_get_fd is C/R correctly\n"; +const char *test_author = "Bhavik Sachdev "; + +static int pidfd_open(pid_t pid, unsigned int flags) +{ + return syscall(__NR_pidfd_open, pid, flags); +} + +static int pidfd_getfd(int pidfd, int targetfd, unsigned int flags) +{ + return syscall(__NR_pidfd_getfd, pidfd, targetfd, flags); +} + +static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) +{ + return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); +} + +int main(int argc, char* argv[]) +{ + #define READ 0 + #define WRITE 1 + + int pidfd, child, p[2], child_read, read_data, status; + int data = 42; + + test_init(argc, argv); + + if (pipe(p)) { + pr_perror("pipe"); + return 1; + } + + child = fork(); + if (child < 0) { + pr_perror("fork"); + return 1; + } + + if (child == 0) { + close(p[WRITE]); + test_waitsig(); + return 0; + } + + pidfd = pidfd_open(child, 0); + if (pidfd < 0) { + pr_perror("pidfd_open failed"); + return 1; + } + + close(p[READ]); + if (write(p[WRITE], &data, sizeof(data)) != sizeof(data)) { + pr_perror("write"); + return 1; + } + close(p[WRITE]); + + child_read = pidfd_getfd(pidfd, p[READ], 0); + if (child_read < 0) { + pr_perror("pidfd_getfd"); + return 1; + } + + test_daemon(); + test_waitsig(); + + if (read(child_read, &read_data, sizeof(read_data)) != sizeof(read_data)) { + pr_perror("read"); + goto err_close; + } + + if (read_data != data) { + fail("data from fd obtained using pidfd_getfd incorrect"); + goto err_close; + } + + if (pidfd_send_signal(pidfd, SIGTERM, NULL, 0)) { + pr_perror("Could not send signal"); + goto err_close; + } + + if (waitpid(child, &status, 0) != child) { + pr_perror("waitpid()"); + return 1; + } + + if (status != 0) { + fail("%d:%d:%d:%d", WIFEXITED(status), WEXITSTATUS(status), WIFSIGNALED(status), WTERMSIG(status)); + return 1; + } + + pass(); + close(child_read); + close(pidfd); + return 0; +err_close: + close(child_read); + close(pidfd); + return 1; +} diff --git a/test/zdtm/static/pidfd_child.c b/test/zdtm/static/pidfd_child.c new file mode 100644 index 0000000000..ec559605dc --- /dev/null +++ b/test/zdtm/static/pidfd_child.c @@ -0,0 +1,66 @@ +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Checks pidfd sends signal to child process after restore\n"; +const char *test_author = "Bhavik Sachdev "; + +static int pidfd_open(pid_t pid, unsigned int flags) +{ + return syscall(__NR_pidfd_open, pid, flags); +} + +static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) +{ + return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); +} + +int main(int argc, char* argv[]) +{ + int pidfd, status; + pid_t child; + + test_init(argc, argv); + + child = fork(); + if (child < 0) { + pr_perror("Unable to fork a new process"); + return 1; + } else if (child == 0) { + test_waitsig(); + return 0; + } + + pidfd = pidfd_open(child, 0); + if (pidfd < 0) { + pr_perror("pidfd_open failed"); + return 1; + } + + test_daemon(); + test_waitsig(); + + if (pidfd_send_signal(pidfd, SIGTERM, NULL, 0)) { + fail("Could not send signal"); + goto err_close; + } + + if (waitpid(child, &status, 0) != child) { + pr_perror("waitpid()"); + goto err_close; + } + + if (status != 0) { + fail("%d:%d:%d:%d", WIFEXITED(status), WEXITSTATUS(status), WIFSIGNALED(status), WTERMSIG(status)); + goto err_close; + } + + pass(); + close(pidfd); + return 0; +err_close: + close(pidfd); + return 1; +} diff --git a/test/zdtm/static/pidfd_dead.c b/test/zdtm/static/pidfd_dead.c new file mode 100644 index 0000000000..9c825899d1 --- /dev/null +++ b/test/zdtm/static/pidfd_dead.c @@ -0,0 +1,244 @@ +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check C/R of pidfds that point to dead processes\n"; +const char *test_author = "Bhavik Sachdev "; + +#ifndef PID_FS_MAGIC +#define PID_FS_MAGIC 0x50494446 +#endif + +/* + * main + * `- child + * `- grandchild + * + * main opens a pidfd for both child and grandchild. + * Before C/R we kill both child and grandchild. + * We end up with two unique dead pidfds. + */ + +static long get_fs_type(int lfd) +{ + struct statfs fst; + + if (fstatfs(lfd, &fst)) { + return -1; + } + return fst.f_type; +} + +static int pidfd_open(pid_t pid, unsigned int flags) +{ + return syscall(__NR_pidfd_open, pid, flags); +} + +static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) +{ + return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); +} + +static int open_pidfd_pair(int pidfd[2], int pid) +{ + pidfd[0] = pidfd_open(pid, 0); + if (pidfd[0] < 0) { + pr_perror("pidfd_open() failed"); + return 1; + } + + pidfd[1] = pidfd_open(pid, 0); + if (pidfd[1] < 0) { + close(pidfd[0]); + pr_perror("pidfd_open() failed"); + return 1; + } + return 0; +} + +static int compare_pidfds(int pidfd[2]) +{ + /* + * After linux 6.9 we can compare inode numbers + * to determine if two pidfds point to the same process. + * While the inode number may change before and after C/R + * pidfds pointing to the same pid should have the same inode number. + */ + struct statx stats[2]; + statx(pidfd[0], "", AT_EMPTY_PATH, STATX_ALL, &stats[0]); + statx(pidfd[1], "", AT_EMPTY_PATH, STATX_ALL, &stats[1]); + if (stats[0].stx_ino != stats[1].stx_ino) + return 1; + return 0; +} + +static int check_for_pidfs(void) +{ + long type; + int pidfd = pidfd_open(getpid(), 0); + if (pidfd < 0) { + pr_perror("pidfd open() failed"); + return -1; + } + type = get_fs_type(pidfd); + close(pidfd); + return type == PID_FS_MAGIC; +} + +int main(int argc, char* argv[]) +{ + #define READ 0 + #define WRITE 1 + + int child, ret, gchild, p[2], status; + int cpidfd[2], gpidfd[2]; + struct statx stats[2]; + + test_init(argc, argv); + + ret = check_for_pidfs(); + if (ret < 0) + return 1; + + if (ret == 0) { + test_daemon(); + test_waitsig(); + skip("Test requires pidfs. skipping..."); + pass(); + return 0; + } + + if (pipe(p)) { + pr_perror("pipe"); + return 1; + } + + child = test_fork(); + if (child < 0) { + pr_perror("fork"); + return 1; + } else if (child == 0) { + int gchild = test_fork(); + close(p[READ]); + if (gchild < 0) { + pr_perror("fork"); + return 1; + } else if (gchild == 0) { + close(p[WRITE]); + while(1) + sleep(1000); + } else { + if (write(p[WRITE], &gchild, sizeof(int)) != sizeof(int)) { + pr_perror("write"); + return 1; + } + close(p[WRITE]); + if (waitpid(gchild, &status, 0) != gchild) { + pr_perror("waitpid"); + return 1; + } + + if (!WIFSIGNALED(status)) { + fail("Expected grandchild to be terminated by a signal"); + return 1; + } + + if (WTERMSIG(status) != SIGKILL) { + fail("Expected grandchild to be terminated by SIGKILL"); + return 1; + } + + return 0; + } + } + + ret = open_pidfd_pair(cpidfd, child); + if (ret) + return 1; + + close(p[WRITE]); + if (read(p[READ], &gchild, sizeof(int)) != sizeof(int)) { + pr_perror("write"); + return 1; + } + close(p[READ]); + + ret = open_pidfd_pair(gpidfd, gchild); + if (ret) + return 1; + + /* + * We kill grandchild and child processes only after opening pidfds. + */ + if (pidfd_send_signal(gpidfd[0], SIGKILL, NULL, 0)) { + pr_perror("pidfd_send_signal"); + goto fail_close; + } + + if (waitpid(child, &status, 0) != child) { + pr_perror("waitpid"); + goto fail_close; + } + + if (!WIFEXITED(status)) { + fail("Expected child to exit normally"); + goto fail_close; + } + + if (WEXITSTATUS(status) != 0) { + fail("Expected child to exit with 0"); + goto fail_close; + } + usleep(1000); + + if (kill(gchild, 0) != -1 && errno != ESRCH) { + fail("Expected grand child to not exist"); + goto fail_close; + } + + if (kill(child, 0) != -1 && errno != ESRCH) { + fail("Expected child to not exist"); + goto fail_close; + } + + test_daemon(); + test_waitsig(); + + ret = compare_pidfds(cpidfd); + if (ret) { + fail("inodes not same for same pid"); + goto fail_close; + } + + ret = compare_pidfds(gpidfd); + if (ret) { + fail("inodes not same for same pid"); + goto fail_close; + } + + statx(cpidfd[0], "", AT_EMPTY_PATH, STATX_ALL, &stats[0]); + statx(gpidfd[0], "", AT_EMPTY_PATH, STATX_ALL, &stats[1]); + if (stats[0].stx_ino == stats[1].stx_ino) { + fail("pidfds pointing to diff pids should have diff inodes"); + goto fail_close; + } + + pass(); + close(cpidfd[0]); + close(cpidfd[1]); + close(gpidfd[0]); + close(gpidfd[1]); + return 0; + +fail_close: + close(cpidfd[0]); + close(cpidfd[1]); + close(gpidfd[0]); + close(gpidfd[1]); + return 1; +} diff --git a/test/zdtm/static/pidfd_kill.c b/test/zdtm/static/pidfd_kill.c new file mode 100644 index 0000000000..6232d033aa --- /dev/null +++ b/test/zdtm/static/pidfd_kill.c @@ -0,0 +1,128 @@ +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Kill child and grandchild process using pidfds\n"; +const char *test_author = "Bhavik Sachdev "; + +static int pidfd_open(pid_t pid, unsigned int flags) +{ + return syscall(__NR_pidfd_open, pid, flags); +} + +static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) +{ + return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); +} + +static int wait_for_child(int child) +{ + int status; + if (waitpid(child, &status, 0) != child) { + pr_perror("waitpid()"); + return 1; + } + + if (status != 0) { + test_msg("%d:%d:%d:%d", WIFEXITED(status), WEXITSTATUS(status), + WIFSIGNALED(status), WTERMSIG(status)); + } + + return 0; +} + +int main(int argc, char* argv[]) +{ + #define READ 0 + #define WRITE 1 + + int child, gchild, cpidfd, gpidfd, gchild_pid, ret; + int p[2]; + + if (pipe(p)) { + pr_perror("pipe"); + return 1; + } + + test_init(argc, argv); + + child = fork(); + if (child < 0) { + pr_perror("fork"); + return 1; + } + + if (child == 0) { + gchild = fork(); + if (gchild < 0) { + pr_perror("fork"); + return 1; + } + + if (gchild == 0) { + test_waitsig(); + return 0; + } + + close(p[READ]); + if (write(p[WRITE], &gchild, sizeof(gchild)) + != sizeof(gchild)) { + pr_perror("write"); + return 1; + } + close(p[WRITE]); + + test_waitsig(); + return wait_for_child(gchild); + } + + cpidfd = pidfd_open(child, 0); + if (cpidfd < 0) { + pr_perror("pidfd_open"); + return 1; + } + + close(p[WRITE]); + if (read(p[READ], &gchild_pid, sizeof(gchild_pid)) + != sizeof(gchild_pid)) { + pr_perror("read"); + return 1; + } + close(p[READ]); + + gpidfd = pidfd_open(gchild_pid, 0); + if (gpidfd < 0) { + pr_perror("pidfd_open"); + return 1; + } + + test_daemon(); + test_waitsig(); + + if (pidfd_send_signal(gpidfd, SIGKILL, NULL, 0)) { + pr_perror("Could not send signal"); + goto fail_close; + } + + if (pidfd_send_signal(cpidfd, SIGKILL, NULL, 0)) { + pr_perror("Could not send signal"); + goto fail_close; + } + + ret = wait_for_child(child); + if (ret) + goto fail_close; + + pass(); + close(cpidfd); + close(gpidfd); + return 0; + +fail_close: + fail(); + close(cpidfd); + close(gpidfd); + return 1; +} diff --git a/test/zdtm/static/pidfd_of_thread.c b/test/zdtm/static/pidfd_of_thread.c new file mode 100644 index 0000000000..d232c7ac1d --- /dev/null +++ b/test/zdtm/static/pidfd_of_thread.c @@ -0,0 +1,114 @@ +#include +#include +#include +#include + +#include "zdtmtst.h" +#include "lock.h" + +const char *test_doc = "Check C/R of pidfds that point to threads\n"; +const char *test_author = "Bhavik Sachdev "; + +/* see also: https://codebrowser.dev/glibc/glibc/sysdeps/unix/sysv/linux/tst-clone3.c.html */ + +#ifndef PIDFD_THREAD +#define PIDFD_THREAD O_EXCL +#endif + +#ifndef PIDFD_SIGNAL_THREAD +#define PIDFD_SIGNAL_THREAD (1UL << 0) +#endif + +#ifndef PID_FS_MAGIC +#define PID_FS_MAGIC 0x50494446 +#endif + +static long get_fs_type(int lfd) +{ + struct statfs fst; + + if (fstatfs(lfd, &fst)) { + return -1; + } + return fst.f_type; +} + +static int pidfd_open(pid_t pid, unsigned int flags) +{ + return syscall(__NR_pidfd_open, pid, flags); +} + +static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) +{ + return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); +} + +static int thread_func(void *a) +{ + test_waitsig(); + return 0; +} + +#define CTID_INIT_VAL 1 + +int main(int argc, char* argv[]) +{ + char st[64 * 1024] __attribute__ ((aligned)); + pid_t tid; + int pidfd, test_pidfd; + futex_t exited; + + int clone_flags = CLONE_THREAD; + clone_flags |= CLONE_VM | CLONE_SIGHAND; + clone_flags |= CLONE_CHILD_CLEARTID; + + test_init(argc, argv); + + test_pidfd = pidfd_open(getpid(), 0); + if (test_pidfd < 0) { + pr_perror("pidfd_open() failed"); + return 1; + } + + /* PIDFD_THREAD, PIDFD_SIGNAL_THREAD are supported only with pidfs */ + if (get_fs_type(test_pidfd) != PID_FS_MAGIC) { + test_daemon(); + test_waitsig(); + skip("pidfs not supported."); + close(test_pidfd); + return 0; + } + close(test_pidfd); + + futex_set(&exited, CTID_INIT_VAL); + + tid = clone(thread_func, st + sizeof(st), clone_flags, NULL, NULL, NULL, &(exited.raw)); + if (tid == -1) { + pr_perror("clone() failed"); + return 1; + } + + test_msg("Successfully created a thread with tid: %d\n", tid); + pidfd = pidfd_open(tid, PIDFD_THREAD); + if (pidfd < 0) { + pr_perror("pidfd_open() failed"); + return 1; + } + + test_daemon(); + test_waitsig(); + + if (pidfd_send_signal(pidfd, SIGTERM, NULL, PIDFD_SIGNAL_THREAD)) { + pr_perror("pidfd_send_signal() failed"); + fail(); + close(pidfd); + return 1; + } + + test_msg("Waiting for thread to exit\n"); + futex_wait_until(&exited, 0); + + pass(); + close(pidfd); + return 0; +} diff --git a/test/zdtm/static/pidfd_of_thread.desc b/test/zdtm/static/pidfd_of_thread.desc new file mode 100644 index 0000000000..802caed655 --- /dev/null +++ b/test/zdtm/static/pidfd_of_thread.desc @@ -0,0 +1 @@ +{'flags': 'noauto crfail'} diff --git a/test/zdtm/static/pidfd_self.c b/test/zdtm/static/pidfd_self.c new file mode 100644 index 0000000000..2730ee123d --- /dev/null +++ b/test/zdtm/static/pidfd_self.c @@ -0,0 +1,140 @@ +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check pidfd /proc/self/fdinfo/ entry remains consistent after checkpoint/restore\n"; +const char *test_author = "Bhavik Sachdev "; + +struct pidfd_status { + unsigned int flags; + pid_t pid; +}; + +static int pidfd_open(pid_t pid, unsigned int flags) +{ + return syscall(__NR_pidfd_open, pid, flags); +} + +static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) +{ + return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); +} + +static void show_pidfd(char *prefix, struct pidfd_status *s) +{ + test_msg("\n\t%s\n\tflags: 0%o\n\tpid: %d\n", prefix, s->flags, s->pid); +} + +static int parse_self_fdinfo(int pidfd, struct pidfd_status *s) +{ + char buf[256]; + int ret = -1; + FILE *f; + + sprintf(buf, "/proc/self/fdinfo/%d", pidfd); + f = fopen(buf, "r"); + if (!f) { + perror("Can't open /proc/self/fdinfo/ to parse"); + return -1; + } + + memset(s, 0, sizeof(*s)); + + /* + * flags: file access mode (octal) 02000002 => [O_RDWR | O_CLOEXEC] + * pid: the pid to which we have pidfd open + */ + while (fgets(buf, sizeof(buf), f)) { + if (!fgets(buf, sizeof(buf), f)) + goto parse_err; + + if (sscanf(buf, "flags: 0%o", &s->flags) != 1) { + goto parse_err; + } + + if (!fgets(buf, sizeof(buf), f)) + goto parse_err; + if (!fgets(buf, sizeof(buf), f)) + goto parse_err; + + if (!fgets(buf, sizeof(buf), f)) + goto parse_err; + + if (sscanf(buf, "Pid: %d", &s->pid) != 1) + goto parse_err; + ret = 0; + break; + } + + if (ret) + goto parse_err; +err: + fclose(f); + return ret; + +parse_err: + pr_perror("Format error"); + goto err; +} + +static int check_pidfd(int fd, struct pidfd_status *old) +{ + struct pidfd_status new; + + if (parse_self_fdinfo(fd, &new)) + return -1; + + show_pidfd("restored", &new); + + if (old->flags != new.flags || old->pid != new.pid) + return -1; + + return 0; +} + +int main(int argc, char* argv[]) +{ + struct pidfd_status old; + int pidfd, ret; + + test_init(argc, argv); + + pidfd = pidfd_open(getpid(), 0); + if (pidfd < 0) { + pr_perror("pidfd_open failed"); + return 1; + } + + parse_self_fdinfo(pidfd, &old); + + show_pidfd("old", &old); + + if (pidfd_send_signal(pidfd, 0, NULL, 0)) { + pr_perror("Could not send signal"); + return 1; + } + + test_daemon(); + test_waitsig(); + + ret = check_pidfd(pidfd, &old); + if (ret) { + fail(); + goto err; + } + + if (pidfd_send_signal(pidfd, 0, NULL, 0)) { + pr_perror("Could not send signal"); + fail(); + goto err; + } + + pass(); + close(pidfd); + return 0; +err: + close(pidfd); + return 1; +}