mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-06-20 21:51:05 +00:00
fork: extend clone3() to support setting a PID
The main motivation to add set_tid to clone3() is CRIU. To restore a process with the same PID/TID CRIU currently uses /proc/sys/kernel/ns_last_pid. It writes the desired (PID - 1) to ns_last_pid and then (quickly) does a clone(). This works most of the time, but it is racy. It is also slow as it requires multiple syscalls. Extending clone3() to support *set_tid makes it possible restore a process using CRIU without accessing /proc/sys/kernel/ns_last_pid and race free (as long as the desired PID/TID is available). This clone3() extension places the same restrictions (CAP_SYS_ADMIN) on clone3() with *set_tid as they are currently in place for ns_last_pid. The original version of this change was using a single value for set_tid. At the 2019 LPC, after presenting set_tid, it was, however, decided to change set_tid to an array to enable setting the PID of a process in multiple PID namespaces at the same time. If a process is created in a PID namespace it is possible to influence the PID inside and outside of the PID namespace. Details also in the corresponding selftest. To create a process with the following PIDs: PID NS level Requested PID 0 (host) 31496 1 42 2 1 For that example the two newly introduced parameters to struct clone_args (set_tid and set_tid_size) would need to be: set_tid[0] = 1; set_tid[1] = 42; set_tid[2] = 31496; set_tid_size = 3; If only the PIDs of the two innermost nested PID namespaces should be defined it would look like this: set_tid[0] = 1; set_tid[1] = 42; set_tid_size = 2; The PID of the newly created process would then be the next available free PID in the PID namespace level 0 (host) and 42 in the PID namespace at level 1 and the PID of the process in the innermost PID namespace would be 1. The set_tid array is used to specify the PID of a process starting from the innermost nested PID namespaces up to set_tid_size PID namespaces. set_tid_size cannot be larger then the current PID namespace level. Signed-off-by: Adrian Reber <areber@redhat.com> Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com> Reviewed-by: Oleg Nesterov <oleg@redhat.com> Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Acked-by: Andrei Vagin <avagin@gmail.com> Link: https://lore.kernel.org/r/20191115123621.142252-1-areber@redhat.com Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
This commit is contained in:
parent
17a810699c
commit
49cb2fc42c
7 changed files with 121 additions and 36 deletions
|
@ -2087,7 +2087,8 @@ static __latent_entropy struct task_struct *copy_process(
|
|||
stackleak_task_init(p);
|
||||
|
||||
if (pid != &init_struct_pid) {
|
||||
pid = alloc_pid(p->nsproxy->pid_ns_for_children);
|
||||
pid = alloc_pid(p->nsproxy->pid_ns_for_children, args->set_tid,
|
||||
args->set_tid_size);
|
||||
if (IS_ERR(pid)) {
|
||||
retval = PTR_ERR(pid);
|
||||
goto bad_fork_cleanup_thread;
|
||||
|
@ -2590,6 +2591,7 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
|
|||
{
|
||||
int err;
|
||||
struct clone_args args;
|
||||
pid_t *kset_tid = kargs->set_tid;
|
||||
|
||||
if (unlikely(usize > PAGE_SIZE))
|
||||
return -E2BIG;
|
||||
|
@ -2600,6 +2602,15 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
|
|||
if (err)
|
||||
return err;
|
||||
|
||||
if (unlikely(args.set_tid_size > MAX_PID_NS_LEVEL))
|
||||
return -EINVAL;
|
||||
|
||||
if (unlikely(!args.set_tid && args.set_tid_size > 0))
|
||||
return -EINVAL;
|
||||
|
||||
if (unlikely(args.set_tid && args.set_tid_size == 0))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Verify that higher 32bits of exit_signal are unset and that
|
||||
* it is a valid signal
|
||||
|
@ -2617,8 +2628,16 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
|
|||
.stack = args.stack,
|
||||
.stack_size = args.stack_size,
|
||||
.tls = args.tls,
|
||||
.set_tid_size = args.set_tid_size,
|
||||
};
|
||||
|
||||
if (args.set_tid &&
|
||||
copy_from_user(kset_tid, u64_to_user_ptr(args.set_tid),
|
||||
(kargs->set_tid_size * sizeof(pid_t))))
|
||||
return -EFAULT;
|
||||
|
||||
kargs->set_tid = kset_tid;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2662,6 +2681,9 @@ SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size)
|
|||
int err;
|
||||
|
||||
struct kernel_clone_args kargs;
|
||||
pid_t set_tid[MAX_PID_NS_LEVEL];
|
||||
|
||||
kargs.set_tid = set_tid;
|
||||
|
||||
err = copy_clone_args_from_user(&kargs, uargs, size);
|
||||
if (err)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue