pidfd patches for v5.1-rc1

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE7btrcuORLb1XUhEwjrBW1T7ssS0FAlx+nn4ACgkQjrBW1T7s
 sS2kwg//aJUCwLIhV91gXUFN2jHTCf0/+5fnigEk7JhAT5wmAykxLM8tprLlIlyp
 HtwNQx54hq/6p010Ulo9K50VS6JRii+2lNSpC6IkqXXdHXXm0ViH+5I9Nru8SVJ+
 avRCYWNjW9Gn1EtcB2yv6KP3XffgnQ6ZLIr4QJwglOxgAqUaWZ68woSUlrIR5yFj
 j48wAxjsC3g2qwGLvXPeiwYZHwk6VnYmrZ3eWXPDthWRDC4zkjyBdchZZzFJagSC
 6sX8T9s5ua5juZMokEJaWjuBQQyfg0NYu41hupSdVjV7/0D3E+5/DiReInvLmSup
 63bZ85uKRqWTNgl4cmJ1W3aVe2RYYemMZCXVVYYvU+IKpvTSzzYY7us+FyMAIRUV
 bT+XPGzTWcGrChzv9bHZcBrkL91XGqyxRJz56jLl6EhRtqxmzmywf6mO6pS2WK4N
 r+aBDgXeJbG39KguCzwUgVX8hC6YlSxSP8Md+2sK+UoAdfTUvFtdCYnjhuACofCt
 saRvDIPF8N9qn4Ch3InzCKkrUTL/H3BZKBl2jo6tYQ9smUsFZW7lQoip5Ui/0VS+
 qksJ91djOc9facGoOorPazojY5fO5Lj3Hg+cGIoxUV0jPH483z7hWH0ALynb0f6z
 EDsgNyEUpIO2nJMJJfm37ysbU/j1gOpzQdaAEaWeknwtfecFPzM=
 =yOWp
 -----END PGP SIGNATURE-----

Merge tag 'pidfd-v5.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux

Pull pidfd system call from Christian Brauner:
 "This introduces the ability to use file descriptors from /proc/<pid>/
  as stable handles on struct pid. Even if a pid is recycled the handle
  will not change. For a start these fds can be used to send signals to
  the processes they refer to.

  With the ability to use /proc/<pid> fds as stable handles on struct
  pid we can fix a long-standing issue where after a process has exited
  its pid can be reused by another process. If a caller sends a signal
  to a reused pid it will end up signaling the wrong process.

  With this patchset we enable a variety of use cases. One obvious
  example is that we can now safely delegate an important part of
  process management - sending signals - to processes other than the
  parent of a given process by sending file descriptors around via scm
  rights and not fearing that the given process will have been recycled
  in the meantime. It also allows for easy testing whether a given
  process is still alive or not by sending signal 0 to a pidfd which is
  quite handy.

  There has been some interest in this feature e.g. from systems
  management (systemd, glibc) and container managers. I have requested
  and gotten comments from glibc to make sure that this syscall is
  suitable for their needs as well. In the future I expect it to take on
  most other pid-based signal syscalls. But such features are left for
  the future once they are needed.

  This has been sitting in linux-next for quite a while and has not
  caused any issues. It comes with selftests which verify basic
  functionality and also test that a recycled pid cannot be signaled via
  a pidfd.

  Jon has written about a prior version of this patchset. It should
  cover the basic functionality since not a lot has changed since then:

      https://lwn.net/Articles/773459/

  The commit message for the syscall itself is extensively documenting
  the syscall, including it's functionality and extensibility"

* tag 'pidfd-v5.1-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux:
  selftests: add tests for pidfd_send_signal()
  signal: add pidfd_send_signal() syscall
This commit is contained in:
Linus Torvalds 2019-03-16 13:47:14 -07:00
commit a9dce6679d
11 changed files with 538 additions and 6 deletions

View file

@ -19,7 +19,9 @@
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/cputime.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/proc_fs.h>
#include <linux/tty.h>
#include <linux/binfmts.h>
#include <linux/coredump.h>
@ -3487,6 +3489,16 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait_time32, compat_sigset_t __user *, uthese,
#endif
#endif
static inline void prepare_kill_siginfo(int sig, struct kernel_siginfo *info)
{
clear_siginfo(info);
info->si_signo = sig;
info->si_errno = 0;
info->si_code = SI_USER;
info->si_pid = task_tgid_vnr(current);
info->si_uid = from_kuid_munged(current_user_ns(), current_uid());
}
/**
* sys_kill - send a signal to a process
* @pid: the PID of the process
@ -3496,16 +3508,125 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
{
struct kernel_siginfo info;
clear_siginfo(&info);
info.si_signo = sig;
info.si_errno = 0;
info.si_code = SI_USER;
info.si_pid = task_tgid_vnr(current);
info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
prepare_kill_siginfo(sig, &info);
return kill_something_info(sig, &info, pid);
}
#ifdef CONFIG_PROC_FS
/*
* Verify that the signaler and signalee either are in the same pid namespace
* or that the signaler's pid namespace is an ancestor of the signalee's pid
* namespace.
*/
static bool access_pidfd_pidns(struct pid *pid)
{
struct pid_namespace *active = task_active_pid_ns(current);
struct pid_namespace *p = ns_of_pid(pid);
for (;;) {
if (!p)
return false;
if (p == active)
break;
p = p->parent;
}
return true;
}
static int copy_siginfo_from_user_any(kernel_siginfo_t *kinfo, siginfo_t *info)
{
#ifdef CONFIG_COMPAT
/*
* Avoid hooking up compat syscalls and instead handle necessary
* conversions here. Note, this is a stop-gap measure and should not be
* considered a generic solution.
*/
if (in_compat_syscall())
return copy_siginfo_from_user32(
kinfo, (struct compat_siginfo __user *)info);
#endif
return copy_siginfo_from_user(kinfo, info);
}
/**
* sys_pidfd_send_signal - send a signal to a process through a task file
* descriptor
* @pidfd: the file descriptor of the process
* @sig: signal to be sent
* @info: the signal info
* @flags: future flags to be passed
*
* The syscall currently only signals via PIDTYPE_PID which covers
* kill(<positive-pid>, <signal>. It does not signal threads or process
* groups.
* In order to extend the syscall to threads and process groups the @flags
* argument should be used. In essence, the @flags argument will determine
* what is signaled and not the file descriptor itself. Put in other words,
* grouping is a property of the flags argument not a property of the file
* descriptor.
*
* Return: 0 on success, negative errno on failure
*/
SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
siginfo_t __user *, info, unsigned int, flags)
{
int ret;
struct fd f;
struct pid *pid;
kernel_siginfo_t kinfo;
/* Enforce flags be set to 0 until we add an extension. */
if (flags)
return -EINVAL;
f = fdget_raw(pidfd);
if (!f.file)
return -EBADF;
/* Is this a pidfd? */
pid = tgid_pidfd_to_pid(f.file);
if (IS_ERR(pid)) {
ret = PTR_ERR(pid);
goto err;
}
ret = -EINVAL;
if (!access_pidfd_pidns(pid))
goto err;
if (info) {
ret = copy_siginfo_from_user_any(&kinfo, info);
if (unlikely(ret))
goto err;
ret = -EINVAL;
if (unlikely(sig != kinfo.si_signo))
goto err;
if ((task_pid(current) != pid) &&
(kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)) {
/* Only allow sending arbitrary signals to yourself. */
ret = -EPERM;
if (kinfo.si_code != SI_USER)
goto err;
/* Turn this into a regular kill signal. */
prepare_kill_siginfo(sig, &kinfo);
}
} else {
prepare_kill_siginfo(sig, &kinfo);
}
ret = kill_pid_info(sig, &kinfo, pid);
err:
fdput(f);
return ret;
}
#endif /* CONFIG_PROC_FS */
static int
do_send_specific(pid_t tgid, pid_t pid, int sig, struct kernel_siginfo *info)
{