Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace

Pull namespace changes from Eric Biederman:
 "This is an assorted mishmash of small cleanups, enhancements and bug
  fixes.

  The major theme is user namespace mount restrictions.  nsown_capable
  is killed as it encourages not thinking about details that need to be
  considered.  A very hard to hit pid namespace exiting bug was finally
  tracked and fixed.  A couple of cleanups to the basic namespace
  infrastructure.

  Finally there is an enhancement that makes per user namespace
  capabilities usable as capabilities, and an enhancement that allows
  the per userns root to nice other processes in the user namespace"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  userns:  Kill nsown_capable it makes the wrong thing easy
  capabilities: allow nice if we are privileged
  pidns: Don't have unshare(CLONE_NEWPID) imply CLONE_THREAD
  userns: Allow PR_CAPBSET_DROP in a user namespace.
  namespaces: Simplify copy_namespaces so it is clear what is going on.
  pidns: Fix hang in zap_pid_ns_processes by sending a potentially extra wakeup
  sysfs: Restrict mounting sysfs
  userns: Better restrictions on when proc and sysfs can be mounted
  vfs: Don't copy mount bind mounts of /proc/<pid>/ns/mnt between namespaces
  kernel/nsproxy.c: Improving a snippet of code.
  proc: Restrict mounting the proc filesystem
  vfs: Lock in place mounts from more privileged users
This commit is contained in:
Linus Torvalds 2013-09-07 14:35:32 -07:00
commit c7c4591db6
27 changed files with 177 additions and 104 deletions

View file

@ -831,6 +831,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
/* Don't allow unprivileged users to reveal what is under a mount */
if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
mnt->mnt.mnt_flags |= MNT_LOCKED;
atomic_inc(&sb->s_active);
mnt->mnt.mnt_sb = sb;
mnt->mnt.mnt_root = dget(root);
@ -1327,6 +1331,8 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
goto dput_and_out;
if (!check_mnt(mnt))
goto dput_and_out;
if (mnt->mnt.mnt_flags & MNT_LOCKED)
goto dput_and_out;
retval = do_umount(mnt, flags);
dput_and_out:
@ -1349,14 +1355,11 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
#endif
static bool mnt_ns_loop(struct path *path)
static bool is_mnt_ns_file(struct dentry *dentry)
{
/* Could bind mounting the mount namespace inode cause a
* mount namespace loop?
*/
struct inode *inode = path->dentry->d_inode;
/* Is this a proxy for a mount namespace? */
struct inode *inode = dentry->d_inode;
struct proc_ns *ei;
struct mnt_namespace *mnt_ns;
if (!proc_ns_inode(inode))
return false;
@ -1365,7 +1368,19 @@ static bool mnt_ns_loop(struct path *path)
if (ei->ns_ops != &mntns_operations)
return false;
mnt_ns = ei->ns;
return true;
}
static bool mnt_ns_loop(struct dentry *dentry)
{
/* Could bind mounting the mount namespace inode cause a
* mount namespace loop?
*/
struct mnt_namespace *mnt_ns;
if (!is_mnt_ns_file(dentry))
return false;
mnt_ns = get_proc_ns(dentry->d_inode)->ns;
return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
}
@ -1374,13 +1389,17 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
{
struct mount *res, *p, *q, *r, *parent;
if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
return ERR_PTR(-EINVAL);
if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
return ERR_PTR(-EINVAL);
res = q = clone_mnt(mnt, dentry, flag);
if (IS_ERR(q))
return q;
q->mnt.mnt_flags &= ~MNT_LOCKED;
q->mnt_mountpoint = mnt->mnt_mountpoint;
p = mnt;
@ -1390,7 +1409,13 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
continue;
for (s = r; s; s = next_mnt(s, r)) {
if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) {
if (!(flag & CL_COPY_UNBINDABLE) &&
IS_MNT_UNBINDABLE(s)) {
s = skip_mnt_tree(s);
continue;
}
if (!(flag & CL_COPY_MNT_NS_FILE) &&
is_mnt_ns_file(s->mnt.mnt_root)) {
s = skip_mnt_tree(s);
continue;
}
@ -1696,6 +1721,19 @@ static int do_change_type(struct path *path, int flag)
return err;
}
static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
{
struct mount *child;
list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
if (!is_subdir(child->mnt_mountpoint, dentry))
continue;
if (child->mnt.mnt_flags & MNT_LOCKED)
return true;
}
return false;
}
/*
* do loopback mount.
*/
@ -1713,7 +1751,7 @@ static int do_loopback(struct path *path, const char *old_name,
return err;
err = -EINVAL;
if (mnt_ns_loop(&old_path))
if (mnt_ns_loop(old_path.dentry))
goto out;
mp = lock_mount(path);
@ -1731,8 +1769,11 @@ static int do_loopback(struct path *path, const char *old_name,
if (!check_mnt(parent) || !check_mnt(old))
goto out2;
if (!recurse && has_locked_children(old, old_path.dentry))
goto out2;
if (recurse)
mnt = copy_tree(old, old_path.dentry, 0);
mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
else
mnt = clone_mnt(old, old_path.dentry, 0);
@ -1741,6 +1782,8 @@ static int do_loopback(struct path *path, const char *old_name,
goto out2;
}
mnt->mnt.mnt_flags &= ~MNT_LOCKED;
err = graft_tree(mnt, parent, mp);
if (err) {
br_write_lock(&vfsmount_lock);
@ -1853,6 +1896,9 @@ static int do_move_mount(struct path *path, const char *old_name)
if (!check_mnt(p) || !check_mnt(old))
goto out1;
if (old->mnt.mnt_flags & MNT_LOCKED)
goto out1;
err = -EINVAL;
if (old_path.dentry != old_path.mnt->mnt_root)
goto out1;
@ -2389,7 +2435,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
namespace_lock();
/* First pass: copy the tree topology */
copy_flags = CL_COPY_ALL | CL_EXPIRE;
copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
if (user_ns != mnt_ns->user_ns)
copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
new = copy_tree(old, old->mnt.mnt_root, copy_flags);
@ -2424,6 +2470,10 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
}
p = next_mnt(p, old);
q = next_mnt(q, new);
if (!q)
break;
while (p->mnt.mnt_root != q->mnt.mnt_root)
p = next_mnt(p, old);
}
namespace_unlock();
@ -2630,6 +2680,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
goto out4;
if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
goto out4;
if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
goto out4;
error = -ENOENT;
if (d_unlinked(new.dentry))
goto out4;
@ -2653,6 +2705,10 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
br_write_lock(&vfsmount_lock);
detach_mnt(new_mnt, &parent_path);
detach_mnt(root_mnt, &root_parent);
if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
new_mnt->mnt.mnt_flags |= MNT_LOCKED;
root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
}
/* mount old root on put_old */
attach_mnt(root_mnt, old_mnt, old_mp);
/* mount new_root on / */
@ -2811,25 +2867,38 @@ bool current_chrooted(void)
return chrooted;
}
void update_mnt_policy(struct user_namespace *userns)
bool fs_fully_visible(struct file_system_type *type)
{
struct mnt_namespace *ns = current->nsproxy->mnt_ns;
struct mount *mnt;
bool visible = false;
down_read(&namespace_sem);
if (unlikely(!ns))
return false;
namespace_lock();
list_for_each_entry(mnt, &ns->list, mnt_list) {
switch (mnt->mnt.mnt_sb->s_magic) {
case SYSFS_MAGIC:
userns->may_mount_sysfs = true;
break;
case PROC_SUPER_MAGIC:
userns->may_mount_proc = true;
break;
struct mount *child;
if (mnt->mnt.mnt_sb->s_type != type)
continue;
/* This mount is not fully visible if there are any child mounts
* that cover anything except for empty directories.
*/
list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
struct inode *inode = child->mnt_mountpoint->d_inode;
if (!S_ISDIR(inode->i_mode))
goto next;
if (inode->i_nlink != 2)
goto next;
}
if (userns->may_mount_sysfs && userns->may_mount_proc)
break;
visible = true;
goto found;
next: ;
}
up_read(&namespace_sem);
found:
namespace_unlock();
return visible;
}
static void *mntns_get(struct task_struct *task)
@ -2860,8 +2929,8 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns)
struct path root;
if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
!nsown_capable(CAP_SYS_CHROOT) ||
!nsown_capable(CAP_SYS_ADMIN))
!ns_capable(current_user_ns(), CAP_SYS_CHROOT) ||
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM;
if (fs->users != 1)