devpts: Make each mount of devpts an independent filesystem.

The /dev/ptmx device node is changed to lookup the directory entry "pts"
in the same directory as the /dev/ptmx device node was opened in.  If
there is a "pts" entry and that entry is a devpts filesystem /dev/ptmx
uses that filesystem.  Otherwise the open of /dev/ptmx fails.

The DEVPTS_MULTIPLE_INSTANCES configuration option is removed, so that
userspace can now safely depend on each mount of devpts creating a new
instance of the filesystem.

Each mount of devpts is now a separate and equal filesystem.

Reserved ttys are now available to all instances of devpts where the
mounter is in the initial mount namespace.

A new vfs helper path_pts is introduced that finds a directory entry
named "pts" in the directory of the passed in path, and changes the
passed in path to point to it.  The helper path_pts uses a function
path_parent_directory that was factored out of follow_dotdot.

In the implementation of devpts:
 - devpts_mnt is killed as it is no longer meaningful if all mounts of
   devpts are equal.
 - pts_sb_from_inode is replaced by just inode->i_sb as all cached
   inodes in the tty layer are now from the devpts filesystem.
 - devpts_add_ref is rolled into the new function devpts_ptmx.  And the
   unnecessary inode hold is removed.
 - devpts_del_ref is renamed devpts_release and reduced to just a
   deacrivate_super.
 - The newinstance mount option continues to be accepted but is now
   ignored.

In devpts_fs.h definitions for when !CONFIG_UNIX98_PTYS are removed as
they are never used.

Documentation/filesystems/devices.txt is updated to describe the current
situation.

This has been verified to work properly on openwrt-15.05, centos5,
centos6, centos7, debian-6.0.2, debian-7.9, debian-8.2, ubuntu-14.04.3,
ubuntu-15.10, fedora23, magia-5, mint-17.3, opensuse-42.1,
slackware-14.1, gentoo-20151225 (13.0?), archlinux-2015-12-01.  With the
caveat that on centos6 and on slackware-14.1 that there wind up being
two instances of the devpts filesystem mounted on /dev/pts, the lower
copy does not end up getting used.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Greg KH <greg@kroah.com>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: Peter Anvin <hpa@zytor.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Serge Hallyn <serge.hallyn@ubuntu.com>
Cc: Willy Tarreau <w@1wt.eu>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Cc: One Thousand Gnomes <gnomes@lxorguk.ukuu.org.uk>
Cc: Jann Horn <jann@thejh.net>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Florian Weimer <fw@deneb.enyo.de>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Eric W. Biederman 2016-06-02 10:29:47 -05:00 committed by Linus Torvalds
parent 049ec1b5a7
commit eedf265aa0
7 changed files with 126 additions and 296 deletions

View file

@ -95,8 +95,6 @@ static struct ctl_table pty_root_table[] = {
static DEFINE_MUTEX(allocated_ptys_lock);
static struct vfsmount *devpts_mnt;
struct pts_mount_opts {
int setuid;
int setgid;
@ -104,7 +102,7 @@ struct pts_mount_opts {
kgid_t gid;
umode_t mode;
umode_t ptmxmode;
int newinstance;
int reserve;
int max;
};
@ -117,11 +115,9 @@ static const match_table_t tokens = {
{Opt_uid, "uid=%u"},
{Opt_gid, "gid=%u"},
{Opt_mode, "mode=%o"},
#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
{Opt_ptmxmode, "ptmxmode=%o"},
{Opt_newinstance, "newinstance"},
{Opt_max, "max=%d"},
#endif
{Opt_err, NULL}
};
@ -137,15 +133,48 @@ static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb)
return sb->s_fs_info;
}
static inline struct super_block *pts_sb_from_inode(struct inode *inode)
struct pts_fs_info *devpts_acquire(struct file *filp)
{
#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
if (inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC)
return inode->i_sb;
#endif
if (!devpts_mnt)
return NULL;
return devpts_mnt->mnt_sb;
struct pts_fs_info *result;
struct path path;
struct super_block *sb;
int err;
path = filp->f_path;
path_get(&path);
/* Has the devpts filesystem already been found? */
sb = path.mnt->mnt_sb;
if (sb->s_magic != DEVPTS_SUPER_MAGIC) {
/* Is a devpts filesystem at "pts" in the same directory? */
err = path_pts(&path);
if (err) {
result = ERR_PTR(err);
goto out;
}
/* Is the path the root of a devpts filesystem? */
result = ERR_PTR(-ENODEV);
sb = path.mnt->mnt_sb;
if ((sb->s_magic != DEVPTS_SUPER_MAGIC) ||
(path.mnt->mnt_root != sb->s_root))
goto out;
}
/*
* pty code needs to hold extra references in case of last /dev/tty close
*/
atomic_inc(&sb->s_active);
result = DEVPTS_SB(sb);
out:
path_put(&path);
return result;
}
void devpts_release(struct pts_fs_info *fsi)
{
deactivate_super(fsi->sb);
}
#define PARSE_MOUNT 0
@ -154,9 +183,7 @@ static inline struct super_block *pts_sb_from_inode(struct inode *inode)
/*
* parse_mount_options():
* Set @opts to mount options specified in @data. If an option is not
* specified in @data, set it to its default value. The exception is
* 'newinstance' option which can only be set/cleared on a mount (i.e.
* cannot be changed during remount).
* specified in @data, set it to its default value.
*
* Note: @data may be NULL (in which case all options are set to default).
*/
@ -174,9 +201,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
opts->ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
opts->max = NR_UNIX98_PTY_MAX;
/* newinstance makes sense only on initial mount */
/* Only allow instances mounted from the initial mount
* namespace to tap the reserve pool of ptys.
*/
if (op == PARSE_MOUNT)
opts->newinstance = 0;
opts->reserve =
(current->nsproxy->mnt_ns == init_task.nsproxy->mnt_ns);
while ((p = strsep(&data, ",")) != NULL) {
substring_t args[MAX_OPT_ARGS];
@ -211,16 +241,12 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
return -EINVAL;
opts->mode = option & S_IALLUGO;
break;
#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
case Opt_ptmxmode:
if (match_octal(&args[0], &option))
return -EINVAL;
opts->ptmxmode = option & S_IALLUGO;
break;
case Opt_newinstance:
/* newinstance makes sense only on initial mount */
if (op == PARSE_MOUNT)
opts->newinstance = 1;
break;
case Opt_max:
if (match_int(&args[0], &option) ||
@ -228,7 +254,6 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
return -EINVAL;
opts->max = option;
break;
#endif
default:
pr_err("called with bogus options\n");
return -EINVAL;
@ -238,7 +263,6 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
return 0;
}
#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
static int mknod_ptmx(struct super_block *sb)
{
int mode;
@ -305,12 +329,6 @@ static void update_ptmx_mode(struct pts_fs_info *fsi)
inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode;
}
}
#else
static inline void update_ptmx_mode(struct pts_fs_info *fsi)
{
return;
}
#endif
static int devpts_remount(struct super_block *sb, int *flags, char *data)
{
@ -344,11 +362,9 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root)
seq_printf(seq, ",gid=%u",
from_kgid_munged(&init_user_ns, opts->gid));
seq_printf(seq, ",mode=%03o", opts->mode);
#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode);
if (opts->max < NR_UNIX98_PTY_MAX)
seq_printf(seq, ",max=%d", opts->max);
#endif
return 0;
}
@ -410,40 +426,11 @@ fail:
return -ENOMEM;
}
#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
static int compare_init_pts_sb(struct super_block *s, void *p)
{
if (devpts_mnt)
return devpts_mnt->mnt_sb == s;
return 0;
}
/*
* devpts_mount()
*
* If the '-o newinstance' mount option was specified, mount a new
* (private) instance of devpts. PTYs created in this instance are
* independent of the PTYs in other devpts instances.
*
* If the '-o newinstance' option was not specified, mount/remount the
* initial kernel mount of devpts. This type of mount gives the
* legacy, single-instance semantics.
*
* The 'newinstance' option is needed to support multiple namespace
* semantics in devpts while preserving backward compatibility of the
* current 'single-namespace' semantics. i.e all mounts of devpts
* without the 'newinstance' mount option should bind to the initial
* kernel mount, like mount_single().
*
* Mounts with 'newinstance' option create a new, private namespace.
*
* NOTE:
*
* For single-mount semantics, devpts cannot use mount_single(),
* because mount_single()/sget() find and use the super-block from
* the most recent mount of devpts. But that recent mount may be a
* 'newinstance' mount and mount_single() would pick the newinstance
* super-block instead of the initial super-block.
* Mount a new (private) instance of devpts. PTYs created in this
* instance are independent of the PTYs in other devpts instances.
*/
static struct dentry *devpts_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
@ -456,18 +443,7 @@ static struct dentry *devpts_mount(struct file_system_type *fs_type,
if (error)
return ERR_PTR(error);
/* Require newinstance for all user namespace mounts to ensure
* the mount options are not changed.
*/
if ((current_user_ns() != &init_user_ns) && !opts.newinstance)
return ERR_PTR(-EINVAL);
if (opts.newinstance)
s = sget(fs_type, NULL, set_anon_super, flags, NULL);
else
s = sget(fs_type, compare_init_pts_sb, set_anon_super, flags,
NULL);
s = sget(fs_type, NULL, set_anon_super, flags, NULL);
if (IS_ERR(s))
return ERR_CAST(s);
@ -491,18 +467,6 @@ out_undo_sget:
return ERR_PTR(error);
}
#else
/*
* This supports only the legacy single-instance semantics (no
* multiple-instance semantics)
*/
static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data)
{
return mount_single(fs_type, flags, data, devpts_fill_super);
}
#endif
static void devpts_kill_sb(struct super_block *sb)
{
struct pts_fs_info *fsi = DEVPTS_SB(sb);
@ -516,9 +480,7 @@ static struct file_system_type devpts_fs_type = {
.name = "devpts",
.mount = devpts_mount,
.kill_sb = devpts_kill_sb,
#ifdef CONFIG_DEVPTS_MULTIPLE_INSTANCES
.fs_flags = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT,
#endif
};
/*
@ -531,16 +493,13 @@ int devpts_new_index(struct pts_fs_info *fsi)
int index;
int ida_ret;
if (!fsi)
return -ENODEV;
retry:
if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL))
return -ENOMEM;
mutex_lock(&allocated_ptys_lock);
if (pty_count >= pty_limit -
(fsi->mount_opts.newinstance ? pty_reserve : 0)) {
if (pty_count >= (pty_limit -
(fsi->mount_opts.reserve ? 0 : pty_reserve))) {
mutex_unlock(&allocated_ptys_lock);
return -ENOSPC;
}
@ -571,30 +530,6 @@ void devpts_kill_index(struct pts_fs_info *fsi, int idx)
mutex_unlock(&allocated_ptys_lock);
}
/*
* pty code needs to hold extra references in case of last /dev/tty close
*/
struct pts_fs_info *devpts_get_ref(struct inode *ptmx_inode, struct file *file)
{
struct super_block *sb;
struct pts_fs_info *fsi;
sb = pts_sb_from_inode(ptmx_inode);
if (!sb)
return NULL;
fsi = DEVPTS_SB(sb);
if (!fsi)
return NULL;
atomic_inc(&sb->s_active);
return fsi;
}
void devpts_put_ref(struct pts_fs_info *fsi)
{
deactivate_super(fsi->sb);
}
/**
* devpts_pty_new -- create a new inode in /dev/pts/
* @ptmx_inode: inode of the master
@ -607,16 +542,12 @@ void devpts_put_ref(struct pts_fs_info *fsi)
struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
{
struct dentry *dentry;
struct super_block *sb;
struct super_block *sb = fsi->sb;
struct inode *inode;
struct dentry *root;
struct pts_mount_opts *opts;
char s[12];
if (!fsi)
return ERR_PTR(-ENODEV);
sb = fsi->sb;
root = sb->s_root;
opts = &fsi->mount_opts;
@ -676,20 +607,8 @@ void devpts_pty_kill(struct dentry *dentry)
static int __init init_devpts_fs(void)
{
int err = register_filesystem(&devpts_fs_type);
struct ctl_table_header *table;
if (!err) {
struct vfsmount *mnt;
table = register_sysctl_table(pty_root_table);
mnt = kern_mount(&devpts_fs_type);
if (IS_ERR(mnt)) {
err = PTR_ERR(mnt);
unregister_filesystem(&devpts_fs_type);
unregister_sysctl_table(table);
} else {
devpts_mnt = mnt;
}
register_sysctl_table(pty_root_table);
}
return err;
}