mirror of
https://github.com/torvalds/linux.git
synced 2025-12-01 07:26:02 +07:00
procfs: add "pidns" mount option
Since the introduction of pid namespaces, their interaction with procfs
has been entirely implicit in ways that require a lot of dancing around
by programs that need to construct sandboxes with different PID
namespaces.
Being able to explicitly specify the pid namespace to use when
constructing a procfs super block will allow programs to no longer need
to fork off a process which does then does unshare(2) / setns(2) and
forks again in order to construct a procfs in a pidns.
So, provide a "pidns" mount option which allows such users to just
explicitly state which pid namespace they want that procfs instance to
use. This interface can be used with fsconfig(2) either with a file
descriptor or a path:
fsconfig(procfd, FSCONFIG_SET_FD, "pidns", NULL, nsfd);
fsconfig(procfd, FSCONFIG_SET_STRING, "pidns", "/proc/self/ns/pid", 0);
or with classic mount(2) / mount(8):
// mount -t proc -o pidns=/proc/self/ns/pid proc /tmp/proc
mount("proc", "/tmp/proc", "proc", MS_..., "pidns=/proc/self/ns/pid");
As this new API is effectively shorthand for setns(2) followed by
mount(2), the permission model for this mirrors pidns_install() to avoid
opening up new attack surfaces by loosening the existing permission
model.
In order to avoid having to RCU-protect all users of proc_pid_ns() (to
avoid UAFs), attempting to reconfigure an existing procfs instance's pid
namespace will error out with -EBUSY. Creating new procfs instances is
quite cheap, so this should not be an impediment to most users, and lets
us avoid a lot of churn in fs/proc/* for a feature that it seems
unlikely userspace would use.
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
Link: https://lore.kernel.org/20250805-procfs-pidns-api-v4-2-705f984940e7@cyphar.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
committed by
Christian Brauner
parent
7df8782012
commit
fe49652e36
@@ -2362,6 +2362,7 @@ The following mount options are supported:
|
||||
hidepid= Set /proc/<pid>/ access mode.
|
||||
gid= Set the group authorized to learn processes information.
|
||||
subset= Show only the specified subset of procfs.
|
||||
pidns= Specify a the namespace used by this procfs.
|
||||
========= ========================================================
|
||||
|
||||
hidepid=off or hidepid=0 means classic mode - everybody may access all
|
||||
@@ -2394,6 +2395,13 @@ information about processes information, just add identd to this group.
|
||||
subset=pid hides all top level files and directories in the procfs that
|
||||
are not related to tasks.
|
||||
|
||||
pidns= specifies a pid namespace (either as a string path to something like
|
||||
`/proc/$pid/ns/pid`, or a file descriptor when using `FSCONFIG_SET_FD`) that
|
||||
will be used by the procfs instance when translating pids. By default, procfs
|
||||
will use the calling process's active pid namespace. Note that the pid
|
||||
namespace of an existing procfs instance cannot be modified (attempting to do
|
||||
so will give an `-EBUSY` error).
|
||||
|
||||
Chapter 5: Filesystem behavior
|
||||
==============================
|
||||
|
||||
|
||||
@@ -38,12 +38,14 @@ enum proc_param {
|
||||
Opt_gid,
|
||||
Opt_hidepid,
|
||||
Opt_subset,
|
||||
Opt_pidns,
|
||||
};
|
||||
|
||||
static const struct fs_parameter_spec proc_fs_parameters[] = {
|
||||
fsparam_u32("gid", Opt_gid),
|
||||
fsparam_u32("gid", Opt_gid),
|
||||
fsparam_string("hidepid", Opt_hidepid),
|
||||
fsparam_string("subset", Opt_subset),
|
||||
fsparam_file_or_string("pidns", Opt_pidns),
|
||||
{}
|
||||
};
|
||||
|
||||
@@ -109,11 +111,66 @@ static int proc_parse_subset_param(struct fs_context *fc, char *value)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PID_NS
|
||||
static int proc_parse_pidns_param(struct fs_context *fc,
|
||||
struct fs_parameter *param,
|
||||
struct fs_parse_result *result)
|
||||
{
|
||||
struct proc_fs_context *ctx = fc->fs_private;
|
||||
struct pid_namespace *target, *active = task_active_pid_ns(current);
|
||||
struct ns_common *ns;
|
||||
struct file *ns_filp __free(fput) = NULL;
|
||||
|
||||
switch (param->type) {
|
||||
case fs_value_is_file:
|
||||
/* came through fsconfig, steal the file reference */
|
||||
ns_filp = no_free_ptr(param->file);
|
||||
break;
|
||||
case fs_value_is_string:
|
||||
ns_filp = filp_open(param->string, O_RDONLY, 0);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(true);
|
||||
break;
|
||||
}
|
||||
if (!ns_filp)
|
||||
ns_filp = ERR_PTR(-EBADF);
|
||||
if (IS_ERR(ns_filp)) {
|
||||
errorfc(fc, "could not get file from pidns argument");
|
||||
return PTR_ERR(ns_filp);
|
||||
}
|
||||
|
||||
if (!proc_ns_file(ns_filp))
|
||||
return invalfc(fc, "pidns argument is not an nsfs file");
|
||||
ns = get_proc_ns(file_inode(ns_filp));
|
||||
if (ns->ops->type != CLONE_NEWPID)
|
||||
return invalfc(fc, "pidns argument is not a pidns file");
|
||||
target = container_of(ns, struct pid_namespace, ns);
|
||||
|
||||
/*
|
||||
* pidns= is shorthand for joining the pidns to get a fsopen fd, so the
|
||||
* permission model should be the same as pidns_install().
|
||||
*/
|
||||
if (!ns_capable(target->user_ns, CAP_SYS_ADMIN)) {
|
||||
errorfc(fc, "insufficient permissions to set pidns");
|
||||
return -EPERM;
|
||||
}
|
||||
if (!pidns_is_ancestor(target, active))
|
||||
return invalfc(fc, "cannot set pidns to non-descendant pidns");
|
||||
|
||||
put_pid_ns(ctx->pid_ns);
|
||||
ctx->pid_ns = get_pid_ns(target);
|
||||
put_user_ns(fc->user_ns);
|
||||
fc->user_ns = get_user_ns(ctx->pid_ns->user_ns);
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_PID_NS */
|
||||
|
||||
static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
||||
{
|
||||
struct proc_fs_context *ctx = fc->fs_private;
|
||||
struct fs_parse_result result;
|
||||
int opt;
|
||||
int opt, err;
|
||||
|
||||
opt = fs_parse(fc, proc_fs_parameters, param, &result);
|
||||
if (opt < 0)
|
||||
@@ -125,15 +182,39 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
||||
break;
|
||||
|
||||
case Opt_hidepid:
|
||||
if (proc_parse_hidepid_param(fc, param))
|
||||
return -EINVAL;
|
||||
err = proc_parse_hidepid_param(fc, param);
|
||||
if (err)
|
||||
return err;
|
||||
break;
|
||||
|
||||
case Opt_subset:
|
||||
if (proc_parse_subset_param(fc, param->string) < 0)
|
||||
return -EINVAL;
|
||||
err = proc_parse_subset_param(fc, param->string);
|
||||
if (err)
|
||||
return err;
|
||||
break;
|
||||
|
||||
case Opt_pidns:
|
||||
#ifdef CONFIG_PID_NS
|
||||
/*
|
||||
* We would have to RCU-protect every proc_pid_ns() or
|
||||
* proc_sb_info() access if we allowed this to be reconfigured
|
||||
* for an existing procfs instance. Luckily, procfs instances
|
||||
* are cheap to create, and mount-beneath would let you
|
||||
* atomically replace an instance even with overmounts.
|
||||
*/
|
||||
if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
|
||||
errorfc(fc, "cannot reconfigure pidns for existing procfs");
|
||||
return -EBUSY;
|
||||
}
|
||||
err = proc_parse_pidns_param(fc, param, &result);
|
||||
if (err)
|
||||
return err;
|
||||
break;
|
||||
#else
|
||||
errorfc(fc, "pidns mount flag not supported on this system");
|
||||
return -EOPNOTSUPP;
|
||||
#endif
|
||||
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -154,6 +235,11 @@ static void proc_apply_options(struct proc_fs_info *fs_info,
|
||||
fs_info->hide_pid = ctx->hidepid;
|
||||
if (ctx->mask & (1 << Opt_subset))
|
||||
fs_info->pidonly = ctx->pidonly;
|
||||
if (ctx->mask & (1 << Opt_pidns) &&
|
||||
!WARN_ON_ONCE(fc->purpose == FS_CONTEXT_FOR_RECONFIGURE)) {
|
||||
put_pid_ns(fs_info->pid_ns);
|
||||
fs_info->pid_ns = get_pid_ns(ctx->pid_ns);
|
||||
}
|
||||
}
|
||||
|
||||
static int proc_fill_super(struct super_block *s, struct fs_context *fc)
|
||||
|
||||
Reference in New Issue
Block a user