mirror of
https://github.com/torvalds/linux.git
synced 2025-12-01 07:26:02 +07:00
procfs: add "pidns" mount option
Since the introduction of pid namespaces, their interaction with procfs
has been entirely implicit in ways that require a lot of dancing around
by programs that need to construct sandboxes with different PID
namespaces.
Being able to explicitly specify the pid namespace to use when
constructing a procfs super block will allow programs to no longer need
to fork off a process which does then does unshare(2) / setns(2) and
forks again in order to construct a procfs in a pidns.
So, provide a "pidns" mount option which allows such users to just
explicitly state which pid namespace they want that procfs instance to
use. This interface can be used with fsconfig(2) either with a file
descriptor or a path:
fsconfig(procfd, FSCONFIG_SET_FD, "pidns", NULL, nsfd);
fsconfig(procfd, FSCONFIG_SET_STRING, "pidns", "/proc/self/ns/pid", 0);
or with classic mount(2) / mount(8):
// mount -t proc -o pidns=/proc/self/ns/pid proc /tmp/proc
mount("proc", "/tmp/proc", "proc", MS_..., "pidns=/proc/self/ns/pid");
As this new API is effectively shorthand for setns(2) followed by
mount(2), the permission model for this mirrors pidns_install() to avoid
opening up new attack surfaces by loosening the existing permission
model.
In order to avoid having to RCU-protect all users of proc_pid_ns() (to
avoid UAFs), attempting to reconfigure an existing procfs instance's pid
namespace will error out with -EBUSY. Creating new procfs instances is
quite cheap, so this should not be an impediment to most users, and lets
us avoid a lot of churn in fs/proc/* for a feature that it seems
unlikely userspace would use.
Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
Link: https://lore.kernel.org/20250805-procfs-pidns-api-v4-2-705f984940e7@cyphar.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
committed by
Christian Brauner
parent
7df8782012
commit
fe49652e36
@@ -2362,6 +2362,7 @@ The following mount options are supported:
|
|||||||
hidepid= Set /proc/<pid>/ access mode.
|
hidepid= Set /proc/<pid>/ access mode.
|
||||||
gid= Set the group authorized to learn processes information.
|
gid= Set the group authorized to learn processes information.
|
||||||
subset= Show only the specified subset of procfs.
|
subset= Show only the specified subset of procfs.
|
||||||
|
pidns= Specify a the namespace used by this procfs.
|
||||||
========= ========================================================
|
========= ========================================================
|
||||||
|
|
||||||
hidepid=off or hidepid=0 means classic mode - everybody may access all
|
hidepid=off or hidepid=0 means classic mode - everybody may access all
|
||||||
@@ -2394,6 +2395,13 @@ information about processes information, just add identd to this group.
|
|||||||
subset=pid hides all top level files and directories in the procfs that
|
subset=pid hides all top level files and directories in the procfs that
|
||||||
are not related to tasks.
|
are not related to tasks.
|
||||||
|
|
||||||
|
pidns= specifies a pid namespace (either as a string path to something like
|
||||||
|
`/proc/$pid/ns/pid`, or a file descriptor when using `FSCONFIG_SET_FD`) that
|
||||||
|
will be used by the procfs instance when translating pids. By default, procfs
|
||||||
|
will use the calling process's active pid namespace. Note that the pid
|
||||||
|
namespace of an existing procfs instance cannot be modified (attempting to do
|
||||||
|
so will give an `-EBUSY` error).
|
||||||
|
|
||||||
Chapter 5: Filesystem behavior
|
Chapter 5: Filesystem behavior
|
||||||
==============================
|
==============================
|
||||||
|
|
||||||
|
|||||||
@@ -38,12 +38,14 @@ enum proc_param {
|
|||||||
Opt_gid,
|
Opt_gid,
|
||||||
Opt_hidepid,
|
Opt_hidepid,
|
||||||
Opt_subset,
|
Opt_subset,
|
||||||
|
Opt_pidns,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct fs_parameter_spec proc_fs_parameters[] = {
|
static const struct fs_parameter_spec proc_fs_parameters[] = {
|
||||||
fsparam_u32("gid", Opt_gid),
|
fsparam_u32("gid", Opt_gid),
|
||||||
fsparam_string("hidepid", Opt_hidepid),
|
fsparam_string("hidepid", Opt_hidepid),
|
||||||
fsparam_string("subset", Opt_subset),
|
fsparam_string("subset", Opt_subset),
|
||||||
|
fsparam_file_or_string("pidns", Opt_pidns),
|
||||||
{}
|
{}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -109,11 +111,66 @@ static int proc_parse_subset_param(struct fs_context *fc, char *value)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_PID_NS
|
||||||
|
static int proc_parse_pidns_param(struct fs_context *fc,
|
||||||
|
struct fs_parameter *param,
|
||||||
|
struct fs_parse_result *result)
|
||||||
|
{
|
||||||
|
struct proc_fs_context *ctx = fc->fs_private;
|
||||||
|
struct pid_namespace *target, *active = task_active_pid_ns(current);
|
||||||
|
struct ns_common *ns;
|
||||||
|
struct file *ns_filp __free(fput) = NULL;
|
||||||
|
|
||||||
|
switch (param->type) {
|
||||||
|
case fs_value_is_file:
|
||||||
|
/* came through fsconfig, steal the file reference */
|
||||||
|
ns_filp = no_free_ptr(param->file);
|
||||||
|
break;
|
||||||
|
case fs_value_is_string:
|
||||||
|
ns_filp = filp_open(param->string, O_RDONLY, 0);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
WARN_ON_ONCE(true);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!ns_filp)
|
||||||
|
ns_filp = ERR_PTR(-EBADF);
|
||||||
|
if (IS_ERR(ns_filp)) {
|
||||||
|
errorfc(fc, "could not get file from pidns argument");
|
||||||
|
return PTR_ERR(ns_filp);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!proc_ns_file(ns_filp))
|
||||||
|
return invalfc(fc, "pidns argument is not an nsfs file");
|
||||||
|
ns = get_proc_ns(file_inode(ns_filp));
|
||||||
|
if (ns->ops->type != CLONE_NEWPID)
|
||||||
|
return invalfc(fc, "pidns argument is not a pidns file");
|
||||||
|
target = container_of(ns, struct pid_namespace, ns);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* pidns= is shorthand for joining the pidns to get a fsopen fd, so the
|
||||||
|
* permission model should be the same as pidns_install().
|
||||||
|
*/
|
||||||
|
if (!ns_capable(target->user_ns, CAP_SYS_ADMIN)) {
|
||||||
|
errorfc(fc, "insufficient permissions to set pidns");
|
||||||
|
return -EPERM;
|
||||||
|
}
|
||||||
|
if (!pidns_is_ancestor(target, active))
|
||||||
|
return invalfc(fc, "cannot set pidns to non-descendant pidns");
|
||||||
|
|
||||||
|
put_pid_ns(ctx->pid_ns);
|
||||||
|
ctx->pid_ns = get_pid_ns(target);
|
||||||
|
put_user_ns(fc->user_ns);
|
||||||
|
fc->user_ns = get_user_ns(ctx->pid_ns->user_ns);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_PID_NS */
|
||||||
|
|
||||||
static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
||||||
{
|
{
|
||||||
struct proc_fs_context *ctx = fc->fs_private;
|
struct proc_fs_context *ctx = fc->fs_private;
|
||||||
struct fs_parse_result result;
|
struct fs_parse_result result;
|
||||||
int opt;
|
int opt, err;
|
||||||
|
|
||||||
opt = fs_parse(fc, proc_fs_parameters, param, &result);
|
opt = fs_parse(fc, proc_fs_parameters, param, &result);
|
||||||
if (opt < 0)
|
if (opt < 0)
|
||||||
@@ -125,15 +182,39 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case Opt_hidepid:
|
case Opt_hidepid:
|
||||||
if (proc_parse_hidepid_param(fc, param))
|
err = proc_parse_hidepid_param(fc, param);
|
||||||
return -EINVAL;
|
if (err)
|
||||||
|
return err;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Opt_subset:
|
case Opt_subset:
|
||||||
if (proc_parse_subset_param(fc, param->string) < 0)
|
err = proc_parse_subset_param(fc, param->string);
|
||||||
return -EINVAL;
|
if (err)
|
||||||
|
return err;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case Opt_pidns:
|
||||||
|
#ifdef CONFIG_PID_NS
|
||||||
|
/*
|
||||||
|
* We would have to RCU-protect every proc_pid_ns() or
|
||||||
|
* proc_sb_info() access if we allowed this to be reconfigured
|
||||||
|
* for an existing procfs instance. Luckily, procfs instances
|
||||||
|
* are cheap to create, and mount-beneath would let you
|
||||||
|
* atomically replace an instance even with overmounts.
|
||||||
|
*/
|
||||||
|
if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
|
||||||
|
errorfc(fc, "cannot reconfigure pidns for existing procfs");
|
||||||
|
return -EBUSY;
|
||||||
|
}
|
||||||
|
err = proc_parse_pidns_param(fc, param, &result);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
break;
|
||||||
|
#else
|
||||||
|
errorfc(fc, "pidns mount flag not supported on this system");
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
#endif
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
@@ -154,6 +235,11 @@ static void proc_apply_options(struct proc_fs_info *fs_info,
|
|||||||
fs_info->hide_pid = ctx->hidepid;
|
fs_info->hide_pid = ctx->hidepid;
|
||||||
if (ctx->mask & (1 << Opt_subset))
|
if (ctx->mask & (1 << Opt_subset))
|
||||||
fs_info->pidonly = ctx->pidonly;
|
fs_info->pidonly = ctx->pidonly;
|
||||||
|
if (ctx->mask & (1 << Opt_pidns) &&
|
||||||
|
!WARN_ON_ONCE(fc->purpose == FS_CONTEXT_FOR_RECONFIGURE)) {
|
||||||
|
put_pid_ns(fs_info->pid_ns);
|
||||||
|
fs_info->pid_ns = get_pid_ns(ctx->pid_ns);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int proc_fill_super(struct super_block *s, struct fs_context *fc)
|
static int proc_fill_super(struct super_block *s, struct fs_context *fc)
|
||||||
|
|||||||
Reference in New Issue
Block a user