2016-04-01 19:41:13 +02:00
|
|
|
/* vi: set sw=4 ts=4: */
|
|
|
|
/*
|
|
|
|
* Mini unshare implementation for busybox.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2016 by Bartosz Golaszewski <bartekgola@gmail.com>
|
|
|
|
*
|
|
|
|
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
|
|
|
|
*/
|
|
|
|
//config:config UNSHARE
|
2018-12-28 03:20:17 +01:00
|
|
|
//config: bool "unshare (7.2 kb)"
|
2016-04-01 19:41:13 +02:00
|
|
|
//config: default y
|
2017-06-14 16:20:02 +02:00
|
|
|
//config: depends on !NOMMU
|
2016-04-01 19:41:13 +02:00
|
|
|
//config: select PLATFORM_LINUX
|
2017-06-14 16:20:02 +02:00
|
|
|
//config: select LONG_OPTS
|
2016-04-01 19:41:13 +02:00
|
|
|
//config: help
|
2017-07-21 09:50:55 +02:00
|
|
|
//config: Run program with some namespaces unshared from parent.
|
2016-04-01 19:41:13 +02:00
|
|
|
|
2017-06-14 16:20:02 +02:00
|
|
|
// needs LONG_OPTS: it is awkward to exclude code which handles --propagation
|
2016-04-01 19:41:13 +02:00
|
|
|
// and --setgroups based on LONG_OPTS, so instead applet requires LONG_OPTS.
|
|
|
|
// depends on !NOMMU: we need fork()
|
|
|
|
|
|
|
|
//applet:IF_UNSHARE(APPLET(unshare, BB_DIR_USR_BIN, BB_SUID_DROP))
|
|
|
|
|
|
|
|
//kbuild:lib-$(CONFIG_UNSHARE) += unshare.o
|
|
|
|
|
|
|
|
//usage:#define unshare_trivial_usage
|
|
|
|
//usage: "[OPTIONS] [PROG [ARGS]]"
|
|
|
|
//usage:#define unshare_full_usage "\n"
|
2017-01-21 02:49:58 +01:00
|
|
|
//usage: "\n -m,--mount[=FILE] Unshare mount namespace"
|
|
|
|
//usage: "\n -u,--uts[=FILE] Unshare UTS namespace (hostname etc.)"
|
|
|
|
//usage: "\n -i,--ipc[=FILE] Unshare System V IPC namespace"
|
|
|
|
//usage: "\n -n,--net[=FILE] Unshare network namespace"
|
|
|
|
//usage: "\n -p,--pid[=FILE] Unshare PID namespace"
|
|
|
|
//usage: "\n -U,--user[=FILE] Unshare user namespace"
|
|
|
|
//usage: "\n -f,--fork Fork before execing PROG"
|
2017-06-14 16:20:02 +02:00
|
|
|
//usage: "\n -r,--map-root-user Map current user to root (implies -U)"
|
2016-04-01 19:41:13 +02:00
|
|
|
//usage: "\n --mount-proc[=DIR] Mount /proc filesystem first (implies -m)"
|
|
|
|
//usage: "\n --propagation slave|shared|private|unchanged"
|
|
|
|
//usage: "\n Modify mount propagation in mount namespace"
|
|
|
|
//usage: "\n --setgroups allow|deny Control the setgroups syscall in user namespaces"
|
|
|
|
|
|
|
|
#include <sched.h>
|
2016-04-02 19:00:44 +02:00
|
|
|
#ifndef CLONE_NEWUTS
|
|
|
|
# define CLONE_NEWUTS 0x04000000
|
|
|
|
#endif
|
|
|
|
#ifndef CLONE_NEWIPC
|
|
|
|
# define CLONE_NEWIPC 0x08000000
|
|
|
|
#endif
|
|
|
|
#ifndef CLONE_NEWUSER
|
|
|
|
# define CLONE_NEWUSER 0x10000000
|
|
|
|
#endif
|
|
|
|
#ifndef CLONE_NEWPID
|
|
|
|
# define CLONE_NEWPID 0x20000000
|
|
|
|
#endif
|
|
|
|
#ifndef CLONE_NEWNET
|
|
|
|
# define CLONE_NEWNET 0x40000000
|
|
|
|
#endif
|
|
|
|
|
2016-04-01 19:41:13 +02:00
|
|
|
#include <sys/mount.h>
|
2016-04-02 19:00:44 +02:00
|
|
|
#ifndef MS_REC
|
|
|
|
# define MS_REC (1 << 14)
|
|
|
|
#endif
|
|
|
|
#ifndef MS_PRIVATE
|
|
|
|
# define MS_PRIVATE (1 << 18)
|
|
|
|
#endif
|
|
|
|
#ifndef MS_SLAVE
|
|
|
|
# define MS_SLAVE (1 << 19)
|
|
|
|
#endif
|
|
|
|
#ifndef MS_SHARED
|
|
|
|
# define MS_SHARED (1 << 20)
|
|
|
|
#endif
|
|
|
|
|
2016-04-01 19:41:13 +02:00
|
|
|
#include "libbb.h"
|
|
|
|
|
|
|
|
static void mount_or_die(const char *source, const char *target,
|
2018-07-17 15:04:17 +02:00
|
|
|
const char *fstype, unsigned long mountflags)
|
2016-04-01 19:41:13 +02:00
|
|
|
{
|
|
|
|
if (mount(source, target, fstype, mountflags, NULL)) {
|
|
|
|
bb_perror_msg_and_die("can't mount %s on %s (flags:0x%lx)",
|
|
|
|
source, target, mountflags);
|
|
|
|
/* fstype is always either NULL or "proc".
|
|
|
|
* "proc" is only used to mount /proc.
|
|
|
|
* No need to clutter up error message with fstype,
|
|
|
|
* it is easily deductible.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#define PATH_PROC_SETGROUPS "/proc/self/setgroups"
|
|
|
|
#define PATH_PROC_UIDMAP "/proc/self/uid_map"
|
|
|
|
#define PATH_PROC_GIDMAP "/proc/self/gid_map"
|
|
|
|
|
|
|
|
struct namespace_descr {
|
|
|
|
int flag;
|
|
|
|
const char nsfile4[4];
|
|
|
|
};
|
|
|
|
|
|
|
|
struct namespace_ctx {
|
|
|
|
char *path;
|
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
|
|
|
OPT_mount = 1 << 0,
|
|
|
|
OPT_uts = 1 << 1,
|
|
|
|
OPT_ipc = 1 << 2,
|
2016-08-22 20:19:34 +02:00
|
|
|
OPT_net = 1 << 3,
|
2016-04-01 19:41:13 +02:00
|
|
|
OPT_pid = 1 << 4,
|
|
|
|
OPT_user = 1 << 5, /* OPT_user, NS_USR_POS, and ns_list[] index must match! */
|
|
|
|
OPT_fork = 1 << 6,
|
|
|
|
OPT_map_root = 1 << 7,
|
|
|
|
OPT_mount_proc = 1 << 8,
|
|
|
|
OPT_propagation = 1 << 9,
|
|
|
|
OPT_setgroups = 1 << 10,
|
|
|
|
};
|
|
|
|
enum {
|
|
|
|
NS_MNT_POS = 0,
|
|
|
|
NS_UTS_POS,
|
|
|
|
NS_IPC_POS,
|
|
|
|
NS_NET_POS,
|
|
|
|
NS_PID_POS,
|
|
|
|
NS_USR_POS, /* OPT_user, NS_USR_POS, and ns_list[] index must match! */
|
|
|
|
NS_COUNT,
|
|
|
|
};
|
|
|
|
static const struct namespace_descr ns_list[] = {
|
|
|
|
{ CLONE_NEWNS, "mnt" },
|
|
|
|
{ CLONE_NEWUTS, "uts" },
|
|
|
|
{ CLONE_NEWIPC, "ipc" },
|
|
|
|
{ CLONE_NEWNET, "net" },
|
|
|
|
{ CLONE_NEWPID, "pid" },
|
|
|
|
{ CLONE_NEWUSER, "user" }, /* OPT_user, NS_USR_POS, and ns_list[] index must match! */
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Upstream unshare doesn't support short options for --mount-proc,
|
|
|
|
* --propagation, --setgroups.
|
|
|
|
* Optional arguments (namespace mountpoints) exist only for long opts,
|
|
|
|
* we are forced to use "fake" letters for them.
|
|
|
|
* '+': stop at first non-option.
|
|
|
|
*/
|
2017-08-08 21:55:02 +02:00
|
|
|
#define OPT_STR "+muinpU""fr""\xfd::""\xfe:""\xff:"
|
2016-04-01 19:41:13 +02:00
|
|
|
static const char unshare_longopts[] ALIGN1 =
|
|
|
|
"mount\0" Optional_argument "\xf0"
|
|
|
|
"uts\0" Optional_argument "\xf1"
|
|
|
|
"ipc\0" Optional_argument "\xf2"
|
2016-08-22 20:19:34 +02:00
|
|
|
"net\0" Optional_argument "\xf3"
|
2016-04-01 19:41:13 +02:00
|
|
|
"pid\0" Optional_argument "\xf4"
|
|
|
|
"user\0" Optional_argument "\xf5"
|
|
|
|
"fork\0" No_argument "f"
|
|
|
|
"map-root-user\0" No_argument "r"
|
|
|
|
"mount-proc\0" Optional_argument "\xfd"
|
|
|
|
"propagation\0" Required_argument "\xfe"
|
|
|
|
"setgroups\0" Required_argument "\xff"
|
|
|
|
;
|
|
|
|
|
|
|
|
/* Ugly-looking string reuse trick */
|
|
|
|
#define PRIVATE_STR "private\0""unchanged\0""shared\0""slave\0"
|
|
|
|
#define PRIVATE_UNCHANGED_SHARED_SLAVE PRIVATE_STR
|
|
|
|
|
|
|
|
static unsigned long parse_propagation(const char *prop_str)
|
|
|
|
{
|
|
|
|
int i = index_in_strings(PRIVATE_UNCHANGED_SHARED_SLAVE, prop_str);
|
|
|
|
if (i < 0)
|
|
|
|
bb_error_msg_and_die("unrecognized: --%s=%s", "propagation", prop_str);
|
|
|
|
if (i == 0)
|
|
|
|
return MS_REC | MS_PRIVATE;
|
|
|
|
if (i == 1)
|
|
|
|
return 0;
|
|
|
|
if (i == 2)
|
|
|
|
return MS_REC | MS_SHARED;
|
|
|
|
return MS_REC | MS_SLAVE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mount_namespaces(pid_t pid, struct namespace_ctx *ns_ctx_list)
|
|
|
|
{
|
|
|
|
const struct namespace_descr *ns;
|
|
|
|
struct namespace_ctx *ns_ctx;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < NS_COUNT; i++) {
|
|
|
|
char nsf[sizeof("/proc/%u/ns/AAAA") + sizeof(int)*3];
|
|
|
|
|
|
|
|
ns = &ns_list[i];
|
|
|
|
ns_ctx = &ns_ctx_list[i];
|
|
|
|
if (!ns_ctx->path)
|
|
|
|
continue;
|
|
|
|
sprintf(nsf, "/proc/%u/ns/%.4s", (unsigned)pid, ns->nsfile4);
|
|
|
|
mount_or_die(nsf, ns_ctx->path, NULL, MS_BIND);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int unshare_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
|
|
|
|
int unshare_main(int argc UNUSED_PARAM, char **argv)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
unsigned int opts;
|
|
|
|
int unsflags;
|
|
|
|
uintptr_t need_mount;
|
|
|
|
const char *proc_mnt_target;
|
|
|
|
const char *prop_str;
|
|
|
|
const char *setgrp_str;
|
|
|
|
unsigned long prop_flags;
|
|
|
|
uid_t reuid = geteuid();
|
|
|
|
gid_t regid = getegid();
|
|
|
|
struct fd_pair fdp;
|
|
|
|
pid_t child = child; /* for compiler */
|
|
|
|
struct namespace_ctx ns_ctx_list[NS_COUNT];
|
|
|
|
|
|
|
|
memset(ns_ctx_list, 0, sizeof(ns_ctx_list));
|
|
|
|
proc_mnt_target = "/proc";
|
|
|
|
prop_str = PRIVATE_STR;
|
|
|
|
setgrp_str = NULL;
|
|
|
|
|
2017-08-08 21:55:02 +02:00
|
|
|
opts = getopt32long(argv, "^" OPT_STR "\0"
|
2016-04-01 19:41:13 +02:00
|
|
|
"\xf0""m" /* long opts (via their "fake chars") imply short opts */
|
|
|
|
":\xf1""u"
|
|
|
|
":\xf2""i"
|
|
|
|
":\xf3""n"
|
|
|
|
":\xf4""p"
|
|
|
|
":\xf5""U"
|
2017-11-09 15:59:22 +01:00
|
|
|
":rU" /* --map-root-user or -r implies -U */
|
2016-04-01 19:41:13 +02:00
|
|
|
":\xfd""m" /* --mount-proc implies -m */
|
2017-08-08 21:55:02 +02:00
|
|
|
, unshare_longopts,
|
|
|
|
&proc_mnt_target, &prop_str, &setgrp_str,
|
|
|
|
&ns_ctx_list[NS_MNT_POS].path,
|
|
|
|
&ns_ctx_list[NS_UTS_POS].path,
|
|
|
|
&ns_ctx_list[NS_IPC_POS].path,
|
|
|
|
&ns_ctx_list[NS_NET_POS].path,
|
|
|
|
&ns_ctx_list[NS_PID_POS].path,
|
|
|
|
&ns_ctx_list[NS_USR_POS].path
|
2016-04-01 19:41:13 +02:00
|
|
|
);
|
|
|
|
argv += optind;
|
|
|
|
//bb_error_msg("opts:0x%x", opts);
|
|
|
|
//bb_error_msg("mount:%s", ns_ctx_list[NS_MNT_POS].path);
|
|
|
|
//bb_error_msg("proc_mnt_target:%s", proc_mnt_target);
|
|
|
|
//bb_error_msg("prop_str:%s", prop_str);
|
|
|
|
//bb_error_msg("setgrp_str:%s", setgrp_str);
|
|
|
|
//exit(1);
|
|
|
|
|
|
|
|
if (setgrp_str) {
|
|
|
|
if (strcmp(setgrp_str, "allow") == 0) {
|
|
|
|
if (opts & OPT_map_root) {
|
libbb: reduce the overhead of single parameter bb_error_msg() calls
Back in 2007, commit 0c97c9d43707 ("'simple' error message functions by
Loic Grenie") introduced bb_simple_perror_msg() to allow for a lower
overhead call to bb_perror_msg() when only a string was being printed
with no parameters. This saves space for some CPU architectures because
it avoids the overhead of a call to a variadic function. However there
has never been a simple version of bb_error_msg(), and since 2007 many
new calls to bb_perror_msg() have been added that only take a single
parameter and so could have been using bb_simple_perror_message().
This changeset introduces 'simple' versions of bb_info_msg(),
bb_error_msg(), bb_error_msg_and_die(), bb_herror_msg() and
bb_herror_msg_and_die(), and replaces all calls that only take a
single parameter, or use something like ("%s", arg), with calls to the
corresponding 'simple' version.
Since it is likely that single parameter calls to the variadic functions
may be accidentally reintroduced in the future a new debugging config
option WARN_SIMPLE_MSG has been introduced. This uses some macro magic
which will cause any such calls to generate a warning, but this is
turned off by default to avoid use of the unpleasant macros in normal
circumstances.
This is a large changeset due to the number of calls that have been
replaced. The only files that contain changes other than simple
substitution of function calls are libbb.h, libbb/herror_msg.c,
libbb/verror_msg.c and libbb/xfuncs_printf.c. In miscutils/devfsd.c,
networking/udhcp/common.h and util-linux/mdev.c additonal macros have
been added for logging so that single parameter and multiple parameter
logging variants exist.
The amount of space saved varies considerably by architecture, and was
found to be as follows (for 'defconfig' using GCC 7.4):
Arm: -92 bytes
MIPS: -52 bytes
PPC: -1836 bytes
x86_64: -938 bytes
Note that for the MIPS architecture only an exception had to be made
disabling the 'simple' calls for 'udhcp' (in networking/udhcp/common.h)
because it made these files larger on MIPS.
Signed-off-by: James Byrne <james.byrne@origamienergy.com>
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
2019-07-02 11:35:03 +02:00
|
|
|
bb_simple_error_msg_and_die(
|
2016-04-01 19:41:13 +02:00
|
|
|
"--setgroups=allow and --map-root-user "
|
|
|
|
"are mutually exclusive"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* It's not "allow", must be "deny" */
|
|
|
|
if (strcmp(setgrp_str, "deny") != 0)
|
|
|
|
bb_error_msg_and_die("unrecognized: --%s=%s",
|
|
|
|
"setgroups", setgrp_str);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
unsflags = 0;
|
|
|
|
need_mount = 0;
|
|
|
|
for (i = 0; i < NS_COUNT; i++) {
|
|
|
|
const struct namespace_descr *ns = &ns_list[i];
|
|
|
|
struct namespace_ctx *ns_ctx = &ns_ctx_list[i];
|
|
|
|
|
|
|
|
if (opts & (1 << i))
|
|
|
|
unsflags |= ns->flag;
|
|
|
|
|
|
|
|
need_mount |= (uintptr_t)(ns_ctx->path);
|
|
|
|
}
|
|
|
|
/* need_mount != 0 if at least one FILE was given */
|
|
|
|
|
|
|
|
prop_flags = MS_REC | MS_PRIVATE;
|
|
|
|
/* Silently ignore --propagation if --mount is not requested. */
|
|
|
|
if (opts & OPT_mount)
|
|
|
|
prop_flags = parse_propagation(prop_str);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Special case: if we were requested to unshare the mount namespace
|
|
|
|
* AND to make any namespace persistent (by bind mounting it) we need
|
|
|
|
* to spawn a child process which will wait for the parent to call
|
|
|
|
* unshare(), then mount parent's namespaces while still in the
|
|
|
|
* previous namespace.
|
|
|
|
*/
|
|
|
|
fdp.wr = -1;
|
|
|
|
if (need_mount && (opts & OPT_mount)) {
|
|
|
|
/*
|
|
|
|
* Can't use getppid() in child, as we can be unsharing the
|
|
|
|
* pid namespace.
|
|
|
|
*/
|
|
|
|
pid_t ppid = getpid();
|
|
|
|
|
|
|
|
xpiped_pair(fdp);
|
|
|
|
|
|
|
|
child = xfork();
|
|
|
|
if (child == 0) {
|
|
|
|
/* Child */
|
|
|
|
close(fdp.wr);
|
|
|
|
|
|
|
|
/* Wait until parent calls unshare() */
|
|
|
|
read(fdp.rd, ns_ctx_list, 1); /* ...using bogus buffer */
|
|
|
|
/*close(fdp.rd);*/
|
|
|
|
|
|
|
|
/* Mount parent's unshared namespaces. */
|
|
|
|
mount_namespaces(ppid, ns_ctx_list);
|
|
|
|
return EXIT_SUCCESS;
|
|
|
|
}
|
|
|
|
/* Parent continues */
|
|
|
|
}
|
|
|
|
|
|
|
|
if (unshare(unsflags) != 0)
|
|
|
|
bb_perror_msg_and_die("unshare(0x%x)", unsflags);
|
|
|
|
|
|
|
|
if (fdp.wr >= 0) {
|
|
|
|
close(fdp.wr); /* Release child */
|
2016-04-02 18:06:24 +02:00
|
|
|
close(fdp.rd); /* should close fd, to not confuse exec'ed PROG */
|
2016-04-01 19:41:13 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (need_mount) {
|
|
|
|
/* Wait for the child to finish mounting the namespaces. */
|
|
|
|
if (opts & OPT_mount) {
|
|
|
|
int exit_status = wait_for_exitstatus(child);
|
|
|
|
if (WIFEXITED(exit_status) &&
|
|
|
|
WEXITSTATUS(exit_status) != EXIT_SUCCESS)
|
|
|
|
return WEXITSTATUS(exit_status);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Regular way - we were requested to mount some other
|
|
|
|
* namespaces: mount them after the call to unshare().
|
|
|
|
*/
|
|
|
|
mount_namespaces(getpid(), ns_ctx_list);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When we're unsharing the pid namespace, it's not the process that
|
|
|
|
* calls unshare() that is put into the new namespace, but its first
|
|
|
|
* child. The user may want to use this option to spawn a new process
|
|
|
|
* that'll become PID 1 in this new namespace.
|
|
|
|
*/
|
|
|
|
if (opts & OPT_fork) {
|
2016-04-02 18:06:24 +02:00
|
|
|
xvfork_parent_waits_and_exits();
|
2016-04-01 19:41:13 +02:00
|
|
|
/* Child continues */
|
|
|
|
}
|
|
|
|
|
|
|
|
if (opts & OPT_map_root) {
|
2017-11-09 16:06:33 +01:00
|
|
|
char uidmap_buf[sizeof("0 %u 1") + sizeof(int)*3];
|
2016-04-01 19:41:13 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Since Linux 3.19 unprivileged writing of /proc/self/gid_map
|
|
|
|
* has been disabled unless /proc/self/setgroups is written
|
|
|
|
* first to permanently disable the ability to call setgroups
|
|
|
|
* in that user namespace.
|
|
|
|
*/
|
|
|
|
xopen_xwrite_close(PATH_PROC_SETGROUPS, "deny");
|
2017-11-09 16:06:33 +01:00
|
|
|
sprintf(uidmap_buf, "0 %u 1", (unsigned)reuid);
|
2016-04-01 19:41:13 +02:00
|
|
|
xopen_xwrite_close(PATH_PROC_UIDMAP, uidmap_buf);
|
2017-11-09 16:06:33 +01:00
|
|
|
sprintf(uidmap_buf, "0 %u 1", (unsigned)regid);
|
2016-04-01 19:41:13 +02:00
|
|
|
xopen_xwrite_close(PATH_PROC_GIDMAP, uidmap_buf);
|
|
|
|
} else
|
|
|
|
if (setgrp_str) {
|
|
|
|
/* Write "allow" or "deny" */
|
|
|
|
xopen_xwrite_close(PATH_PROC_SETGROUPS, setgrp_str);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (opts & OPT_mount) {
|
|
|
|
mount_or_die("none", "/", NULL, prop_flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (opts & OPT_mount_proc) {
|
|
|
|
/*
|
|
|
|
* When creating a new pid namespace, we might want the pid
|
|
|
|
* subdirectories in /proc to remain consistent with the new
|
|
|
|
* process IDs. Without --mount-proc the pids in /proc would
|
|
|
|
* still reflect the old pid namespace. This is why we make
|
|
|
|
* /proc private here and then do a fresh mount.
|
|
|
|
*/
|
|
|
|
mount_or_die("none", proc_mnt_target, NULL, MS_PRIVATE | MS_REC);
|
|
|
|
mount_or_die("proc", proc_mnt_target, "proc", MS_NOSUID | MS_NOEXEC | MS_NODEV);
|
|
|
|
}
|
|
|
|
|
2016-04-02 18:06:24 +02:00
|
|
|
exec_prog_or_SHELL(argv);
|
2016-04-01 19:41:13 +02:00
|
|
|
}
|