new{g,u}idmap: align setuid and fscaps behavior

Commit 1ecca8439d ("new[ug]idmap: not require CAP_SYS_ADMIN in the parent userNS")
does contain a wrong commit message, is lacking an explanation of the
issue, misses some simplifications and hardening features. This commit
tries to rectify this.

In (crazy) environment where all capabilities are dropped from the
capability bounding set apart from CAP_SET{G,U}ID setuid- and
fscaps-based new{g,u}idmap binaries behave differently when writing
complex mappings for an unprivileged user:

1. newuidmap is setuid

unshare -U sleep infinity &
newuidmap $? 0 100000 65536

First file_ns_capable(file, ns, CAP_SYS_ADMIN) is hit. This calls into
cap_capable() and hits the loop

for (;;) {
        /* Do we have the necessary capabilities? */
        if (ns == cred->user_ns)
                return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;

        /*
         * If we're already at a lower level than we're looking for,
         * we're done searching.
         */
        if (ns->level <= cred->user_ns->level)
                return -EPERM;

        /*
         * The owner of the user namespace in the parent of the
         * user namespace has all caps.
        */
        if ((ns->parent == cred->user_ns) && uid_eq(ns->owner, cred->euid))
                return 0;

        /*
         * If you have a capability in a parent user ns, then you have
         * it over all children user namespaces as well.
        */
        ns = ns->parent;
}

The first check fails and falls through to the end of the loop and
retrieves the parent user namespace and checks whether CAP_SYS_ADMIN is
available there which isn't.

2. newuidmap has CAP_SETUID as fscaps set

unshare -U sleep infinity &
newuidmap $? 0 100000 65536

The first file_ns_capable() check for CAP_SYS_ADMIN is passed since the
euid has not been changed:

if ((ns->parent == cred->user_ns) && uid_eq(ns->owner, cred->euid))
        return 0;

Now new_idmap_permitted() is hit which calls ns_capable(ns->parent,
CAP_SET{G,U}ID). This check passes since CAP_SET{G,U}ID is available in
the parent user namespace.
Now file_ns_capable(file, ns->parent, CAP_SETUID) is hit and the
cap_capable() loop (see above) is entered again. This passes

if (ns == cred->user_ns)
        return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;

since CAP_SET{G,U}ID is available in the parent user namespace. Now the
mapping can be written.

There is no need for this descrepancy between setuid and fscaps based
new{g,u}idmap binaries. The solution is to do a
seteuid() back to the unprivileged uid and PR_SET_KEEPCAPS to keep
CAP_SET{G,U}ID. The seteuid() will cause the
file_ns_capable(file, ns, CAP_SYS_ADMIN) check to pass and the
PR_SET_KEEPCAPS for CAP_SET{G,U}ID will cause the CAP_SET{G,U}ID to
pass.

Fixes: 1ecca8439d ("new[ug]idmap: not require CAP_SYS_ADMIN in the parent userNS")
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
This commit is contained in:
Christian Brauner 2018-10-27 18:23:50 +02:00
parent bb3f810611
commit 52c081b02c
No known key found for this signature in database
GPG Key ID: 8EB056D53EECB12D
2 changed files with 48 additions and 30 deletions

View File

@ -38,7 +38,7 @@
#include "idmapping.h"
#include <sys/prctl.h>
#if HAVE_SYS_CAPABILITY_H
# include <sys/capability.h>
#include <sys/capability.h>
#endif
struct map_range *get_map_ranges(int ranges, int argc, char **argv)
@ -123,58 +123,76 @@ struct map_range *get_map_ranges(int ranges, int argc, char **argv)
*/
#define ULONG_DIGITS ((((sizeof(unsigned long) * CHAR_BIT) + 9)/10)*3)
/*
* The ruid refers to the caller's uid and is used to reset the effective uid
* back to the callers real uid.
* This clutch mainly exists for setuid-based new{g,u}idmap binaries that are
* called in contexts where all capabilities other than the necessary
* CAP_SET{G,U}ID capabilities are dropped. Since the kernel will require
* assurance that the caller holds CAP_SYS_ADMIN over the target user namespace
* the only way it can confirm is in this case is if the effective uid is
* equivalent to the uid owning the target user namespace.
* Note, we only support this when a) new{g,u}idmap is not called by root and
* b) if the caller's uid and the uid retrieved via system appropriate means
* (shadow file or other) are identical. Specifically, this does not support
* when the root user calls the new{g,u}idmap binary for an unprivileged user.
* If this is wanted: use file capabilities!
*/
void write_mapping(int proc_dir_fd, int ranges, struct map_range *mappings,
const char *map_file, uid_t uid)
const char *map_file, uid_t ruid)
{
int idx;
struct map_range *mapping;
size_t bufsize;
char *buf, *pos;
int fd;
#if HAVE_SYS_CAPABILITY_H
struct __user_cap_header_struct hdr = { _LINUX_CAPABILITY_VERSION_3, 0 };
struct __user_cap_data_struct data[2] = { { 0 } };
#endif
bufsize = ranges * ((ULONG_DIGITS + 1) * 3);
pos = buf = xmalloc(bufsize);
#if HAVE_SYS_CAPABILITY_H
int cap;
struct __user_cap_header_struct hdr = {_LINUX_CAPABILITY_VERSION_3, 0};
struct __user_cap_data_struct data[2] = {{0}};
if (strcmp(map_file, "uid_map") == 0) {
cap = CAP_SETUID;
} else if (strcmp(map_file, "gid_map") == 0) {
cap = CAP_SETGID;
} else {
fprintf(stderr, _("%s: Invalid map file %s specified\n"), Prog, map_file);
exit(EXIT_FAILURE);
}
if (capget(&hdr, data) < 0) {
fprintf(stderr, _("%s: Could not get capabilities\n"), Prog);
exit(EXIT_FAILURE);
}
if (!(data[0].effective & CAP_TO_MASK(CAP_SYS_ADMIN)) &&
uid != geteuid()) {
bool uid_map;
if (strcmp(map_file, "uid_map") == 0) {
uid_map = true;
} else if (strcmp(map_file, "gid_map") == 0) {
uid_map = false;
} else {
fprintf(stderr, _("%s: Invalid map file %s specified\n"), Prog, map_file);
exit(EXIT_FAILURE);
}
/* Align setuid- and fscaps-based new{g,u}idmap behavior. */
if (!(data[0].effective & CAP_TO_MASK(CAP_SYS_ADMIN)) && ruid != 0 &&
ruid == getuid() && ruid != geteuid()) {
if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) < 0) {
fprintf(stderr, _("%s: Could not prctl(PR_SET_KEEPCAPS)\n"), Prog);
exit(EXIT_FAILURE);
}
if (seteuid(uid) < 0) {
fprintf(stderr, _("%s: Could not seteuid to %d\n"), Prog, uid);
exit(EXIT_FAILURE);
}
memset(data, 0, sizeof(data));
data[0].effective = data[0].permitted = CAP_TO_MASK(uid_map ? CAP_SETUID : CAP_SETGID);
if (capset(&hdr, data) < 0) {
fprintf(stderr, _("%s: Could not set caps\n"), Prog);
if (seteuid(ruid) < 0) {
fprintf(stderr, _("%s: Could not seteuid to %d\n"), Prog, ruid);
exit(EXIT_FAILURE);
}
}
/* Lockdown new{g,u}idmap by dropping all unneeded capabilities. */
memset(data, 0, sizeof(data));
data[0].effective = CAP_TO_MASK(cap);
data[0].permitted = data[0].effective;
if (capset(&hdr, data) < 0) {
fprintf(stderr, _("%s: Could not set caps\n"), Prog);
exit(EXIT_FAILURE);
}
#endif
bufsize = ranges * ((ULONG_DIGITS + 1) * 3);
pos = buf = xmalloc(bufsize);
/* Build the mapping command */
mapping = mappings;
for (idx = 0; idx < ranges; idx++, mapping++) {

View File

@ -38,7 +38,7 @@ struct map_range {
extern struct map_range *get_map_ranges(int ranges, int argc, char **argv);
extern void write_mapping(int proc_dir_fd, int ranges,
struct map_range *mappings, const char *map_file, uid_t uid);
struct map_range *mappings, const char *map_file, uid_t ruid);
#endif /* _ID_MAPPING_H_ */