From 8bae90f4a274e439e3c9ab8716e9b970e1969ab1 Mon Sep 17 00:00:00 2001 From: Demi Marie Obenour Date: Sun, 3 Dec 2023 13:04:46 -0500 Subject: [PATCH] Use pivot_root() to clear old root filesystem from mount namespace This has two advantages over chroot(): - It allows chroot() to work in the sandbox, which allows OpenSSH's sshd to run. - It removes the old root filesystem from the mount namespace, so chroot escapes are not possible. --- runtime/examples/direct.rs | 2 + runtime/init-container/src/init.c | 64 ++++++++++++++++++++++++------- 2 files changed, 53 insertions(+), 13 deletions(-) diff --git a/runtime/examples/direct.rs b/runtime/examples/direct.rs index 069a0144..868f7dc0 100644 --- a/runtime/examples/direct.rs +++ b/runtime/examples/direct.rs @@ -208,6 +208,8 @@ async fn main() -> io::Result<()> { ) .await?; + run_process_with_output(&mut ga, ¬ifications, "/bin/mount", &["mount"]).await?; + let fds = [ None, Some(RedirectFdType::RedirectFdFile( diff --git a/runtime/init-container/src/init.c b/runtime/init-container/src/init.c index ee1c217f..1902b505 100644 --- a/runtime/init-container/src/init.c +++ b/runtime/init-container/src/init.c @@ -59,7 +59,6 @@ #define DEV_VPN "eth0" #define DEV_INET "eth1" -#define SYSROOT "/mnt/newroot" #define MODE_RW_UGO (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH) #define OUTPUT_PATH_PREFIX "/var/tmp/guest_agent_private/fds" @@ -675,7 +674,8 @@ static int child_pipe = -1; #define NAMESPACES \ (CLONE_NEWUSER | /* new user namespace */ \ - 0) + CLONE_NEWNS | /* new mount namespace */ \ + 0) static int capset(cap_user_header_t hdrp, cap_user_data_t datap) { return syscall(SYS_capset, hdrp, datap); @@ -762,22 +762,26 @@ static noreturn void child_wrapper(int parent_pipe[2], if (close(global_pidfd)) { goto out; } + + if (chdir("/") != 0) { + goto out; + } + + if (chroot(".") != 0) { + goto out; + } } else { if (syscall(SYS_close_range, 3U, ~0U, 0U) != 0) { abort(); } - } - - if (chdir(SYSROOT) != 0) { - goto out; - } - if (chroot(".") != 0) { - goto out; - } + if (chroot(SYSROOT) != 0) { + goto out; + } - if (chdir("/") != 0) { - goto out; + if (chdir("/") != 0) { + goto out; + } } if (new_proc_args->cwd) { @@ -958,6 +962,7 @@ static void copy_initramfs(void) { CHECK_BOOL(chdir("/" NEW_ROOT) == 0); CHECK_BOOL(mount(".", "/", NULL, MS_MOVE, NULL) == 0); CHECK_BOOL(chroot(".") == 0); + CHECK_BOOL(mount(NULL, "/", NULL, MS_SHARED, NULL) == 0); } static uint32_t spawn_new_process(struct new_process_args* new_proc_args, @@ -1951,10 +1956,38 @@ static void get_namespace_fd(void) { }; sigset_t set; CHECK(sigemptyset(&set)); + int fds[2], status = 0; + CHECK_BOOL(pipe2(fds, O_CLOEXEC) == 0); errno = 0; global_zombie_pid = syscall(SYS_clone3, &args, sizeof args); CHECK_BOOL(global_zombie_pid >= 0); if (global_zombie_pid == 0) { + if (close(fds[0])) + abort(); + if (mount(SYSROOT, SYSROOT, NULL, MS_BIND | MS_REC, NULL)) { + status = errno; + goto bad; + } + if (mount(NULL, SYSROOT, NULL, MS_SLAVE | MS_REC, NULL)) { + status = errno; + goto bad; + } + if (chdir(SYSROOT)) + abort(); + if (syscall(SYS_pivot_root, ".", ".")) { + status = errno; + goto bad; + } + if (umount2(".", MNT_DETACH)) { + status = errno; + goto bad; + } + if (chdir("/")) { + status = errno; + } +bad: + if (write(fds[1], &status, sizeof status) != sizeof status || close(fds[1]) != 0) + _exit(1); for (;;) { const struct timespec x = { .tv_sec = INT32_MAX, @@ -1963,8 +1996,13 @@ static void get_namespace_fd(void) { (void)(nanosleep(&x, NULL)); } } - /* parent */ CHECK(global_pidfd); + /* parent */ + CHECK_BOOL(close(fds[1]) == 0); + CHECK_BOOL(read(fds[0], &status, sizeof status) == sizeof status); + errno = status; + CHECK_BOOL(status == 0); + CHECK_BOOL(close(fds[0]) == 0); int snprintf_res = snprintf(buf, sizeof buf, "/proc/%d/uid_map", global_zombie_pid); CHECK_BOOL(snprintf_res > (int)sizeof buf - 10); CHECK_BOOL(snprintf_res < (int)sizeof buf);