Skip to content

Commit

Permalink
Use pivot_root() to clear old root filesystem from mount namespace
Browse files Browse the repository at this point in the history
This has two advantages over chroot():

- It allows chroot() to work in the sandbox, which allows OpenSSH's sshd
  to run.
- It removes the old root filesystem from the mount namespace, so chroot
  escapes are not possible.
  • Loading branch information
DemiMarie committed Dec 10, 2023
1 parent 986d8a1 commit 8bae90f
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 13 deletions.
2 changes: 2 additions & 0 deletions runtime/examples/direct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,8 @@ async fn main() -> io::Result<()> {
)
.await?;

run_process_with_output(&mut ga, &notifications, "/bin/mount", &["mount"]).await?;

let fds = [
None,
Some(RedirectFdType::RedirectFdFile(
Expand Down
64 changes: 51 additions & 13 deletions runtime/init-container/src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@

#define DEV_VPN "eth0"
#define DEV_INET "eth1"
#define SYSROOT "/mnt/newroot"

#define MODE_RW_UGO (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)
#define OUTPUT_PATH_PREFIX "/var/tmp/guest_agent_private/fds"
Expand Down Expand Up @@ -675,7 +674,8 @@ static int child_pipe = -1;

#define NAMESPACES \
(CLONE_NEWUSER | /* new user namespace */ \
0)
CLONE_NEWNS | /* new mount namespace */ \
0)

static int capset(cap_user_header_t hdrp, cap_user_data_t datap) {
return syscall(SYS_capset, hdrp, datap);
Expand Down Expand Up @@ -762,22 +762,26 @@ static noreturn void child_wrapper(int parent_pipe[2],
if (close(global_pidfd)) {
goto out;
}

if (chdir("/") != 0) {
goto out;
}

if (chroot(".") != 0) {
goto out;
}
} else {
if (syscall(SYS_close_range, 3U, ~0U, 0U) != 0) {
abort();
}
}

if (chdir(SYSROOT) != 0) {
goto out;
}

if (chroot(".") != 0) {
goto out;
}
if (chroot(SYSROOT) != 0) {
goto out;
}

if (chdir("/") != 0) {
goto out;
if (chdir("/") != 0) {
goto out;
}
}

if (new_proc_args->cwd) {
Expand Down Expand Up @@ -958,6 +962,7 @@ static void copy_initramfs(void) {
CHECK_BOOL(chdir("/" NEW_ROOT) == 0);
CHECK_BOOL(mount(".", "/", NULL, MS_MOVE, NULL) == 0);
CHECK_BOOL(chroot(".") == 0);
CHECK_BOOL(mount(NULL, "/", NULL, MS_SHARED, NULL) == 0);
}

static uint32_t spawn_new_process(struct new_process_args* new_proc_args,
Expand Down Expand Up @@ -1951,10 +1956,38 @@ static void get_namespace_fd(void) {
};
sigset_t set;
CHECK(sigemptyset(&set));
int fds[2], status = 0;
CHECK_BOOL(pipe2(fds, O_CLOEXEC) == 0);
errno = 0;
global_zombie_pid = syscall(SYS_clone3, &args, sizeof args);
CHECK_BOOL(global_zombie_pid >= 0);
if (global_zombie_pid == 0) {
if (close(fds[0]))
abort();
if (mount(SYSROOT, SYSROOT, NULL, MS_BIND | MS_REC, NULL)) {
status = errno;
goto bad;
}
if (mount(NULL, SYSROOT, NULL, MS_SLAVE | MS_REC, NULL)) {
status = errno;
goto bad;
}
if (chdir(SYSROOT))
abort();
if (syscall(SYS_pivot_root, ".", ".")) {
status = errno;
goto bad;
}
if (umount2(".", MNT_DETACH)) {
status = errno;
goto bad;
}
if (chdir("/")) {
status = errno;
}
bad:
if (write(fds[1], &status, sizeof status) != sizeof status || close(fds[1]) != 0)
_exit(1);
for (;;) {
const struct timespec x = {
.tv_sec = INT32_MAX,
Expand All @@ -1963,8 +1996,13 @@ static void get_namespace_fd(void) {
(void)(nanosleep(&x, NULL));
}
}
/* parent */
CHECK(global_pidfd);
/* parent */
CHECK_BOOL(close(fds[1]) == 0);
CHECK_BOOL(read(fds[0], &status, sizeof status) == sizeof status);
errno = status;
CHECK_BOOL(status == 0);
CHECK_BOOL(close(fds[0]) == 0);
int snprintf_res = snprintf(buf, sizeof buf, "/proc/%d/uid_map", global_zombie_pid);
CHECK_BOOL(snprintf_res > (int)sizeof buf - 10);
CHECK_BOOL(snprintf_res < (int)sizeof buf);
Expand Down

0 comments on commit 8bae90f

Please sign in to comment.