| diff --git a/libcontainer/cgroups/file.go b/libcontainer/cgroups/file.go |
| index 0cdaf747849f..0c186ba21117 100644 |
| --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go |
| +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go |
| @@ -76,16 +76,16 @@ var ( |
| // TestMode is set to true by unit tests that need "fake" cgroupfs. |
| TestMode bool |
| |
| - cgroupFd int = -1 |
| - prepOnce sync.Once |
| - prepErr error |
| - resolveFlags uint64 |
| + cgroupRootHandle *os.File |
| + prepOnce sync.Once |
| + prepErr error |
| + resolveFlags uint64 |
| ) |
| |
| func prepareOpenat2() error { |
| prepOnce.Do(func() { |
| fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{ |
| - Flags: unix.O_DIRECTORY | unix.O_PATH, |
| + Flags: unix.O_DIRECTORY | unix.O_PATH | unix.O_CLOEXEC, |
| }) |
| if err != nil { |
| prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err} |
| @@ -96,15 +96,16 @@ func prepareOpenat2() error { |
| } |
| return |
| } |
| + file := os.NewFile(uintptr(fd), cgroupfsDir) |
| + |
| var st unix.Statfs_t |
| - if err = unix.Fstatfs(fd, &st); err != nil { |
| + if err := unix.Fstatfs(int(file.Fd()), &st); err != nil { |
| prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err} |
| logrus.Warnf("falling back to securejoin: %s", prepErr) |
| return |
| } |
| |
| - cgroupFd = fd |
| - |
| + cgroupRootHandle = file |
| resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS |
| if st.Type == unix.CGROUP2_SUPER_MAGIC { |
| // cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks |
| @@ -131,7 +132,7 @@ func openFile(dir, file string, flags int) (*os.File, error) { |
| return openFallback(path, flags, mode) |
| } |
| |
| - fd, err := unix.Openat2(cgroupFd, relPath, |
| + fd, err := unix.Openat2(int(cgroupRootHandle.Fd()), relPath, |
| &unix.OpenHow{ |
| Resolve: resolveFlags, |
| Flags: uint64(flags) | unix.O_CLOEXEC, |
| @@ -139,20 +140,20 @@ func openFile(dir, file string, flags int) (*os.File, error) { |
| }) |
| if err != nil { |
| err = &os.PathError{Op: "openat2", Path: path, Err: err} |
| - // Check if cgroupFd is still opened to cgroupfsDir |
| + // Check if cgroupRootHandle is still opened to cgroupfsDir |
| // (happens when this package is incorrectly used |
| // across the chroot/pivot_root/mntns boundary, or |
| // when /sys/fs/cgroup is remounted). |
| // |
| // TODO: if such usage will ever be common, amend this |
| - // to reopen cgroupFd and retry openat2. |
| - fdStr := strconv.Itoa(cgroupFd) |
| + // to reopen cgroupRootHandle and retry openat2. |
| + fdStr := strconv.Itoa(int(cgroupRootHandle.Fd())) |
| fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr) |
| if fdDest != cgroupfsDir { |
| - // Wrap the error so it is clear that cgroupFd |
| + // Wrap the error so it is clear that cgroupRootHandle |
| // is opened to an unexpected/wrong directory. |
| - err = fmt.Errorf("cgroupFd %s unexpectedly opened to %s != %s: %w", |
| - fdStr, fdDest, cgroupfsDir, err) |
| + err = fmt.Errorf("cgroupRootHandle %d unexpectedly opened to %s != %s: %w", |
| + cgroupRootHandle.Fd(), fdDest, cgroupfsDir, err) |
| } |
| return nil, err |
| } |
| diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go |
| index 1092331b25d8..2cb970a3d55b 100644 |
| --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go |
| +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/paths.go |
| @@ -83,6 +83,7 @@ func tryDefaultCgroupRoot() string { |
| if err != nil { |
| return "" |
| } |
| + defer dir.Close() |
| names, err := dir.Readdirnames(1) |
| if err != nil { |
| return "" |
| diff --git a/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go |
| index dd61dfd3c90c..c0d4d6e6f6f9 100644 |
| --- a/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go |
| +++ b/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go |
| @@ -351,6 +351,15 @@ func (c *linuxContainer) start(process *Process) (retErr error) { |
| }() |
| } |
| |
| + // Before starting "runc init", mark all non-stdio open files as O_CLOEXEC |
| + // to make sure we don't leak any files into "runc init". Any files to be |
| + // passed to "runc init" throuhg ExtraFiles will get dup2'd by the Go |
| + // runtime and thus their O_CLOEXEC flag will be cleared. This is some |
| + // additional protection against attacks like CVE-2024-21626, by making |
| + // sure we never leak files to "runc init" we didn't intend to. |
| + if err := utils.CloseExecFrom(3); err != nil { |
| + return fmt.Errorf("unable to mark non-stdio fds as cloexec: %w", err) |
| + } |
| if err := parent.start(); err != nil { |
| return fmt.Errorf("unable to start container process: %w", err) |
| } |
| diff --git a/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go |
| index 2e4c59353c83..23899817ce92 100644 |
| --- a/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go |
| +++ b/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go |
| @@ -8,6 +8,7 @@ import ( |
| "io" |
| "net" |
| "os" |
| + "path/filepath" |
| "strings" |
| "unsafe" |
| |
| @@ -135,6 +136,32 @@ func populateProcessEnvironment(env []string) error { |
| return nil |
| } |
| |
| +// verifyCwd ensures that the current directory is actually inside the mount |
| +// namespace root of the current process. |
| +func verifyCwd() error { |
| + // getcwd(2) on Linux detects if cwd is outside of the rootfs of the |
| + // current mount namespace root, and in that case prefixes "(unreachable)" |
| + // to the returned string. glibc's getcwd(3) and Go's Getwd() both detect |
| + // when this happens and return ENOENT rather than returning a non-absolute |
| + // path. In both cases we can therefore easily detect if we have an invalid |
| + // cwd by checking the return value of getcwd(3). See getcwd(3) for more |
| + // details, and CVE-2024-21626 for the security issue that motivated this |
| + // check. |
| + // |
| + // We have to use unix.Getwd() here because os.Getwd() has a workaround for |
| + // $PWD which involves doing stat(.), which can fail if the current |
| + // directory is inaccessible to the container process. |
| + if wd, err := unix.Getwd(); err == unix.ENOENT { |
| + return errors.New("current working directory is outside of container mount namespace root -- possible container breakout detected") |
| + } else if err != nil { |
| + return fmt.Errorf("failed to verify if current working directory is safe: %w", err) |
| + } else if !filepath.IsAbs(wd) { |
| + // We shouldn't ever hit this, but check just in case. |
| + return fmt.Errorf("current working directory is not absolute -- possible container breakout detected: cwd is %q", wd) |
| + } |
| + return nil |
| +} |
| + |
| // finalizeNamespace drops the caps, sets the correct user |
| // and working dir, and closes any leaked file descriptors |
| // before executing the command inside the namespace |
| @@ -193,6 +220,10 @@ func finalizeNamespace(config *initConfig) error { |
| return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err) |
| } |
| } |
| + // Make sure our final working directory is inside the container. |
| + if err := verifyCwd(); err != nil { |
| + return err |
| + } |
| if err := system.ClearKeepCaps(); err != nil { |
| return fmt.Errorf("unable to clear keep caps: %w", err) |
| } |
| diff --git a/vendor/github.com/opencontainers/runc/libcontainer/logs/logs.go b/vendor/github.com/opencontainers/runc/libcontainer/logs/logs.go |
| index 95deb0d6ca7a..349a18ed3839 100644 |
| --- a/vendor/github.com/opencontainers/runc/libcontainer/logs/logs.go |
| +++ b/vendor/github.com/opencontainers/runc/libcontainer/logs/logs.go |
| @@ -4,10 +4,19 @@ import ( |
| "bufio" |
| "encoding/json" |
| "io" |
| + "os" |
| |
| "github.com/sirupsen/logrus" |
| ) |
| |
| +// IsLogrusFd returns whether the provided fd matches the one that logrus is |
| +// currently outputting to. This should only ever be called by UnsafeCloseFrom |
| +// from `runc init`. |
| +func IsLogrusFd(fd uintptr) bool { |
| + file, ok := logrus.StandardLogger().Out.(*os.File) |
| + return ok && file.Fd() == fd |
| +} |
| + |
| func ForwardLogs(logPipe io.ReadCloser) chan error { |
| done := make(chan error, 1) |
| s := bufio.NewScanner(logPipe) |
| diff --git a/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go |
| index 09ab552b3d12..d1bb12273c04 100644 |
| --- a/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go |
| +++ b/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go |
| @@ -4,6 +4,7 @@ import ( |
| "errors" |
| "fmt" |
| "os" |
| + "os/exec" |
| "strconv" |
| |
| "github.com/opencontainers/selinux/go-selinux" |
| @@ -14,6 +15,7 @@ import ( |
| "github.com/opencontainers/runc/libcontainer/keys" |
| "github.com/opencontainers/runc/libcontainer/seccomp" |
| "github.com/opencontainers/runc/libcontainer/system" |
| + "github.com/opencontainers/runc/libcontainer/utils" |
| ) |
| |
| // linuxSetnsInit performs the container's initialization for running a new process |
| @@ -82,6 +84,21 @@ func (l *linuxSetnsInit) Init() error { |
| if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { |
| return err |
| } |
| + |
| + // Check for the arg before waiting to make sure it exists and it is |
| + // returned as a create time error. |
| + name, err := exec.LookPath(l.config.Args[0]) |
| + if err != nil { |
| + return err |
| + } |
| + // exec.LookPath in Go < 1.20 might return no error for an executable |
| + // residing on a file system mounted with noexec flag, so perform this |
| + // extra check now while we can still return a proper error. |
| + // TODO: remove this once go < 1.20 is not supported. |
| + if err := eaccess(name); err != nil { |
| + return &os.PathError{Op: "eaccess", Path: name, Err: err} |
| + } |
| + |
| // Set seccomp as close to execve as possible, so as few syscalls take |
| // place afterward (reducing the amount of syscalls that users need to |
| // enable in their seccomp profiles). |
| @@ -101,5 +118,23 @@ func (l *linuxSetnsInit) Init() error { |
| return &os.PathError{Op: "close log pipe", Path: "fd " + strconv.Itoa(l.logFd), Err: err} |
| } |
| |
| - return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) |
| + // Close all file descriptors we are not passing to the container. This is |
| + // necessary because the execve target could use internal runc fds as the |
| + // execve path, potentially giving access to binary files from the host |
| + // (which can then be opened by container processes, leading to container |
| + // escapes). Note that because this operation will close any open file |
| + // descriptors that are referenced by (*os.File) handles from underneath |
| + // the Go runtime, we must not do any file operations after this point |
| + // (otherwise the (*os.File) finaliser could close the wrong file). See |
| + // CVE-2024-21626 for more information as to why this protection is |
| + // necessary. |
| + // |
| + // This is not needed for runc-dmz, because the extra execve(2) step means |
| + // that all O_CLOEXEC file descriptors have already been closed and thus |
| + // the second execve(2) from runc-dmz cannot access internal file |
| + // descriptors from runc. |
| + if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil { |
| + return err |
| + } |
| + return system.Exec(name, l.config.Args[0:], os.Environ()) |
| } |
| diff --git a/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go |
| index c09a7bed30ea..d1d94352f93d 100644 |
| --- a/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go |
| +++ b/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go |
| @@ -17,6 +17,7 @@ import ( |
| "github.com/opencontainers/runc/libcontainer/keys" |
| "github.com/opencontainers/runc/libcontainer/seccomp" |
| "github.com/opencontainers/runc/libcontainer/system" |
| + "github.com/opencontainers/runc/libcontainer/utils" |
| ) |
| |
| type linuxStandardInit struct { |
| @@ -258,5 +259,23 @@ func (l *linuxStandardInit) Init() error { |
| return err |
| } |
| |
| + // Close all file descriptors we are not passing to the container. This is |
| + // necessary because the execve target could use internal runc fds as the |
| + // execve path, potentially giving access to binary files from the host |
| + // (which can then be opened by container processes, leading to container |
| + // escapes). Note that because this operation will close any open file |
| + // descriptors that are referenced by (*os.File) handles from underneath |
| + // the Go runtime, we must not do any file operations after this point |
| + // (otherwise the (*os.File) finaliser could close the wrong file). See |
| + // CVE-2024-21626 for more information as to why this protection is |
| + // necessary. |
| + // |
| + // This is not needed for runc-dmz, because the extra execve(2) step means |
| + // that all O_CLOEXEC file descriptors have already been closed and thus |
| + // the second execve(2) from runc-dmz cannot access internal file |
| + // descriptors from runc. |
| + if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil { |
| + return err |
| + } |
| return system.Exec(name, l.config.Args[0:], os.Environ()) |
| } |
| diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go |
| index 220d0b439379..15533df1f2b2 100644 |
| --- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go |
| +++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go |
| @@ -7,8 +7,11 @@ import ( |
| "fmt" |
| "os" |
| "strconv" |
| + _ "unsafe" // for go:linkname |
| |
| "golang.org/x/sys/unix" |
| + |
| + "github.com/opencontainers/runc/libcontainer/logs" |
| ) |
| |
| // EnsureProcHandle returns whether or not the given file handle is on procfs. |
| @@ -23,9 +26,11 @@ func EnsureProcHandle(fh *os.File) error { |
| return nil |
| } |
| |
| -// CloseExecFrom applies O_CLOEXEC to all file descriptors currently open for |
| -// the process (except for those below the given fd value). |
| -func CloseExecFrom(minFd int) error { |
| +type fdFunc func(fd int) |
| + |
| +// fdRangeFrom calls the passed fdFunc for each file descriptor that is open in |
| +// the current process. |
| +func fdRangeFrom(minFd int, fn fdFunc) error { |
| fdDir, err := os.Open("/proc/self/fd") |
| if err != nil { |
| return err |
| @@ -50,15 +55,66 @@ func CloseExecFrom(minFd int) error { |
| if fd < minFd { |
| continue |
| } |
| - // Intentionally ignore errors from unix.CloseOnExec -- the cases where |
| - // this might fail are basically file descriptors that have already |
| - // been closed (including and especially the one that was created when |
| - // os.ReadDir did the "opendir" syscall). |
| - unix.CloseOnExec(fd) |
| + // Ignore the file descriptor we used for readdir, as it will be closed |
| + // when we return. |
| + if uintptr(fd) == fdDir.Fd() { |
| + continue |
| + } |
| + // Run the closure. |
| + fn(fd) |
| } |
| return nil |
| } |
| |
| +// CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or |
| +// equal to minFd in the current process. |
| +func CloseExecFrom(minFd int) error { |
| + return fdRangeFrom(minFd, unix.CloseOnExec) |
| +} |
| + |
| +//go:linkname runtime_IsPollDescriptor internal/poll.IsPollDescriptor |
| + |
| +// In order to make sure we do not close the internal epoll descriptors the Go |
| +// runtime uses, we need to ensure that we skip descriptors that match |
| +// "internal/poll".IsPollDescriptor. Yes, this is a Go runtime internal thing, |
| +// unfortunately there's no other way to be sure we're only keeping the file |
| +// descriptors the Go runtime needs. Hopefully nothing blows up doing this... |
| +func runtime_IsPollDescriptor(fd uintptr) bool |
| + |
| +// UnsafeCloseFrom closes all file descriptors greater or equal to minFd in the |
| +// current process, except for those critical to Go's runtime (such as the |
| +// netpoll management descriptors). |
| +// |
| +// NOTE: That this function is incredibly dangerous to use in most Go code, as |
| +// closing file descriptors from underneath *os.File handles can lead to very |
| +// bad behaviour (the closed file descriptor can be re-used and then any |
| +// *os.File operations would apply to the wrong file). This function is only |
| +// intended to be called from the last stage of runc init. |
| +func UnsafeCloseFrom(minFd int) error { |
| + // We must not close some file descriptors. |
| + return fdRangeFrom(minFd, func(fd int) { |
| + if runtime_IsPollDescriptor(uintptr(fd)) { |
| + // These are the Go runtimes internal netpoll file descriptors. |
| + // These file descriptors are operated on deep in the Go scheduler, |
| + // and closing those files from underneath Go can result in panics. |
| + // There is no issue with keeping them because they are not |
| + // executable and are not useful to an attacker anyway. Also we |
| + // don't have any choice. |
| + return |
| + } |
| + if logs.IsLogrusFd(uintptr(fd)) { |
| + // Do not close the logrus output fd. We cannot exec a pipe, and |
| + // the contents are quite limited (very little attacker control, |
| + // JSON-encoded) making shellcode attacks unlikely. |
| + return |
| + } |
| + // There's nothing we can do about errors from close(2), and the |
| + // only likely error to be seen is EBADF which indicates the fd was |
| + // already closed (in which case, we got what we wanted). |
| + _ = unix.Close(fd) |
| + }) |
| +} |
| + |
| // NewSockPair returns a new unix socket pair |
| func NewSockPair(name string) (parent *os.File, child *os.File, err error) { |
| fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) |