blob: 2eb64cf76ca4fba4dff3e4919857da964c5d4ff6 [file] [log] [blame] [edit]
package userns
import (
"fmt"
"os"
"sort"
"strings"
"sync"
"syscall"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
"github.com/opencontainers/runc/libcontainer/configs"
)
type Mapping struct {
UIDMappings []configs.IDMap
GIDMappings []configs.IDMap
}
func (m Mapping) toSys() (uids, gids []syscall.SysProcIDMap) {
for _, uid := range m.UIDMappings {
uids = append(uids, syscall.SysProcIDMap{
ContainerID: int(uid.ContainerID),
HostID: int(uid.HostID),
Size: int(uid.Size),
})
}
for _, gid := range m.GIDMappings {
gids = append(gids, syscall.SysProcIDMap{
ContainerID: int(gid.ContainerID),
HostID: int(gid.HostID),
Size: int(gid.Size),
})
}
return
}
// id returns a unique identifier for this mapping, agnostic of the order of
// the uid and gid mappings (because the order doesn't matter to the kernel).
// The set of userns handles is indexed using this ID.
func (m Mapping) id() string {
var uids, gids []string
for _, idmap := range m.UIDMappings {
uids = append(uids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size))
}
for _, idmap := range m.GIDMappings {
gids = append(gids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size))
}
// We don't care about the sort order -- just sort them.
sort.Strings(uids)
sort.Strings(gids)
return "uid=" + strings.Join(uids, ",") + ";gid=" + strings.Join(gids, ",")
}
type Handles struct {
m sync.Mutex
maps map[string]*os.File
}
// Release all resources associated with this Handle. All existing files
// returned from Get() will continue to work even after calling Release(). The
// same Handles can be re-used after calling Release().
func (hs *Handles) Release() {
hs.m.Lock()
defer hs.m.Unlock()
// Close the files for good measure, though GC will do that for us anyway.
for _, file := range hs.maps {
_ = file.Close()
}
hs.maps = nil
}
func spawnProc(req Mapping) (*os.Process, error) {
// We need to spawn a subprocess with the requested mappings, which is
// unfortunately quite expensive. The "safe" way of doing this is natively
// with Go (and then spawning something like "sleep infinity"), but
// execve() is a waste of cycles because we just need some process to have
// the right mapping, we don't care what it's executing. The "unsafe"
// option of doing a clone() behind the back of Go is probably okay in
// theory as long as we just do kill(getpid(), SIGSTOP). However, if we
// tell Go to put the new process into PTRACE_TRACEME mode, we can avoid
// the exec and not have to faff around with the mappings.
//
// Note that Go's stdlib does not support newuidmap, but in the case of
// id-mapped mounts, it seems incredibly unlikely that the user will be
// requesting us to do a remapping as an unprivileged user with mappings
// they have privileges over.
logrus.Debugf("spawning dummy process for id-mapping %s", req.id())
uidMappings, gidMappings := req.toSys()
// We don't need to use /proc/thread-self here because the exe mm of a
// thread-group is guaranteed to be the same for all threads by definition.
// This lets us avoid having to do runtime.LockOSThread.
return os.StartProcess("/proc/self/exe", []string{"runc", "--help"}, &os.ProcAttr{
Sys: &syscall.SysProcAttr{
Cloneflags: unix.CLONE_NEWUSER,
UidMappings: uidMappings,
GidMappings: gidMappings,
GidMappingsEnableSetgroups: false,
// Put the process into PTRACE_TRACEME mode to allow us to get the
// userns without having a proper execve() target.
Ptrace: true,
},
})
}
func dupFile(f *os.File) (*os.File, error) {
newFd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0)
if err != nil {
return nil, os.NewSyscallError("fcntl(F_DUPFD_CLOEXEC)", err)
}
return os.NewFile(uintptr(newFd), f.Name()), nil
}
// Get returns a handle to a /proc/$pid/ns/user nsfs file with the requested
// mapping. The processes spawned to produce userns nsfds are cached, so if
// equivalent user namespace mappings are requested, the same user namespace
// will be returned. The caller is responsible for closing the returned file
// descriptor.
func (hs *Handles) Get(req Mapping) (file *os.File, err error) {
hs.m.Lock()
defer hs.m.Unlock()
if hs.maps == nil {
hs.maps = make(map[string]*os.File)
}
file, ok := hs.maps[req.id()]
if !ok {
proc, err := spawnProc(req)
if err != nil {
return nil, fmt.Errorf("failed to spawn dummy process for map %s: %w", req.id(), err)
}
// Make sure we kill the helper process. We ignore errors because
// there's not much we can do about them anyway, and ultimately
defer func() {
_ = proc.Kill()
_, _ = proc.Wait()
}()
// Stash away a handle to the userns file. This is neater than keeping
// the process alive, because Go's GC can handle files much better than
// leaked processes, and having long-living useless processes seems
// less than ideal.
file, err = os.Open(fmt.Sprintf("/proc/%d/ns/user", proc.Pid))
if err != nil {
return nil, err
}
hs.maps[req.id()] = file
}
// Duplicate the file, to make sure the lifecycle of each *os.File we
// return is independent.
return dupFile(file)
}