pid-sandbox: execute pid-ns-init as pid 1 (bug 675312)
Execute pid-ns-init as the first fork after unshare, as
required for it to have pid 1 and become the default reaper
of orphaned descendant processes. In _exec, exec a separate
pid-ns-init process to behave as a supervisor which will
forward signals to init and forward exit status to the parent
process.
Fixes: a75d5546e3a4 ("Introduce a tiny init replacement for inside pid namespace")
Bug: https://bugs.gentoo.org/675312
Reviewed-by: Brian Dolbec <dolsen@gentoo.org>
Signed-off-by: Zac Medico <zmedico@gentoo.org>
diff --git a/bin/pid-ns-init b/bin/pid-ns-init
index 843257b..182d00a 100644
--- a/bin/pid-ns-init
+++ b/bin/pid-ns-init
@@ -1,23 +1,59 @@
#!/usr/bin/env python
-# Copyright 2018 Gentoo Authors
+# Copyright 2018-2019 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
+import functools
import os
+import signal
import sys
+KILL_SIGNALS = (
+ signal.SIGINT,
+ signal.SIGTERM,
+ signal.SIGHUP,
+)
+
+def forward_kill_signal(main_child_pid, signum, frame):
+ os.kill(main_child_pid, signum)
+
+
def main(argv):
if len(argv) < 2:
- return 'Usage: {} <main-child-pid>'.format(argv[0])
- main_child_pid = int(argv[1])
+ return 'Usage: {} <main-child-pid> or <binary> <argv0> [arg]..'.format(argv[0])
+
+ if len(argv) == 2:
+ # The child process is init (pid 1) in a child pid namespace, and
+ # the current process supervises from within the global pid namespace
+ # (forwarding signals to init and forwarding exit status to the parent
+ # process).
+ main_child_pid = int(argv[1])
+ else:
+ # The current process is init (pid 1) in a child pid namespace.
+ binary = argv[1]
+ args = argv[2:]
+
+ main_child_pid = os.fork()
+ if main_child_pid == 0:
+ os.execv(binary, args)
+
+ sig_handler = functools.partial(forward_kill_signal, main_child_pid)
+ for signum in KILL_SIGNALS:
+ signal.signal(signum, sig_handler)
# wait for child processes
while True:
- pid, status = os.wait()
+ try:
+ pid, status = os.wait()
+ except OSError as e:
+ if e.errno == errno.EINTR:
+ continue
+ raise
if pid == main_child_pid:
if os.WIFEXITED(status):
return os.WEXITSTATUS(status)
elif os.WIFSIGNALED(status):
+ signal.signal(os.WTERMSIG(status), signal.SIG_DFL)
os.kill(os.getpid(), os.WTERMSIG(status))
# go to the unreachable place
break
diff --git a/lib/portage/process.py b/lib/portage/process.py
index 7103b6b..6af3ac3 100644
--- a/lib/portage/process.py
+++ b/lib/portage/process.py
@@ -564,15 +564,28 @@
noiselevel=-1)
else:
if unshare_pid:
- # pid namespace requires us to become init
- fork_ret = os.fork()
- if fork_ret != 0:
- os.execv(portage._python_interpreter, [
+ main_child_pid = os.fork()
+ if main_child_pid == 0:
+ # pid namespace requires us to become init
+ binary, myargs = portage._python_interpreter, [
portage._python_interpreter,
os.path.join(portage._bin_path,
'pid-ns-init'),
- '%s' % fork_ret,
- ])
+ binary] + myargs
+ else:
+ # Execute a supervisor process which will forward
+ # signals to init and forward exit status to the
+ # parent process. The supervisor process runs in
+ # the global pid namespace, so skip /proc remount
+ # and other setup that's intended only for the
+ # init process.
+ binary, myargs = portage._python_interpreter, [
+ portage._python_interpreter,
+ os.path.join(portage._bin_path,
+ 'pid-ns-init'), str(main_child_pid)]
+
+ os.execve(binary, myargs, env)
+
if unshare_mount:
# mark the whole filesystem as slave to avoid
# mounts escaping the namespace