login: Wait for all descendants in browser process group
Since kernel 3.4, it's been possible for a process to wait on all
descendants, not just direct children, given the appropriate prctl().
The session_manager has always wanted to do this, so let's do it!
BUG=chromium:436549
TEST=bvt-inline, unittests
Change-Id: I7d2048cd11762d077f346c0d6e473ba9307c779d
Previous-Reviewed-on: https://chromium-review.googlesource.com/252803
Trybot-Ready: Chris Masone <cmasone@chromium.org>
(cherry picked from commit 1ebe027077ef6a2e0c090143bc5179a7f3531e62)
Reviewed-on: https://chromium-review.googlesource.com/253791
Reviewed-by: Yuly Novikov <ynovikov@chromium.org>
Commit-Queue: Yuly Novikov <ynovikov@chromium.org>
Tested-by: Yuly Novikov <ynovikov@chromium.org>
diff --git a/login_manager/browser_job.cc b/login_manager/browser_job.cc
index 9b11cc1..df03502 100644
--- a/login_manager/browser_job.cc
+++ b/login_manager/browser_job.cc
@@ -105,7 +105,7 @@
if (subprocess_.pid() < 0)
return;
- LOG(INFO) << "Terminating process: " << message;
+ LOG(INFO) << "Terminating process group: " << message;
subprocess_.KillEverything(signal);
}
@@ -113,19 +113,19 @@
if (subprocess_.pid() < 0)
return;
- LOG(INFO) << "Terminating process group: " << message;
+ LOG(INFO) << "Terminating process: " << message;
subprocess_.Kill(signal);
}
void BrowserJob::WaitAndAbort(base::TimeDelta timeout) {
if (subprocess_.pid() < 0)
return;
- if (!system_->ChildIsGone(subprocess_.pid(), timeout)) {
+ if (!system_->ProcessGroupIsGone(subprocess_.pid(), timeout)) {
LOG(WARNING) << "Aborting child process " << subprocess_.pid()
<< "'s process group " << timeout.InSeconds()
- << " seconds after sending TERM signal";
+ << " seconds after sending signal";
std::string message = base::StringPrintf("Browser took more than %" PRId64
- " seconds to exit after TERM.",
+ " seconds to exit after signal.",
timeout.InSeconds());
KillEverything(SIGABRT, message);
} else {
diff --git a/login_manager/browser_job_unittest.cc b/login_manager/browser_job_unittest.cc
index 1782cf7..8a63650 100644
--- a/login_manager/browser_job_unittest.cc
+++ b/login_manager/browser_job_unittest.cc
@@ -107,7 +107,7 @@
EXPECT_CALL(utils_, fork()).WillOnce(Return(kDummyPid));
EXPECT_CALL(utils_, kill(-kDummyPid, _, SIGABRT)).Times(1);
EXPECT_CALL(utils_, time(NULL)).WillRepeatedly(Return(0));
- EXPECT_CALL(utils_, ChildIsGone(kDummyPid, _)).WillOnce(Return(false));
+ EXPECT_CALL(utils_, ProcessGroupIsGone(kDummyPid, _)).WillOnce(Return(false));
EXPECT_CALL(metrics_, HasRecordedChromeExec()).WillRepeatedly(Return(false));
EXPECT_CALL(metrics_, RecordStats(_)).Times(AnyNumber());
@@ -120,7 +120,7 @@
pid_t kDummyPid = 4;
EXPECT_CALL(utils_, fork()).WillOnce(Return(kDummyPid));
EXPECT_CALL(utils_, time(NULL)).WillRepeatedly(Return(0));
- EXPECT_CALL(utils_, ChildIsGone(kDummyPid, _)).WillOnce(Return(true));
+ EXPECT_CALL(utils_, ProcessGroupIsGone(kDummyPid, _)).WillOnce(Return(true));
EXPECT_CALL(metrics_, HasRecordedChromeExec()).WillRepeatedly(Return(false));
EXPECT_CALL(metrics_, RecordStats(_)).Times(AnyNumber());
diff --git a/login_manager/generator_job.cc b/login_manager/generator_job.cc
index 95943aa..0f8bbcd 100644
--- a/login_manager/generator_job.cc
+++ b/login_manager/generator_job.cc
@@ -78,7 +78,7 @@
void GeneratorJob::WaitAndAbort(base::TimeDelta timeout) {
if (subprocess_.pid() < 0)
return;
- if (!system_->ChildIsGone(subprocess_.pid(), timeout))
+ if (!system_->ProcessGroupIsGone(subprocess_.pid(), timeout))
KillEverything(SIGABRT, std::string());
else
DLOG(INFO) << "Cleaned up child " << subprocess_.pid();
diff --git a/login_manager/mock_system_utils.h b/login_manager/mock_system_utils.h
index 215fb3a..10563c1 100644
--- a/login_manager/mock_system_utils.h
+++ b/login_manager/mock_system_utils.h
@@ -31,7 +31,8 @@
MOCK_METHOD1(time, time_t(time_t*)); // NOLINT
MOCK_METHOD0(fork, pid_t(void));
MOCK_METHOD0(IsDevMode, int(void));
- MOCK_METHOD2(ChildIsGone, bool(pid_t child_spec, base::TimeDelta timeout));
+ MOCK_METHOD2(ProcessGroupIsGone, bool(pid_t child_spec,
+ base::TimeDelta timeout));
// All filesystem-touching methods write to a ScopedTempDir that's owned by
// this class.
diff --git a/login_manager/session_manager_main.cc b/login_manager/session_manager_main.cc
index a921bbd..30e7cd7 100644
--- a/login_manager/session_manager_main.cc
+++ b/login_manager/session_manager_main.cc
@@ -5,6 +5,7 @@
#include <errno.h>
#include <stdint.h>
#include <stdlib.h>
+#include <sys/prctl.h>
#include <sys/types.h>
#include <unistd.h>
@@ -122,6 +123,10 @@
base::CommandLine* cl = base::CommandLine::ForCurrentProcess();
chromeos::InitLog(chromeos::kLogToSyslog | chromeos::kLogHeader);
+ // Allow waiting for all descendants, not just immediate children
+ if (::prctl(PR_SET_CHILD_SUBREAPER, 1))
+ PLOG(ERROR) << "Couldn't set child subreaper";
+
if (cl->HasSwitch(switches::kHelp)) {
LOG(INFO) << switches::kHelpMessage;
return 0;
diff --git a/login_manager/session_manager_service.cc b/login_manager/session_manager_service.cc
index 2801fed..0e5cad6 100644
--- a/login_manager/session_manager_service.cc
+++ b/login_manager/session_manager_service.cc
@@ -250,8 +250,9 @@
void SessionManagerService::HandleExit(const siginfo_t& ignored) {
LOG(INFO) << "Exiting process is " << browser_->GetName() << ".";
- // If I could wait for descendants here, I would. Instead, I kill them.
- browser_->KillEverything(SIGKILL, "Session termination");
+ // Clears up the whole job's process group.
+ browser_->KillEverything(SIGKILL, "Ensuring browser processes are gone.");
+ browser_->WaitAndAbort(GetKillTimeout());
browser_->ClearPid();
// Do nothing if already shutting down.
diff --git a/login_manager/system_utils.h b/login_manager/system_utils.h
index 9e3b056..6b6a45f 100644
--- a/login_manager/system_utils.h
+++ b/login_manager/system_utils.h
@@ -47,9 +47,10 @@
// Returns 0 if normal mode, 1 if developer mode, -1 if error.
virtual int IsDevMode() = 0;
- // Returns: true if child specified by |child_spec| exited,
+ // Returns: true if process group specified by |child_spec| exited,
// false if we time out.
- virtual bool ChildIsGone(pid_t child_spec, base::TimeDelta timeout) = 0;
+ virtual bool ProcessGroupIsGone(pid_t child_spec,
+ base::TimeDelta timeout) = 0;
virtual bool EnsureAndReturnSafeFileSize(const base::FilePath& file,
int32_t* file_size_32) = 0;
diff --git a/login_manager/system_utils_impl.cc b/login_manager/system_utils_impl.cc
index bbf978a..59fea8d 100644
--- a/login_manager/system_utils_impl.cc
+++ b/login_manager/system_utils_impl.cc
@@ -69,7 +69,8 @@
return ::fork();
}
-bool SystemUtilsImpl::ChildIsGone(pid_t child_spec, base::TimeDelta timeout) {
+bool SystemUtilsImpl::ProcessGroupIsGone(pid_t child_spec,
+ base::TimeDelta timeout) {
base::TimeTicks start = base::TimeTicks::Now();
base::TimeDelta elapsed;
int ret;
@@ -80,7 +81,7 @@
alarm(static_cast<int32_t>(timeout.InSeconds()));
do {
errno = 0;
- ret = ::waitpid(child_spec, NULL, 0);
+ ret = ::waitpid(-child_spec, NULL, 0);
elapsed = base::TimeTicks::Now() - start;
} while (ret > 0 || (errno == EINTR && elapsed < timeout));
diff --git a/login_manager/system_utils_impl.h b/login_manager/system_utils_impl.h
index 802f943..79c4d35 100644
--- a/login_manager/system_utils_impl.h
+++ b/login_manager/system_utils_impl.h
@@ -35,7 +35,7 @@
time_t time(time_t* t) override;
pid_t fork() override;
int IsDevMode() override;
- bool ChildIsGone(pid_t child_spec, base::TimeDelta timeout) override;
+ bool ProcessGroupIsGone(pid_t child_spec, base::TimeDelta timeout) override;
bool EnsureAndReturnSafeFileSize(const base::FilePath& file,
int32_t* file_size_32) override;