login: Wait for all descendants in browser process group

Since kernel 3.4, it's been possible for a process to wait on all
descendants, not just direct children, given the appropriate prctl().

The session_manager has always wanted to do this, so let's do it!

BUG=chromium:436549
TEST=bvt-inline, unittests

Change-Id: I7d2048cd11762d077f346c0d6e473ba9307c779d
Previous-Reviewed-on: https://chromium-review.googlesource.com/252803
Trybot-Ready: Chris Masone <cmasone@chromium.org>
(cherry picked from commit 1ebe027077ef6a2e0c090143bc5179a7f3531e62)
Reviewed-on: https://chromium-review.googlesource.com/253791
Reviewed-by: Yuly Novikov <ynovikov@chromium.org>
Commit-Queue: Yuly Novikov <ynovikov@chromium.org>
Tested-by: Yuly Novikov <ynovikov@chromium.org>
diff --git a/login_manager/browser_job.cc b/login_manager/browser_job.cc
index 9b11cc1..df03502 100644
--- a/login_manager/browser_job.cc
+++ b/login_manager/browser_job.cc
@@ -105,7 +105,7 @@
   if (subprocess_.pid() < 0)
     return;
 
-  LOG(INFO) << "Terminating process: " << message;
+  LOG(INFO) << "Terminating process group: " << message;
   subprocess_.KillEverything(signal);
 }
 
@@ -113,19 +113,19 @@
   if (subprocess_.pid() < 0)
     return;
 
-  LOG(INFO) << "Terminating process group: " << message;
+  LOG(INFO) << "Terminating process: " << message;
   subprocess_.Kill(signal);
 }
 
 void BrowserJob::WaitAndAbort(base::TimeDelta timeout) {
   if (subprocess_.pid() < 0)
     return;
-  if (!system_->ChildIsGone(subprocess_.pid(), timeout)) {
+  if (!system_->ProcessGroupIsGone(subprocess_.pid(), timeout)) {
     LOG(WARNING) << "Aborting child process " << subprocess_.pid()
                  << "'s process group " << timeout.InSeconds()
-                 << " seconds after sending TERM signal";
+                 << " seconds after sending signal";
     std::string message = base::StringPrintf("Browser took more than %" PRId64
-                                             " seconds to exit after TERM.",
+                                             " seconds to exit after signal.",
                                              timeout.InSeconds());
     KillEverything(SIGABRT, message);
   } else {
diff --git a/login_manager/browser_job_unittest.cc b/login_manager/browser_job_unittest.cc
index 1782cf7..8a63650 100644
--- a/login_manager/browser_job_unittest.cc
+++ b/login_manager/browser_job_unittest.cc
@@ -107,7 +107,7 @@
   EXPECT_CALL(utils_, fork()).WillOnce(Return(kDummyPid));
   EXPECT_CALL(utils_, kill(-kDummyPid, _, SIGABRT)).Times(1);
   EXPECT_CALL(utils_, time(NULL)).WillRepeatedly(Return(0));
-  EXPECT_CALL(utils_, ChildIsGone(kDummyPid, _)).WillOnce(Return(false));
+  EXPECT_CALL(utils_, ProcessGroupIsGone(kDummyPid, _)).WillOnce(Return(false));
 
   EXPECT_CALL(metrics_, HasRecordedChromeExec()).WillRepeatedly(Return(false));
   EXPECT_CALL(metrics_, RecordStats(_)).Times(AnyNumber());
@@ -120,7 +120,7 @@
   pid_t kDummyPid = 4;
   EXPECT_CALL(utils_, fork()).WillOnce(Return(kDummyPid));
   EXPECT_CALL(utils_, time(NULL)).WillRepeatedly(Return(0));
-  EXPECT_CALL(utils_, ChildIsGone(kDummyPid, _)).WillOnce(Return(true));
+  EXPECT_CALL(utils_, ProcessGroupIsGone(kDummyPid, _)).WillOnce(Return(true));
 
   EXPECT_CALL(metrics_, HasRecordedChromeExec()).WillRepeatedly(Return(false));
   EXPECT_CALL(metrics_, RecordStats(_)).Times(AnyNumber());
diff --git a/login_manager/generator_job.cc b/login_manager/generator_job.cc
index 95943aa..0f8bbcd 100644
--- a/login_manager/generator_job.cc
+++ b/login_manager/generator_job.cc
@@ -78,7 +78,7 @@
 void GeneratorJob::WaitAndAbort(base::TimeDelta timeout) {
   if (subprocess_.pid() < 0)
     return;
-  if (!system_->ChildIsGone(subprocess_.pid(), timeout))
+  if (!system_->ProcessGroupIsGone(subprocess_.pid(), timeout))
     KillEverything(SIGABRT, std::string());
   else
     DLOG(INFO) << "Cleaned up child " << subprocess_.pid();
diff --git a/login_manager/mock_system_utils.h b/login_manager/mock_system_utils.h
index 215fb3a..10563c1 100644
--- a/login_manager/mock_system_utils.h
+++ b/login_manager/mock_system_utils.h
@@ -31,7 +31,8 @@
   MOCK_METHOD1(time, time_t(time_t*));  // NOLINT
   MOCK_METHOD0(fork, pid_t(void));
   MOCK_METHOD0(IsDevMode, int(void));
-  MOCK_METHOD2(ChildIsGone, bool(pid_t child_spec, base::TimeDelta timeout));
+  MOCK_METHOD2(ProcessGroupIsGone, bool(pid_t child_spec,
+                                        base::TimeDelta timeout));
 
   // All filesystem-touching methods write to a ScopedTempDir that's owned by
   // this class.
diff --git a/login_manager/session_manager_main.cc b/login_manager/session_manager_main.cc
index a921bbd..30e7cd7 100644
--- a/login_manager/session_manager_main.cc
+++ b/login_manager/session_manager_main.cc
@@ -5,6 +5,7 @@
 #include <errno.h>
 #include <stdint.h>
 #include <stdlib.h>
+#include <sys/prctl.h>
 #include <sys/types.h>
 #include <unistd.h>
 
@@ -122,6 +123,10 @@
   base::CommandLine* cl = base::CommandLine::ForCurrentProcess();
   chromeos::InitLog(chromeos::kLogToSyslog | chromeos::kLogHeader);
 
+  // Allow waiting for all descendants, not just immediate children
+  if (::prctl(PR_SET_CHILD_SUBREAPER, 1))
+    PLOG(ERROR) << "Couldn't set child subreaper";
+
   if (cl->HasSwitch(switches::kHelp)) {
     LOG(INFO) << switches::kHelpMessage;
     return 0;
diff --git a/login_manager/session_manager_service.cc b/login_manager/session_manager_service.cc
index 2801fed..0e5cad6 100644
--- a/login_manager/session_manager_service.cc
+++ b/login_manager/session_manager_service.cc
@@ -250,8 +250,9 @@
 void SessionManagerService::HandleExit(const siginfo_t& ignored) {
   LOG(INFO) << "Exiting process is " << browser_->GetName() << ".";
 
-  // If I could wait for descendants here, I would.  Instead, I kill them.
-  browser_->KillEverything(SIGKILL, "Session termination");
+  // Clears up the whole job's process group.
+  browser_->KillEverything(SIGKILL, "Ensuring browser processes are gone.");
+  browser_->WaitAndAbort(GetKillTimeout());
   browser_->ClearPid();
 
   // Do nothing if already shutting down.
diff --git a/login_manager/system_utils.h b/login_manager/system_utils.h
index 9e3b056..6b6a45f 100644
--- a/login_manager/system_utils.h
+++ b/login_manager/system_utils.h
@@ -47,9 +47,10 @@
   // Returns 0 if normal mode, 1 if developer mode, -1 if error.
   virtual int IsDevMode() = 0;
 
-  // Returns: true if child specified by |child_spec| exited,
+  // Returns: true if process group specified by |child_spec| exited,
   //          false if we time out.
-  virtual bool ChildIsGone(pid_t child_spec, base::TimeDelta timeout) = 0;
+  virtual bool ProcessGroupIsGone(pid_t child_spec,
+                                  base::TimeDelta timeout) = 0;
 
   virtual bool EnsureAndReturnSafeFileSize(const base::FilePath& file,
                                            int32_t* file_size_32) = 0;
diff --git a/login_manager/system_utils_impl.cc b/login_manager/system_utils_impl.cc
index bbf978a..59fea8d 100644
--- a/login_manager/system_utils_impl.cc
+++ b/login_manager/system_utils_impl.cc
@@ -69,7 +69,8 @@
   return ::fork();
 }
 
-bool SystemUtilsImpl::ChildIsGone(pid_t child_spec, base::TimeDelta timeout) {
+bool SystemUtilsImpl::ProcessGroupIsGone(pid_t child_spec,
+                                         base::TimeDelta timeout) {
   base::TimeTicks start = base::TimeTicks::Now();
   base::TimeDelta elapsed;
   int ret;
@@ -80,7 +81,7 @@
   alarm(static_cast<int32_t>(timeout.InSeconds()));
   do {
     errno = 0;
-    ret = ::waitpid(child_spec, NULL, 0);
+    ret = ::waitpid(-child_spec, NULL, 0);
     elapsed = base::TimeTicks::Now() - start;
   } while (ret > 0 || (errno == EINTR && elapsed < timeout));
 
diff --git a/login_manager/system_utils_impl.h b/login_manager/system_utils_impl.h
index 802f943..79c4d35 100644
--- a/login_manager/system_utils_impl.h
+++ b/login_manager/system_utils_impl.h
@@ -35,7 +35,7 @@
   time_t time(time_t* t) override;
   pid_t fork() override;
   int IsDevMode() override;
-  bool ChildIsGone(pid_t child_spec, base::TimeDelta timeout) override;
+  bool ProcessGroupIsGone(pid_t child_spec, base::TimeDelta timeout) override;
 
   bool EnsureAndReturnSafeFileSize(const base::FilePath& file,
                                    int32_t* file_size_32) override;