test/python/lib/cmd_util.py - mirrors/cros/chromiumos/platform/dev-util - Git at Google

 # Copyright 2021 The Chromium OS Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """Taring/cmd utilities to support container_util."""

 import contextlib
 import errno
 import functools
 import operator
 import os
 import signal
 import subprocess
 import sys
 import tempfile
 import time
 import logging


 from . import signals


 # For use by ShellQuote.  Match all characters that the shell might treat
 # specially.  This means a number of things:
 #  - Reserved characters.
 #  - Characters used in expansions (brace, variable, path, globs, etc...).
 #  - Characters that an interactive shell might use (like !).
 #  - Whitespace so that one arg turns into multiple.
 # See the bash man page as well as the POSIX shell documentation for more info:
 #   http://www.gnu.org/software/bash/manual/bashref.html
 #   http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
 _SHELL_QUOTABLE_CHARS = frozenset('[|&;()<> \t!{}[]=*?~$"\'\\#^')
 # The chars that, when used inside of double quotes, need escaping.
 # Order here matters as we need to escape backslashes first.
 _SHELL_ESCAPE_CHARS = r'\"`$'

 # The number of files is larger than this, we will use -T option
 # and files to be added may not show up to the command line.
 _THRESHOLD_TO_USE_T_FOR_TAR = 50


 def ShellQuote(s):
   """Quote |s| in a way that is safe for use in a shell.

   We aim to be safe, but also to produce "nice" output.  That means we don't
   use quotes when we don't need to, and we prefer to use less quotes (like
   putting it all in single quotes) than more (using double quotes and escaping
   a bunch of stuff, or mixing the quotes).

   While python does provide a number of alternatives like:
    - pipes.quote
    - shlex.quote
   They suffer from various problems like:
    - Not widely available in different python versions.
    - Do not produce pretty output in many cases.
    - Are in modules that rarely otherwise get used.

   Note: We don't handle reserved shell words like "for" or "case".  This is
   because those only matter when they're the first element in a command, and
   there is no use case for that.  When we want to run commands, we tend to
   run real programs and not shell ones.

   Args:
     s: The string to quote.

   Returns:
     A safely (possibly quoted) string.
   """
   if sys.version_info.major < 3:
     # This is a bit of a hack.  Python 2 will display strings with u prefixes
     # when logging which makes things harder to work with.  Writing bytes to
     # stdout will be interpreted as UTF-8 content implicitly.
     if isinstance(s, str):
       try:
         s = s.encode('utf-8')
       except UnicodeDecodeError:
         # We tried our best.  Let Python's automatic mixed encoding kick in.
         pass
     else:
       return repr(s)
   else:
     # If callers pass down bad types, don't blow up.
     if isinstance(s, bytes):
       s = s.decode('utf-8', 'backslashreplace')
     elif not isinstance(s, str):
       return repr(s)

   # See if no quoting is needed so we can return the string as-is.
   for c in s:
     if c in _SHELL_QUOTABLE_CHARS:
       break
   else:
     if not s:
       return "''"
     else:
       return s

   # See if we can use single quotes first.  Output is nicer.
   if "'" not in s:
     return "'%s'" % s

   # Have to use double quotes.  Escape the few chars that still expand when
   # used inside of double quotes.
   for c in _SHELL_ESCAPE_CHARS:
     if c in s:
       s = s.replace(c, r'\%s' % c)
   return '"%s"' % s


 def CmdToStr(cmd):
   """Translate a command list into a space-separated string.

   The resulting string should be suitable for logging messages and for
   pasting into a terminal to run.  Command arguments are surrounded by
   quotes to keep them grouped, even if an argument has spaces in it.

   Examples:
     ['a', 'b'] ==> "'a' 'b'"
     ['a b', 'c'] ==> "'a b' 'c'"
     ['a', 'b\'c'] ==> '\'a\' "b\'c"'
     [u'a', "/'$b"] ==> '\'a\' "/\'$b"'
     [] ==> ''
     See unittest for additional (tested) examples.

   Args:
     cmd: List of command arguments.

   Returns:
     String representing full command.
   """
   # If callers pass down bad types, triage it a bit.
   if isinstance(cmd, (list, tuple)):
     return ' '.join(ShellQuote(arg) for arg in cmd)
   else:
     raise ValueError('cmd must be list or tuple, not %s: %r' %
                      (type(cmd), repr(cmd)))


 class CompletedProcess(getattr(subprocess, 'CompletedProcess', object)):
   """An object to store various attributes of a child process.

   This is akin to subprocess.CompletedProcess.
   """

   # The linter is confused by the getattr usage above.
   # TODO(vapier): Drop this once we're Python 3-only and we drop getattr.
   # pylint: disable=bad-option-value,super-on-old-class
   def __init__(self, args=None, returncode=None, stdout=None, stderr=None):
     if sys.version_info.major < 3:
       self.args = args
       self.stdout = stdout
       self.stderr = stderr
       self.returncode = returncode
     else:
       super(CompletedProcess, self).__init__(
           args=args, returncode=returncode, stdout=stdout, stderr=stderr)

   @property
   def cmd(self):
     """Alias to self.args to better match other subprocess APIs."""
     return self.args

   @property
   def cmdstr(self):
     """Return self.cmd as a well shell-quoted string useful for log messages."""
     if self.args is None:
       return ''
     else:
       return CmdToStr(self.args)

   def check_returncode(self):
     """Raise CalledProcessError if the exit code is non-zero."""
     if self.returncode:
       raise CalledProcessError(
           returncode=self.returncode, cmd=self.args, stdout=self.stdout,
           stderr=self.stderr, msg='check_returncode failed')


 # TODO(crbug.com/1006587): Migrate users to CompletedProcess and drop this.
 class CommandResult(CompletedProcess):
   """An object to store various attributes of a child process.

   This is akin to subprocess.CompletedProcess.
   """

   # The linter is confused by the getattr usage above.
   # TODO(vapier): Drop this once we're Python 3-only and we drop getattr.
   # pylint: disable=bad-option-value,super-on-old-class
   def __init__(self, cmd=None, error=None, output=None, returncode=None,
                args=None, stdout=None, stderr=None):
     if args is None:
       args = cmd
     elif cmd is not None:
       raise TypeError('Only specify |args|, not |cmd|')
     if stdout is None:
       stdout = output
     elif output is not None:
       raise TypeError('Only specify |stdout|, not |output|')
     if stderr is None:
       stderr = error
     elif error is not None:
       raise TypeError('Only specify |stderr|, not |error|')

     super(CommandResult, self).__init__(args=args, stdout=stdout, stderr=stderr,
                                         returncode=returncode)

   @property
   def output(self):
     """Backwards compat API."""
     return self.stdout

   @property
   def error(self):
     """Backwards compat API."""
     return self.stderr


 class CalledProcessError(subprocess.CalledProcessError):
   """Error caught in run() function.

   This is akin to subprocess.CalledProcessError.  We do not support |output|,
   only |stdout|.

   Attributes:
     returncode: The exit code of the process.
     cmd: The command that triggered this exception.
     msg: Short explanation of the error.
     exception: The underlying Exception if available.
   """

   def __init__(self, returncode, cmd, stdout=None, stderr=None, msg=None,
                exception=None):
     if exception is not None and not isinstance(exception, Exception):
       raise TypeError('exception must be an exception instance; got %r'
                       % (exception,))

     super(CalledProcessError, self).__init__(returncode, cmd, stdout)
     # The parent class will set |output|, so delete it.
     del self.output
     # TODO(vapier): When we're Python 3-only, delete this assignment as the
     # parent handles it for us.
     self.stdout = stdout
     # TODO(vapier): When we're Python 3-only, move stderr to the init above.
     self.stderr = stderr
     self.msg = msg
     self.exception = exception

   @property
   def cmdstr(self):
     """Return self.cmd as a well shell-quoted string useful for log messages."""
     if self.cmd is None:
       return ''
     else:
       return CmdToStr(self.cmd)

   def Stringify(self, stdout=True, stderr=True):
     """Custom method for controlling what is included in stringifying this.

     Args:
       stdout: Whether to include captured stdout in the return value.
       stderr: Whether to include captured stderr in the return value.

     Returns:
       A summary string for this result.
     """
     items = [
         u'return code: %s; command: %s' % (
             self.returncode, self.cmdstr),
     ]
     if stderr and self.stderr:
       stderr = self.stderr
       if isinstance(stderr, bytes):
         stderr = stderr.decode('utf-8', 'replace')
       items.append(stderr)
     if stdout and self.stdout:
       stdout = self.stdout
       if isinstance(stdout, bytes):
         stdout = stdout.decode('utf-8', 'replace')
       items.append(stdout)
     if self.msg:
       msg = self.msg
       if isinstance(msg, bytes):
         msg = msg.decode('utf-8', 'replace')
       items.append(msg)
     return u'\n'.join(items)

   def __str__(self):
     if sys.version_info.major < 3:
       # __str__ needs to return ascii, thus force a conversion to be safe.
       return self.Stringify().encode('ascii', 'xmlcharrefreplace')
     else:
       return self.Stringify()

   def __eq__(self, other):
     return (isinstance(other, type(self)) and
             self.returncode == other.returncode and
             self.cmd == other.cmd and
             self.stdout == other.stdout and
             self.stderr == other.stderr and
             self.msg == other.msg and
             self.exception == other.exception)

   def __ne__(self, other):
     return not self.__eq__(other)


 # TODO(crbug.com/1006587): Migrate users to CompletedProcess and drop this.
 class RunCommandError(CalledProcessError):
   """Error caught in run() method.

   Attributes:
     args: Tuple of the attributes below.
     msg: Short explanation of the error.
     result: The CommandResult that triggered this error, if available.
     exception: The underlying Exception if available.
   """

   def __init__(self, msg, result=None, exception=None):
     # This makes mocking tests easier.
     if result is None:
       result = CommandResult()
     elif not isinstance(result, CommandResult):
       raise TypeError('result must be a CommandResult instance; got %r'
                       % (result,))

     self.args = (msg, result, exception)
     self.result = result
     super(RunCommandError, self).__init__(
         returncode=result.returncode, cmd=result.args, stdout=result.stdout,
         stderr=result.stderr, msg=msg, exception=exception)


 class TerminateRunCommandError(RunCommandError):
   """We were signaled to shutdown while running a command.

   Client code shouldn't generally know, nor care about this class.  It's
   used internally to suppress retry attempts when we're signaled to die.
   """

 def _KillChildProcess(proc, int_timeout, kill_timeout, cmd, original_handler,
                       signum, frame):
   """Used as a signal handler by run.

   This is internal to run.  No other code should use this.
   """
   if signum:
     # If we've been invoked because of a signal, ignore delivery of that signal
     # from this point forward.  The invoking context of _KillChildProcess
     # restores signal delivery to what it was prior; we suppress future delivery
     # till then since this code handles SIGINT/SIGTERM fully including
     # delivering the signal to the original handler on the way out.
     signal.signal(signum, signal.SIG_IGN)

   # Do not trust Popen's returncode alone; we can be invoked from contexts where
   # the Popen instance was created, but no process was generated.
   if proc.returncode is None and proc.pid is not None:
     try:
       while proc.poll_lock_breaker() is None and int_timeout >= 0:
         time.sleep(0.1)
         int_timeout -= 0.1

       proc.terminate()
       while proc.poll_lock_breaker() is None and kill_timeout >= 0:
         time.sleep(0.1)
         kill_timeout -= 0.1

       if proc.poll_lock_breaker() is None:
         # Still doesn't want to die.  Too bad, so sad, time to die.
         proc.kill()
     except EnvironmentError as e:
       logging.warning('Ignoring unhandled exception in _KillChildProcess: %s',
                       e)

     # Ensure our child process has been reaped.
     kwargs = {}
     if sys.version_info.major >= 3:
       # ... but don't wait forever.
       kwargs['timeout'] = 60
     proc.wait_lock_breaker(**kwargs)

   if not signals.RelaySignal(original_handler, signum, frame):
     # Mock up our own, matching exit code for signaling.
     cmd_result = CommandResult(args=cmd, returncode=signum << 8)
     raise TerminateRunCommandError('Received signal %i' % signum, cmd_result)


 class _Popen(subprocess.Popen):
   """subprocess.Popen derivative customized for our usage.

   Specifically, we fix terminate/send_signal/kill to work if the child process
   was a setuid binary; on vanilla kernels, the parent can wax the child
   regardless, on goobuntu this apparently isn't allowed, thus we fall back
   to the sudo machinery we have.

   While we're overriding send_signal, we also suppress ESRCH being raised
   if the process has exited, and suppress signaling all together if the process
   has knowingly been waitpid'd already.
   """

   # Pylint seems to be buggy with the send_signal signature detection.
   # pylint: disable=arguments-differ
   def send_signal(self, sig):
     if self.returncode is not None:
       # The original implementation in Popen would allow signaling whatever
       # process now occupies this pid, even if the Popen object had waitpid'd.
       # Since we can escalate to sudo kill, we do not want to allow that.
       # Fixing this addresses that angle, and makes the API less sucky in the
       # process.
       return

     try:
       os.kill(self.pid, sig)
     except EnvironmentError as e:
       if e.errno == errno.EPERM:
         # Kill returns either 0 (signal delivered), or 1 (signal wasn't
         # delivered).  This isn't particularly informative, but we still
         # need that info to decide what to do, thus the check=False.
         ret = sudo_run(['kill', '-%i' % sig, str(self.pid)],
                        print_cmd=False, stdout=True,
                        stderr=True, check=False)
         if ret.returncode == 1:
           # The kill binary doesn't distinguish between permission denied,
           # and the pid is missing.  Denied can only occur under weird
           # grsec/selinux policies.  We ignore that potential and just
           # assume the pid was already dead and try to reap it.
           self.poll()
       elif e.errno == errno.ESRCH:
         # Since we know the process is dead, reap it now.
         # Normally Popen would throw this error- we suppress it since frankly
         # that's a misfeature and we're already overriding this method.
         self.poll()
       else:
         raise

   def _lock_breaker(self, func, *args, **kwargs):
     """Helper to manage the waitpid lock.

     Workaround https://bugs.python.org/issue25960.
     """
     # If the lock doesn't exist, or is not locked, call the func directly.
     lock = getattr(self, '_waitpid_lock', None)
     if lock is not None and lock.locked():
       try:
         lock.release()
         return func(*args, **kwargs)
       finally:
         if not lock.locked():
           lock.acquire()
     else:
       return func(*args, **kwargs)

   def poll_lock_breaker(self, *args, **kwargs):
     """Wrapper around poll() to break locks if needed."""
     return self._lock_breaker(self.poll, *args, **kwargs)

   def wait_lock_breaker(self, *args, **kwargs):
     """Wrapper around wait() to break locks if needed."""
     return self._lock_breaker(self.wait, *args, **kwargs)


 # pylint: disable=redefined-builtin
 def run(cmd, print_cmd=True, stdout=None, stderr=None,
         cwd=None, input=None,
         shell=False, env=None, extra_env=None, ignore_sigint=False,
         chroot_args=None, debug_level=logging.INFO,
         check=True, int_timeout=1, kill_timeout=1,
         log_output=False, capture_output=False,
         quiet=False, encoding=None, errors=None, dryrun=False,
         **kwargs):
   """Runs a command.

   Args:
     cmd: cmd to run.  Should be input to subprocess.Popen. If a string, shell
       must be true. Otherwise the command must be an array of arguments, and
       shell must be false.
     print_cmd: prints the command before running it.
     stdout: Where to send stdout.  This may be many things to control
       redirection:
         * None is the default; the existing stdout is used.
         * An existing file object (must be opened with mode 'w' or 'wb').
         * A string to a file (will be truncated & opened automatically).
         * subprocess.PIPE to capture & return the output.
         * A boolean to indicate whether to capture the output.
           True will capture the output via a tempfile (good for large output).
         * An open file descriptor (as a positive integer).
     stderr: Where to send stderr.  See |stdout| for possible values.  This also
       may be subprocess.STDOUT to indicate stderr & stdout should be combined.
     cwd: the working directory to run this cmd.
     input: The data to pipe into this command through stdin.  If a file object
       or file descriptor, stdin will be connected directly to that.
     shell: Controls whether we add a shell as a command interpreter.  See cmd
       since it has to agree as to the type.
     env: If non-None, this is the environment for the new process.  If
       enter_chroot is true then this is the environment of the enter_chroot,
       most of which gets removed from the cmd run.
     extra_env: If set, this is added to the environment for the new process.
       In enter_chroot=True case, these are specified on the post-entry
       side, and so are often more useful.  This dictionary is not used to
       clear any entries though.
     ignore_sigint: If True, we'll ignore signal.SIGINT before calling the
       child.  This is the desired behavior if we know our child will handle
       Ctrl-C.  If we don't do this, I think we and the child will both get
       Ctrl-C at the same time, which means we'll forcefully kill the child.
     chroot_args: An array of arguments for the chroot environment wrapper.
     debug_level: The debug level of run's output.
     check: Whether to raise an exception when command returns a non-zero exit
       code, or return the CommandResult object containing the exit code.
       Note: will still raise an exception if the cmd file does not exist.
     int_timeout: If we're interrupted, how long (in seconds) should we give the
       invoked process to clean up before we send a SIGTERM.
     kill_timeout: If we're interrupted, how long (in seconds) should we give the
       invoked process to shutdown from a SIGTERM before we SIGKILL it.
     log_output: Log the command and its output automatically.
     capture_output: Set |stdout| and |stderr| to True.
     quiet: Set |print_cmd| to False, and |capture_output| to True.
     encoding: Encoding for stdin/stdout/stderr, otherwise bytes are used.  Most
       users want 'utf-8' here for string data.
     errors: How to handle errors when |encoding| is used.  Defaults to 'strict',
       but 'ignore' and 'replace' are common settings.
     dryrun: Only log the command,and return a stub result.

   Returns:
     A CommandResult object.

   Raises:
     RunCommandError: Raised on error.
   """
   # Hide this function in pytest tracebacks when a RunCommandError is raised,
   # as seeing the contents of this function when a command fails is not helpful.
   # https://docs.pytest.org/en/latest/example/simple.html#writing-well-integrated-assertion-helpers
   __tracebackhide__ = operator.methodcaller('errisinstance', RunCommandError)

   # Handle backwards compatible settings.
   if 'log_stdout_to_file' in kwargs:
     logging.warning('run: log_stdout_to_file=X is now stdout=X')
     log_stdout_to_file = kwargs.pop('log_stdout_to_file')
     if log_stdout_to_file is not None:
       stdout = log_stdout_to_file
   stdout_file_mode = 'w+b'
   if 'append_to_file' in kwargs:
     # TODO(vapier): Enable this warning once chromite & users migrate.
     # logging.warning('run: append_to_file is now part of stdout')
     if kwargs.pop('append_to_file'):
       stdout_file_mode = 'a+b'
   assert not kwargs, 'Unknown arguments to run: %s' % (list(kwargs),)

   if quiet:
     print_cmd = False
     capture_output = True

   if capture_output:
     # TODO(vapier): Enable this once we migrate all the legacy arguments above.
     # if stdout is not None or stderr is not None:
     #   raise ValueError('capture_output may not be used with stdout & stderr')
     # TODO(vapier): Drop this specialization once we're Python 3-only as we can
     # pass this argument down to Popen directly.
     if stdout is None:
       stdout = True
     if stderr is None:
       stderr = True

   if encoding is not None and errors is None:
     errors = 'strict'

   # Set default for variables.
   popen_stdout = None
   popen_stderr = None
   stdin = None
   cmd_result = CommandResult()

   # Force the timeout to float; in the process, if it's not convertible,
   # a self-explanatory exception will be thrown.
   kill_timeout = float(kill_timeout)

   def _get_tempfile():
     try:
       return UnbufferedTemporaryFile()
     except EnvironmentError as e:
       if e.errno != errno.ENOENT:
         raise
       # This can occur if we were pointed at a specific location for our
       # TMP, but that location has since been deleted.  Suppress that issue
       # in this particular case since our usage gurantees deletion,
       # and since this is primarily triggered during hard cgroups shutdown.
       return UnbufferedTemporaryFile(dir='/tmp')

   # Modify defaults based on parameters.
   # Note that tempfiles must be unbuffered else attempts to read
   # what a separate process did to that file can result in a bad
   # view of the file.
   log_stdout_to_file = False
   if isinstance(stdout, str):
     popen_stdout = open(stdout, stdout_file_mode)
     log_stdout_to_file = True
   elif hasattr(stdout, 'fileno'):
     popen_stdout = stdout
     log_stdout_to_file = True
   elif isinstance(stdout, bool):
     # This check must come before isinstance(int) because bool subclasses int.
     if stdout:
       popen_stdout = _get_tempfile()
   elif isinstance(stdout, int):
     popen_stdout = stdout
   elif log_output:
     popen_stdout = _get_tempfile()

   log_stderr_to_file = False
   if hasattr(stderr, 'fileno'):
     popen_stderr = stderr
     log_stderr_to_file = True
   elif isinstance(stderr, bool):
     # This check must come before isinstance(int) because bool subclasses int.
     if stderr:
       popen_stderr = _get_tempfile()
   elif isinstance(stderr, int):
     popen_stderr = stderr
   elif log_output:
     popen_stderr = _get_tempfile()

   # If subprocesses have direct access to stdout or stderr, they can bypass
   # our buffers, so we need to flush to ensure that output is not interleaved.
   if popen_stdout is None or popen_stderr is None:
     sys.stdout.flush()
     sys.stderr.flush()

   # If input is a string, we'll create a pipe and send it through that.
   # Otherwise we assume it's a file object that can be read from directly.
   if isinstance(input, (str, bytes)):
     stdin = subprocess.PIPE
     # Allow people to always pass in bytes or strings regardless of encoding.
     # Our Popen usage takes care of converting everything to bytes first.
     #
     # Linter can't see that we're using |input| as a var, not a builtin.
     # pylint: disable=input-builtin
     if encoding and isinstance(input, str):
       input = input.encode(encoding, errors)
     elif not encoding and isinstance(input, str):
       input = input.encode('utf-8')
   elif input is not None:
     stdin = input
     input = None

   # Sanity check the command.  This helps when RunCommand is deep in the call
   # chain, but the command itself was constructed along the way.
   if isinstance(cmd, (str, bytes)):
     if not shell:
       raise ValueError('Cannot run a string command without a shell')
     cmd = ['/bin/bash', '-c', cmd]
     shell = False
   elif shell:
     raise ValueError('Cannot run an array command with a shell')
   elif not cmd:
     raise ValueError('Missing command to run')
   elif not isinstance(cmd, (list, tuple)):
     raise TypeError('cmd must be list or tuple, not %s: %r' %
                     (type(cmd), repr(cmd)))
   elif not all(isinstance(x, (bytes, str)) for x in cmd):
     raise TypeError('All command elements must be bytes/strings: %r' % (cmd,))

   # If we are using enter_chroot we need to use enterchroot pass env through
   # to the final command.
   env = env.copy() if env is not None else os.environ.copy()
   # Looking at localized error messages may be unexpectedly dangerous, so we
   # set LC_MESSAGES=C to make sure the output of commands is safe to inspect.
   env['LC_MESSAGES'] = 'C'
   env.update(extra_env if extra_env else {})

   # Print out the command before running.
   if dryrun or print_cmd or log_output:
     log = ''
     if dryrun:
       log += '(dryrun) '
     log += 'run: %s' % (CmdToStr(cmd),)
     if cwd:
       log += ' in %s' % (cwd,)
     logging.log(debug_level, '%s', log)

   cmd_result.args = cmd

   # We want to still something in dryrun mode so we process all the options
   # and return appropriate values (e.g. output with correct encoding).
   popen_cmd = ['true'] if dryrun else cmd

   proc = None
   # Verify that the signals modules is actually usable, and won't segfault
   # upon invocation of getsignal.  See signals.SignalModuleUsable for the
   # details and upstream python bug.
   use_signals = False
   try:
     proc = _Popen(popen_cmd, cwd=cwd, stdin=stdin, stdout=popen_stdout,
                   stderr=popen_stderr, shell=False, env=env,
                   close_fds=True)

     if use_signals:
       if ignore_sigint:
         old_sigint = signal.signal(signal.SIGINT, signal.SIG_IGN)
       else:
         old_sigint = signal.getsignal(signal.SIGINT)
         signal.signal(signal.SIGINT,
                       functools.partial(_KillChildProcess, proc, int_timeout,
                                         kill_timeout, cmd, old_sigint))

       old_sigterm = signal.getsignal(signal.SIGTERM)
       signal.signal(signal.SIGTERM,
                     functools.partial(_KillChildProcess, proc, int_timeout,
                                       kill_timeout, cmd, old_sigterm))

     try:
       (cmd_result.stdout, cmd_result.stderr) = proc.communicate(input)
     finally:
       if use_signals:
         signal.signal(signal.SIGINT, old_sigint)
         signal.signal(signal.SIGTERM, old_sigterm)

       if (popen_stdout and not isinstance(popen_stdout, int) and
           not log_stdout_to_file):
         popen_stdout.seek(0)
         cmd_result.stdout = popen_stdout.read()
         popen_stdout.close()
       elif log_stdout_to_file:
         popen_stdout.close()

       if (popen_stderr and not isinstance(popen_stderr, int) and
           not log_stderr_to_file):
         popen_stderr.seek(0)
         cmd_result.stderr = popen_stderr.read()
         popen_stderr.close()

     cmd_result.returncode = proc.returncode

     # The try/finally block is a bit hairy.  We normally want the logged
     # output to be what gets passed back up.  But if there's a decode error,
     # we don't want it to break logging entirely.  If the output had a lot of
     # newlines, always logging it as bytes wouldn't be human readable.
     try:
       if encoding:
         if cmd_result.stdout is not None:
           cmd_result.stdout = cmd_result.stdout.decode(encoding, errors)
         if cmd_result.stderr is not None:
           cmd_result.stderr = cmd_result.stderr.decode(encoding, errors)
     finally:
       if log_output:
         if cmd_result.stdout:
           logging.log(debug_level, '(stdout):\n%s', cmd_result.stdout)
         if cmd_result.stderr:
           logging.log(debug_level, '(stderr):\n%s', cmd_result.stderr)

     if check and proc.returncode:
       msg = 'cmd=%s' % cmd
       if cwd:
         msg += ', cwd=%s' % cwd
       if extra_env:
         msg += ', extra env=%s' % extra_env
       raise RunCommandError(msg, cmd_result)
   except OSError as e:
     estr = str(e)
     if e.errno == errno.EACCES:
       estr += '; does the program need `chmod a+x`?'
     raise RunCommandError(estr, CommandResult(args=cmd), exception=e)
   finally:
     if proc is not None:
       # Ensure the process is dead.
       _KillChildProcess(proc, int_timeout, kill_timeout, cmd, None, None, None)

   # We might capture stdout/stderr for internal reasons (like logging), but we
   # don't want to let it leak back out to the callers.  They only get output if
   # they explicitly requested it.
   if stdout is None:
     cmd_result.stdout = None
   if stderr is None:
     cmd_result.stderr = None

   return cmd_result
 # pylint: enable=redefined-builtin


 # Convenience run methods.

 COMP_NONE = 0
 COMP_GZIP = 1
 COMP_BZIP2 = 2
 COMP_XZ = 3


 def FindCompressor(compression, chroot=None):
   """Locate a compressor utility program (possibly in a chroot).

   Since we compress/decompress a lot, make it easy to locate a
   suitable utility program in a variety of locations.  We favor
   the one in the chroot over /, and the parallel implementation
   over the single threaded one.

   Args:
     compression: The type of compression desired.
     chroot: Optional path to a chroot to search.

   Returns:
     Path to a compressor.

   Raises:
     ValueError: If compression is unknown.
   """
   if compression == COMP_GZIP:
     std = 'gzip'
     para = 'pigz'
   elif compression == COMP_BZIP2:
     std = 'bzip2'
     para = 'pbzip2'
   elif compression == COMP_NONE:
     return 'cat'
   else:
     raise ValueError('unknown compression %s', compression)

   roots = []
   if chroot:
     roots.append(chroot)
   roots.append('/')

   for prog in [para, std]:
     for root in roots:
       for subdir in ['', 'usr']:
         path = os.path.join(root, subdir, 'bin', prog)
         if os.path.exists(path):
           return path

   return std


 class TarballError(RunCommandError):
   """Error while running tar.

   We may run tar multiple times because of "soft" errors.  The result is from
   the last run instance.
   """


 def CreateTarball(
     tarball_path, cwd, compression=COMP_BZIP2, chroot=None,
     inputs=None, timeout=300, extra_args=None, **kwargs):
   """Create a tarball.  Executes 'tar' on the commandline.

   Args:
     tarball_path: The path of the tar file to generate. Can be file descriptor.
     cwd: The directory to run the tar command.
     sudo: Whether to run with "sudo".
     compression: The type of compression desired.  See the FindCompressor
       function for details.
     chroot: See FindCompressor().
     inputs: A list of files or directories to add to the tarball.  If unset,
       defaults to ".".
     timeout: The number of seconds to wait on soft failure.
     extra_args: A list of extra args to pass to "tar".
     kwargs: Any run options/overrides to use.

   Returns:
     The cmd_result object returned by the run invocation.

   Raises:
     TarballError: if the tar command failed, possibly after retry.
   """
   if inputs is None:
     inputs = ['.']

   if extra_args is None:
     extra_args = []
   kwargs.setdefault('debug_level', logging.INFO)

   # Use a separate compression program - this enables parallel compression
   # in some cases.
   # Using 'raw' hole detection instead of 'seek' isn't that much slower, but
   # will provide much better results when archiving large disk images that are
   # not fully sparse.
   comp = FindCompressor(compression, chroot=chroot)
   cmd = (['tar'] +
          extra_args +
          ['--sparse', '--hole-detection=raw',
           '--use-compress-program', comp, '-c'])

   rc_stdout = None
   if isinstance(tarball_path, int):
     cmd += ['--to-stdout']
     rc_stdout = tarball_path
   else:
     cmd += ['-f', tarball_path]

   if len(inputs) > _THRESHOLD_TO_USE_T_FOR_TAR:
     cmd += ['--null', '-T', '/dev/stdin']
     rc_input = b'\0'.join(x.encode('utf-8') for x in inputs)
   else:
     cmd += list(inputs)
     rc_input = None

   rc_func = run

   # If tar fails with status 1, retry twice. Once after timeout seconds and
   # again 2*timeout seconds after that.
   for try_count in range(3):
     try:
       result = rc_func(cmd, cwd=cwd, **dict(kwargs, check=False,
                input=rc_input, stdout=rc_stdout))
     except RunCommandError as rce:
       # There are cases where run never executes the command (cannot find tar,
       # cannot execute tar, such as when cwd does not exist). Although the run
       # command will show low-level problems, we also want to log the context
       # of what CreateTarball was trying to do.
       logging.error('CreateTarball unable to run tar for %s in %s. cmd={%s}',
                     tarball_path, cwd, cmd)
       raise rce
     if result.returncode == 0:
       return result
     if result.returncode != 1 or try_count > 1:
       # Since the build is abandoned at this point, we will take 5
       # entire minutes to track down the competing process.
       # Error will have the low-level tar command error, so log the context
       # of the tar command (tarball_path file, current working dir).
       logging.error('CreateTarball failed creating %s in %s. cmd={%s}',
                     tarball_path, cwd, cmd)
       raise TarballError('CreateTarball', result)

     assert result.returncode == 1
     time.sleep(timeout * (try_count + 1))
     logging.warning('CreateTarball: tar: source modification time changed '
                     '(see crbug.com/547055), retrying')


 def UnbufferedTemporaryFile(**kwargs):
   """Handle buffering changes in tempfile.TemporaryFile."""
   assert 'bufsize' not in kwargs
   assert 'buffering' not in kwargs
   if sys.version_info.major < 3:
     kwargs['bufsize'] = 0
   else:
     kwargs['buffering'] = 0
   return tempfile.TemporaryFile(**kwargs)