blob: dee042070932de682e5305b509967e1c3243caae [file] [log] [blame]
# Copyright 2020 The ChromiumOS Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Run xz from PATH with a thread for each core in the system."""
import functools
import getopt
import os
import subprocess
import sys
from chromite.lib import commandline
from chromite.lib import osutils
PIXZ_DISABLE_VAR = "FOR_TEST_XZ_AUTO_NO_PIXZ"
XZ_DISABLE_VAR = "FOR_TEST_XZ_AUTO_NO_XZ_DECOMPRESSION"
@functools.lru_cache
def HasPixz():
"""Returns path to pixz if it's on PATH or None otherwise."""
return PIXZ_DISABLE_VAR not in os.environ and osutils.Which("pixz")
def ParsePixzArgs(argv):
"""Determines flags to pass to pixz, per argv.
Returns:
A tuple containing:
- A raw list of flags to pass to pixz.
- An optional input file.
- An optional output file (only exists if the input file is present).
"""
# Glancing at docs, the following opts are supported. -i and -o are ignored,
# since we assert in `main` that they're not present, but include parsing
# for them anyway.
flags, args = getopt.gnu_getopt(
args=argv,
shortopts="dlxi:o:0123456789p:tkch",
)
if not args:
file_to_compress = None
target = None
elif len(args) == 1:
file_to_compress = args[0]
target = None
else:
file_to_compress = args[0]
target = args[1]
raw_flag_list = []
for key, val in flags:
raw_flag_list.append(key)
if val:
raw_flag_list.append(val)
return raw_flag_list, file_to_compress, target
def Execvp(argv) -> None:
"""Execs the given argv."""
os.execvp(argv[0], argv)
def ExecCompressCommand(stdout, argv) -> None:
"""Execs compression command."""
# It appears that in order for pixz to do parallel decompression,
# compression needs to be done with pixz. xz itself is only capable of
# parallel compression.
if not HasPixz():
cmd = ["xz"]
if stdout:
cmd.append("-zc")
else:
cmd.append("-z")
cmd += argv
Execvp(cmd)
cmd = ["pixz"]
raw_flag_list, compressed_file_name, output_file = ParsePixzArgs(argv)
# Pixz treats tarballs specially: if it detects that a tarball has been
# passed to it, it'll also write a small index in the output file that
# makes operations like listing the tar faster. If this tar autodetection
# is enabled and pixz is asked to compress an empty file, it breaks. In
# addition, these indices have no apparent impact on decompression
# parallelism, so they're not super useful to us. Disable the feature
# wholesale.
if "-t" not in raw_flag_list:
raw_flag_list.append("-t")
autodelete_input_file = False
if not compressed_file_name:
assert not output_file
compressed_file_name = "/dev/stdin"
output_file = "/dev/stdout"
elif stdout:
output_file = "/dev/stdout"
elif not output_file:
# Pixz defaults to a `.pxz` suffix (or `.tpxz` if it's compressing a
# tar file). We need the suffix to be consistent, so force it here.
output_file = f"{compressed_file_name}.xz"
autodelete_input_file = True
cmd += raw_flag_list
cmd.append(compressed_file_name)
if output_file:
cmd.append(output_file)
if not autodelete_input_file:
Execvp(cmd)
return_code = subprocess.call(cmd)
if not return_code:
os.unlink(compressed_file_name)
sys.exit(return_code)
def ExecXzDecompressCommand(stdout, argv) -> None:
"""Executes `xz` with the given params."""
cmd = ["xz"]
if stdout:
cmd.append("-dc")
else:
cmd.append("-d")
cmd += argv
Execvp(cmd)
def ExecDecompressCommand(stdout, argv) -> None:
"""Execs decompression command."""
if not HasPixz():
ExecXzDecompressCommand(stdout, argv)
cmd = ["pixz", "-d"]
raw_flag_list, compressed_file_name, output_file_name = ParsePixzArgs(argv)
cmd += raw_flag_list
assert compressed_file_name or not output_file_name
if not compressed_file_name:
Execvp(cmd)
# HACK: When passed a file, pixz will jump around it and try to find the
# file's index. If the file we originally compressed was empty and we
# requested no index, pixz will error out because lzma will report no
# entries to it, and pixz doesn't handle that well.
#
# Since we need to support files with indices and without, we can't pass
# `-t`. If we do, that causes pixz to error out occasionally on tar files
# with indices. :(
#
# In any case, at the time I checked, empty xz files are 32 bytes, so just
# opt to use xz for anything under 4KB. pixz archives are xz-compatible
# anyway.
if (
XZ_DISABLE_VAR not in os.environ
and os.path.isfile(compressed_file_name)
and os.path.getsize(compressed_file_name) <= 4 * 1024
):
ExecXzDecompressCommand(stdout, argv)
cmd.append(compressed_file_name)
# Explicitly tell pixz the file is the input, so it will dump the output
# to stdout, instead of automatically choosing an output name.
if stdout:
output_file_name = "/dev/stdout"
if output_file_name:
cmd.append(output_file_name)
Execvp(cmd)
def GetParser():
"""Return a command line parser."""
parser = commandline.ArgumentParser(description=__doc__)
parser.add_argument(
"-d",
"--decompress",
"--uncompress",
help="Decompress rather than compress.",
action="store_true",
)
parser.add_argument(
"-c",
dest="stdout",
action="store_true",
help="Write to standard output and don't delete input files.",
)
return parser
def main(argv) -> None:
parser = GetParser()
known_args, argv = parser.parse_known_args()
if "-i" in argv or "-o" in argv:
parser.error("It is invalid to use -i or -o with xz_auto")
if known_args.decompress:
ExecDecompressCommand(known_args.stdout, argv)
else:
ExecCompressCommand(known_args.stdout, argv)