blob: e8255b81c90bf0e87ddc448f0a210bcb757d3bca [file] [log] [blame]
#!/usr/bin/env python3
# Copyright 2021 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Packs a tarball from a git source tree."""
# pylint: disable=cros-logging-import
import argparse
import errno
import logging
import os
from pathlib import Path
import shutil
import subprocess
import sys
import tempfile
from typing import List, Iterable
def enumerate_untracked_files(in_dir: Path) -> List[Path]:
"""Returns a list of files untracked by git in |in_dir|."""
return subprocess.check_output(
['git', 'ls-files', '--others'],
encoding='utf-8',
cwd=in_dir,
).strip().splitlines()
def get_git_sha(in_dir: Path) -> str:
"""Returns the git SHA representing HEAD in |in_dir|."""
return subprocess.check_output(
['git', 'rev-parse', 'HEAD'],
encoding='utf-8',
cwd=in_dir,
).strip()
def copy_git_tree_ignoring(from_path: Path, to_dir: Path,
ignore: Iterable[str]):
"""Copies |from_path| into |to_dir|, ignoring all subdirs in |ignore|.
All paths in |ignore| should be relative to |from_dir|.
This also removes all .git directories and other .git* files.
"""
# shutil.copytree is incredibly slow (it took many minutes to copy my Rust
# tree from SSD -> memfs; |rsync| took 20secs). Prefer to use something
# faster, then go clean up afterward. This is theoretically problematic in
# some cases, but for the specific task of "copy some sources from this git
# directory into a tarball," seems to work fine.
# If we just hand rsync the directory, it'll copy it into
# |to_dir / from_path.name|; we want the contents to go into |to_dir|
# directly.
to_dir.mkdir(parents=True)
rsync_command = [
'rsync',
'-a',
'--exclude=.git*',
]
rsync_command += (str(x) for x in from_path.iterdir() if x.name != '.git')
rsync_command.append(str(to_dir))
subprocess.check_call(rsync_command)
for x in (to_dir / x for x in ignore):
# x.exists() will read through the symlink; we therefore need to treat
# symlinks specially.
if not x.is_symlink() and not x.exists():
continue
if not x.is_symlink() and x.is_dir():
shutil.rmtree(x)
else:
os.unlink(x)
# Now, since this is all meant to be version controlled by git, empty
# directories shouldn't exist. Clean those.
for root_dir, _, _ in os.walk(to_dir, topdown=False):
try:
os.rmdir(root_dir)
except OSError as e:
if e.errno != errno.ENOTEMPTY:
raise
else:
logging.debug('Removing empty directory %s', root_dir)
def get_parser():
"""Creates a parser for commandline args."""
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('--debug', action='store_true')
parser.add_argument(
'--git-dir',
required=True,
type=Path,
help="Path to the root of your git directory. If applicable, don't "
'forget to sync submodules!')
parser.add_argument(
'--output-prefix',
required=True,
type=Path,
help='Prefix at which to where to place the tarball. This has a few '
'components. For example, given a prefix of |/foo/bar/baz/rust| and '
"packing a repo at SHA abcdef1234567890, the output tarball's name will "
'be /foo/bar/baz/rust-abcdef1234-src.tar.xz, which will contain a '
'single directory, rust-abcdef1234-src/, which contains the desired '
'bits of |--git-dir|.')
parser.add_argument(
'--post-copy-command',
help='Command to run after copying sources to a tempdir, in the root '
'of said tempdir. Passed directly to `bash -c`.')
return parser
def main(argv: List[str]):
parser = get_parser()
opts = parser.parse_args(argv)
logging.basicConfig(level=logging.DEBUG if opts.debug else logging.INFO)
full_output_prefix = opts.output_prefix
output_dir = full_output_prefix.parent.resolve()
output_prefix = full_output_prefix.name
post_copy_command = opts.post_copy_command
git_dir = opts.git_dir.resolve()
head_sha = get_git_sha(git_dir)
# 12 is arbitrary, but should be enough for anyone(tm).
sha_shorthand = head_sha[:12]
output_file_name_no_ext = f'{output_prefix}-{sha_shorthand}-src'
output_file = output_dir / f'{output_file_name_no_ext}.tar.xz'
logging.info('Will pack %s at SHA %s into %s', git_dir, head_sha, output_file)
logging.info('Enumerating untracked files...')
untracked_files = enumerate_untracked_files(git_dir)
with tempfile.TemporaryDirectory(prefix='pack_git_tarball_') as temp_dir:
tar_dir = Path(temp_dir) / output_file_name_no_ext
logging.info('Copying git tree to %s...', tar_dir)
copy_git_tree_ignoring(
git_dir,
tar_dir,
ignore=untracked_files,
)
# Stash the SHA for HEAD here, so it's easier for people to figure out
# where the sources came from.
sha_file = tar_dir / 'packed_git_sha'
if sha_file.exists():
raise RuntimeError(f"SHA file at {sha_file} already exists; it shouldn't")
sha_file.write_text(head_sha, encoding='utf-8')
if post_copy_command:
logging.info('Running %r', post_copy_command)
# Since we say bash in the `help` string, require the use of bash here
# instead of using shell=True.
subprocess.check_call(['bash', '-c', post_copy_command], cwd=tar_dir)
logging.info('Tarring and compressing result')
tar_file = f'{tar_dir}.tar'
subprocess.check_call(
[
'tar',
'cf',
tar_file,
output_file_name_no_ext,
],
cwd=temp_dir,
)
subprocess.check_call(['xz', '-T0', '-9', tar_file])
shutil.move(f'{tar_file}.xz', output_file)
logging.info('Result is available at %s', output_file)
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))