llvm_tools: save intermediate state even if a step fails
When uploading multiple crashes, a network error may happen (or anything
else, really).
This isn't going to save us from SIGKILLs or power-offs (it's impossible
to save us 100% from either of those, since there's always going to be a
race between writing the JSON file and submitting the test-case), but it
should keep us from submitting duplicate crash reports in most cases.
This also has us sort results from `gsutil ls`, since that may not
always print things in a deterministic order.
BUG=None
TEST=Ran the script; ^C'ed it in the middle.
Change-Id: I9695d83db6fb8161dc6fce16b13980c8eacf219d
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/toolchain-utils/+/2412482
Reviewed-by: Jian Cai <jiancai@google.com>
Tested-by: George Burgess <gbiv@chromium.org>
diff --git a/llvm_tools/bisect_clang_crashes.py b/llvm_tools/bisect_clang_crashes.py
index 9ea1035..4d47c4c 100755
--- a/llvm_tools/bisect_clang_crashes.py
+++ b/llvm_tools/bisect_clang_crashes.py
@@ -26,7 +26,7 @@
results = subprocess.check_output(['gsutil', 'ls', pattern],
stderr=subprocess.STDOUT,
encoding='utf-8')
- return [l.strip() for l in results.splitlines()]
+ return sorted(l.strip() for l in results.splitlines())
def get_crash_reproducers(working_dir):
@@ -43,6 +43,21 @@
return results
+def submit_crash_to_forcey(forcey: str, temporary_directory: str,
+ buildbucket_id: str, url: str) -> None:
+ dest_dir = os.path.join(temporary_directory, buildbucket_id)
+ dest_file = os.path.join(dest_dir, os.path.basename(url))
+ logging.info('Downloading and submitting %r...', url)
+ subprocess.check_output(['gsutil', 'cp', url, dest_file],
+ stderr=subprocess.STDOUT)
+ subprocess.check_output(['tar', '-xJf', dest_file], cwd=dest_dir)
+ for src, script in get_crash_reproducers(dest_dir):
+ subprocess.check_output([
+ forcey, 'reduce', '-wait=false', '-note',
+ '%s:%s' % (url, src), '-sh_file', script, '-src_file', src
+ ])
+
+
def main(argv):
chroot.VerifyOutsideChroot()
logging.basicConfig(
@@ -81,33 +96,41 @@
logging.info('Successfully loaded %d previously-submitted crashes',
len(visited))
- for url in urls:
- splits = url.split('/')
- buildbucket_id = splits[-2]
- # Skip the builds that has been processed
- if buildbucket_id in visited:
- continue
- visited[buildbucket_id] = '%s' % url
- dest_dir = os.path.join(temporary_directory, buildbucket_id)
- dest_file = os.path.join(dest_dir, splits[-1])
- logging.info('Downloading and submitting %r...', url)
- subprocess.check_output(['gsutil', 'cp', url, dest_file],
- stderr=subprocess.STDOUT)
- subprocess.check_output(['tar', '-xJf', dest_file], cwd=dest_dir)
-
- for src, script in get_crash_reproducers(dest_dir):
- subprocess.check_output(
- [
- opts.forcey, 'reduce', '-wait=false', '-note',
- '%s:%s' % (url, src), '-sh_file', script, '-src_file', src
- ],
- encoding='utf-8',
+ try:
+ for url in urls:
+ splits = url.split('/')
+ buildbucket_id = splits[-2]
+ # Skip the builds that has been processed
+ if buildbucket_id in visited:
+ continue
+ submit_crash_to_forcey(
+ forcey=opts.forcey,
+ temporary_directory=temporary_directory,
+ buildbucket_id=buildbucket_id,
+ url=url,
)
+ visited[buildbucket_id] = url
- tmp_state_file = state_file + '.tmp'
- with open(tmp_state_file, 'w', encoding='utf-8') as f:
- json.dump(visited, f, indent=2)
- os.rename(tmp_state_file, state_file)
+ exception_in_flight = False
+ except:
+ exception_in_flight = True
+ raise
+ finally:
+ if exception_in_flight:
+ # This is best-effort. If the machine powers off or similar, we'll just
+ # resubmit the same crashes, which is suboptimal, but otherwise
+ # acceptable.
+ logging.error('Something went wrong; attempting to save our work...')
+ else:
+ logging.info('Persisting state...')
+
+ tmp_state_file = state_file + '.tmp'
+ with open(tmp_state_file, 'w', encoding='utf-8') as f:
+ json.dump(visited, f, indent=2)
+ os.rename(tmp_state_file, state_file)
+
+ logging.info('State successfully persisted')
+
if opts.cleanup:
shutil.rmtree(temporary_directory)