compiler_wrapper: add autocrash logic
As outlined in the attached bug, we want to be able to crash the
compiler based on a handful of heuristics. Crashing Clang helps get us
self-contained reproducers fairly easily.
This CL provides (off-by-default) functionality to do the above. The
expectation is that a SWE will hack at it to make it work as they need
to.
BUG=b:236736327
TEST=Installed the new wrapper; observed autocrashes.
Change-Id: I76ec753ec37baa5e9b6dab92668081fa7c605725
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/third_party/toolchain-utils/+/3714885
Reviewed-by: Manoj Gupta <manojgupta@chromium.org>
Reviewed-by: Jordan Abrahams-Whitehead <ajordanr@google.com>
Commit-Queue: George Burgess <gbiv@chromium.org>
Tested-by: George Burgess <gbiv@chromium.org>
diff --git a/compiler_wrapper/README.md b/compiler_wrapper/README.md
index 0228e27..bb63798 100644
--- a/compiler_wrapper/README.md
+++ b/compiler_wrapper/README.md
@@ -73,3 +73,31 @@
`/usr/bin/clang_host_wrapper`
- Gcc host wrapper:
`/usr/x86_64-pc-linux-gnu/gcc-bin/10.2.0/host_wrapper`
+
+## Using the compiler wrapper to crash arbitrary compilations
+
+When Clang crashes, its output can be extremely useful. Often, it will provide
+the user with a stack trace, and messages like:
+
+```
+clang-15: unable to execute command: Illegal instruction
+clang-15: note: diagnostic msg: /tmp/clang_crash_diagnostics/foo-5420d2.c
+clang-15: note: diagnostic msg: /tmp/clang_crash_diagnostics/foo-5420d2.sh
+```
+
+Where the artifacts at `/tmp/clang_crash_diagnostics/foo-*` are a full,
+self-contained reproducer of the inputs that caused the crash in question.
+Often, such a reproducer is very valuable to have even for cases where a crash
+_doesn't_ happen (e.g., maybe Clang is now emitting an error where it used to
+not do so, and we want to bisect upstream LLVM with that info). Normally,
+collecting and crafting such a reproducer is a multi-step process, and can be
+error-prone; compile commands may rely on env vars, they may be done within
+`chroot`s, they may rely on being executed in a particular directory, they may
+rely on intermediate state, etc.
+
+Because of the usefulness of these crash reports, our wrapper supports crashing
+Clang even on files that ordinarily don't cause Clang to crash. For various
+reasons (b/236736327), this support currently requires rebuilding and
+redeploying the wrapper in order to work. That said, this could be a valuable
+tool for devs interested in creating a self-contained reproducer without having
+to manually reproduce the environment in which a particular build was performed.
diff --git a/compiler_wrapper/ccache_flag.go b/compiler_wrapper/ccache_flag.go
index 7d19da8..2c966fd 100644
--- a/compiler_wrapper/ccache_flag.go
+++ b/compiler_wrapper/ccache_flag.go
@@ -4,6 +4,11 @@
package main
+func isInConfigureStage(env env) bool {
+ val, present := env.getenv("EBUILD_PHASE")
+ return present && val == "configure"
+}
+
func processCCacheFlag(builder *commandBuilder) {
// We should be able to share the objects across compilers as
// the pre-processed output will differ. This allows boards
@@ -22,7 +27,7 @@
// Disable ccache during portage's src_configure phase. Using ccache here is generally a
// waste of time, since these files are very small. Experimentally, this speeds up
// configuring by ~13%.
- if val, present := builder.env.getenv("EBUILD_PHASE"); present && val == "configure" {
+ if isInConfigureStage(builder.env) {
useCCache = false
}
diff --git a/compiler_wrapper/compiler_wrapper.go b/compiler_wrapper/compiler_wrapper.go
index 1fe3eb7..28d2247 100644
--- a/compiler_wrapper/compiler_wrapper.go
+++ b/compiler_wrapper/compiler_wrapper.go
@@ -201,6 +201,12 @@
}
}
+ // If builds matching some heuristic should crash, crash them. Since this is purely a
+ // debugging tool, don't offer any nice features with it (e.g., rusage, ...).
+ if shouldUseCrashBuildsHeuristic && mainBuilder.target.compilerType == clangType {
+ return buildWithAutocrash(env, cfg, compilerCmd)
+ }
+
bisectStage := getBisectStage(env)
if rusageEnabled {
diff --git a/compiler_wrapper/crash_builds.go b/compiler_wrapper/crash_builds.go
new file mode 100644
index 0000000..147fb36
--- /dev/null
+++ b/compiler_wrapper/crash_builds.go
@@ -0,0 +1,154 @@
+// Copyright 2022 The ChromiumOS Authors.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package main
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "regexp"
+)
+
+// ** HEY YOU, PERSON READING THIS! **
+//
+// Are you a dev who wants to make this work locally? Awesome! Please note that this **only** works
+// for Clang. If that's OK, here's a checklist for you:
+// [ ] Set `shouldUseCrashBuildsHeuristic = true` below.
+// [ ] If you want this heuristic to operate during `src_configure` (rare), also set
+// `allowAutoCrashInConfigure` to true.
+// [ ] Modify `shouldAutocrashPostExec` to return `true` when the compiler's output/flags match what
+// you want to crash on, and `false` otherwise.
+// [ ] Run `./install_compiler_wrapper.sh` to install the updated wrapper.
+// [ ] Run whatever command reproduces the error.
+//
+// If you need to make changes to your heuristic, repeat the above steps starting at
+// `./install_compiler_wrapper.sh` until things seem to do what you want.
+const (
+ // Set this to true to use autocrashing logic.
+ shouldUseCrashBuildsHeuristic = false
+ // Set this to true to allow `shouldAutocrashPostExec` to check+crash configure steps.
+ allowAutoCrashInConfigure = false
+)
+
+// shouldAutocrashPostExec returns true if we should automatically crash the compiler. This is
+// called after the compiler is run. If it returns true, we'll re-execute the compiler with the bit
+// of extra code necessary to crash it.
+func shouldAutocrashPostExec(env env, cfg *config, originalCmd *command, runInfo compilerExecInfo) bool {
+ // ** TODO, DEAR READER: ** Fill this in. Below are a few `if false {` blocks that should
+ // work for common use-cases. You're encouraged to change them to `if true {` if they suit
+ // your needs.
+
+ // Return true if `error: some error message` is contained in the run's stderr.
+ if false {
+ return bytes.Contains(runInfo.stderr, []byte("error: some error message"))
+ }
+
+ // Return true if `foo.c:${line_number}: error: some error message` appears in the run's
+ // stderr. Otherwise, return false.
+ if false {
+ r := regexp.MustCompile(`foo\.c:\d+: error: some error message`)
+ return r.Match(runInfo.stderr)
+ }
+
+ // Return true if there's a `-fjust-give-up` flag in the compiler's invocation.
+ if false {
+ for _, flag := range originalCmd.Args {
+ if flag == "-fjust-give-up" {
+ return true
+ }
+ }
+
+ return false
+ }
+
+ panic("Please fill in `shouldAutocrashPostExec` with meaningful logic.")
+}
+
+type compilerExecInfo struct {
+ exitCode int
+ stdout, stderr []byte
+}
+
+// ** Below here are implementation details. If all you want is autocrashing behavior, you don't
+// need to keep reading. **
+const (
+ autocrashProgramLine = "\n#pragma clang __debug parser_crash"
+)
+
+type buildWithAutocrashPredicates struct {
+ allowInConfigure bool
+ shouldAutocrash func(env, *config, *command, compilerExecInfo) bool
+}
+
+func buildWithAutocrash(env env, cfg *config, originalCmd *command) (exitCode int, err error) {
+ return buildWithAutocrashImpl(env, cfg, originalCmd, buildWithAutocrashPredicates{
+ allowInConfigure: allowAutoCrashInConfigure,
+ shouldAutocrash: shouldAutocrashPostExec,
+ })
+}
+
+func buildWithAutocrashImpl(env env, cfg *config, originalCmd *command, preds buildWithAutocrashPredicates) (exitCode int, err error) {
+ stdinBuffer := (*bytes.Buffer)(nil)
+ subprocStdin := io.Reader(nil)
+ invocationUsesStdinAsAFile := needStdinTee(originalCmd)
+ if invocationUsesStdinAsAFile {
+ stdinBuffer = &bytes.Buffer{}
+ if _, err := stdinBuffer.ReadFrom(env.stdin()); err != nil {
+ return 0, wrapErrorwithSourceLocf(err, "prebuffering stdin")
+ }
+ subprocStdin = stdinBuffer
+ } else {
+ subprocStdin = env.stdin()
+ }
+
+ stdoutBuffer := &bytes.Buffer{}
+ stderrBuffer := &bytes.Buffer{}
+ exitCode, err = wrapSubprocessErrorWithSourceLoc(originalCmd,
+ env.run(originalCmd, subprocStdin, stdoutBuffer, stderrBuffer))
+ if err != nil {
+ return 0, err
+ }
+
+ autocrashAllowed := preds.allowInConfigure || !isInConfigureStage(env)
+ crash := autocrashAllowed && preds.shouldAutocrash(env, cfg, originalCmd, compilerExecInfo{
+ exitCode: exitCode,
+ stdout: stdoutBuffer.Bytes(),
+ stderr: stderrBuffer.Bytes(),
+ })
+ if !crash {
+ stdoutBuffer.WriteTo(env.stdout())
+ stderrBuffer.WriteTo(env.stderr())
+ return exitCode, nil
+ }
+
+ fmt.Fprintln(env.stderr(), "** Autocrash requested; crashing the compiler...**")
+
+ // `stdinBuffer == nil` implies that `-` wasn't used as a flag. If `-` isn't used as a
+ // flag, clang will ignore stdin. We want to write our #pragma to stdin, since we can't
+ // reasonably modify the files we're currently compiling.
+ if stdinBuffer == nil {
+ newArgs := []string{}
+ // Clang can't handle `-o ${target}` when handed multiple input files. Since
+ // we expect to crash before emitting anything, remove `-o ${file}` entirely.
+ for i, e := 0, len(originalCmd.Args); i < e; i++ {
+ a := originalCmd.Args[i]
+ if a == "-o" {
+ // Skip the -o here, then skip the following arg in the loop header.
+ i++
+ } else {
+ newArgs = append(newArgs, a)
+ }
+ }
+ // And now add args that instruct clang to read from stdin. In this case, we also
+ // need to tell Clang what language the file is written in; C is as good as anything
+ // for this.
+ originalCmd.Args = append(newArgs, "-x", "c", "-")
+ stdinBuffer = &bytes.Buffer{}
+ }
+
+ stdinBuffer.WriteString(autocrashProgramLine)
+ return wrapSubprocessErrorWithSourceLoc(originalCmd,
+ env.run(originalCmd, stdinBuffer, env.stdout(), env.stderr()))
+}
diff --git a/compiler_wrapper/crash_builds_test.go b/compiler_wrapper/crash_builds_test.go
new file mode 100644
index 0000000..a4b2b99
--- /dev/null
+++ b/compiler_wrapper/crash_builds_test.go
@@ -0,0 +1,260 @@
+// Copyright 2022 The ChromiumOS Authors.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package main
+
+import (
+ "bytes"
+ "io"
+ "strings"
+ "testing"
+)
+
+func TestBuildWithAutoCrashDoesNothingIfCrashIsNotRequested(t *testing.T) {
+ withTestContext(t, func(ctx *testContext) {
+ neverAutoCrash := buildWithAutocrashPredicates{
+ allowInConfigure: true,
+ shouldAutocrash: func(env, *config, *command, compilerExecInfo) bool {
+ return false
+ },
+ }
+
+ exitCode, err := buildWithAutocrashImpl(ctx, ctx.cfg, ctx.newCommand(clangX86_64, mainCc), neverAutoCrash)
+ if err != nil {
+ t.Fatalf("unexpectedly failed with %v", err)
+ }
+ ctx.must(exitCode)
+ if ctx.cmdCount != 1 {
+ t.Errorf("expected 1 call. Got: %d", ctx.cmdCount)
+ }
+ })
+}
+
+func TestBuildWithAutoCrashSkipsAutocrashLogicIfInConfigureAndConfigureChecksDisabled(t *testing.T) {
+ withTestContext(t, func(ctx *testContext) {
+ alwaysAutocrash := buildWithAutocrashPredicates{
+ allowInConfigure: false,
+ shouldAutocrash: func(env, *config, *command, compilerExecInfo) bool {
+ return true
+ },
+ }
+
+ ctx.env = append(ctx.env, "EBUILD_PHASE=configure")
+ exitCode, err := buildWithAutocrashImpl(ctx, ctx.cfg, ctx.newCommand(clangX86_64, mainCc), alwaysAutocrash)
+ if err != nil {
+ t.Fatalf("unexpectedly failed with %v", err)
+ }
+ ctx.must(exitCode)
+ if ctx.cmdCount != 1 {
+ t.Errorf("expected 1 call. Got: %d", ctx.cmdCount)
+ }
+ })
+}
+
+func TestBuildWithAutoCrashRerunsIfPredicateRequestsCrash(t *testing.T) {
+ withTestContext(t, func(ctx *testContext) {
+ autocrashPostCmd := buildWithAutocrashPredicates{
+ allowInConfigure: true,
+ shouldAutocrash: func(env, *config, *command, compilerExecInfo) bool {
+ return true
+ },
+ }
+
+ ctx.cmdMock = func(cmd *command, stdin io.Reader, stdout io.Writer, stderr io.Writer) error {
+ hasDash := false
+ for _, arg := range cmd.Args {
+ if arg == "-" {
+ hasDash = true
+ break
+ }
+ }
+
+ switch ctx.cmdCount {
+ case 1:
+ if hasDash {
+ t.Error("Got `-` on command 1; didn't want that.")
+ }
+ return nil
+ case 2:
+ if !hasDash {
+ t.Error("Didn't get `-` on command 2; wanted that.")
+ } else {
+ input := stdin.(*bytes.Buffer)
+ if s := input.String(); !strings.Contains(s, autocrashProgramLine) {
+ t.Errorf("Input was %q; expected %q to be in it", s, autocrashProgramLine)
+ }
+ }
+ return nil
+ default:
+ t.Fatalf("Unexpected command count: %d", ctx.cmdCount)
+ panic("Unreachable")
+ }
+ }
+
+ exitCode, err := buildWithAutocrashImpl(ctx, ctx.cfg, ctx.newCommand(clangX86_64, mainCc), autocrashPostCmd)
+ if err != nil {
+ t.Fatalf("unexpectedly failed with %v", err)
+ }
+ ctx.must(exitCode)
+
+ if ctx.cmdCount != 2 {
+ t.Errorf("expected 2 calls. Got: %d", ctx.cmdCount)
+ }
+ })
+}
+
+func TestBuildWithAutoCrashAddsDashAndWritesToStdinIfInputFileIsNotStdin(t *testing.T) {
+ withTestContext(t, func(ctx *testContext) {
+ autocrashPostCmd := buildWithAutocrashPredicates{
+ allowInConfigure: true,
+ shouldAutocrash: func(env, *config, *command, compilerExecInfo) bool {
+ return true
+ },
+ }
+
+ ctx.cmdMock = func(cmd *command, stdin io.Reader, stdout io.Writer, stderr io.Writer) error {
+ numDashes := 0
+ for _, arg := range cmd.Args {
+ if arg == "-" {
+ numDashes++
+ }
+ }
+
+ switch ctx.cmdCount {
+ case 1:
+ if numDashes != 0 {
+ t.Errorf("Got %d dashes on command 1; want 0", numDashes)
+ }
+ return nil
+ case 2:
+ if numDashes != 1 {
+ t.Errorf("Got %d dashes on command 2; want 1", numDashes)
+ }
+
+ input := stdin.(*bytes.Buffer).String()
+ stdinHasAutocrashLine := strings.Contains(input, autocrashProgramLine)
+ if !stdinHasAutocrashLine {
+ t.Error("Got no autocrash line on the second command; wanted that")
+ }
+ return nil
+ default:
+ t.Fatalf("Unexpected command count: %d", ctx.cmdCount)
+ panic("Unreachable")
+ }
+ }
+
+ exitCode, err := buildWithAutocrashImpl(ctx, ctx.cfg, ctx.newCommand(clangX86_64, mainCc), autocrashPostCmd)
+ if err != nil {
+ t.Fatalf("unexpectedly failed with %v", err)
+ }
+ ctx.must(exitCode)
+
+ if ctx.cmdCount != 2 {
+ t.Errorf("expected 2 calls. Got: %d", ctx.cmdCount)
+ }
+ })
+}
+
+func TestBuildWithAutoCrashAppendsToStdinIfStdinIsTheOnlyInputFile(t *testing.T) {
+ withTestContext(t, func(ctx *testContext) {
+ autocrashPostCmd := buildWithAutocrashPredicates{
+ allowInConfigure: true,
+ shouldAutocrash: func(env, *config, *command, compilerExecInfo) bool {
+ return true
+ },
+ }
+
+ ctx.cmdMock = func(cmd *command, stdin io.Reader, stdout io.Writer, stderr io.Writer) error {
+ numDashes := 0
+ for _, arg := range cmd.Args {
+ if arg == "-" {
+ numDashes++
+ }
+ }
+
+ if numDashes != 1 {
+ t.Errorf("Got %d dashes on command %d (args: %#v); want 1", numDashes, ctx.cmdCount, cmd.Args)
+ }
+
+ input := stdin.(*bytes.Buffer).String()
+ stdinHasAutocrashLine := strings.Contains(input, autocrashProgramLine)
+
+ switch ctx.cmdCount {
+ case 1:
+ if stdinHasAutocrashLine {
+ t.Error("Got autocrash line on the first command; did not want that")
+ }
+ return nil
+ case 2:
+ if !stdinHasAutocrashLine {
+ t.Error("Got no autocrash line on the second command; wanted that")
+ }
+ return nil
+ default:
+ t.Fatalf("Unexpected command count: %d", ctx.cmdCount)
+ panic("Unreachable")
+ }
+ }
+
+ exitCode, err := buildWithAutocrashImpl(ctx, ctx.cfg, ctx.newCommand(clangX86_64, "-x", "c", "-"), autocrashPostCmd)
+ if err != nil {
+ t.Fatalf("unexpectedly failed with %v", err)
+ }
+ ctx.must(exitCode)
+
+ if ctx.cmdCount != 2 {
+ t.Errorf("expected 2 calls. Got: %d", ctx.cmdCount)
+ }
+ })
+}
+
+func TestCrashBuildFiltersObjectFileOptionOnCrashes(t *testing.T) {
+ withTestContext(t, func(ctx *testContext) {
+ autocrashPostCmd := buildWithAutocrashPredicates{
+ allowInConfigure: true,
+ shouldAutocrash: func(env, *config, *command, compilerExecInfo) bool {
+ return true
+ },
+ }
+
+ const outputFileName = "/path/to/foo.o"
+
+ ctx.cmdMock = func(cmd *command, stdin io.Reader, stdout io.Writer, stderr io.Writer) error {
+ cmdOutputArg := (*string)(nil)
+ for i, e := range cmd.Args {
+ if e == "-o" {
+ // Assume something follows. If not, we'll crash and the
+ // test will fail.
+ cmdOutputArg = &cmd.Args[i+1]
+ }
+ }
+
+ switch ctx.cmdCount {
+ case 1:
+ if cmdOutputArg == nil || *cmdOutputArg != outputFileName {
+ t.Errorf("Got command args %q; want `-o %q` in them", cmd.Args, outputFileName)
+ }
+ return nil
+ case 2:
+ if cmdOutputArg != nil {
+ t.Errorf("Got command args %q; want no mention of `-o %q` in them", cmd.Args, outputFileName)
+ }
+ return nil
+ default:
+ t.Fatalf("Unexpected command count: %d", ctx.cmdCount)
+ panic("Unreachable")
+ }
+ }
+
+ exitCode, err := buildWithAutocrashImpl(ctx, ctx.cfg, ctx.newCommand(clangX86_64, "-o", outputFileName, mainCc), autocrashPostCmd)
+ if err != nil {
+ t.Fatalf("unexpectedly failed with %v", err)
+ }
+ ctx.must(exitCode)
+
+ if ctx.cmdCount != 2 {
+ t.Errorf("expected 2 calls. Got: %d", ctx.cmdCount)
+ }
+ })
+}