Introduce cos_kdump_debugger in cos/tools

BUG=b/183723779

Change-Id: I810be221ff7b643c417fe9bfde0e48ac299eefd3
Reviewed-on: https://cos-review.googlesource.com/c/cos/tools/+/18190
Reviewed-by: Robert Kolchmeyer <rkolchmeyer@google.com>
Cloud-Build: GCB Service account <228075978874@cloudbuild.gserviceaccount.com>
Tested-by: Arnav Kansal <rnv@google.com>
diff --git a/cloudbuild.yaml b/cloudbuild.yaml
index db54dbc..29afca5 100644
--- a/cloudbuild.yaml
+++ b/cloudbuild.yaml
@@ -36,4 +36,4 @@
     fi
     echo "All builds succeeded."
 
-timeout: 3600s
+timeout: 7200s
diff --git a/src/cmd/cos_kdump_debugger/Dockerfile b/src/cmd/cos_kdump_debugger/Dockerfile
new file mode 100644
index 0000000..62c8fe9
--- /dev/null
+++ b/src/cmd/cos_kdump_debugger/Dockerfile
@@ -0,0 +1,37 @@
+# Start from ubuntu
+FROM ubuntu:latest
+
+RUN apt-get update
+
+ARG DEBIAN_FRONTEND=noninteractive
+# This is required because of https://github.com/crash-utility/crash/issues/74
+RUN apt update && BUILD_DEPS="binutils binutils-dev bison gawk \
+    flex zlib1g-dev libncurses5-dev liblzo2-dev libsnappy-dev \
+    make wget git" \
+    && apt-get install -y --ignore-missing $BUILD_DEPS \
+    && git clone -b 7.2.9 https://github.com/crash-utility/crash.git /tmp/crash \
+    && git config --global user.email "foo@bar.local" \
+    && git config --global user.name "Foo Bar" \
+    && git -C /tmp/crash cherry-pick a5531b2 71e159c \
+    && make -C /tmp/crash extensions lzo snappy \
+    && mkdir -p /usr/lib/crash/extensions/ \
+    && cp -f /tmp/crash/crash /usr/bin \
+    && cp -f /tmp/crash/extensions/*.so /usr/lib/crash/extensions/ \
+    && strip /usr/bin/crash /usr/lib/crash/extensions/*.so \
+    && rm -rf /tmp/crash \
+    && apt-get purge -y --auto-remove $BUILD_DEPS
+RUN apt-get install -y -qq --no-install-recommends gdb xz-utils python curl vim \
+      liblzo2-2 libsnappy1v5 zlib1g libncurses5 binutils apt-transport-https ca-certificates gnupg
+
+# Installs gsutil, following:
+# https://cloud.google.com/storage/docs/gsutil_install#sdk-install
+RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] \
+    https://packages.cloud.google.com/apt cloud-sdk main" | \
+    tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
+    curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | \
+    apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - \
+    && apt-get update && apt-get -y -qq install google-cloud-sdk && apt-get clean
+
+COPY ./kdump_debugger.sh /kdump_debugger.sh
+
+ENTRYPOINT ["/kdump_debugger.sh"]
diff --git a/src/cmd/cos_kdump_debugger/README.md b/src/cmd/cos_kdump_debugger/README.md
new file mode 100644
index 0000000..fd24080
--- /dev/null
+++ b/src/cmd/cos_kdump_debugger/README.md
@@ -0,0 +1,66 @@
+# COS Kdump Debugger container used in COS kdump
+
+## Overview
+
+This is a docker image used by COS kdump. It includes
+scripts and necessary dependencies for inspecting a kernel crash dump from a COS
+instance based on COS images.
+
+## Building COS Kdump Debugger Docker image
+
+### Locally (for testing the image)
+
+For testing, you can simply build and test this docker container locally on your
+workstation:
+
+```shell
+  $ docker build -t cos-kdump-debugger:dev .
+```
+
+## Using cos-kdump-debugger Image
+
+This container requires two mount points from the host:
+
+1.  It needs the gcloud credential from your host machine. (Not required if
+    using a GCE VM, because GCE VMs get their credential from metadata server.)
+2.  It needs a sosreport tarball containing the kdump.
+
+Let's say you have the sosreport tarball located at
+`~/debug/sosreport-kdump-next-20190130220549.tar.xz`, then you should run:
+
+```shell
+  $ docker run --rm -it \
+  $ -v ~/.config/gcloud:/root/.config/gcloud \
+  $ -v ~/debug/:/sos \
+  $ cos-kdump-debugger:dev \
+  $ --sos sosreport-kdump-next-20190130220549.tar.xz
+```
+
+If you only want to use the container to run some simply crash commands
+(useful for testing), you can run:
+
+```shell
+  $ docker run --rm -it \
+  $ -v ~/.config/gcloud:/root/.config/gcloud \
+  $ -v ~/debug/:/sos \
+  $ cos-kdump-debugger:dev \
+  $ --crash_command "bt" \
+  $ --sos sosreport-kdump-next-20190130220549.tar.xz
+```
+
+The `kdump_debugger.sh` script requires the matching vmlinux for the COS kernel
+being used. By default, the script will fetch the vmlinux for you by looking in
+the GCS bucket `gs://cos-tools`. You can also explicitly set the path to the
+matching vmlinux in GCS by setting the `--vmlinux_path` flag. 
+
+For example:
+
+```shell
+  $ docker run --rm -it \
+  $ -v ~/.config/gcloud:/root/.config/gcloud \
+  $ -v ~/debug/:/sos \
+  $ cos-kdump-debugger:dev \
+  $ --crash_command "bt" \
+  $ --sos sosreport-kdump-next-20190130220549.tar.xz
+  $ --vmlinux_path gs://<path-to-vmlinux-in-storage-bucket>
+```
diff --git a/src/cmd/cos_kdump_debugger/cloudbuild.yaml b/src/cmd/cos_kdump_debugger/cloudbuild.yaml
new file mode 100644
index 0000000..be1ceb3
--- /dev/null
+++ b/src/cmd/cos_kdump_debugger/cloudbuild.yaml
@@ -0,0 +1,26 @@
+options:
+  env:
+  - 'DOCKER_CLI_EXPERIMENTAL=enabled'
+  machineType: 'N1_HIGHCPU_8'
+steps:
+# Build toolbox image
+# This step is needed to add a new entry to /proc/sys/fs/binfmt_misc. Docker
+# uses QEMU user emulation to run arm64 programs on x86 hosts. A QEMU
+# interpreter needs to be added to /proc/sys/fs/binfmt_misc to run arm64
+# programs.
+- name: 'gcr.io/cloud-builders/docker'
+  args: ['run', '--privileged', 'linuxkit/binfmt:v0.7']
+# The default builder (which appears to be the Docker daemon that implements
+# the old, familiar `docker build` behavior) doesn't support the --platform
+# flag, so we need to create a new builder.
+- name: 'gcr.io/cloud-builders/docker'
+  args: ['buildx', 'create', '--name', 'builder']
+- name: 'gcr.io/cloud-builders/docker'
+  args: ['buildx', 'use', 'builder']
+# Images produced in this way do not appear in the Docker image registry shown
+# by `docker images`, at least by default. We use the --push flag to push the
+# image after building it, because a subsequent `docker push` won't find the
+# image locally.
+- name: 'gcr.io/cloud-builders/docker'
+  args: ['buildx', 'build', '--platform', 'linux/amd64,linux/arm64', '-f', 'src/cmd/cos_kdump_debugger/Dockerfile', '-t', 'gcr.io/${_OUTPUT_PROJECT}/cos_kdump_debugger:latest', '-t', 'gcr.io/${_OUTPUT_PROJECT}/cos_kdump_debugger:${TAG_NAME}', '--push', 'src/cmd/cos_kdump_debugger']
+timeout: 7200s
diff --git a/src/cmd/cos_kdump_debugger/kdump_debugger.sh b/src/cmd/cos_kdump_debugger/kdump_debugger.sh
new file mode 100755
index 0000000..1ab57ff
--- /dev/null
+++ b/src/cmd/cos_kdump_debugger/kdump_debugger.sh
@@ -0,0 +1,133 @@
+#!/bin/bash
+# This script sets up the environment for kernel crash dump debugging.
+
+set -eu
+set -o pipefail
+
+
+readonly PROG_NAME=$(basename "$0")
+readonly REPORT_TARBALL_DIR="/sos"
+readonly REPORT_DIR="sos"
+CRASH_COMMAND=""
+VMLINUX_PATH=""
+
+#
+# usage <exit_code>
+#
+# Print usage and exit.
+#
+usage() {
+        local exit_code="${1}"
+
+        cat <<EOF
+Usage:
+        ${PROG_NAME} [-s <sos>] [-c <crash_command>] [-p <vmlinux_path>]
+        -s, --sos            The filename of the sosreport tarball.
+        -c, --crash_command  The crash command to run. Will open a shell for interactive debugging after running this command.
+        -p, --vmlinux_path   The gsutil path to the matching vmlinux. If not set, the vmlinux will be fetched from gs://cos-tools.
+
+Examples:
+        $ ${PROG_NAME} --sos sosreport-kdump-next-20190130220549.tar.xz
+        $ ${PROG_NAME} -s sosreport-kdump-next-20190130220549.tar.xz -c bt -p gs://cos-tools/15978.0.0/vmlinux
+
+Note:
+        Expecting the sosreport tarball to be located at REPORT_TARBALL_DIR.
+EOF
+        exit "${exit_code}"
+}
+
+
+#
+# parse_args <args...>
+#
+# Parse command line arguments.
+#
+parse_args() {
+  local args
+
+  if ! args=$(getopt \
+          --options "c: s: p: n" \
+          --longoptions "crash_command: sos: vmlinux_path:" \
+          -- "$@"); then
+    usage 1
+  fi
+  eval set -- "${args}"
+
+  while :; do
+    arg="${1}"
+    shift
+    case "${arg}" in
+    -c|--crash_command) CRASH_COMMAND="${1}"; shift ;;
+    -s|--sos) REPORT_TARBALL="${1}"; shift ;;
+    -p|--vmlinux_path) VMLINUX_PATH="${1}"; shift ;;
+    --) break ;;
+    *) echo "internal error parsing arguments!"; usage 1 ;;
+    esac
+  done
+}
+
+
+#
+# setup
+#
+# Setup the work directory for debugging, containing:
+# 1. Uncompressed sosreport tarball.
+# 2. vmlinux for the COS kernel.
+# 3. Latest kernel crash dump.
+#
+setup() {
+  local sosreport_dir
+  local buildnumber
+
+  # We want all data get automatically removed after container exit.
+  WORKDIR=$(mktemp -d)
+  cd "${WORKDIR}"
+
+  # Uncompress sosreport tarball, and rename the folder into $REPORT_TARBALL
+  echo "Uncompressing sosreport tarball."
+  tar -xf "${REPORT_TARBALL_DIR}/${REPORT_TARBALL}"
+  sosreport_dir=$(find ./sosreport-* -maxdepth 0 | sed -n 1p)
+  mv "${sosreport_dir}" "${REPORT_DIR}"
+
+  # Copy the latest kernel crash dump to the workdir for easy access
+  cp "${REPORT_DIR}/var/kdump" .
+
+  # If VMLINUX_PATH is not set, fetch the vmlinux from gs://cos-tools
+  if [[ -z "${VMLINUX_PATH}" ]]; then
+    echo "--vmlinux_path not set, fetching vmlinux from gs://cos-tools"
+    buildnumber=$(grep BUILD_ID "${REPORT_DIR}/etc/os-release" | cut -d "=" -f 2)
+    VMLINUX_PATH="gs://cos-tools/${buildnumber}/vmlinux"
+  fi
+
+  echo "Downloading ${VMLINUX_PATH}."
+  gsutil -q cp "${VMLINUX_PATH}" .
+}
+
+
+#
+# run
+#
+# This steps will start inspect the kernel crash dump:
+# If CRASH_COMMAND is set, this step will execute the given command.
+# Otherwise, this step will open a shell for interactive debugging.
+#
+run() {
+  if [[ -n "${CRASH_COMMAND}" ]]; then
+    echo "Running crash command: ${CRASH_COMMAND}."
+    echo -e "${CRASH_COMMAND}\nq" | crash vmlinux kdump
+  fi
+
+  # Give control to user, if no debugging command is specified.
+  exec /bin/bash
+}
+
+
+main() {
+  parse_args "$@"
+
+  setup
+
+  run
+}
+
+main "$@"