cos_gpu_installer: Add a protobuf for required GPU drivers

This CL added a protobuf to specify the required GPU drivers
for COS customers. It also added a test to read the protobuf
and check whether the GPU drivers exist on GCS bucket.

See go/cos-gpu-multi-version for design details.

BUG=b/155192122

Change-Id: Idbee1ef9d5ca1e45ae65fab2575cc0093906eb59
Reviewed-on: https://cos-review.googlesource.com/c/cos/tools/+/18450
Reviewed-by: Arnav Kansal <rnv@google.com>
Cloud-Build: GCB Service account <228075978874@cloudbuild.gserviceaccount.com>
Tested-by: Ke Wu <mikewu@google.com>
diff --git a/go.mod b/go.mod
index 32fe52f..e9dbc57 100644
--- a/go.mod
+++ b/go.mod
@@ -9,6 +9,7 @@
 	github.com/andygrunwald/go-gerrit v0.0.0-20201231163137-46815e48bfe0
 	github.com/beevik/etree v1.1.0
 	github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b
+	github.com/golang/protobuf v1.4.3
 	github.com/google/go-cmp v0.5.4
 	github.com/google/subcommands v1.2.0
 	github.com/gorilla/sessions v1.2.0
@@ -19,6 +20,7 @@
 	github.com/smartystreets/goconvey v1.6.4 // indirect
 	github.com/urfave/cli/v2 v2.2.0
 	go.chromium.org/luci v0.0.0-20200722211809-bab0c30be68b
+	golang.org/x/net v0.0.0-20201224014010-6772e930b67b
 	golang.org/x/oauth2 v0.0.0-20210201163806-010130855d6c
 	golang.org/x/sys v0.0.0-20210503173754-0981d6026fa6
 	google.golang.org/api v0.39.0
diff --git a/src/cmd/cos_gpu_installer/README.md b/src/cmd/cos_gpu_installer/README.md
index 0672495..ac56467 100644
--- a/src/cmd/cos_gpu_installer/README.md
+++ b/src/cmd/cos_gpu_installer/README.md
@@ -34,4 +34,10 @@
 
 ## Test
 
+### Source code
 Currently only unittest is available. Use `go test` to run unittest.
+
+### GPU drivers availability
+The test `test/check_drivers_test.go` is available for checking GPU drivers
+availability. It checks which drivers are available for live COS images.
+Use `test/run_test.sh` to run the test.
diff --git a/src/cmd/cos_gpu_installer/test/check_drivers_test.go b/src/cmd/cos_gpu_installer/test/check_drivers_test.go
new file mode 100644
index 0000000..dc5e82c
--- /dev/null
+++ b/src/cmd/cos_gpu_installer/test/check_drivers_test.go
@@ -0,0 +1,159 @@
+package test
+
+import (
+	"bytes"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"path/filepath"
+	"regexp"
+	"testing"
+	"text/template"
+
+	"cloud.google.com/go/storage"
+	pb_version "cos.googlesource.com/cos/tools.git/src/cmd/cos_gpu_installer/versions"
+	"golang.org/x/net/context"
+	"golang.org/x/oauth2/google"
+	"google.golang.org/api/compute/v1"
+	"google.golang.org/api/option"
+	"google.golang.org/protobuf/encoding/prototext"
+)
+
+const (
+	protoConfigPath       = "../versions/config/versions.textproto"
+	driverPublicGcsBucket = "nvidia-drivers-us-public"
+)
+
+// Test to check whether precompiled drivers exist in public GCS bucket.
+//
+// Note: The test uses Application Default Credentials for authentication.
+//       If not already done, install the gcloud CLI from
+//       https://cloud.google.com/sdk/ and run
+//       `gcloud auth application-default login`. For more information, see
+//       https://developers.google.com/identity/protocols/application-default-credentials
+func TestCheckDrivers(t *testing.T) {
+	ctx := context.Background()
+
+	c, err := google.DefaultClient(ctx, compute.CloudPlatformScope)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	computeService, err := compute.New(c)
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	storageClient, err := storage.NewClient(ctx, option.WithoutAuthentication())
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer storageClient.Close()
+
+	versionsMap, err := readVersionMap()
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	for _, entry := range versionsMap.GetEntry() {
+		testCase := fmt.Sprintf("[cos=%s,driver=%s]", entry.GetCosImageFamily(), entry.GetGpuDriverVersion())
+		t.Run(testCase, testCheckDriver(entry.GetCosImageFamily(), entry.GetGpuDriverVersion(), computeService, storageClient, ctx))
+	}
+}
+
+// Reads GpuVersionMap from protocal buffer.
+// The definition and data of protobuf should be found at cos.googlesource.com/cos/tools.git/src/cmd/cos_gpu_installer/versions
+func readVersionMap() (*pb_version.GpuVersionMap, error) {
+	configPath, err := filepath.Abs(protoConfigPath)
+	if err != nil {
+		return nil, fmt.Errorf("failed to get abspath of proto config file: %v", err)
+	}
+
+	versionMap := &pb_version.GpuVersionMap{}
+	in, err := ioutil.ReadFile(configPath)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read proto config file: %v", err)
+	}
+	if err := prototext.Unmarshal(in, versionMap); err != nil {
+		return nil, fmt.Errorf("failed to unmarshal proto config: %v", err)
+	}
+	return versionMap, nil
+}
+
+// Gets the COS image name from a COS image family.
+func getImageFromFamily(imageFamily string, computeService *compute.Service, ctx context.Context) (string, error) {
+	resp, err := computeService.Images.GetFromFamily("cos-cloud", imageFamily).Context(ctx).Do()
+	if err != nil {
+		return "", err
+	}
+	return resp.Name, nil
+}
+
+// Checks whether a given GCS object exisit in the given GCS bucket.
+func gcsObjectExist(bucket string, object string, storageClient *storage.Client, ctx context.Context) (bool, error) {
+	_, err := storageClient.Bucket(bucket).Object(object).Attrs(ctx)
+	if err == storage.ErrObjectNotExist {
+		return false, nil
+	}
+	if err != nil {
+		return false, fmt.Errorf("failed to get GCS object %s from bukect %s: %v", object, bucket, err)
+	}
+	return true, nil
+}
+
+// Composes the GCS path of a Nvidia precompiled drivers based on COS image name and GPU driver version.
+func getPrecompiledDriverGcsPath(cosImage string, gpuDriverVersion string) (string, error) {
+	const temp = `nvidia-cos-project/{{.milestone}}/tesla/{{.driverBranch}}_00/{{.driverVersion}}/NVIDIA-Linux-x86_64-{{.driverVersion}}_{{.cosVersion}}.cos`
+
+	re, err := regexp.Compile(`^cos-(dev-|beta-|stable-)?([\d]+)-([\d-]+)$`)
+	if err != nil {
+		return "", fmt.Errorf("failed to compile regular expression: %v", err)
+	}
+	if !re.MatchString(cosImage) {
+		return "", fmt.Errorf("failed to parse COS image name %s", cosImage)
+	}
+	cosVersion := re.FindStringSubmatch(cosImage)
+
+	re, err = regexp.Compile(`^([\d]+)\.[\d\.]+$`)
+	if err != nil {
+		return "", fmt.Errorf("failed to compile regular expression: %v", err)
+	}
+	if !re.MatchString(gpuDriverVersion) {
+		return "", fmt.Errorf("failed to parse GPU driver version %s", gpuDriverVersion)
+	}
+	driverBranch := re.FindStringSubmatch(gpuDriverVersion)[1]
+
+	m := map[string]string{
+		"milestone":     cosVersion[2],
+		"cosVersion":    cosVersion[2] + "-" + cosVersion[3],
+		"driverBranch":  driverBranch,
+		"driverVersion": gpuDriverVersion,
+	}
+	var buffer bytes.Buffer
+	if err := template.Must(template.New("").Parse(temp)).Execute(&buffer, m); err != nil {
+		return "", fmt.Errorf("failed to generate GCS object path from template: %v", err)
+	}
+	return buffer.String(), nil
+}
+
+// Testcase to check whether the precompiled driver of a [cosImageFamily, gpuDriverVersion] combination exists.
+func testCheckDriver(cosImageFamily string, gpuDriverVersion string, computeService *compute.Service, storageClient *storage.Client, ctx context.Context) func(*testing.T) {
+	return func(t *testing.T) {
+		imageName, err := getImageFromFamily(cosImageFamily, computeService, ctx)
+		if err != nil {
+			t.Errorf("failed to get image from image family %s: %v", cosImageFamily, err)
+		}
+		driverObject, err := getPrecompiledDriverGcsPath(imageName, gpuDriverVersion)
+		if err != nil {
+			t.Errorf("failed to get GCS path of precompiled driver [image=%s,driver=%s]: %v", imageName, gpuDriverVersion, err)
+		}
+
+		exist, err := gcsObjectExist(driverPublicGcsBucket, driverObject, storageClient, ctx)
+		if err != nil {
+			t.Errorf("failed to check existence: %v", err)
+		}
+		if !exist {
+			t.Errorf("Precompiled drivers gs://%s/%s doesn't exist", driverPublicGcsBucket, driverObject)
+		}
+	}
+}
diff --git a/src/cmd/cos_gpu_installer/test/run_test.sh b/src/cmd/cos_gpu_installer/test/run_test.sh
new file mode 100755
index 0000000..39d0c0f
--- /dev/null
+++ b/src/cmd/cos_gpu_installer/test/run_test.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+set -o errexit
+set -o pipefail
+set -o nounset
+
+readonly PROG_NAME="$(basename "$0")"
+readonly SCRIPT_DIR="$(dirname "$0")"
+readonly PROTOC_BIN="protoc"
+readonly GCLOUD_BIN="gcloud"
+
+usage() {
+  cat <<EOF
+
+${PROG_NAME}: Run check_drivers_test.go to check COS precompiled drivers availability.
+
+Prerequisites:
+    The following commands have to be installed and be able to found in \$PATH:
+        \`gcloud\`: https://cloud.google.com/sdk/
+        \`protoc\`: https://github.com/protocolbuffers/protobuf
+
+    Besides, the test uses Application Default Credentials for authentication. So you need to run \`gcloud auth application-default login\` to set up ADC.
+EOF
+  exit "${1}"
+}
+
+check_command_exist() {
+    cmd="$1"
+    command -v "${cmd}" &> /dev/null
+}
+
+check_application_default_credentials() {
+    "${GCLOUD_BIN}" auth application-default print-access-token 1> /dev/null
+}
+
+check_prerequisites() {
+    check_command_exist "${PROTOC_BIN}" && \
+      check_command_exist "${GCLOUD_BIN}" && \
+      check_application_default_credentials
+}
+
+compile_proto() {
+    ~/protoc/bin/protoc -I "${SCRIPT_DIR}"/../versions --go_out=paths=source_relative:"${SCRIPT_DIR}"/../versions "${SCRIPT_DIR}"/../versions/versions.proto
+    trap cleanup_proto EXIT
+}
+
+cleanup_proto() {
+    rm "${SCRIPT_DIR}"/../versions/versions.pb.go
+}
+
+run_test() {
+    go test -v "${SCRIPT_DIR}"/check_drivers_test.go
+}
+
+main() {
+    echo "Checking prerequisites..."
+    set +e
+    if ! check_prerequisites; then
+      usage 1
+    fi
+    set -e
+
+    echo "Compiling protobuf..."
+    compile_proto
+
+    echo "Running test..."
+    run_test
+}
+
+main
\ No newline at end of file
diff --git a/src/cmd/cos_gpu_installer/versions/config/versions.textproto b/src/cmd/cos_gpu_installer/versions/config/versions.textproto
new file mode 100644
index 0000000..bd47d5e
--- /dev/null
+++ b/src/cmd/cos_gpu_installer/versions/config/versions.textproto
@@ -0,0 +1,24 @@
+entry {
+    cos_image_family: "cos-89-lts"
+    gpu_driver_version: "450.119.04"
+}
+
+entry {
+    cos_image_family: "cos-85-lts"
+    gpu_driver_version: "450.119.04"
+}
+
+entry {
+    cos_image_family: "cos-85-lts"
+    gpu_driver_version: "450.80.02"
+}
+
+entry {
+    cos_image_family: "cos-85-lts"
+    gpu_driver_version: "450.51.06"
+}
+
+entry {
+    cos_image_family: "cos-81-lts"
+    gpu_driver_version: "418.67"
+}
\ No newline at end of file
diff --git a/src/cmd/cos_gpu_installer/versions/versions.proto b/src/cmd/cos_gpu_installer/versions/versions.proto
new file mode 100644
index 0000000..1d6eb1e
--- /dev/null
+++ b/src/cmd/cos_gpu_installer/versions/versions.proto
@@ -0,0 +1,20 @@
+syntax = "proto3";
+
+package versions;
+
+option go_package = "cos.googlesource.com/cos/tools.git/src/cmd/cos_gpu_installer/versions";
+
+// This proto defines a list of Nvidia precompiled drivers that are required for COS customers.
+message GpuVersionMap {
+ repeated GpuVersionMapEntry entry = 1;
+}
+
+message GpuVersionMapEntry {
+  // Name of a COS image family.
+  // Required.
+  optional string cos_image_family = 1;
+
+  // Version of a GPU driver.
+  // Required.
+  optional string gpu_driver_version = 2;
+}
\ No newline at end of file