cos_gpu_installer: Add a protobuf for required GPU drivers
This CL added a protobuf to specify the required GPU drivers
for COS customers. It also added a test to read the protobuf
and check whether the GPU drivers exist on GCS bucket.
See go/cos-gpu-multi-version for design details.
BUG=b/155192122
Change-Id: Idbee1ef9d5ca1e45ae65fab2575cc0093906eb59
Reviewed-on: https://cos-review.googlesource.com/c/cos/tools/+/18450
Reviewed-by: Arnav Kansal <rnv@google.com>
Cloud-Build: GCB Service account <228075978874@cloudbuild.gserviceaccount.com>
Tested-by: Ke Wu <mikewu@google.com>
diff --git a/go.mod b/go.mod
index 32fe52f..e9dbc57 100644
--- a/go.mod
+++ b/go.mod
@@ -9,6 +9,7 @@
github.com/andygrunwald/go-gerrit v0.0.0-20201231163137-46815e48bfe0
github.com/beevik/etree v1.1.0
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b
+ github.com/golang/protobuf v1.4.3
github.com/google/go-cmp v0.5.4
github.com/google/subcommands v1.2.0
github.com/gorilla/sessions v1.2.0
@@ -19,6 +20,7 @@
github.com/smartystreets/goconvey v1.6.4 // indirect
github.com/urfave/cli/v2 v2.2.0
go.chromium.org/luci v0.0.0-20200722211809-bab0c30be68b
+ golang.org/x/net v0.0.0-20201224014010-6772e930b67b
golang.org/x/oauth2 v0.0.0-20210201163806-010130855d6c
golang.org/x/sys v0.0.0-20210503173754-0981d6026fa6
google.golang.org/api v0.39.0
diff --git a/src/cmd/cos_gpu_installer/README.md b/src/cmd/cos_gpu_installer/README.md
index 0672495..ac56467 100644
--- a/src/cmd/cos_gpu_installer/README.md
+++ b/src/cmd/cos_gpu_installer/README.md
@@ -34,4 +34,10 @@
## Test
+### Source code
Currently only unittest is available. Use `go test` to run unittest.
+
+### GPU drivers availability
+The test `test/check_drivers_test.go` is available for checking GPU drivers
+availability. It checks which drivers are available for live COS images.
+Use `test/run_test.sh` to run the test.
diff --git a/src/cmd/cos_gpu_installer/test/check_drivers_test.go b/src/cmd/cos_gpu_installer/test/check_drivers_test.go
new file mode 100644
index 0000000..dc5e82c
--- /dev/null
+++ b/src/cmd/cos_gpu_installer/test/check_drivers_test.go
@@ -0,0 +1,159 @@
+package test
+
+import (
+ "bytes"
+ "fmt"
+ "io/ioutil"
+ "log"
+ "path/filepath"
+ "regexp"
+ "testing"
+ "text/template"
+
+ "cloud.google.com/go/storage"
+ pb_version "cos.googlesource.com/cos/tools.git/src/cmd/cos_gpu_installer/versions"
+ "golang.org/x/net/context"
+ "golang.org/x/oauth2/google"
+ "google.golang.org/api/compute/v1"
+ "google.golang.org/api/option"
+ "google.golang.org/protobuf/encoding/prototext"
+)
+
+const (
+ protoConfigPath = "../versions/config/versions.textproto"
+ driverPublicGcsBucket = "nvidia-drivers-us-public"
+)
+
+// Test to check whether precompiled drivers exist in public GCS bucket.
+//
+// Note: The test uses Application Default Credentials for authentication.
+// If not already done, install the gcloud CLI from
+// https://cloud.google.com/sdk/ and run
+// `gcloud auth application-default login`. For more information, see
+// https://developers.google.com/identity/protocols/application-default-credentials
+func TestCheckDrivers(t *testing.T) {
+ ctx := context.Background()
+
+ c, err := google.DefaultClient(ctx, compute.CloudPlatformScope)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ computeService, err := compute.New(c)
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ storageClient, err := storage.NewClient(ctx, option.WithoutAuthentication())
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer storageClient.Close()
+
+ versionsMap, err := readVersionMap()
+ if err != nil {
+ log.Fatal(err)
+ }
+
+ for _, entry := range versionsMap.GetEntry() {
+ testCase := fmt.Sprintf("[cos=%s,driver=%s]", entry.GetCosImageFamily(), entry.GetGpuDriverVersion())
+ t.Run(testCase, testCheckDriver(entry.GetCosImageFamily(), entry.GetGpuDriverVersion(), computeService, storageClient, ctx))
+ }
+}
+
+// Reads GpuVersionMap from protocal buffer.
+// The definition and data of protobuf should be found at cos.googlesource.com/cos/tools.git/src/cmd/cos_gpu_installer/versions
+func readVersionMap() (*pb_version.GpuVersionMap, error) {
+ configPath, err := filepath.Abs(protoConfigPath)
+ if err != nil {
+ return nil, fmt.Errorf("failed to get abspath of proto config file: %v", err)
+ }
+
+ versionMap := &pb_version.GpuVersionMap{}
+ in, err := ioutil.ReadFile(configPath)
+ if err != nil {
+ return nil, fmt.Errorf("failed to read proto config file: %v", err)
+ }
+ if err := prototext.Unmarshal(in, versionMap); err != nil {
+ return nil, fmt.Errorf("failed to unmarshal proto config: %v", err)
+ }
+ return versionMap, nil
+}
+
+// Gets the COS image name from a COS image family.
+func getImageFromFamily(imageFamily string, computeService *compute.Service, ctx context.Context) (string, error) {
+ resp, err := computeService.Images.GetFromFamily("cos-cloud", imageFamily).Context(ctx).Do()
+ if err != nil {
+ return "", err
+ }
+ return resp.Name, nil
+}
+
+// Checks whether a given GCS object exisit in the given GCS bucket.
+func gcsObjectExist(bucket string, object string, storageClient *storage.Client, ctx context.Context) (bool, error) {
+ _, err := storageClient.Bucket(bucket).Object(object).Attrs(ctx)
+ if err == storage.ErrObjectNotExist {
+ return false, nil
+ }
+ if err != nil {
+ return false, fmt.Errorf("failed to get GCS object %s from bukect %s: %v", object, bucket, err)
+ }
+ return true, nil
+}
+
+// Composes the GCS path of a Nvidia precompiled drivers based on COS image name and GPU driver version.
+func getPrecompiledDriverGcsPath(cosImage string, gpuDriverVersion string) (string, error) {
+ const temp = `nvidia-cos-project/{{.milestone}}/tesla/{{.driverBranch}}_00/{{.driverVersion}}/NVIDIA-Linux-x86_64-{{.driverVersion}}_{{.cosVersion}}.cos`
+
+ re, err := regexp.Compile(`^cos-(dev-|beta-|stable-)?([\d]+)-([\d-]+)$`)
+ if err != nil {
+ return "", fmt.Errorf("failed to compile regular expression: %v", err)
+ }
+ if !re.MatchString(cosImage) {
+ return "", fmt.Errorf("failed to parse COS image name %s", cosImage)
+ }
+ cosVersion := re.FindStringSubmatch(cosImage)
+
+ re, err = regexp.Compile(`^([\d]+)\.[\d\.]+$`)
+ if err != nil {
+ return "", fmt.Errorf("failed to compile regular expression: %v", err)
+ }
+ if !re.MatchString(gpuDriverVersion) {
+ return "", fmt.Errorf("failed to parse GPU driver version %s", gpuDriverVersion)
+ }
+ driverBranch := re.FindStringSubmatch(gpuDriverVersion)[1]
+
+ m := map[string]string{
+ "milestone": cosVersion[2],
+ "cosVersion": cosVersion[2] + "-" + cosVersion[3],
+ "driverBranch": driverBranch,
+ "driverVersion": gpuDriverVersion,
+ }
+ var buffer bytes.Buffer
+ if err := template.Must(template.New("").Parse(temp)).Execute(&buffer, m); err != nil {
+ return "", fmt.Errorf("failed to generate GCS object path from template: %v", err)
+ }
+ return buffer.String(), nil
+}
+
+// Testcase to check whether the precompiled driver of a [cosImageFamily, gpuDriverVersion] combination exists.
+func testCheckDriver(cosImageFamily string, gpuDriverVersion string, computeService *compute.Service, storageClient *storage.Client, ctx context.Context) func(*testing.T) {
+ return func(t *testing.T) {
+ imageName, err := getImageFromFamily(cosImageFamily, computeService, ctx)
+ if err != nil {
+ t.Errorf("failed to get image from image family %s: %v", cosImageFamily, err)
+ }
+ driverObject, err := getPrecompiledDriverGcsPath(imageName, gpuDriverVersion)
+ if err != nil {
+ t.Errorf("failed to get GCS path of precompiled driver [image=%s,driver=%s]: %v", imageName, gpuDriverVersion, err)
+ }
+
+ exist, err := gcsObjectExist(driverPublicGcsBucket, driverObject, storageClient, ctx)
+ if err != nil {
+ t.Errorf("failed to check existence: %v", err)
+ }
+ if !exist {
+ t.Errorf("Precompiled drivers gs://%s/%s doesn't exist", driverPublicGcsBucket, driverObject)
+ }
+ }
+}
diff --git a/src/cmd/cos_gpu_installer/test/run_test.sh b/src/cmd/cos_gpu_installer/test/run_test.sh
new file mode 100755
index 0000000..39d0c0f
--- /dev/null
+++ b/src/cmd/cos_gpu_installer/test/run_test.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+set -o errexit
+set -o pipefail
+set -o nounset
+
+readonly PROG_NAME="$(basename "$0")"
+readonly SCRIPT_DIR="$(dirname "$0")"
+readonly PROTOC_BIN="protoc"
+readonly GCLOUD_BIN="gcloud"
+
+usage() {
+ cat <<EOF
+
+${PROG_NAME}: Run check_drivers_test.go to check COS precompiled drivers availability.
+
+Prerequisites:
+ The following commands have to be installed and be able to found in \$PATH:
+ \`gcloud\`: https://cloud.google.com/sdk/
+ \`protoc\`: https://github.com/protocolbuffers/protobuf
+
+ Besides, the test uses Application Default Credentials for authentication. So you need to run \`gcloud auth application-default login\` to set up ADC.
+EOF
+ exit "${1}"
+}
+
+check_command_exist() {
+ cmd="$1"
+ command -v "${cmd}" &> /dev/null
+}
+
+check_application_default_credentials() {
+ "${GCLOUD_BIN}" auth application-default print-access-token 1> /dev/null
+}
+
+check_prerequisites() {
+ check_command_exist "${PROTOC_BIN}" && \
+ check_command_exist "${GCLOUD_BIN}" && \
+ check_application_default_credentials
+}
+
+compile_proto() {
+ ~/protoc/bin/protoc -I "${SCRIPT_DIR}"/../versions --go_out=paths=source_relative:"${SCRIPT_DIR}"/../versions "${SCRIPT_DIR}"/../versions/versions.proto
+ trap cleanup_proto EXIT
+}
+
+cleanup_proto() {
+ rm "${SCRIPT_DIR}"/../versions/versions.pb.go
+}
+
+run_test() {
+ go test -v "${SCRIPT_DIR}"/check_drivers_test.go
+}
+
+main() {
+ echo "Checking prerequisites..."
+ set +e
+ if ! check_prerequisites; then
+ usage 1
+ fi
+ set -e
+
+ echo "Compiling protobuf..."
+ compile_proto
+
+ echo "Running test..."
+ run_test
+}
+
+main
\ No newline at end of file
diff --git a/src/cmd/cos_gpu_installer/versions/config/versions.textproto b/src/cmd/cos_gpu_installer/versions/config/versions.textproto
new file mode 100644
index 0000000..bd47d5e
--- /dev/null
+++ b/src/cmd/cos_gpu_installer/versions/config/versions.textproto
@@ -0,0 +1,24 @@
+entry {
+ cos_image_family: "cos-89-lts"
+ gpu_driver_version: "450.119.04"
+}
+
+entry {
+ cos_image_family: "cos-85-lts"
+ gpu_driver_version: "450.119.04"
+}
+
+entry {
+ cos_image_family: "cos-85-lts"
+ gpu_driver_version: "450.80.02"
+}
+
+entry {
+ cos_image_family: "cos-85-lts"
+ gpu_driver_version: "450.51.06"
+}
+
+entry {
+ cos_image_family: "cos-81-lts"
+ gpu_driver_version: "418.67"
+}
\ No newline at end of file
diff --git a/src/cmd/cos_gpu_installer/versions/versions.proto b/src/cmd/cos_gpu_installer/versions/versions.proto
new file mode 100644
index 0000000..1d6eb1e
--- /dev/null
+++ b/src/cmd/cos_gpu_installer/versions/versions.proto
@@ -0,0 +1,20 @@
+syntax = "proto3";
+
+package versions;
+
+option go_package = "cos.googlesource.com/cos/tools.git/src/cmd/cos_gpu_installer/versions";
+
+// This proto defines a list of Nvidia precompiled drivers that are required for COS customers.
+message GpuVersionMap {
+ repeated GpuVersionMapEntry entry = 1;
+}
+
+message GpuVersionMapEntry {
+ // Name of a COS image family.
+ // Required.
+ optional string cos_image_family = 1;
+
+ // Version of a GPU driver.
+ // Required.
+ optional string gpu_driver_version = 2;
+}
\ No newline at end of file