cos-gpu-installer: Switch precompiled driver and signature location to COS build artifacts for M109

precompiled drivers for M105 and below are fetched from NVIDIA
locations gs://nvidia-drivers-us-public/nvidia-cos-project/..

Example:

gs://nvidia-drivers-us-public/nvidia-cos-project/105/tesla/470_00/470.182.03/NVIDIA-Linux-x86_64-470.182.03_105-17412.1.75.cos

Signatures for the same are available at gs://cos-tools/build_id/extensions/gpu/

With the COS precompiled drivers the locations for both the drivers and
the signatures can be changed to gs://cos-tools/build_id/

BUG=b/292160336
TEST=manual
RELEASE_NOTE=Switch precompiled driver and signature location to COS
build artifacts for M109.

Change-Id: Icd1a96ac45e9a356fb60477b99834b9e0ee5e66c
Reviewed-on: https://cos-review.googlesource.com/c/cos/tools/+/52452
Cloud-Build: GCB Service account <228075978874@cloudbuild.gserviceaccount.com>
Tested-by: Arnav Kansal <rnv@google.com>
Reviewed-by: Oleksandr Tymoshenko <ovt@google.com>
diff --git a/src/cmd/cos_gpu_installer/internal/commands/install.go b/src/cmd/cos_gpu_installer/internal/commands/install.go
index a8f639f..fc9e3b4 100644
--- a/src/cmd/cos_gpu_installer/internal/commands/install.go
+++ b/src/cmd/cos_gpu_installer/internal/commands/install.go
@@ -116,6 +116,7 @@
 	kernelOpen         bool
 	noVerify           bool
 	kernelModuleParams modules.ModuleParameters
+	selfPrecompiled    bool
 }
 
 // Name implements subcommands.Command.Name.
@@ -207,7 +208,6 @@
 	}
 
 	var gpuType GPUType = NO_GPU
-
 	if !c.prepareBuildTools {
 		if gpuType, err = c.getGPUTypeInfo(); err != nil {
 			if !c.noVerify {
@@ -218,23 +218,17 @@
 		}
 	}
 
+	if milestone := envReader.Milestone(); selfPrecompiledCandidate(milestone) {
+		c.selfPrecompiled = true
+	}
+
 	downloader := cos.NewGCSDownloader(envReader, c.gcsDownloadBucket, c.gcsDownloadPrefix)
 	if c.nvidiaInstallerURL == "" {
 		versionInput := c.driverVersion
-		milestone, err := strconv.Atoi(envReader.Milestone())
-		if err != nil {
-			c.logError(errors.Wrap(err, "failed to parse milestone number"))
-			return subcommands.ExitFailure
-		}
 		c.driverVersion, err = getDriverVersion(downloader, c.driverVersion)
 		if err != nil {
-			if versionInput == "latest" && milestone < 93 {
-				c.logError(errors.Wrap(err, "'--version=latest' is only supported on COS M93 and onwards, please unset this flag"))
-				return subcommands.ExitFailure
-			} else {
-				c.logError(errors.Wrap(err, "failed to get default driver version"))
-				return subcommands.ExitFailure
-			}
+			c.logError(errors.Wrap(err, fmt.Sprintf("failed to get %s driver version", versionInput)))
+			return subcommands.ExitFailure
 		}
 		if err := c.checkDriverCompatibility(downloader, gpuType); err != nil {
 			c.logError(errors.Wrap(err, "failed to check driver compatibility"))
@@ -267,7 +261,7 @@
 		cacher = installer.NewCacher(hostInstallDir, envReader.BuildNumber(), c.driverVersion)
 		if isCached, isOpen, err := cacher.IsCached(); isCached && err == nil {
 			log.V(2).Info("Found cached version, NOT building the drivers.")
-			if err := installer.ConfigureCachedInstalltion(hostInstallDir, !c.unsignedDriver, c.test, isOpen, c.noVerify, c.kernelModuleParams); err != nil {
+			if err := installer.ConfigureCachedInstallation(hostInstallDir, !c.unsignedDriver, c.test, isOpen, c.noVerify, c.selfPrecompiled, c.kernelModuleParams); err != nil {
 				c.logError(errors.Wrap(err, "failed to configure cached installation"))
 				return subcommands.ExitFailure
 			}
@@ -362,8 +356,12 @@
 
 	var installerFile string
 	if c.nvidiaInstallerURL == "" {
-		installerFile, err = installer.DownloadDriverInstaller(
-			c.driverVersion, envReader.Milestone(), envReader.BuildNumber())
+		if c.selfPrecompiled {
+			installerFile, err = installer.DownloadDriverInstallerV2(downloader, c.driverVersion)
+		} else {
+			installerFile, err = installer.DownloadDriverInstaller(
+				c.driverVersion, envReader.Milestone(), envReader.BuildNumber())
+		}
 		if err != nil {
 			return errors.Wrap(err, "failed to download GPU driver installer")
 		}
@@ -379,19 +377,27 @@
 			if err := signing.DownloadDriverSignaturesFromURL(c.signatureURL); err != nil {
 				return errors.Wrap(err, "failed to download driver signature")
 			}
-		} else if err := signing.DownloadDriverSignatures(downloader, c.driverVersion); err != nil {
-			if strings.Contains(err.Error(), "404 Not Found") {
-				return fmt.Errorf("The GPU driver is not available for the COS version. Please wait for half a day and retry.")
+		} else {
+			if c.selfPrecompiled {
+				if err = signing.DownloadDriverSignaturesV2(downloader, c.driverVersion); err != nil {
+					return errors.Wrap(err, "failed to download driver signature")
+				}
+			} else {
+				if err := signing.DownloadDriverSignatures(downloader, c.driverVersion); err != nil {
+					if strings.Contains(err.Error(), "404 Not Found") {
+						return fmt.Errorf("The GPU driver is not available for the COS version. Please wait for half a day and retry.")
+					}
+					return errors.Wrap(err, "failed to download driver signature")
+				}
 			}
-			return errors.Wrap(err, "failed to download driver signature")
 		}
 	}
 
-	if err := installer.RunDriverInstaller(toolchainPkgDir, installerFile, c.driverVersion, !c.unsignedDriver, c.test, false, c.noVerify, c.kernelModuleParams); err != nil {
+	if err := installer.RunDriverInstaller(toolchainPkgDir, installerFile, c.driverVersion, !c.unsignedDriver, c.test, false, c.noVerify, c.selfPrecompiled, c.kernelModuleParams); err != nil {
 		if errors.Is(err, installer.ErrDriverLoad) {
 			// Drivers were linked, but couldn't load; try again with legacy linking
 			log.Infof("Failed to load kernel module, err: %v. Retrying driver installation with legacy linking", err)
-			if err := installer.RunDriverInstaller(toolchainPkgDir, installerFile, c.driverVersion, !c.unsignedDriver, c.test, true, c.noVerify, c.kernelModuleParams); err != nil {
+			if err := installer.RunDriverInstaller(toolchainPkgDir, installerFile, c.driverVersion, !c.unsignedDriver, c.test, true, c.noVerify, c.selfPrecompiled, c.kernelModuleParams); err != nil {
 				return fmt.Errorf("failed to run GPU driver installer: %v", err)
 			}
 		} else {
@@ -495,3 +501,12 @@
 	}
 	return nil
 }
+
+func selfPrecompiledCandidate(milestone string) bool {
+	for _, v := range []string{"93", "97", "101", "105"} {
+		if v == milestone {
+			return false
+		}
+	}
+	return true
+}
diff --git a/src/cmd/cos_gpu_installer/internal/installer/installer.go b/src/cmd/cos_gpu_installer/internal/installer/installer.go
index ef2b8b8..497cfd0 100644
--- a/src/cmd/cos_gpu_installer/internal/installer/installer.go
+++ b/src/cmd/cos_gpu_installer/internal/installer/installer.go
@@ -29,6 +29,7 @@
 	gpuFirmwareDirContainer       = "/usr/local/nvidia/firmware/nvidia"
 	templateGPUDriverFile         = "gpu_%s_version"
 	precompiledInstallerURLFormat = "https://storage.googleapis.com/nvidia-drivers-%s-public/nvidia-cos-project/%s/tesla/%s_00/%s/NVIDIA-Linux-x86_64-%s_%s-%s.cos"
+	precompiledDriverTemplate     = "NVIDIA-Linux-x86_64-%s-custom.run"
 	defaultFilePermission         = 0755
 	signedURLKey                  = "Expires"
 	prebuiltModuleTemplate        = "nvidia-drivers-%s.tgz"
@@ -73,8 +74,8 @@
 	return nil
 }
 
-// ConfigureCachedInstalltion updates ldconfig and installs the cached GPU driver kernel modules.
-func ConfigureCachedInstalltion(gpuInstallDirHost string, needSigned, test, kernelOpen, noVerify bool, moduleParameters modules.ModuleParameters) error {
+// ConfigureCachedInstallation updates ldconfig and installs the cached GPU driver kernel modules.
+func ConfigureCachedInstallation(gpuInstallDirHost string, needSigned, test, kernelOpen, noVerify, selfPrecompiled bool, moduleParameters modules.ModuleParameters) error {
 	log.V(2).Info("Configuring cached driver installation")
 
 	if err := createHostDirBindMount(gpuInstallDirHost, gpuInstallDirContainer); err != nil {
@@ -83,7 +84,7 @@
 	if err := updateContainerLdCache(); err != nil {
 		return errors.Wrap(err, "failed to configure cached driver installation")
 	}
-	if err := loadGPUDrivers(moduleParameters, needSigned, test, kernelOpen, noVerify); err != nil {
+	if err := loadGPUDrivers(moduleParameters, needSigned, test, kernelOpen, noVerify, selfPrecompiled); err != nil {
 		return errors.Wrap(err, "failed to configure cached driver installation")
 	}
 
@@ -111,6 +112,17 @@
 	return DownloadToInstallDir(downloadURL, "GPU driver installer")
 }
 
+// DownloadDriverInstallerV2 downloads GPU driver installer given driver version from COS build artifacts.
+func DownloadDriverInstallerV2(downloader *cos.GCSDownloader, driverVersion string) (string, error) {
+	log.Infof("Downloading GPU driver installer version %s", driverVersion)
+	installerFilename := fmt.Sprintf(precompiledDriverTemplate, driverVersion)
+	err := downloader.DownloadArtifact(gpuInstallDirContainer, installerFilename)
+	if err != nil {
+		return "", errors.Wrap(err, "failed to download installer")
+	}
+	return installerFilename, nil
+}
+
 // ConfigureDriverInstallationDirs configures GPU driver installation directories by creating mounts.
 func ConfigureDriverInstallationDirs(gpuInstallDirHost string, kernelRelease string) (chan<- int, error) {
 	log.Info("Configuring driver installation directories")
@@ -309,7 +321,7 @@
 
 // RunDriverInstaller runs GPU driver installer. Only works if the provided
 // installer includes precompiled drivers.
-func RunDriverInstaller(toolchainDir, installerFilename, driverVersion string, needSigned, test, legacyLink, noVerify bool, moduleParameters modules.ModuleParameters) error {
+func RunDriverInstaller(toolchainDir, installerFilename, driverVersion string, needSigned, test, legacyLink, noVerify, selfPrecompiled bool, moduleParameters modules.ModuleParameters) error {
 	log.Info("Running GPU driver installer")
 
 	// Extract files to a fixed path first to make sure md5sum of generated gpu drivers are consistent.
@@ -368,8 +380,10 @@
 			}
 		}
 		// Copy public key.
-		if err := utils.CopyFile(signing.GetPublicKeyDer(), filepath.Join(gpuInstallDirContainer, "pubkey.der")); err != nil {
-			return errors.Wrapf(err, "failed to copy file %s", signing.GetPublicKeyDer())
+		if !selfPrecompiled {
+			if err := utils.CopyFile(signing.GetPublicKeyDer(), filepath.Join(gpuInstallDirContainer, "pubkey.der")); err != nil {
+				return errors.Wrapf(err, "failed to copy file %s", signing.GetPublicKeyDer())
+			}
 		}
 	} else if !legacyLink {
 		// Copy drivers to the desired end directory. This is done as part of
@@ -394,7 +408,7 @@
 	// The legacy linking method does this when the installer doesn't fail (i.e.
 	// module signature verification isn't enforced).
 	if (legacyLink && legacyInstallerFailed) || !legacyLink {
-		if err := loadGPUDrivers(moduleParameters, needSigned, test, false, noVerify); err != nil {
+		if err := loadGPUDrivers(moduleParameters, needSigned, test, false, noVerify, selfPrecompiled); err != nil {
 			return fmt.Errorf("%w: %v", ErrDriverLoad, err)
 		}
 	}
@@ -523,9 +537,9 @@
 	return nil
 }
 
-func loadGPUDrivers(moduleParams modules.ModuleParameters, needSigned, test, kernelOpen, noVerify bool) error {
+func loadGPUDrivers(moduleParams modules.ModuleParameters, needSigned, test, kernelOpen, noVerify, selfPrecompiled bool) error {
 	// Don't need to load public key in test mode. Platform key is used.
-	if needSigned && !test && !kernelOpen {
+	if needSigned && !test && !kernelOpen && !selfPrecompiled {
 		if err := modules.LoadPublicKey("gpu-key", filepath.Join(gpuInstallDirContainer, "pubkey.der"), modules.SecondaryKeyring); err != nil {
 			return errors.Wrap(err, "failed to load public key")
 		}
@@ -643,7 +657,7 @@
 	}
 
 	// load the prebuilt kernel modules
-	if err := loadGPUDrivers(moduleParameters, false, false, true, noVerify); err != nil {
+	if err := loadGPUDrivers(moduleParameters, false, false, true, noVerify, true); err != nil {
 		return fmt.Errorf("%w: %v", ErrDriverLoad, err)
 	}
 
diff --git a/src/cmd/cos_gpu_installer/internal/signing/signature.go b/src/cmd/cos_gpu_installer/internal/signing/signature.go
index 68126eb..e67570b 100644
--- a/src/cmd/cos_gpu_installer/internal/signing/signature.go
+++ b/src/cmd/cos_gpu_installer/internal/signing/signature.go
@@ -2,6 +2,7 @@
 package signing
 
 import (
+	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
@@ -16,12 +17,31 @@
 	gpuDriverPubKeyPem = "gpu-driver-cert.pem"
 	gpuDriverPubKeyDer = "gpu-driver-cert.der"
 	gpuDriverDummyKey  = "dummy-key"
+	signatureTemplate  = "nvidia-drivers-%s-signature.tar.gz"
 )
 
 var (
 	gpuDriverSigningDir = "/build/sign-gpu-driver"
 )
 
+// DownloadDriverSignaturesV2 downloads GPU driver signatures from COS build artifacts.
+func DownloadDriverSignaturesV2(downloader *cos.GCSDownloader, driverVersion string) error {
+	if err := os.MkdirAll(gpuDriverSigningDir, 0755); err != nil {
+		return errors.Wrapf(err, "failed to create signing dir %s", gpuDriverSigningDir)
+	}
+	log.Infof("Downloading driver signature for version %s", driverVersion)
+	signatureName := fmt.Sprintf(signatureTemplate, driverVersion)
+	if err := downloader.DownloadArtifact(gpuDriverSigningDir, signatureName); err != nil {
+		return errors.Wrapf(err, "failed to download driver signature for version %s", driverVersion)
+	}
+
+	if err := decompressSignature(signatureName); err != nil {
+		return errors.Wrapf(err, "failed to decompress driver signature for version %s.", driverVersion)
+	}
+
+	return nil
+}
+
 // DownloadDriverSignatures downloads GPU driver signatures.
 func DownloadDriverSignatures(downloader cos.ExtensionsDownloader, driverVersion string) error {
 	if err := os.MkdirAll(gpuDriverSigningDir, 0755); err != nil {