cos-tools/cos_customizer: Add the `-use-scratch-disk` flag in cos-customizer.
This change is required for the migration from GPU Installer V1 to GPU Installer V2 in the derived image build process. Installing proprietary GPU drivers with GPU Installer V2 requires downloading and preparing the toolchain to link the precompiled GPU drivers., which consumes approximately 5GB of additional disk space.
In the driverVM environment, where the vmm size limit is 10GB, increasing the boot disk size is not a viable option. To address this, a new flag is added to enable cos-customizer to provision a scratch disk. This scratch disk will be used to store the toolchain during the GPU driver installation process.
BUG=b/380479799
tTEST=cloudbuild
RELEASE_NOTE= Add `-use-scratch-disk` flag in cos-customizer finish-image-build step.
Change-Id: Ic1f555e848a36a85b225ab5b00ed77237ec0b7ce
Reviewed-on: https://cos-review.googlesource.com/c/cos/tools/+/87220
Reviewed-by: Robert Kolchmeyer <rkolchmeyer@google.com>
Tested-by: Shuo Yang <gshuoy@google.com>
Reviewed-by: Arnav Kansal <rnv@google.com>
Cloud-Build: GCB Service account <228075978874@cloudbuild.gserviceaccount.com>
diff --git a/src/cmd/cos_customizer/README.md b/src/cmd/cos_customizer/README.md
index 02dc39f..3bbaf25 100644
--- a/src/cmd/cos_customizer/README.md
+++ b/src/cmd/cos_customizer/README.md
@@ -298,6 +298,9 @@
`-enable-cleanup`: If this flag is set, COS-Customizer will automatically delete old
VMs created by previous invocations in the project and zone set by `-project` and `-zone`.
+`-use-scratch-disk`: If this flag is set, the preloading VM will have access to a 10GB scratch disk mounted at `/mnt/disks/scratch`.
+ The disk type will be determined by the `-disk-type` flag, with the default being `pd-standard`.
+
`-sbom-input-path`: Path to the input JSON file for SBOM generation. This path is relative to
`-build-context` set in step `start-image-build`. Schema for the input:
diff --git a/src/cmd/cos_customizer/finish_image_build.go b/src/cmd/cos_customizer/finish_image_build.go
index fc087ea..41f9680 100644
--- a/src/cmd/cos_customizer/finish_image_build.go
+++ b/src/cmd/cos_customizer/finish_image_build.go
@@ -68,6 +68,7 @@
enableCleanup bool
sbomOutputPath string
sbomInputPath string
+ useScratchDisk bool
}
// Name implements subcommands.Command.Name.
@@ -137,6 +138,9 @@
flags.BoolVar(&f.enableCleanup, "enable-cleanup", false, "Enable cleanup of old VM instances created by COS-Customizer.")
flags.StringVar(&f.sbomInputPath, "sbom-input-path", "", "The path to the SBOM input file.")
flags.StringVar(&f.sbomOutputPath, "sbom-output-path", "", "The GCS path to store the output SBOM file.")
+ flags.BoolVar(&f.useScratchDisk, "use-scratch-disk", false, "Specifies whether to use a scratch disk during the image build."+
+ " When this flag is enabled, the preloading VM will have access to a 10GB scratch disk mounted at /mnt/disks/scratch."+
+ " The disk type will be determined by the --disk-type flag, with the default being pd-standard.")
}
func (f *FinishImageBuild) validate() error {
@@ -201,6 +205,7 @@
return nil, nil, nil, nil, err
}
provConfig.BootDisk.OEMSize = f.oemSize
+ provConfig.UseScratchDisk = f.useScratchDisk
outputImageConfig := config.NewImage(imageName, f.imageProject)
outputImageConfig.Labels = f.labels.m
outputImageConfig.Licenses = f.licenses.l
diff --git a/src/pkg/preloader/preload.go b/src/pkg/preloader/preload.go
index dba5e9e..0af203b 100644
--- a/src/pkg/preloader/preload.go
+++ b/src/pkg/preloader/preload.go
@@ -65,6 +65,9 @@
}
func needScratchDisk(provConfig *provisioner.Config) bool {
+ if provConfig.UseScratchDisk {
+ return true
+ }
for _, step := range provConfig.Steps {
if step.Type == "InstallGPU" {
return true
diff --git a/src/pkg/preloader/preload_test.go b/src/pkg/preloader/preload_test.go
index bd14663..ce9df6f 100644
--- a/src/pkg/preloader/preload_test.go
+++ b/src/pkg/preloader/preload_test.go
@@ -224,6 +224,16 @@
workflow: []byte("{{.ScratchDisks}} {{.ScratchDiskSource}}"),
want: `{"Name":"scratch-disk","SourceImage":"scratch","Type":"${disk_type}","SizeGb":"10"},{"Source":"scratch-disk"},`,
},
+ {
+ testName: "UseScratchDisk",
+ outputImage: config.NewImage("", ""),
+ buildConfig: &config.Build{GCSBucket: "bucket", GPUType: "nvidia-tesla-k80", Project: "p", Zone: "z"},
+ provConfig: &provisioner.Config{
+ UseScratchDisk: true,
+ },
+ workflow: []byte("{{.ScratchDisks}} {{.ScratchDiskSource}}"),
+ want: `{"Name":"scratch-disk","SourceImage":"scratch","Type":"${disk_type}","SizeGb":"10"},{"Source":"scratch-disk"},`,
+ },
}
gcs := fakes.GCSForTest(t)
defer gcs.Close()
diff --git a/src/pkg/provisioner/config.go b/src/pkg/provisioner/config.go
index adb1191..b231365 100644
--- a/src/pkg/provisioner/config.go
+++ b/src/pkg/provisioner/config.go
@@ -88,6 +88,9 @@
// - AnthosInstallerReleaseBucket: the path to download the AnthosInstaller binary.
Steps []StepConfig
+
+ // UseScratchDisk indicates whether a scratch disk should be used.
+ UseScratchDisk bool
}
// stepDeps contains "step" dependencies