blob: 791fc509dc22cc9e201f9ac35ecea9b2d6561712 [file] [log] [blame]
package commands
import (
"cos-extensions/extensions/gpu"
"fmt"
"net/http"
"github.com/spf13/cobra"
)
var (
installGPU = &cobra.Command{
Use: "gpu -- [options]",
DisableFlagsInUseLine: true,
Short: "Installs a gpu driver.",
Long: `Installs a gpu driver.
Additional description:
cos-extensions install gpu -- [options]
--version
The gpu extension can be invoked with --version. The possible values are
'latest', 'default', 'R<major-version>' eg. 'R470', 'R535'. or
precise driver versions retrievable by running ${PROG_NAME} list.
If unspecified, the default driver version is installed.
--force-fallback
This flag indicates whether to use fallback mechanism when specified
GPU driver is not compatible with GPU devices. In case unspecified,
the fallback behavior of the installer is not applicable for
--version=R<major-version> eg. 'R470', 'R525' or
--version=<precise-version> eg. '535.129.03', '525.147.05'.
The fallback behavior of the installer is active for --version is unset
or --version=default or --version=latest.
When fallback behavior is active, the installer will find a compatible
driver to install for the detected GPU on the VM.
--prepare-build-tools
The gpu extension can be invoked with a --prepare-build-tools optional
argument that can be used to cache the toolchain for the installer.
Caching the toolchain carries the overhead of ~1GB disk space on the
stateful partition.
Using this command only populates the cache and does NOT install the GPU
drivers thus may saves time on downloading the toolchain during subsequent
installations.
--clean-build--tools
Use this optional command to delete the cache for the toolchain present on
the stateful partition.
-test
The gpu extension can be used to install drivers on a dev channel image with
this.
-no-verify
Skip kernel module loading, installation verification,
and the enabling of NVIDIA persistence mode.
Useful for preloading drivers without attached GPU.
-debug
The granularity of logging of the gpu extension can be increased by
specifying this.`,
RunE: func(cmd *cobra.Command, args []string) error {
installers, err := gpu.GetCosInstaller(&http.Client{})
if err != nil {
return fmt.Errorf("Failed to set gpu installer: %v", err)
}
err = gpu.Install(installers, args)
if err != nil {
return fmt.Errorf("Failed to install GPU driver: %v", err)
}
return nil
},
}
listGPU = &cobra.Command{
Use: "gpu",
Short: "Lists the available gpu driver versions.",
Long: `Lists the available gpu driver versions.
Additional description:
cos-extensions list [flags] -- [options]
--target-gpu
This flag specifies the GPU device to display its compatible drivers.
If specified, it must be one of: NVIDIA_TESLA_K80, NVIDIA_TESLA_P4,
NVIDIA_TESLA_P100, NVIDIA_TESLA_V100, NVIDIA_L4, NVIDIA_H100_80GB,
NVIDIA_TESLA_A100, NVIDIA_A100_80GB, NVIDIA_TESLA_T4.
If not specified, the GPU device will be auto-detected by the installer.
--gpu-proto-cache-dir
The GPU proto cache directory that GPU driver versions proto file is stored into.
If unspecified, the GPU driver versions proto file will not be cached.`,
RunE: func(cmd *cobra.Command, args []string) error {
printInstaller, _ := cmd.Flags().GetBool("gpu-installer")
installers, err := gpu.GetCosInstaller(&http.Client{})
if err != nil {
return fmt.Errorf("failed to set gpu installer: %v", err)
}
err = gpu.List(installers, printInstaller, args)
if err != nil {
return fmt.Errorf("failed to list GPU drivers: %v", err)
}
return nil
},
}
)
func init() {
// Registering subcommands for gpu extension
installCmd.AddCommand(installGPU)
listCmd.AddCommand(listGPU)
// Flag definitions for cos-extensions list
listGPU.Flags().Bool("gpu-installer", false, "lists the default cos-gpu-installer version in use.")
}