| // Package installer provides functionality to install GPU drivers. |
| package installer |
| |
| import ( |
| "context" |
| stderrors "errors" |
| "fmt" |
| "io/fs" |
| "io/ioutil" |
| "os" |
| "os/exec" |
| "path" |
| "path/filepath" |
| "regexp" |
| "sort" |
| "strings" |
| "syscall" |
| |
| "cos.googlesource.com/cos/tools.git/src/cmd/cos_gpu_installer/internal/signing" |
| "cos.googlesource.com/cos/tools.git/src/pkg/cos" |
| "cos.googlesource.com/cos/tools.git/src/pkg/modules" |
| "cos.googlesource.com/cos/tools.git/src/pkg/utils" |
| |
| log "github.com/golang/glog" |
| "github.com/pkg/errors" |
| "golang.org/x/sys/unix" |
| ) |
| |
| const ( |
| gpuInstallDirContainer = "/usr/local/nvidia" |
| gpuDriverProtoBin = "gpu_driver_versions.bin" |
| gpuFirmwareDirContainer = "/usr/local/nvidia/firmware/nvidia" |
| templateGPUDriverFile = "gpu_%s_version" |
| precompiledDriverTemplate = "NVIDIA-Linux-x86_64-%s-custom.run" |
| defaultFilePermission = 0755 |
| signedURLKey = "Expires" |
| prebuiltModuleTemplate = "nvidia-drivers-%s.tgz" |
| DefaultVersion = "default" |
| LatestVersion = "latest" |
| MajorGPUDriverArtifactPrefix = "gpu_" |
| MajorGPUDriverArtifactSuffix = "_version" |
| ) |
| |
| var ( |
| gspFileNames = []string{"gsp.bin", "gsp_tu10x.bin", "gsp_ad10x.bin", "gsp_ga10x.bin"} |
| // ErrDriverLoad indicates that installed GPU drivers could not be loaded into |
| // the kernel. |
| ErrDriverLoad = stderrors.New("failed to load GPU drivers") |
| |
| errInstallerFailed = stderrors.New("failed to run GPU driver installer") |
| ) |
| |
| // VerifyDriverInstallation runs some commands to verify the driver installation. |
| func VerifyDriverInstallation(noVerify, debug bool) error { |
| if noVerify { |
| log.Infof("Flag --no-verify is set, skip driver installation verification.") |
| return nil |
| } |
| log.Info("Verifying GPU driver installation") |
| |
| newPathEnv := fmt.Sprintf("%s/bin:%s", gpuInstallDirContainer, os.Getenv("PATH")) |
| os.Setenv("PATH", newPathEnv) |
| // Run nvidia-smi to check whether nvidia GPU driver is installed. |
| if err := utils.RunCommandAndLogOutput(exec.Command("nvidia-smi"), false); err != nil { |
| return errors.Wrap(err, "failed to verify GPU driver installation") |
| } |
| |
| // Create unified memory device file. |
| if err := utils.RunCommandAndLogOutput(exec.Command("nvidia-modprobe", "-c0", "-u", "-m"), false); err != nil { |
| return errors.Wrap(err, "failed to create unified memory device file") |
| } |
| |
| // Create symlinks in /dev/char for all possible NVIDIA device nodes |
| var err error |
| if debug { |
| err = utils.RunCommandAndLogOutput(exec.Command("nvidia-ctk", "system", "create-dev-char-symlinks", "--create-all"), false) |
| } else { |
| err = utils.RunCommandAndLogOutput(exec.Command("nvidia-ctk", "--quiet", "system", "create-dev-char-symlinks", "--create-all"), false) |
| } |
| if err != nil { |
| return errors.Wrap(err, "failed to create symlinks") |
| } |
| return nil |
| } |
| |
| // ConfigureCachedInstallation updates ldconfig and installs the cached GPU driver kernel modules. |
| func ConfigureCachedInstallation(gpuInstallDirHost, kernelDriversPath string, needSigned, test, kernelOpen, noVerify bool, moduleParameters modules.ModuleParameters) error { |
| log.V(2).Info("Configuring cached driver installation") |
| |
| if err := createHostDirBindMount(gpuInstallDirHost, gpuInstallDirContainer); err != nil { |
| return errors.Wrap(err, "failed to create driver installation dir") |
| } |
| if err := updateContainerLdCache(); err != nil { |
| return errors.Wrap(err, "failed to configure cached driver installation") |
| } |
| if err := loadGPUDrivers(moduleParameters, needSigned, test, kernelOpen, noVerify, kernelDriversPath); err != nil { |
| return errors.Wrap(err, "failed to configure cached driver installation") |
| } |
| |
| return nil |
| } |
| |
| // DownloadToInstallDir downloads data from the provided URL to the GPU |
| // installation directory. It returns the basename of the locally written file. |
| func DownloadToInstallDir(url, infoStr string) (string, error) { |
| outputPath := filepath.Join(gpuInstallDirContainer, strings.Split(path.Base(url), "?"+signedURLKey+"=")[0]) |
| if err := utils.DownloadContentFromURL(url, outputPath, infoStr); err != nil { |
| return "", fmt.Errorf("failed to download file with description %q from %q and install into %q: %v", infoStr, url, gpuInstallDirContainer, err) |
| } |
| return filepath.Base(outputPath), nil |
| |
| } |
| |
| // DownloadDriverInstallerV2 downloads GPU driver installer given driver version from COS build artifacts. |
| func DownloadDriverInstallerV2(ctx context.Context, downloader cos.ExtensionsDownloader, driverVersion string) (string, error) { |
| log.Infof("Downloading GPU driver installer version %s", driverVersion) |
| installerFilename := fmt.Sprintf(precompiledDriverTemplate, driverVersion) |
| err := downloader.DownloadExtensionArtifact(ctx, gpuInstallDirContainer, cos.GPUExtension, installerFilename) |
| if err != nil { |
| return "", errors.Wrap(err, "failed to download installer") |
| } |
| return installerFilename, nil |
| } |
| |
| // ConfigureDriverInstallationDirs configures GPU driver installation directories by creating mounts. |
| func ConfigureDriverInstallationDirs(gpuInstallDirHost, kernelRelease, arch string) (func(), error) { |
| log.Info("Configuring driver installation directories") |
| |
| if err := createHostDirBindMount(gpuInstallDirHost, gpuInstallDirContainer); err != nil { |
| return nil, errors.Wrap(err, "failed to create dirver installation dir") |
| } |
| |
| if err := createOverlayFS( |
| "/usr/bin", gpuInstallDirContainer+"/bin", gpuInstallDirContainer+"/bin-workdir"); err != nil { |
| return nil, errors.Wrap(err, "failed to create bin overlay") |
| } |
| |
| ldLibraryPath := "/usr/lib/" + arch + "-linux-gnu" |
| if err := createOverlayFS( |
| ldLibraryPath, gpuInstallDirContainer+"/lib64", gpuInstallDirContainer+"/lib64-workdir"); err != nil { |
| return nil, errors.Wrap(err, "failed to create lib64 overlay") |
| } |
| modulePath := filepath.Join("/lib/modules", kernelRelease, "video") |
| if err := createOverlayFS( |
| modulePath, gpuInstallDirContainer+"/drivers", gpuInstallDirContainer+"/drivers-workdir"); err != nil { |
| return nil, errors.Wrap(err, "failed to create drivers overlay") |
| } |
| |
| if err := updateContainerLdCache(); err != nil { |
| return nil, errors.Wrap(err, "failed to update container ld cache") |
| } |
| return cleanupMounts( |
| "/usr/bin", |
| ldLibraryPath, |
| modulePath, |
| gpuInstallDirContainer), nil |
| } |
| |
| func cleanupMounts(paths ...string) func() { |
| return func() { |
| log.Info("Start to clean up mounts...") |
| for _, path := range paths { |
| if err := syscall.Unmount(path, 0); err != nil { |
| log.Errorf("Failed to unmount %s: %v", path, err) |
| } |
| } |
| log.Info("Cleanup finished!") |
| } |
| } |
| |
| func extractPrecompiled(nvidiaDir string) error { |
| log.Info("Extracting precompiled artifacts...") |
| precompiledDir := filepath.Join(nvidiaDir, "kernel", "precompiled") |
| files, err := os.ReadDir(precompiledDir) |
| if err != nil { |
| return fmt.Errorf("failed to read %q: %v", precompiledDir, err) |
| } |
| var precompiledArchive string |
| if len(files) == 0 { |
| return stderrors.New("failed to find precompiled artifacts in this nvidia installer") |
| } |
| if len(files) == 1 { |
| precompiledArchive = filepath.Join(precompiledDir, files[0].Name()) |
| } |
| if len(files) > 1 { |
| var fileNames []string |
| for _, f := range files { |
| fileNames = append(fileNames, f.Name()) |
| } |
| sort.Strings(fileNames) |
| log.Warningf("Found multiple precompiled archives in this nvidia installer: %q", strings.Join(fileNames, ",")) |
| log.Warningf("Using precompiled archive named %q", fileNames[len(fileNames)-1]) |
| precompiledArchive = filepath.Join(precompiledDir, fileNames[len(fileNames)-1]) |
| } |
| cmd := exec.Command(filepath.Join(nvidiaDir, "mkprecompiled"), "--unpack", precompiledArchive, "-o", precompiledDir) |
| if err := utils.RunCommandAndLogOutput(cmd, false); err != nil { |
| return fmt.Errorf("failed to unpack precompiled artifacts: %v", err) |
| } |
| log.Info("Done extracting precompiled artifacts") |
| return nil |
| } |
| |
| func linkDrivers(toolchainDir, nvidiaDir string) error { |
| log.Info("Linking drivers...") |
| var kernelInfo unix.Utsname |
| if err := unix.Uname(&kernelInfo); err != nil { |
| return fmt.Errorf("failed to find kernel release info using uname: %v", err) |
| } |
| kernelRelease := strings.Trim(string(kernelInfo.Release[:]), "\x00") |
| // COS 85+ kernels use lld as their linker |
| linker := filepath.Join(toolchainDir, "bin", "ld.lld") |
| linkerScript := filepath.Join(toolchainDir, "usr", "src", "linux-headers-"+kernelRelease, "scripts", "module.lds") |
| linkerScriptExists, err := utils.CheckFileExists(linkerScript) |
| if err != nil { |
| return fmt.Errorf("failed to check if %s exists, err: %v", linkerScript, err) |
| } |
| if !linkerScriptExists { |
| // Fallback to module-common.lds, which is used in the 5.4 kernel |
| linkerScript = filepath.Join(toolchainDir, "usr", "src", "linux-headers-"+kernelRelease, "scripts", "module-common.lds") |
| } |
| nvidiaKernelDir := filepath.Join(nvidiaDir, "kernel") |
| // Link nvidia.ko |
| nvidiaObjs := []string{ |
| filepath.Join(nvidiaKernelDir, "precompiled", "nv-linux.o"), |
| filepath.Join(nvidiaKernelDir, "nvidia", "nv-kernel.o_binary"), |
| } |
| args := append([]string{"-T", linkerScript, "-r", "-o", filepath.Join(nvidiaKernelDir, "precompiled", "nvidia.ko")}, nvidiaObjs...) |
| cmd := exec.Command(linker, args...) |
| log.Infof("Running link command: %v", cmd.Args) |
| if err := utils.RunCommandAndLogOutput(cmd, false); err != nil { |
| return fmt.Errorf("failed to link nvidia.ko: %v", err) |
| } |
| // Link nvidia-modeset.ko |
| modesetObjs := []string{ |
| filepath.Join(nvidiaKernelDir, "precompiled", "nv-modeset-linux.o"), |
| filepath.Join(nvidiaKernelDir, "nvidia-modeset", "nv-modeset-kernel.o_binary"), |
| } |
| args = append([]string{"-T", linkerScript, "-r", "-o", filepath.Join(nvidiaKernelDir, "precompiled", "nvidia-modeset.ko")}, modesetObjs...) |
| cmd = exec.Command(linker, args...) |
| log.Infof("Running link command: %v", cmd.Args) |
| if err := utils.RunCommandAndLogOutput(cmd, false); err != nil { |
| return fmt.Errorf("failed to link nvidia-modeset.ko: %v", err) |
| } |
| // Move all modules to kernel dir (includes some pre-linked modules, in |
| // addition to the above linked ones) |
| if err := filepath.WalkDir(filepath.Join(nvidiaKernelDir, "precompiled"), func(path string, d fs.DirEntry, err error) error { |
| if err != nil { |
| return err |
| } |
| if d.IsDir() { |
| return nil |
| } |
| if filepath.Ext(path) == ".ko" { |
| newPath := filepath.Join(nvidiaKernelDir, filepath.Base(path)) |
| if err := unix.Rename(path, newPath); err != nil { |
| return fmt.Errorf("failed to move %q to %q: %v", path, newPath, err) |
| } |
| } |
| return nil |
| }); err != nil { |
| return fmt.Errorf("failed to copy kernel modules: %v", err) |
| } |
| log.Info("Done linking drivers") |
| return nil |
| } |
| |
| func linkDriversLegacy(toolchainDir, nvidiaDir string) error { |
| log.Info("Linking drivers using legacy method...") |
| // The legacy linking method needs to use "/usr/bin/ld" as the linker to |
| // maintain bit-for-bit compatibility with driver signatures. The legacy |
| // linking method also finds the linker by searching the PATH for "ld". If |
| // bin/ld is present in the toolchain, rename it temporarily so the legacy |
| // linking method doesn't use it. |
| ld := filepath.Join(toolchainDir, "bin", "ld") |
| if _, err := os.Lstat(ld); !os.IsNotExist(err) { |
| dst := filepath.Join(toolchainDir, "bin", "ld.orig") |
| if err := unix.Rename(ld, dst); err != nil { |
| return fmt.Errorf("failed to rename %q to %q: %v", ld, dst, err) |
| } |
| defer func() { |
| if err := unix.Rename(dst, ld); err != nil { |
| // At this point, this error is non-fatal. It will become fatal when |
| // something tries to use bin/ld in the toolchain. At time of writing, |
| // nothing uses bin/ld after this point. |
| log.Warningf("Could not restore %q", ld) |
| } |
| }() |
| } |
| cmd := exec.Command(filepath.Join(nvidiaDir, "nvidia-installer"), |
| "--utility-prefix="+gpuInstallDirContainer, |
| "--opengl-prefix="+gpuInstallDirContainer, |
| "--x-prefix="+gpuInstallDirContainer, |
| "--install-libglvnd", |
| "--no-install-compat32-libs", |
| "--log-file-name="+filepath.Join(gpuInstallDirContainer, "nvidia-installer.log"), |
| "--silent", |
| "--accept-license", |
| ) |
| log.Infof("Installer arguments:\n%v", cmd.Args) |
| err := utils.RunCommandAndLogOutput(cmd, false) |
| log.Info("Done linking drivers") |
| if err != nil { |
| return fmt.Errorf("%w: %v", errInstallerFailed, err) |
| } |
| return nil |
| } |
| |
| func installUserLibs(nvidiaDir, arch string) error { |
| log.Info("Installing userspace libraries...") |
| cmdArgs := []string{ |
| "--utility-prefix=" + gpuInstallDirContainer, |
| "--opengl-prefix=" + gpuInstallDirContainer, |
| "--x-prefix=" + gpuInstallDirContainer, |
| "--install-libglvnd", |
| "--log-file-name=" + filepath.Join(gpuInstallDirContainer, "nvidia-installer.log"), |
| "--silent", |
| "--accept-license", |
| "--no-kernel-module", |
| } |
| // Arm64 installer does not recognize the 32 bit compatibility libraries command line option. |
| if arch == "x86_64" { |
| cmdArgs = append(cmdArgs, "--no-install-compat32-libs") |
| } |
| cmd := exec.Command(filepath.Join(nvidiaDir, "nvidia-installer"), cmdArgs...) |
| log.Infof("Installer arguments:\n%v", cmd.Args) |
| if err := utils.RunCommandAndLogOutput(cmd, false); err != nil { |
| return fmt.Errorf("failed to run GPU driver installer: %v", err) |
| } |
| log.Info("Done installing userspace libraries") |
| return nil |
| } |
| |
| // RunDriverInstaller runs GPU driver installer. Only works if the provided |
| // installer includes precompiled drivers. |
| func RunDriverInstaller(toolchainDir, installerFilename, driverVersion, arch, kernelDriversPath string, needSigned, test, legacyLink, noVerify bool, moduleParameters modules.ModuleParameters) error { |
| log.Info("Running GPU driver installer") |
| |
| // Extract files to a fixed path first to make sure md5sum of generated gpu drivers are consistent. |
| extractDir := "/tmp/extract" |
| if err := os.RemoveAll(extractDir); err != nil { |
| return fmt.Errorf("failed to clean %q: %v", extractDir, err) |
| } |
| cmd := exec.Command("sh", installerFilename, "-x", "--target", extractDir) |
| cmd.Dir = gpuInstallDirContainer |
| if err := cmd.Run(); err != nil { |
| return errors.Wrap(err, "failed to extract installer files") |
| } |
| |
| // Extract precompiled artifacts. |
| if err := extractPrecompiled(extractDir); err != nil { |
| return fmt.Errorf("failed to extract precompiled artifacts: %v", err) |
| } |
| |
| // Link drivers. |
| var legacyInstallerFailed bool |
| if legacyLink { |
| if err := linkDriversLegacy(toolchainDir, extractDir); err != nil { |
| if stderrors.Is(err, errInstallerFailed) { |
| // This case is expected when module signature enforcement is enabled. |
| // Since the installer terminated early, we need to re-run it after |
| // signing modules. |
| // |
| // If we don't sign modules (i.e. needSigned is false), then we'll see |
| // an error when we load the modules, and that will be fatal. |
| legacyInstallerFailed = true |
| } else { |
| return fmt.Errorf("failed to link drivers: %v", err) |
| } |
| } |
| } else { |
| if err := linkDrivers(toolchainDir, extractDir); err != nil { |
| return fmt.Errorf("failed to link drivers: %v", err) |
| } |
| } |
| |
| kernelFiles, err := ioutil.ReadDir(filepath.Join(extractDir, "kernel")) |
| if err != nil { |
| return errors.Wrapf(err, "failed to list files in directory %s", filepath.Join(extractDir, "kernel")) |
| } |
| if needSigned { |
| // sign GPU drivers. |
| for _, kernelFile := range kernelFiles { |
| if strings.HasSuffix(kernelFile.Name(), ".ko") { |
| module := kernelFile.Name() |
| signaturePath := signing.GetModuleSignature(module) |
| modulePath := filepath.Join(extractDir, "kernel", module) |
| signedModulePath := filepath.Join(gpuInstallDirContainer, "drivers", module) |
| if err := modules.AppendSignature(signedModulePath, modulePath, signaturePath); err != nil { |
| return errors.Wrapf(err, "failed to sign kernel module %s", module) |
| } |
| } |
| } |
| } else if !legacyLink { |
| // Copy drivers to the desired end directory. This is done as part of |
| // `modules.AppendSignature` in the above signing block, but we need to do |
| // it for unsigned modules as well. Legacy linking already does this copy |
| // in the unsigned case (we expect that legacy linking also does this when |
| // the installer fails); we skip this block in the legacy link case to avoid |
| // redundancy. |
| for _, kernelFile := range kernelFiles { |
| if strings.HasSuffix(kernelFile.Name(), ".ko") { |
| module := kernelFile.Name() |
| src := filepath.Join(extractDir, "kernel", module) |
| dst := filepath.Join(gpuInstallDirContainer, "drivers", module) |
| if err := utils.CopyFile(src, dst); err != nil { |
| return fmt.Errorf("failed to copy kernel module %q: %v", module, err) |
| } |
| } |
| } |
| } |
| |
| // Load GPU drivers. |
| // The legacy linking method does this when the installer doesn't fail (i.e. |
| // module signature verification isn't enforced). |
| if (legacyLink && legacyInstallerFailed) || !legacyLink { |
| if err := loadGPUDrivers(moduleParameters, needSigned, test, false, noVerify, kernelDriversPath); err != nil { |
| return fmt.Errorf("%w: %v", ErrDriverLoad, err) |
| } |
| } |
| |
| // Install libs. |
| // The legacy linking method does this when the installer doesn't fail (i.e. |
| // module signature verification isn't enforced). |
| if (legacyLink && legacyInstallerFailed) || !legacyLink { |
| if err := installUserLibs(extractDir, arch); err != nil { |
| return fmt.Errorf("failed to install userspace libraries: %v", err) |
| } |
| |
| // Driver version may be empty if custom nvidia-installer-url is used |
| // read from manifest file |
| if driverVersion == "" { |
| |
| driverVersion = findDriverVersionManifestFile(filepath.Join(extractDir, ".manifest")) |
| log.Info("found driver version from nvidia-installer pkg ", driverVersion) |
| } |
| |
| if err := prepareGSPFirmware(extractDir, driverVersion, needSigned); err != nil { |
| return fmt.Errorf("failed to prepare GSP firmware, err: %v", err) |
| } |
| } |
| |
| return nil |
| } |
| |
| // DownloadGPUDriverVersionsProto will download gpuDriverProtoBin from GCS bucket to /var/lib/nvidia if it does not exist. |
| func DownloadGPUDriverVersionsProto(ctx context.Context, downloader cos.ArtifactsDownloader, gpuInstallDir string) ([]byte, error) { |
| destFullPath := filepath.Join(gpuInstallDir, gpuDriverProtoBin) |
| _, err := os.Stat(destFullPath) |
| if err == nil { |
| log.Infof("Found GPU driver version proto: %s, skip downloading", destFullPath) |
| } else if os.IsNotExist(err) { |
| if err := os.MkdirAll(gpuInstallDir, defaultFilePermission); err != nil { |
| return nil, fmt.Errorf("failed to create %s with error: %v", gpuInstallDir, err) |
| } |
| if err := downloader.DownloadArtifact(ctx, gpuInstallDir, gpuDriverProtoBin); err != nil { |
| return nil, fmt.Errorf("failed to download %s from GCS bucket with error: %v", gpuDriverProtoBin, err) |
| } |
| log.Infof("Succesfully download %s from GCS bucket.", gpuDriverProtoBin) |
| } else { |
| return nil, fmt.Errorf("error checking %s : %v", destFullPath, err) |
| } |
| return os.ReadFile(destFullPath) |
| } |
| |
| // GeGGPUDriverVersion gets the supplied GPU driver version. |
| // Supports "default", "latest", "R470", "R525" aliases |
| func GetGPUDriverVersion(ctx context.Context, downloader cos.ArtifactsDownloader, alias string) (string, error) { |
| log.Infof("Getting the %s GPU driver version", alias) |
| content, err := downloader.GetArtifact(ctx, fmt.Sprintf(templateGPUDriverFile, alias)) |
| if err != nil { |
| return "", errors.Wrapf(err, "failed to get %s GPU driver version", alias) |
| } |
| return strings.Trim(string(content), "\n "), nil |
| } |
| |
| // DownloadGPUDriverVersionArtifacts fetch all the gpu_xx_version files and the key is the file name and the value is the content. |
| // E.g. |
| // gpu_default_version -> 535.129.03, |
| // gpu_R470_version -> 470.223.02, |
| // gpu_R535_version -> 535.129.03, |
| func DownloadGPUDriverVersionArtifacts(ctx context.Context, downloader cos.ArtifactsDownloader) (map[string]string, error) { |
| gpuArtifacts, err := downloader.ListArtifacts(ctx, MajorGPUDriverArtifactPrefix) |
| if err != nil { |
| return nil, fmt.Errorf("error happens when listing artifacts with prefix: %s: %w", MajorGPUDriverArtifactPrefix, err) |
| } |
| var GPUDriverMajorVersionArtifactsContentMap = map[string]string{} |
| for _, gpuArtifact := range gpuArtifacts { |
| if strings.HasSuffix(gpuArtifact, MajorGPUDriverArtifactSuffix) { |
| gpuArtifactName := path.Base(gpuArtifact) |
| content, err := downloader.GetArtifact(ctx, gpuArtifactName) |
| if err != nil { |
| return nil, errors.Wrapf(err, "failed to download artifact: %s", gpuArtifact) |
| } |
| GPUDriverMajorVersionArtifactsContentMap[gpuArtifactName] = strings.Trim(string(content), "\n ") |
| } |
| } |
| return GPUDriverMajorVersionArtifactsContentMap, nil |
| } |
| |
| func updateContainerLdCache() error { |
| log.V(2).Info("Updating container's ld cache") |
| |
| f, err := os.Create("/etc/ld.so.conf.d/nvidia.conf") |
| if err != nil { |
| f.Close() |
| return errors.Wrap(err, "failed to update ld cache") |
| } |
| f.WriteString(gpuInstallDirContainer + "/lib64") |
| f.Close() |
| |
| err = exec.Command("ldconfig").Run() |
| if err != nil { |
| return errors.Wrap(err, "failed to update ld cache") |
| } |
| return nil |
| } |
| |
| func createHostDirBindMount(hostDir, bindMountPath string) error { |
| if err := os.MkdirAll(hostDir, defaultFilePermission); err != nil { |
| return errors.Wrapf(err, "failed to create dir %s", hostDir) |
| } |
| if err := os.MkdirAll(bindMountPath, defaultFilePermission); err != nil { |
| return errors.Wrapf(err, "failed to create dir %s", bindMountPath) |
| } |
| if err := syscall.Mount(hostDir, bindMountPath, "", syscall.MS_BIND, ""); err != nil { |
| return errors.Wrapf(err, "failed to create bind mount %s", bindMountPath) |
| } |
| // Remount to clear noexec flag. |
| if err := syscall.Mount("", bindMountPath, "", |
| syscall.MS_REMOUNT|syscall.MS_NOSUID|syscall.MS_NODEV|syscall.MS_RELATIME, ""); err != nil { |
| return errors.Wrapf(err, "failed to remount %s", bindMountPath) |
| } |
| return nil |
| } |
| |
| func createOverlayFS(lowerDir, upperDir, workDir string) error { |
| if err := os.MkdirAll(lowerDir, defaultFilePermission); err != nil { |
| return errors.Wrapf(err, "failed to create dir %s", lowerDir) |
| } |
| if err := os.MkdirAll(upperDir, defaultFilePermission); err != nil { |
| return errors.Wrapf(err, "failed to create dir %s", upperDir) |
| } |
| if err := os.MkdirAll(workDir, defaultFilePermission); err != nil { |
| return errors.Wrapf(err, "failed to create dir %s", workDir) |
| } |
| |
| if err := syscall.Mount("none", lowerDir, "overlay", 0, |
| fmt.Sprintf("lowerdir=%s,upperdir=%s,workdir=%s", lowerDir, upperDir, workDir)); err != nil { |
| return errors.Wrapf(err, "failed to create overlayfs (lowerdir=%s, upperdir=%s)", lowerDir, upperDir) |
| } |
| return nil |
| } |
| |
| func loadGPUDrivers(moduleParams modules.ModuleParameters, needSigned, test, kernelOpen, noVerify bool, kernelDriversPath string) error { |
| if noVerify { |
| log.Infof("Flag --no-verify is set, skip kernel module loading.") |
| return nil |
| } |
| kernelModulePath := filepath.Join(gpuInstallDirContainer, "drivers") |
| drmPanel := &modules.Module{ |
| Name: "drm_panel_orientation_quirks", |
| Path: filepath.Join(kernelDriversPath, "gpu/drm/drm_panel_orientation_quirks.ko"), |
| SkipNotFound: true, |
| } |
| i2c := &modules.Module{ |
| Name: "i2c_core", |
| Path: filepath.Join(kernelDriversPath, "i2c/i2c-core.ko"), |
| Deps: []*modules.Module{drmPanel}, |
| SkipNotFound: true, |
| } |
| drm := &modules.Module{ |
| Name: "drm", |
| Path: filepath.Join(kernelDriversPath, "gpu/drm/drm.ko"), |
| Deps: []*modules.Module{i2c, drmPanel}, |
| SkipNotFound: true, |
| } |
| drmKms := &modules.Module{ |
| Name: "drm_kms_helper", |
| Path: filepath.Join(kernelDriversPath, "gpu/drm/drm_kms_helper.ko"), |
| Deps: []*modules.Module{drm, i2c}, |
| SkipNotFound: true, |
| } |
| nvidia := &modules.Module{ |
| Name: "nvidia", |
| Path: filepath.Join(kernelModulePath, "nvidia.ko"), |
| Deps: []*modules.Module{drm, i2c}, |
| } |
| nvidiaUvm := &modules.Module{ |
| Name: "nvidia_uvm", |
| Path: filepath.Join(kernelModulePath, "nvidia-uvm.ko"), |
| Deps: []*modules.Module{nvidia}, |
| } |
| nvidiaModeset := &modules.Module{ |
| Name: "nvidia_modeset", |
| Path: filepath.Join(kernelModulePath, "nvidia-modeset.ko"), |
| Deps: []*modules.Module{nvidia}, |
| } |
| nvidiaDrm := &modules.Module{ |
| Name: "nvidia_drm", |
| Path: filepath.Join(kernelModulePath, "nvidia-drm.ko"), |
| Deps: []*modules.Module{nvidiaModeset, drmKms}, |
| } |
| |
| // Need to load modules in order due to module dependency. |
| gpuModules := []*modules.Module{nvidia, nvidiaUvm, nvidiaModeset, nvidiaDrm} |
| for _, module := range gpuModules { |
| if err := modules.LoadModule(module, moduleParams); err != nil { |
| return errors.Wrapf(err, "failed to load module %s", module.Path) |
| } |
| } |
| return nil |
| } |
| |
| func GetLoadedNVIDIAKernelModuleVersion(versionFilePath string) string { |
| log.V(2).Infof("Attempting to read version from: %s", versionFilePath) |
| content, err := os.ReadFile(versionFilePath) |
| if err != nil { |
| log.V(2).Infof("Failed to read version file: %v", err) |
| return "" |
| } |
| contentStr := string(content) |
| kernelModuleVersionPattern := regexp.MustCompile(`\d+\.\d+\.\d+`) |
| kernelModuleVersion := kernelModuleVersionPattern.FindString(contentStr) |
| log.V(2).Infof("NVIDIA kernel module version: %s", kernelModuleVersion) |
| return kernelModuleVersion |
| } |
| |
| func prepareGSPFirmware(extractDir, driverVersion string, needSigned bool) error { |
| for _, gspFileName := range gspFileNames { |
| signaturePath := signing.GetModuleSignature(gspFileName) |
| installerGSPPath := filepath.Join(extractDir, "firmware", gspFileName) |
| containerGSPPath := filepath.Join(gpuFirmwareDirContainer, driverVersion, gspFileName) |
| haveSignature, err := utils.CheckFileExists(signaturePath) |
| if err != nil { |
| return fmt.Errorf("failed to check if %s exists, err: %v", signaturePath, err) |
| } |
| haveFirmware, err := utils.CheckFileExists(installerGSPPath) |
| if err != nil { |
| return fmt.Errorf("failed to check if %s exists, err: %v", installerGSPPath, err) |
| } |
| switch { |
| case haveSignature && !haveFirmware: |
| return fmt.Errorf("firmware doesn't exist but its signature does.") |
| case !haveFirmware: |
| log.Infof("GSP firmware for %s doesn't exist. Skipping firmware preparation for %s.", gspFileName, gspFileName) |
| case !needSigned: |
| // No signature needed, copy firmware only. |
| if err := copyFirmware(installerGSPPath, containerGSPPath, driverVersion); err != nil { |
| return fmt.Errorf("failed to copy firmware, err: %v.", err) |
| } |
| case !haveSignature: |
| log.Infof("GSP firmware signature for %s doesn't exist. Skipping firmware preparation for %s.", gspFileName, gspFileName) |
| default: |
| // Both firmware and signature exist. |
| if err := copyFirmware(installerGSPPath, containerGSPPath, driverVersion); err != nil { |
| return fmt.Errorf("failed to copy firmware, err: %v.", err) |
| } |
| if err := setIMAXattr(signaturePath, containerGSPPath); err != nil { |
| return err |
| } |
| } |
| } |
| return nil |
| } |
| |
| func copyFirmware(installerGSPPath, containerGSPPath, gspFileName string) error { |
| if err := os.MkdirAll(filepath.Dir(containerGSPPath), defaultFilePermission); err != nil { |
| return fmt.Errorf("Falied to create firmware directory, err: %v", err) |
| } |
| if err := utils.CopyFile(installerGSPPath, containerGSPPath); err != nil { |
| return fmt.Errorf("Falied to copy %s, err: %v", gspFileName, err) |
| } |
| return nil |
| } |
| |
| func setIMAXattr(signaturePath, containerGSPPath string) error { |
| signature, err := os.ReadFile(signaturePath) |
| if err != nil { |
| return fmt.Errorf("failed to read signature err: %v", err) |
| } |
| if err := syscall.Setxattr(containerGSPPath, "security.ima", signature, 0); err != nil { |
| return fmt.Errorf("failed to set xattr for security.ima, err: %v", err) |
| } |
| return nil |
| } |
| |
| // tries to read .manifest file to find driverVersion present in the manifest |
| func findDriverVersionManifestFile(manifestFilePath string) string { |
| manifestFileRawBytes, err := os.ReadFile(manifestFilePath) |
| if err != nil { |
| return "" |
| } |
| lines := strings.Split(string(manifestFileRawBytes), "\n") |
| if len(lines) < 2 { |
| return "" |
| } |
| // driver version present in the second line of the file |
| driverVersion := strings.TrimSpace(lines[1]) |
| return driverVersion |
| } |
| |
| func RunDriverInstallerPrebuiltModules(ctx context.Context, downloader *cos.GCSDownloader, installerFilename, driverVersion, arch, kernelDriversPath string, noVerify bool, moduleParameters modules.ModuleParameters) error { |
| // fetch the prebuilt modules |
| if err := downloader.DownloadArtifact(ctx, gpuInstallDirContainer, fmt.Sprintf(prebuiltModuleTemplate, driverVersion)); err != nil { |
| return fmt.Errorf("failed to download prebuilt modules: %v", err) |
| } |
| |
| tarballPath := filepath.Join(gpuInstallDirContainer, fmt.Sprintf(prebuiltModuleTemplate, driverVersion)) |
| // extract the prebuilt modules and firmware to the installation dirs |
| if err := exec.Command("tar", "--overwrite", "--xattrs", "--xattrs-include=*", "-xf", tarballPath, "-C", gpuInstallDirContainer).Run(); err != nil { |
| return fmt.Errorf("failed to extract prebuilt modules: %v", err) |
| } |
| if err := os.Chmod(gpuInstallDirContainer, defaultFilePermission); err != nil { |
| return fmt.Errorf("failed to change permission of install dir: %v", err) |
| } |
| |
| // load the prebuilt kernel modules |
| if err := loadGPUDrivers(moduleParameters, false, false, true, noVerify, kernelDriversPath); err != nil { |
| return fmt.Errorf("%w: %v", ErrDriverLoad, err) |
| } |
| |
| // Extract files to a fixed path first to make sure md5sum of generated gpu drivers are consistent. |
| extractDir := "/tmp/extract" |
| if err := os.RemoveAll(extractDir); err != nil { |
| return fmt.Errorf("failed to clean %q: %v", extractDir, err) |
| } |
| cmd := exec.Command("sh", installerFilename, "-x", "--target", extractDir) |
| cmd.Dir = gpuInstallDirContainer |
| if err := cmd.Run(); err != nil { |
| return errors.Wrap(err, "failed to extract installer files") |
| } |
| if err := installUserLibs(extractDir, arch); err != nil { |
| return fmt.Errorf("failed to install userspace libraries: %v", err) |
| } |
| |
| return nil |
| } |
| |
| func PrebuiltModulesAvailable(ctx context.Context, downloader *cos.GCSDownloader, driverVersion string, kernelOpen bool) (bool, error) { |
| if !kernelOpen { |
| return false, nil |
| } |
| |
| prebuiltModulesArtifactPath := fmt.Sprintf(prebuiltModuleTemplate, driverVersion) |
| return downloader.ArtifactExists(ctx, prebuiltModulesArtifactPath) |
| } |
| |
| // DownloadGenericDriverInstaller downloads the generic GPU driver installer given driver version. |
| func DownloadGenericDriverInstaller(ctx context.Context, downloader *cos.GCSDownloader, driverVersion string) (string, error) { |
| log.Infof("Downloading GPU driver installer version %s", driverVersion) |
| return downloader.DownloadGenericNvidiaDriver(ctx, gpuInstallDirContainer, driverVersion) |
| } |