Trigger daemons from cos-gpu-installer (instead of cos-extensions)

BUG=b/502284908
TEST=Run custom cos-extensions and custom cos-gpu-installer on VMs, installing grid and non-grid GPU drivers. On grid: test license and CUDA workload.
`sudo systemctl list-units --type=path | grep nvidia` and verify the right services are running.
RELEASE_NOTE=None

Change-Id: I6778d527d76b90654b56cc778f76a5401ea165eb
Reviewed-on: https://cos-review.googlesource.com/c/cos/tools/+/149184
Reviewed-by: Robert Kolchmeyer <rkolchmeyer@google.com>
Cloud-Build: 228075978874@cloudbuild.gserviceaccount.com <228075978874@cloudbuild.gserviceaccount.com>
Tested-by: Miri Amarilio <mirilio@google.com>
diff --git a/src/cmd/cos_gpu_installer/internal/commands/install.go b/src/cmd/cos_gpu_installer/internal/commands/install.go
index 8d9bd90..68191d4 100644
--- a/src/cmd/cos_gpu_installer/internal/commands/install.go
+++ b/src/cmd/cos_gpu_installer/internal/commands/install.go
@@ -317,6 +317,14 @@
 		c.hostInstallDir = os.Getenv("NVIDIA_INSTALL_DIR_HOST")
 	}
 	hostInstallDir := filepath.Join(hostRootPath, c.hostInstallDir)
+	if err := os.MkdirAll(hostInstallDir, 0755); err != nil {
+		c.logError(errors.Wrap(err, "failed to create host install dir"))
+		return subcommands.ExitFailure
+	}
+	if err := installer.EnsureExecutable(c.hostInstallDir, hostRootPath); err != nil {
+		c.logError(errors.Wrap(err, "failed to ensure installation directory is executable"))
+		return subcommands.ExitFailure
+	}
 
 	var downloader *cos.GCSDownloader
 	if c.localArtifactsDir != "" {
@@ -416,6 +424,10 @@
 				c.logError(errors.Wrap(err, "failed to update host ld cache"))
 				return subcommands.ExitFailure
 			}
+			if err := installer.SetupDaemons(c.hostInstallDir, hostRootPath); err != nil {
+				c.logError(errors.Wrap(err, "failed to setup daemons"))
+				return subcommands.ExitFailure
+			}
 			return subcommands.ExitSuccess
 		}
 	}
@@ -440,6 +452,10 @@
 			c.logError(err)
 			return subcommands.ExitFailure
 		}
+		if err := installer.SetupDaemons(c.hostInstallDir, hostRootPath); err != nil {
+			c.logError(errors.Wrap(err, "failed to setup daemons"))
+			return subcommands.ExitFailure
+		}
 		return subcommands.ExitSuccess
 	}
 
@@ -457,6 +473,11 @@
 		return subcommands.ExitFailure
 	}
 
+	if err := installer.SetupDaemons(c.hostInstallDir, hostRootPath); err != nil {
+		c.logError(errors.Wrap(err, "failed to setup daemons"))
+		return subcommands.ExitFailure
+	}
+
 	return subcommands.ExitSuccess
 }
 
diff --git a/src/cmd/cos_gpu_installer/internal/installer/daemon.go b/src/cmd/cos_gpu_installer/internal/installer/daemon.go
new file mode 100644
index 0000000..fabf8ea
--- /dev/null
+++ b/src/cmd/cos_gpu_installer/internal/installer/daemon.go
@@ -0,0 +1,169 @@
+package installer
+
+import (
+	"bytes"
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"text/template"
+
+	log "github.com/golang/glog"
+	"golang.org/x/sys/unix"
+)
+
+const (
+	griddPathTmpl = `[Unit]
+Description=Trigger the nvidia-gridd binary if it exists/changes
+
+[Path]
+PathExists={{.InstallDir}}/bin/nvidia-gridd
+Unit=nvidia-gridd.service
+
+[Install]
+WantedBy=multi-user.target
+`
+
+	griddServiceTmpl = `[Unit]
+Description=Launches nvidia-gridd
+ConditionPathExists={{.InstallDir}}/bin/nvidia-gridd
+
+[Service]
+Type=forking
+Environment="LD_LIBRARY_PATH={{.InstallDir}}/gridd-libs"
+ExecStart={{.InstallDir}}/bin/nvidia-gridd
+Restart=on-failure
+RestartSec=6s
+
+[Install]
+WantedBy=multi-user.target
+`
+
+	persistencedPathTmpl = `[Unit]
+Description=Trigger the nvidia-persistenced binary if it exists/changes
+
+[Path]
+PathExists={{.InstallDir}}/bin/nvidia-persistenced
+Unit=nvidia-persistenced.service
+
+[Install]
+WantedBy=multi-user.target
+`
+
+	persistencedServiceTmpl = `[Unit]
+Description=Launches nvidia-persistenced
+ConditionPathExists={{.InstallDir}}/bin/nvidia-persistenced
+
+[Service]
+Type=forking
+ExecStart={{.InstallDir}}/bin/nvidia-persistenced
+Restart=on-failure
+RestartSec=6s
+
+[Install]
+WantedBy=multi-user.target
+`
+)
+
+type sysdTemplateData struct {
+	InstallDir string
+}
+
+func writeTemplate(path string, tmplStr string, data sysdTemplateData) error {
+	tmpl, err := template.New(filepath.Base(path)).Parse(tmplStr)
+	if err != nil {
+		return err
+	}
+	var buf bytes.Buffer
+	if err := tmpl.Execute(&buf, data); err != nil {
+		return err
+	}
+	return os.WriteFile(path, buf.Bytes(), 0644)
+}
+
+// EnsureExecutable ensures that the installation directory on the host
+// is executable by ensuring any noexec mounts are remounted with exec.
+func EnsureExecutable(hostInstallDir, hostRootPath string) error {
+	containerViewOfHostDir := filepath.Join(hostRootPath, hostInstallDir)
+	var stat unix.Statfs_t
+	if err := unix.Statfs(containerViewOfHostDir, &stat); err != nil {
+		return fmt.Errorf("failed to statfs %s: %v", containerViewOfHostDir, err)
+	}
+
+	if stat.Flags&unix.ST_NOEXEC != 0 {
+		log.Infof("Target installation directory %s is mounted with noexec. Remounting as executable on the host.", hostInstallDir)
+
+		cmd := exec.Command("nsenter", "-t", "1", "-m", "mount", "--bind", hostInstallDir, hostInstallDir)
+		if out, err := cmd.CombinedOutput(); err != nil {
+			return fmt.Errorf("failed to bind mount %s on host: %v, out: %s", hostInstallDir, err, string(out))
+		}
+
+		cmd = exec.Command("nsenter", "-t", "1", "-m", "mount", "-o", "remount,exec", hostInstallDir)
+		if out, err := cmd.CombinedOutput(); err != nil {
+			return fmt.Errorf("failed to remount %s as executable on host: %v, out: %s", hostInstallDir, err, string(out))
+		}
+	} else {
+		log.V(2).Infof("Target installation directory %s is already executable.", hostInstallDir)
+	}
+
+	return nil
+}
+
+// SetupDaemons deploys systemd units for nvidia-persistenced and conditionally nvidia-gridd.
+func SetupDaemons(hostInstallDir, hostRootPath string) error {
+	sysdDir := filepath.Join(hostRootPath, "etc/systemd/system")
+	if err := os.MkdirAll(sysdDir, 0755); err != nil {
+		return fmt.Errorf("failed to create systemd directory: %v", err)
+	}
+
+	data := sysdTemplateData{
+		InstallDir: hostInstallDir,
+	}
+
+	type unitDef struct {
+		name    string
+		content string
+	}
+
+	units := []unitDef{
+		{"nvidia-persistenced.path", persistencedPathTmpl},
+		{"nvidia-persistenced.service", persistencedServiceTmpl},
+	}
+
+	isGrid := false
+	if _, err := os.Stat(filepath.Join(hostRootPath, hostInstallDir, "bin", "nvidia-gridd")); err == nil {
+		isGrid = true
+	}
+
+	if isGrid {
+		units = append(units, unitDef{"nvidia-gridd.path", griddPathTmpl})
+		units = append(units, unitDef{"nvidia-gridd.service", griddServiceTmpl})
+	}
+
+	for _, u := range units {
+		path := filepath.Join(sysdDir, u.name)
+		if err := writeTemplate(path, u.content, data); err != nil {
+			return fmt.Errorf("failed to write %s: %v", u.name, err)
+		}
+	}
+
+	cmd := exec.Command("chroot", hostRootPath, "systemctl", "daemon-reload")
+	if out, err := cmd.CombinedOutput(); err != nil {
+		log.Warningf("failed to daemon-reload: %v, out: %s", err, string(out))
+	} else {
+		startUnits := []string{"nvidia-persistenced.path"}
+		if isGrid {
+			startUnits = append(startUnits, "nvidia-gridd.path")
+		}
+
+		for _, unit := range startUnits {
+			cmd = exec.Command("chroot", hostRootPath, "systemctl", "start", unit)
+			if out, err := cmd.CombinedOutput(); err != nil {
+				log.Warningf("failed to start %s: %v, out: %s", unit, err, string(out))
+			}
+		}
+		log.Infof("Successfully setup NVIDIA systemd daemons")
+	}
+
+	return nil
+}