blob: 6744c10ed2ab8d4fe2135f2de28902124fed3a3d [file] [log] [blame] [edit]
#cloud-config
#
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
write_files:
- path: /tmp/gpu-installer-test/test.sh
permissions: 0644
owner: root
content: |
#!/bin/bash
set -o errexit
set -o pipefail
fail() {
echo "TestFail: $@"
exit 1
}
# Sets up the Docker environment and fetches metadata values necessary for configuring the COS GPU installer.
setUp() {
export DOCKER_CONFIG=/mnt/stateful_partition/docker-config
mkdir -p $DOCKER_CONFIG
if ! docker-credential-gcr configure-docker; then
fail "Docker credentials configuration failed."
fi
# Fetching metadata values
BUILD_GCR=$(curl -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/instance/attributes/build-gcr)
TAG=$(curl -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/instance/attributes/tag)
docker pull ${BUILD_GCR}/cos-gpu-installer:${TAG}
}
# Installation of GPU driver
installGPUDriver() {
if ! COS_GPU_INSTALLER=${BUILD_GCR}/cos-gpu-installer:${TAG} cos-extensions install gpu; then
fail "GPU driver installation failed."
else
echo "GPU driver installation succeeded."
fi
}
# Test if NVIDIA kernel modules are loaded
testNvidiaKernelModules() {
if ! lsmod | grep -q "nvidia"; then
fail "NVIDIA kernel modules are not loaded."
else
echo "NVIDIA kernel modules test passed."
fi
}
# Test NVIDIA SMI
testNvidiaSMI() {
if ! /var/lib/nvidia/bin/nvidia-smi; then
fail "NVIDIA SMI failed"
else
echo "NVIDIA SMI test passed"
fi
}
main() {
setUp
installGPUDriver
testNvidiaKernelModules
testNvidiaSMI
}
main 2>&1 | sed "s/^/TestStatus: /"
echo "TestPass: all tests passed"
- path: /etc/systemd/system/gpu-install-test.service
permissions: 0644
owner: root
content: |
[Unit]
Description=Preloader test
Wants=network-online.target gcr-online.target docker.service
After=network-online.target gcr-online.target docker.service
[Service]
Type=oneshot
RemainAfterExit=yes
User=root
ExecStart=/bin/bash /tmp/gpu-installer-test/test.sh
StandardOutput=tty
StandardError=tty
TTYPath=/dev/ttyS0
runcmd:
- systemctl daemon-reload
- systemctl --no-block start gpu-install-test.service