blob: 74491bb0326b283d4ccb7cf829fe7d846845caf9 [file] [log] [blame]
// Package deviceinfo provides the devices information for cos-gpu-installer
package deviceinfo
import (
"fmt"
"os/exec"
"slices"
"strconv"
"strings"
)
type GPUType int
const (
K80 GPUType = iota
P4
P100
V100
L4
T4
H100
H200
B200
GB200
A100_40GB
A100_80GB
RTX_PRO_6000
NO_GPU
Others
)
// AvailableGPUTypesList returns a list of GPU devices supported based on the milestone
func AvailableGPUTypesList(milestone string) ([]GPUType, error) {
var availableGpuTypeList = []GPUType{P4, P100, V100, L4, T4, H100, H200, A100_40GB, A100_80GB}
milestoneInt, err := strconv.Atoi(milestone)
if err != nil {
return nil, fmt.Errorf("invalid milestone input: %v", err)
}
if milestoneInt > 105 {
availableGpuTypeList = append(availableGpuTypeList, B200)
availableGpuTypeList = append(availableGpuTypeList, RTX_PRO_6000)
}
if milestoneInt > 113 {
availableGpuTypeList = append(availableGpuTypeList, GB200)
}
return availableGpuTypeList, nil
}
var AllGPUTypeStrings = []string{
"NVIDIA_TESLA_K80",
"NVIDIA_TESLA_P4",
"NVIDIA_TESLA_P100",
"NVIDIA_TESLA_V100",
"NVIDIA_L4",
"NVIDIA_H100_80GB",
"NVIDIA_H200",
"NVIDIA_B200",
"NVIDIA_TESLA_A100",
"NVIDIA_A100_80GB",
"NVIDIA_TESLA_T4",
"NVIDIA_GB200",
"NVIDIA_RTX_PRO_6000",
}
// ParseGPUType converts a string to a GPUType enum.
func ParseGPUType(gpu string) (GPUType, error) {
processedGPU := strings.ToUpper(strings.TrimSpace(gpu))
switch processedGPU {
case "NVIDIA_TESLA_K80":
return K80, nil
case "NVIDIA_TESLA_P4":
return P4, nil
case "NVIDIA_TESLA_P100":
return P100, nil
case "NVIDIA_TESLA_V100":
return V100, nil
case "NVIDIA_L4":
return L4, nil
case "NVIDIA_H100_80GB":
return H100, nil
case "NVIDIA_TESLA_A100":
return A100_40GB, nil
case "NVIDIA_A100_80GB":
return A100_80GB, nil
case "NVIDIA_TESLA_T4":
return T4, nil
case "NVIDIA_H200":
return H200, nil
case "NVIDIA_B200":
return B200, nil
case "NVIDIA_GB200":
return GB200, nil
case "NVIDIA_RTX_PRO_6000":
return RTX_PRO_6000, nil
default:
return 0, fmt.Errorf("invalid GPU type string. Available GPU types are: %s", strings.Join(AllGPUTypeStrings, ", "))
}
}
func (g GPUType) String() string {
switch g {
case K80:
return "NVIDIA_TESLA_K80"
case P4:
return "NVIDIA_TESLA_P4"
case P100:
return "NVIDIA_TESLA_P100"
case V100:
return "NVIDIA_TESLA_V100"
case L4:
return "NVIDIA_L4"
case H100:
return "NVIDIA_H100_80GB"
case A100_40GB:
return "NVIDIA_TESLA_A100"
case A100_80GB:
return "NVIDIA_A100_80GB"
case T4:
return "NVIDIA_TESLA_T4"
case H200:
return "NVIDIA_H200"
case B200:
return "NVIDIA_B200"
case GB200:
return "NVIDIA_GB200"
case RTX_PRO_6000:
return "NVIDIA_RTX_PRO_6000"
case NO_GPU:
return "NO_GPU"
case Others:
return "OTHERS"
default:
return "UNKNOWN"
}
}
// TODO(gshuoy): b/331317222 - Add the open source supported in the proto file.
func (g GPUType) OpenSupported() bool {
switch g {
case NO_GPU, K80, P4, P100, V100:
return false
default:
return true
}
}
// TODO(gshuoy): b/331317222 - Add the arch support in the proto file.
// SupportedArches returns the list of supported architectures for the GPU type.
// Typically either "x86_64", "aarch64", or both (if future GPUs support both).
func (g GPUType) SupportedArches() []string {
switch g {
case GB200:
return []string{"aarch64"}
case NO_GPU, Others:
return []string{"x86_64", "aarch64"}
default:
return []string{"x86_64"}
}
}
func (g GPUType) SupportsArch(arch string) bool {
processedArch := strings.ToLower(strings.TrimSpace(arch))
return slices.Contains(g.SupportedArches(), processedArch)
}
// TODO(gshuoy): b/331317222 - add the pci id in the proto file.
func GetGPUTypeInfo() (GPUType, error) {
cmd := "lspci -nn | grep -i \"nvidia\""
outBytes, err := exec.Command("/bin/bash", "-c", cmd).Output()
if err != nil {
return NO_GPU, err
}
out := string(outBytes)
switch {
case strings.Contains(out, "[Tesla K80]"):
return K80, nil
case strings.Contains(out, "NVIDIA Corporation Device 15f8"), strings.Contains(out, "NVIDIA Corporation GP100GL"), strings.Contains(out, "[Tesla P100"), strings.Contains(out, "[10de:15f8]"):
return P100, nil
case strings.Contains(out, "NVIDIA Corporation Device 1db1"), strings.Contains(out, "NVIDIA Corporation GV100GL"), strings.Contains(out, "[Tesla V100"), strings.Contains(out, "[10de:1db1]"):
return V100, nil
case strings.Contains(out, "NVIDIA Corporation Device 1bb3"), strings.Contains(out, "NVIDIA Corporation GP104GL"), strings.Contains(out, "[Tesla P4"), strings.Contains(out, "[10de:1bb3]"):
return P4, nil
case strings.Contains(out, "NVIDIA Corporation Device 27b8"), strings.Contains(out, "NVIDIA Corporation AD104GL [L4]"), strings.Contains(out, "[10de:27b8]"):
return L4, nil
case strings.Contains(out, "NVIDIA Corporation Device 2330"), strings.Contains(out, "NVIDIA Corporation GH100[H100"), strings.Contains(out, "[10de:2330]"):
return H100, nil
case strings.Contains(out, "NVIDIA Corporation Device 20b0"), strings.Contains(out, "NVIDIA Corporation GA100 [A100 SXM4 40GB"), strings.Contains(out, "[10de:20b0]"):
return A100_40GB, nil
case strings.Contains(out, "NVIDIA Corporation Device 20b2"), strings.Contains(out, "NVIDIA Corporation GA100 [A100 SXM4 80GB"), strings.Contains(out, "[10de:20b2]"):
return A100_80GB, nil
case strings.Contains(out, "NVIDIA Corporation Device 1eb8"), strings.Contains(out, "[Tesla T4]"), strings.Contains(out, "[10de:1eb8]"):
return T4, nil
case strings.Contains(out, "NVIDIA Corporation Device 2335"), strings.Contains(out, "NVIDIA Corporation GH100 [H200"), strings.Contains(out, "[10de:2335]"):
return H200, nil
case strings.Contains(out, "NVIDIA Corporation Device 2901"), strings.Contains(out, "NVIDIA Corporation GB100 [B200"), strings.Contains(out, "[10de:2901]"):
return B200, nil
case strings.Contains(out, "NVIDIA Corporation Device 2941"), strings.Contains(out, "NVIDIA Corporation GB200"), strings.Contains(out, "[10de:2941]"):
return GB200, nil
case strings.Contains(out, "NVIDIA Corporation Device 2bb5"), strings.Contains(out, "[10de:2bb5]"):
return RTX_PRO_6000, nil
default:
return Others, nil
}
}