| /* |
| # Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| */ |
| |
| package nvcaps |
| |
| import ( |
| "bufio" |
| "fmt" |
| "io" |
| "log" |
| "os" |
| "path/filepath" |
| "strconv" |
| "strings" |
| ) |
| |
| const ( |
| nvidiaProcDriverPath = "/proc/driver/nvidia" |
| nvidiaCapabilitiesPath = nvidiaProcDriverPath + "/capabilities" |
| |
| nvcapsProcDriverPath = "/proc/driver/nvidia-caps" |
| nvcapsMigMinorsPath = nvcapsProcDriverPath + "/mig-minors" |
| nvcapsDevicePath = "/dev/nvidia-caps" |
| ) |
| |
| // MigMinor represents the minor number of a MIG device |
| type MigMinor int |
| |
| // MigCap represents the path to a MIG cap file |
| type MigCap string |
| |
| // MigCaps stores a map of MIG cap file paths to MIG minors |
| type MigCaps map[MigCap]MigMinor |
| |
| // NewGPUInstanceCap creates a MigCap for the specified MIG GPU instance. |
| // A GPU instance is uniquely defined by the GPU minor number and GI instance ID. |
| func NewGPUInstanceCap(gpu, gi int) MigCap { |
| return MigCap(fmt.Sprintf("gpu%d/gi%d/access", gpu, gi)) |
| } |
| |
| // NewComputeInstanceCap creates a MigCap for the specified MIG Compute instance. |
| // A GPU instance is uniquely defined by the GPU minor number, GI instance ID, and CI instance ID. |
| func NewComputeInstanceCap(gpu, gi, ci int) MigCap { |
| return MigCap(fmt.Sprintf("gpu%d/gi%d/ci%d/access", gpu, gi, ci)) |
| } |
| |
| // GetCapDevicePath returns the path to the cap device for the specified cap. |
| // An error is returned if the cap is invalid. |
| func (m MigCaps) GetCapDevicePath(cap MigCap) (string, error) { |
| minor, exists := m[cap] |
| if !exists { |
| return "", fmt.Errorf("invalid MIG capability path %v", cap) |
| } |
| return minor.DevicePath(), nil |
| } |
| |
| // NewMigCaps creates a MigCaps structure based on the contents of the MIG minors file. |
| func NewMigCaps() (MigCaps, error) { |
| // Open nvcapsMigMinorsPath for walking. |
| // If the nvcapsMigMinorsPath does not exist, then we are not on a MIG |
| // capable machine, so there is nothing to do. |
| // The format of this file is discussed in: |
| // https://docs.nvidia.com/datacenter/tesla/mig-user-guide/index.html#unique_1576522674 |
| minorsFile, err := os.Open(nvcapsMigMinorsPath) |
| if os.IsNotExist(err) { |
| return nil, nil |
| } |
| if err != nil { |
| return nil, fmt.Errorf("error opening MIG minors file: %v", err) |
| } |
| defer minorsFile.Close() |
| |
| return processMinorsFile(minorsFile), nil |
| } |
| |
| func processMinorsFile(minorsFile io.Reader) MigCaps { |
| // Walk each line of nvcapsMigMinorsPath and construct a mapping of nvidia |
| // capabilities path to device minor for that capability |
| migCaps := make(MigCaps) |
| scanner := bufio.NewScanner(minorsFile) |
| for scanner.Scan() { |
| cap, minor, err := processMigMinorsLine(scanner.Text()) |
| if err != nil { |
| log.Printf("Skipping line in MIG minors file: %v", err) |
| continue |
| } |
| migCaps[cap] = minor |
| } |
| return migCaps |
| } |
| |
| func processMigMinorsLine(line string) (MigCap, MigMinor, error) { |
| parts := strings.Split(line, " ") |
| if len(parts) != 2 { |
| return "", 0, fmt.Errorf("error processing line: %v", line) |
| } |
| |
| migCap := MigCap(parts[0]) |
| if !migCap.isValid() { |
| return "", 0, fmt.Errorf("invalid MIG minors line: '%v'", line) |
| } |
| |
| minor, err := strconv.Atoi(parts[1]) |
| if err != nil { |
| return "", 0, fmt.Errorf("error reading MIG minor from '%v': %v", line, err) |
| } |
| |
| return migCap, MigMinor(minor), nil |
| } |
| |
| func (m MigCap) isValid() bool { |
| cap := string(m) |
| switch cap { |
| case "config", "monitor": |
| return true |
| default: |
| var gpu int |
| var gi int |
| var ci int |
| // Look for a CI access file |
| n, _ := fmt.Sscanf(cap, "gpu%d/gi%d/ci%d/access", &gpu, &gi, &ci) |
| if n == 3 { |
| return true |
| } |
| // Look for a GI access file |
| n, _ = fmt.Sscanf(cap, "gpu%d/gi%d/access %d", &gpu, &gi) |
| if n == 2 { |
| return true |
| } |
| } |
| return false |
| } |
| |
| // ProcPath returns the proc path associated with the MIG capability |
| func (m MigCap) ProcPath() string { |
| id := string(m) |
| |
| var path string |
| switch id { |
| case "config", "monitor": |
| path = "mig/" + id |
| default: |
| parts := strings.SplitN(id, "/", 2) |
| path = strings.Join([]string{parts[0], "mig", parts[1]}, "/") |
| } |
| return filepath.Join(nvidiaCapabilitiesPath, path) |
| } |
| |
| // DevicePath returns the path for the nvidia-caps device with the specified |
| // minor number |
| func (m MigMinor) DevicePath() string { |
| return fmt.Sprintf(nvcapsDevicePath+"/nvidia-cap%d", m) |
| } |