| /* |
| Copyright The containerd Authors. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| */ |
| |
| package nvidia |
| |
| import ( |
| "context" |
| "fmt" |
| "os" |
| "os/exec" |
| "strconv" |
| "strings" |
| |
| "github.com/containerd/containerd/v2/core/containers" |
| "github.com/containerd/containerd/v2/pkg/oci" |
| specs "github.com/opencontainers/runtime-spec/specs-go" |
| ) |
| |
| // NvidiaCLI is the path to the Nvidia helper binary |
| const NvidiaCLI = "nvidia-container-cli" |
| |
| // Capability specifies capabilities for the gpu inside the container |
| // Detailed explanation of options can be found: |
| // https://github.com/nvidia/nvidia-container-runtime#supported-driver-capabilities |
| type Capability string |
| |
| const ( |
| // Compute capability |
| Compute Capability = "compute" |
| // Compat32 capability |
| Compat32 Capability = "compat32" |
| // Graphics capability |
| Graphics Capability = "graphics" |
| // Utility capability |
| Utility Capability = "utility" |
| // Video capability |
| Video Capability = "video" |
| // Display capability |
| Display Capability = "display" |
| ) |
| |
| // AllCaps returns the complete list of supported Nvidia capabilities. |
| func AllCaps() []Capability { |
| return []Capability{ |
| Compute, |
| Compat32, |
| Graphics, |
| Utility, |
| Video, |
| Display, |
| } |
| } |
| |
| // WithGPUs adds NVIDIA gpu support to a container |
| func WithGPUs(opts ...Opts) oci.SpecOpts { |
| return func(_ context.Context, _ oci.Client, _ *containers.Container, s *specs.Spec) error { |
| c := &config{} |
| for _, o := range opts { |
| if err := o(c); err != nil { |
| return err |
| } |
| } |
| if c.OCIHookPath == "" { |
| path, err := exec.LookPath("containerd") |
| if err != nil { |
| return err |
| } |
| c.OCIHookPath = path |
| } |
| nvidiaPath, err := exec.LookPath(NvidiaCLI) |
| if err != nil { |
| return err |
| } |
| if s.Hooks == nil { |
| s.Hooks = &specs.Hooks{} |
| } |
| s.Hooks.CreateRuntime = append(s.Hooks.CreateRuntime, specs.Hook{ |
| Path: c.OCIHookPath, |
| Args: append([]string{ |
| "containerd", |
| "oci-hook", |
| "--", |
| nvidiaPath, |
| // ensures the required kernel modules are properly loaded |
| "--load-kmods", |
| }, c.args()...), |
| Env: os.Environ(), |
| }) |
| return nil |
| } |
| } |
| |
| type config struct { |
| Devices []string |
| Capabilities []Capability |
| LoadKmods bool |
| LDCache string |
| LDConfig string |
| Requirements []string |
| OCIHookPath string |
| NoCgroups bool |
| } |
| |
| func (c *config) args() []string { |
| var args []string |
| |
| if c.LoadKmods { |
| args = append(args, "--load-kmods") |
| } |
| if c.LDCache != "" { |
| args = append(args, fmt.Sprintf("--ldcache=%s", c.LDCache)) |
| } |
| args = append(args, |
| "configure", |
| ) |
| if len(c.Devices) > 0 { |
| args = append(args, fmt.Sprintf("--device=%s", strings.Join(c.Devices, ","))) |
| } |
| for _, c := range c.Capabilities { |
| args = append(args, fmt.Sprintf("--%s", c)) |
| } |
| if c.LDConfig != "" { |
| args = append(args, fmt.Sprintf("--ldconfig=%s", c.LDConfig)) |
| } |
| for _, r := range c.Requirements { |
| args = append(args, fmt.Sprintf("--require=%s", r)) |
| } |
| if c.NoCgroups { |
| args = append(args, "--no-cgroups") |
| } |
| args = append(args, "--pid={{pid}}", "{{rootfs}}") |
| return args |
| } |
| |
| // Opts are options for configuring gpu support |
| type Opts func(*config) error |
| |
| // WithDevices adds the provided device indexes to the container |
| func WithDevices(ids ...int) Opts { |
| return func(c *config) error { |
| for _, i := range ids { |
| c.Devices = append(c.Devices, strconv.Itoa(i)) |
| } |
| return nil |
| } |
| } |
| |
| // WithDeviceUUIDs adds the specific device UUID to the container |
| func WithDeviceUUIDs(uuids ...string) Opts { |
| return func(c *config) error { |
| c.Devices = append(c.Devices, uuids...) |
| return nil |
| } |
| } |
| |
| // WithAllDevices adds all gpus to the container |
| func WithAllDevices(c *config) error { |
| c.Devices = []string{"all"} |
| return nil |
| } |
| |
| // WithAllCapabilities adds all capabilities to the container for the gpus |
| func WithAllCapabilities(c *config) error { |
| c.Capabilities = AllCaps() |
| return nil |
| } |
| |
| // WithCapabilities adds the specified capabilities to the container for the gpus |
| func WithCapabilities(caps ...Capability) Opts { |
| return func(c *config) error { |
| c.Capabilities = append(c.Capabilities, caps...) |
| return nil |
| } |
| } |
| |
| // WithRequiredCUDAVersion sets the required cuda version |
| func WithRequiredCUDAVersion(major, minor int) Opts { |
| return func(c *config) error { |
| c.Requirements = append(c.Requirements, fmt.Sprintf("cuda>=%d.%d", major, minor)) |
| return nil |
| } |
| } |
| |
| // WithOCIHookPath sets the hook path for the binary |
| func WithOCIHookPath(path string) Opts { |
| return func(c *config) error { |
| c.OCIHookPath = path |
| return nil |
| } |
| } |
| |
| // WithLookupOCIHookPath sets the hook path for the binary via a binary name |
| func WithLookupOCIHookPath(name string) Opts { |
| return func(c *config) error { |
| path, err := exec.LookPath(name) |
| if err != nil { |
| return err |
| } |
| c.OCIHookPath = path |
| return nil |
| } |
| } |
| |
| // WithNoCgroups passes --no-cgroups option to nvidia-container-cli. |
| func WithNoCgroups(c *config) error { |
| c.NoCgroups = true |
| return nil |
| } |