| /* |
| Copyright 2014 The Kubernetes Authors. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| */ |
| |
| package resource |
| |
| import ( |
| "fmt" |
| "math" |
| "strconv" |
| "strings" |
| |
| v1 "k8s.io/api/core/v1" |
| "k8s.io/apimachinery/pkg/api/resource" |
| |
| podutil "k8s.io/kubernetes/pkg/api/v1/pod" |
| ) |
| |
| // PodResourcesOptions controls the behavior of PodRequests and PodLimits. |
| type PodResourcesOptions struct { |
| // Reuse, if provided will be reused to accumulate resources and returned by the PodRequests or PodLimits |
| // functions. All existing values in Reuse will be lost. |
| Reuse v1.ResourceList |
| // InPlacePodVerticalScalingEnabled indicates that the in-place pod vertical scaling feature gate is enabled. |
| InPlacePodVerticalScalingEnabled bool |
| // ExcludeOverhead controls if pod overhead is excluded from the calculation. |
| ExcludeOverhead bool |
| // ContainerFn is called with the effective resources required for each container within the pod. |
| ContainerFn func(res v1.ResourceList, containerType podutil.ContainerType) |
| // NonMissingContainerRequests if provided will replace any missing container level requests for the specified resources |
| // with the given values. If the requests for those resources are explicitly set, even if zero, they will not be modified. |
| NonMissingContainerRequests v1.ResourceList |
| } |
| |
| // PodRequests computes the pod requests per the PodResourcesOptions supplied. If PodResourcesOptions is nil, then |
| // the requests are returned including pod overhead. The computation is part of the API and must be reviewed |
| // as an API change. |
| func PodRequests(pod *v1.Pod, opts PodResourcesOptions) v1.ResourceList { |
| // attempt to reuse the maps if passed, or allocate otherwise |
| reqs := reuseOrClearResourceList(opts.Reuse) |
| |
| var containerStatuses map[string]*v1.ContainerStatus |
| if opts.InPlacePodVerticalScalingEnabled { |
| containerStatuses = make(map[string]*v1.ContainerStatus, len(pod.Status.ContainerStatuses)) |
| for i := range pod.Status.ContainerStatuses { |
| containerStatuses[pod.Status.ContainerStatuses[i].Name] = &pod.Status.ContainerStatuses[i] |
| } |
| } |
| |
| for _, container := range pod.Spec.Containers { |
| containerReqs := container.Resources.Requests |
| if opts.InPlacePodVerticalScalingEnabled { |
| cs, found := containerStatuses[container.Name] |
| if found { |
| if pod.Status.Resize == v1.PodResizeStatusInfeasible { |
| containerReqs = cs.AllocatedResources.DeepCopy() |
| } else { |
| containerReqs = max(container.Resources.Requests, cs.AllocatedResources) |
| } |
| } |
| } |
| |
| if len(opts.NonMissingContainerRequests) > 0 { |
| containerReqs = applyNonMissing(containerReqs, opts.NonMissingContainerRequests) |
| } |
| |
| if opts.ContainerFn != nil { |
| opts.ContainerFn(containerReqs, podutil.Containers) |
| } |
| |
| addResourceList(reqs, containerReqs) |
| } |
| |
| restartableInitContainerReqs := v1.ResourceList{} |
| initContainerReqs := v1.ResourceList{} |
| // init containers define the minimum of any resource |
| // Note: In-place resize is not allowed for InitContainers, so no need to check for ResizeStatus value |
| // |
| // Let's say `InitContainerUse(i)` is the resource requirements when the i-th |
| // init container is initializing, then |
| // `InitContainerUse(i) = sum(Resources of restartable init containers with index < i) + Resources of i-th init container`. |
| // |
| // See https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/753-sidecar-containers#exposing-pod-resource-requirements for the detail. |
| for _, container := range pod.Spec.InitContainers { |
| containerReqs := container.Resources.Requests |
| if len(opts.NonMissingContainerRequests) > 0 { |
| containerReqs = applyNonMissing(containerReqs, opts.NonMissingContainerRequests) |
| } |
| |
| if container.RestartPolicy != nil && *container.RestartPolicy == v1.ContainerRestartPolicyAlways { |
| // and add them to the resulting cumulative container requests |
| addResourceList(reqs, containerReqs) |
| |
| // track our cumulative restartable init container resources |
| addResourceList(restartableInitContainerReqs, containerReqs) |
| containerReqs = restartableInitContainerReqs |
| } else { |
| tmp := v1.ResourceList{} |
| addResourceList(tmp, containerReqs) |
| addResourceList(tmp, restartableInitContainerReqs) |
| containerReqs = tmp |
| } |
| |
| if opts.ContainerFn != nil { |
| opts.ContainerFn(containerReqs, podutil.InitContainers) |
| } |
| maxResourceList(initContainerReqs, containerReqs) |
| } |
| |
| maxResourceList(reqs, initContainerReqs) |
| |
| // Add overhead for running a pod to the sum of requests if requested: |
| if !opts.ExcludeOverhead && pod.Spec.Overhead != nil { |
| addResourceList(reqs, pod.Spec.Overhead) |
| } |
| |
| return reqs |
| } |
| |
| // applyNonMissing will return a copy of the given resource list with any missing values replaced by the nonMissing values |
| func applyNonMissing(reqs v1.ResourceList, nonMissing v1.ResourceList) v1.ResourceList { |
| cp := v1.ResourceList{} |
| for k, v := range reqs { |
| cp[k] = v.DeepCopy() |
| } |
| |
| for k, v := range nonMissing { |
| if _, found := reqs[k]; !found { |
| rk := cp[k] |
| rk.Add(v) |
| cp[k] = rk |
| } |
| } |
| return cp |
| } |
| |
| // PodLimits computes the pod limits per the PodResourcesOptions supplied. If PodResourcesOptions is nil, then |
| // the limits are returned including pod overhead for any non-zero limits. The computation is part of the API and must be reviewed |
| // as an API change. |
| func PodLimits(pod *v1.Pod, opts PodResourcesOptions) v1.ResourceList { |
| // attempt to reuse the maps if passed, or allocate otherwise |
| limits := reuseOrClearResourceList(opts.Reuse) |
| |
| for _, container := range pod.Spec.Containers { |
| if opts.ContainerFn != nil { |
| opts.ContainerFn(container.Resources.Limits, podutil.Containers) |
| } |
| addResourceList(limits, container.Resources.Limits) |
| } |
| |
| restartableInitContainerLimits := v1.ResourceList{} |
| initContainerLimits := v1.ResourceList{} |
| // init containers define the minimum of any resource |
| // |
| // Let's say `InitContainerUse(i)` is the resource requirements when the i-th |
| // init container is initializing, then |
| // `InitContainerUse(i) = sum(Resources of restartable init containers with index < i) + Resources of i-th init container`. |
| // |
| // See https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/753-sidecar-containers#exposing-pod-resource-requirements for the detail. |
| for _, container := range pod.Spec.InitContainers { |
| containerLimits := container.Resources.Limits |
| // Is the init container marked as a restartable init container? |
| if container.RestartPolicy != nil && *container.RestartPolicy == v1.ContainerRestartPolicyAlways { |
| addResourceList(limits, containerLimits) |
| |
| // track our cumulative restartable init container resources |
| addResourceList(restartableInitContainerLimits, containerLimits) |
| containerLimits = restartableInitContainerLimits |
| } else { |
| tmp := v1.ResourceList{} |
| addResourceList(tmp, containerLimits) |
| addResourceList(tmp, restartableInitContainerLimits) |
| containerLimits = tmp |
| } |
| |
| if opts.ContainerFn != nil { |
| opts.ContainerFn(containerLimits, podutil.InitContainers) |
| } |
| maxResourceList(initContainerLimits, containerLimits) |
| } |
| |
| maxResourceList(limits, initContainerLimits) |
| |
| // Add overhead to non-zero limits if requested: |
| if !opts.ExcludeOverhead && pod.Spec.Overhead != nil { |
| for name, quantity := range pod.Spec.Overhead { |
| if value, ok := limits[name]; ok && !value.IsZero() { |
| value.Add(quantity) |
| limits[name] = value |
| } |
| } |
| } |
| |
| return limits |
| } |
| |
| // addResourceList adds the resources in newList to list. |
| func addResourceList(list, newList v1.ResourceList) { |
| for name, quantity := range newList { |
| if value, ok := list[name]; !ok { |
| list[name] = quantity.DeepCopy() |
| } else { |
| value.Add(quantity) |
| list[name] = value |
| } |
| } |
| } |
| |
| // maxResourceList sets list to the greater of list/newList for every resource in newList |
| func maxResourceList(list, newList v1.ResourceList) { |
| for name, quantity := range newList { |
| if value, ok := list[name]; !ok || quantity.Cmp(value) > 0 { |
| list[name] = quantity.DeepCopy() |
| } |
| } |
| } |
| |
| // max returns the result of max(a, b) for each named resource and is only used if we can't |
| // accumulate into an existing resource list |
| func max(a v1.ResourceList, b v1.ResourceList) v1.ResourceList { |
| result := v1.ResourceList{} |
| for key, value := range a { |
| if other, found := b[key]; found { |
| if value.Cmp(other) <= 0 { |
| result[key] = other.DeepCopy() |
| continue |
| } |
| } |
| result[key] = value.DeepCopy() |
| } |
| for key, value := range b { |
| if _, found := result[key]; !found { |
| result[key] = value.DeepCopy() |
| } |
| } |
| return result |
| } |
| |
| // reuseOrClearResourceList is a helper for avoiding excessive allocations of |
| // resource lists within the inner loop of resource calculations. |
| func reuseOrClearResourceList(reuse v1.ResourceList) v1.ResourceList { |
| if reuse == nil { |
| return make(v1.ResourceList, 4) |
| } |
| for k := range reuse { |
| delete(reuse, k) |
| } |
| return reuse |
| } |
| |
| // GetResourceRequestQuantity finds and returns the request quantity for a specific resource. |
| func GetResourceRequestQuantity(pod *v1.Pod, resourceName v1.ResourceName) resource.Quantity { |
| requestQuantity := resource.Quantity{} |
| |
| switch resourceName { |
| case v1.ResourceCPU: |
| requestQuantity = resource.Quantity{Format: resource.DecimalSI} |
| case v1.ResourceMemory, v1.ResourceStorage, v1.ResourceEphemeralStorage: |
| requestQuantity = resource.Quantity{Format: resource.BinarySI} |
| default: |
| requestQuantity = resource.Quantity{Format: resource.DecimalSI} |
| } |
| |
| for _, container := range pod.Spec.Containers { |
| if rQuantity, ok := container.Resources.Requests[resourceName]; ok { |
| requestQuantity.Add(rQuantity) |
| } |
| } |
| |
| for _, container := range pod.Spec.InitContainers { |
| if rQuantity, ok := container.Resources.Requests[resourceName]; ok { |
| if requestQuantity.Cmp(rQuantity) < 0 { |
| requestQuantity = rQuantity.DeepCopy() |
| } |
| } |
| } |
| |
| // Add overhead for running a pod |
| // to the total requests if the resource total is non-zero |
| if pod.Spec.Overhead != nil { |
| if podOverhead, ok := pod.Spec.Overhead[resourceName]; ok && !requestQuantity.IsZero() { |
| requestQuantity.Add(podOverhead) |
| } |
| } |
| |
| return requestQuantity |
| } |
| |
| // GetResourceRequest finds and returns the request value for a specific resource. |
| func GetResourceRequest(pod *v1.Pod, resource v1.ResourceName) int64 { |
| if resource == v1.ResourcePods { |
| return 1 |
| } |
| |
| requestQuantity := GetResourceRequestQuantity(pod, resource) |
| |
| if resource == v1.ResourceCPU { |
| return requestQuantity.MilliValue() |
| } |
| |
| return requestQuantity.Value() |
| } |
| |
| // ExtractResourceValueByContainerName extracts the value of a resource |
| // by providing container name |
| func ExtractResourceValueByContainerName(fs *v1.ResourceFieldSelector, pod *v1.Pod, containerName string) (string, error) { |
| container, err := findContainerInPod(pod, containerName) |
| if err != nil { |
| return "", err |
| } |
| return ExtractContainerResourceValue(fs, container) |
| } |
| |
| // ExtractResourceValueByContainerNameAndNodeAllocatable extracts the value of a resource |
| // by providing container name and node allocatable |
| func ExtractResourceValueByContainerNameAndNodeAllocatable(fs *v1.ResourceFieldSelector, pod *v1.Pod, containerName string, nodeAllocatable v1.ResourceList) (string, error) { |
| realContainer, err := findContainerInPod(pod, containerName) |
| if err != nil { |
| return "", err |
| } |
| |
| container := realContainer.DeepCopy() |
| |
| MergeContainerResourceLimits(container, nodeAllocatable) |
| |
| return ExtractContainerResourceValue(fs, container) |
| } |
| |
| // ExtractContainerResourceValue extracts the value of a resource |
| // in an already known container |
| func ExtractContainerResourceValue(fs *v1.ResourceFieldSelector, container *v1.Container) (string, error) { |
| divisor := resource.Quantity{} |
| if divisor.Cmp(fs.Divisor) == 0 { |
| divisor = resource.MustParse("1") |
| } else { |
| divisor = fs.Divisor |
| } |
| |
| switch fs.Resource { |
| case "limits.cpu": |
| return convertResourceCPUToString(container.Resources.Limits.Cpu(), divisor) |
| case "limits.memory": |
| return convertResourceMemoryToString(container.Resources.Limits.Memory(), divisor) |
| case "limits.ephemeral-storage": |
| return convertResourceEphemeralStorageToString(container.Resources.Limits.StorageEphemeral(), divisor) |
| case "requests.cpu": |
| return convertResourceCPUToString(container.Resources.Requests.Cpu(), divisor) |
| case "requests.memory": |
| return convertResourceMemoryToString(container.Resources.Requests.Memory(), divisor) |
| case "requests.ephemeral-storage": |
| return convertResourceEphemeralStorageToString(container.Resources.Requests.StorageEphemeral(), divisor) |
| } |
| // handle extended standard resources with dynamic names |
| // example: requests.hugepages-<pageSize> or limits.hugepages-<pageSize> |
| if strings.HasPrefix(fs.Resource, "requests.") { |
| resourceName := v1.ResourceName(strings.TrimPrefix(fs.Resource, "requests.")) |
| if IsHugePageResourceName(resourceName) { |
| return convertResourceHugePagesToString(container.Resources.Requests.Name(resourceName, resource.BinarySI), divisor) |
| } |
| } |
| if strings.HasPrefix(fs.Resource, "limits.") { |
| resourceName := v1.ResourceName(strings.TrimPrefix(fs.Resource, "limits.")) |
| if IsHugePageResourceName(resourceName) { |
| return convertResourceHugePagesToString(container.Resources.Limits.Name(resourceName, resource.BinarySI), divisor) |
| } |
| } |
| return "", fmt.Errorf("unsupported container resource : %v", fs.Resource) |
| } |
| |
| // convertResourceCPUToString converts cpu value to the format of divisor and returns |
| // ceiling of the value. |
| func convertResourceCPUToString(cpu *resource.Quantity, divisor resource.Quantity) (string, error) { |
| c := int64(math.Ceil(float64(cpu.MilliValue()) / float64(divisor.MilliValue()))) |
| return strconv.FormatInt(c, 10), nil |
| } |
| |
| // convertResourceMemoryToString converts memory value to the format of divisor and returns |
| // ceiling of the value. |
| func convertResourceMemoryToString(memory *resource.Quantity, divisor resource.Quantity) (string, error) { |
| m := int64(math.Ceil(float64(memory.Value()) / float64(divisor.Value()))) |
| return strconv.FormatInt(m, 10), nil |
| } |
| |
| // convertResourceHugePagesToString converts hugepages value to the format of divisor and returns |
| // ceiling of the value. |
| func convertResourceHugePagesToString(hugePages *resource.Quantity, divisor resource.Quantity) (string, error) { |
| m := int64(math.Ceil(float64(hugePages.Value()) / float64(divisor.Value()))) |
| return strconv.FormatInt(m, 10), nil |
| } |
| |
| // convertResourceEphemeralStorageToString converts ephemeral storage value to the format of divisor and returns |
| // ceiling of the value. |
| func convertResourceEphemeralStorageToString(ephemeralStorage *resource.Quantity, divisor resource.Quantity) (string, error) { |
| m := int64(math.Ceil(float64(ephemeralStorage.Value()) / float64(divisor.Value()))) |
| return strconv.FormatInt(m, 10), nil |
| } |
| |
| // findContainerInPod finds a container by its name in the provided pod |
| func findContainerInPod(pod *v1.Pod, containerName string) (*v1.Container, error) { |
| for _, container := range pod.Spec.Containers { |
| if container.Name == containerName { |
| return &container, nil |
| } |
| } |
| for _, container := range pod.Spec.InitContainers { |
| if container.Name == containerName { |
| return &container, nil |
| } |
| } |
| return nil, fmt.Errorf("container %s not found", containerName) |
| } |
| |
| // MergeContainerResourceLimits checks if a limit is applied for |
| // the container, and if not, it sets the limit to the passed resource list. |
| func MergeContainerResourceLimits(container *v1.Container, |
| allocatable v1.ResourceList) { |
| if container.Resources.Limits == nil { |
| container.Resources.Limits = make(v1.ResourceList) |
| } |
| // NOTE: we exclude hugepages-* resources because hugepages are never overcommitted. |
| // This means that the container always has a limit specified. |
| for _, resource := range []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory, v1.ResourceEphemeralStorage} { |
| if quantity, exists := container.Resources.Limits[resource]; !exists || quantity.IsZero() { |
| if cap, exists := allocatable[resource]; exists { |
| container.Resources.Limits[resource] = cap.DeepCopy() |
| } |
| } |
| } |
| } |
| |
| // IsHugePageResourceName returns true if the resource name has the huge page |
| // resource prefix. |
| func IsHugePageResourceName(name v1.ResourceName) bool { |
| return strings.HasPrefix(string(name), v1.ResourceHugePagesPrefix) |
| } |