| /* |
| Copyright 2021 The Kubernetes Authors. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| */ |
| |
| package job |
| |
| import ( |
| "fmt" |
| |
| batch "k8s.io/api/batch/v1" |
| v1 "k8s.io/api/core/v1" |
| "k8s.io/apiserver/pkg/util/feature" |
| "k8s.io/kubernetes/pkg/features" |
| ) |
| |
| // matchPodFailurePolicy returns information about matching a given failed pod |
| // against the pod failure policy rules. The information is represented as an |
| // - optional job failure message (present in case the pod matched a 'FailJob' rule), |
| // - a boolean indicating if the failure should be counted towards backoffLimit |
| // (and backoffLimitPerIndex if specified). It should not be counted |
| // if the pod matched an 'Ignore' rule, |
| // - a pointer to the matched pod failure policy action. |
| func matchPodFailurePolicy(podFailurePolicy *batch.PodFailurePolicy, failedPod *v1.Pod) (*string, bool, *batch.PodFailurePolicyAction) { |
| if podFailurePolicy == nil { |
| return nil, true, nil |
| } |
| ignore := batch.PodFailurePolicyActionIgnore |
| failJob := batch.PodFailurePolicyActionFailJob |
| failIndex := batch.PodFailurePolicyActionFailIndex |
| count := batch.PodFailurePolicyActionCount |
| for index, podFailurePolicyRule := range podFailurePolicy.Rules { |
| if podFailurePolicyRule.OnExitCodes != nil { |
| if containerStatus := matchOnExitCodes(&failedPod.Status, podFailurePolicyRule.OnExitCodes); containerStatus != nil { |
| switch podFailurePolicyRule.Action { |
| case batch.PodFailurePolicyActionIgnore: |
| return nil, false, &ignore |
| case batch.PodFailurePolicyActionFailIndex: |
| if feature.DefaultFeatureGate.Enabled(features.JobBackoffLimitPerIndex) { |
| return nil, true, &failIndex |
| } |
| case batch.PodFailurePolicyActionCount: |
| return nil, true, &count |
| case batch.PodFailurePolicyActionFailJob: |
| msg := fmt.Sprintf("Container %s for pod %s/%s failed with exit code %v matching %v rule at index %d", |
| containerStatus.Name, failedPod.Namespace, failedPod.Name, containerStatus.State.Terminated.ExitCode, podFailurePolicyRule.Action, index) |
| return &msg, true, &failJob |
| } |
| } |
| } else if podFailurePolicyRule.OnPodConditions != nil { |
| if podCondition := matchOnPodConditions(&failedPod.Status, podFailurePolicyRule.OnPodConditions); podCondition != nil { |
| switch podFailurePolicyRule.Action { |
| case batch.PodFailurePolicyActionIgnore: |
| return nil, false, &ignore |
| case batch.PodFailurePolicyActionFailIndex: |
| if feature.DefaultFeatureGate.Enabled(features.JobBackoffLimitPerIndex) { |
| return nil, true, &failIndex |
| } |
| case batch.PodFailurePolicyActionCount: |
| return nil, true, &count |
| case batch.PodFailurePolicyActionFailJob: |
| msg := fmt.Sprintf("Pod %s/%s has condition %v matching %v rule at index %d", |
| failedPod.Namespace, failedPod.Name, podCondition.Type, podFailurePolicyRule.Action, index) |
| return &msg, true, &failJob |
| } |
| } |
| } |
| } |
| return nil, true, nil |
| } |
| |
| // matchOnExitCodes returns a terminated container status that matches the error code requirement, if any exists. |
| // If the returned status is non-nil, it has a non-nil Terminated field. |
| func matchOnExitCodes(podStatus *v1.PodStatus, requirement *batch.PodFailurePolicyOnExitCodesRequirement) *v1.ContainerStatus { |
| if containerStatus := getMatchingContainerFromList(podStatus.ContainerStatuses, requirement); containerStatus != nil { |
| return containerStatus |
| } |
| return getMatchingContainerFromList(podStatus.InitContainerStatuses, requirement) |
| } |
| |
| func matchOnPodConditions(podStatus *v1.PodStatus, requirement []batch.PodFailurePolicyOnPodConditionsPattern) *v1.PodCondition { |
| for _, podCondition := range podStatus.Conditions { |
| for _, pattern := range requirement { |
| if podCondition.Type == pattern.Type && podCondition.Status == pattern.Status { |
| return &podCondition |
| } |
| } |
| } |
| return nil |
| } |
| |
| // getMatchingContainerFromList returns the first terminated container status in the list that matches the error code requirement, or nil if none match. |
| // If the returned status is non-nil, it has a non-nil Terminated field |
| func getMatchingContainerFromList(containerStatuses []v1.ContainerStatus, requirement *batch.PodFailurePolicyOnExitCodesRequirement) *v1.ContainerStatus { |
| for _, containerStatus := range containerStatuses { |
| if containerStatus.State.Terminated == nil { |
| // This container is still be terminating. There is no exit code to match. |
| continue |
| } |
| if requirement.ContainerName == nil || *requirement.ContainerName == containerStatus.Name { |
| if containerStatus.State.Terminated.ExitCode != 0 { |
| if isOnExitCodesOperatorMatching(containerStatus.State.Terminated.ExitCode, requirement) { |
| return &containerStatus |
| } |
| } |
| } |
| } |
| return nil |
| } |
| |
| func isOnExitCodesOperatorMatching(exitCode int32, requirement *batch.PodFailurePolicyOnExitCodesRequirement) bool { |
| switch requirement.Operator { |
| case batch.PodFailurePolicyOnExitCodesOpIn: |
| for _, value := range requirement.Values { |
| if value == exitCode { |
| return true |
| } |
| } |
| return false |
| case batch.PodFailurePolicyOnExitCodesOpNotIn: |
| for _, value := range requirement.Values { |
| if value == exitCode { |
| return false |
| } |
| } |
| return true |
| default: |
| return false |
| } |
| } |