| /* |
| Copyright 2016 The Kubernetes Authors. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| */ |
| |
| package remote |
| |
| import ( |
| "context" |
| "errors" |
| "fmt" |
| "io" |
| "strings" |
| "time" |
| |
| "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" |
| "go.opentelemetry.io/otel/trace" |
| "google.golang.org/grpc" |
| "google.golang.org/grpc/backoff" |
| "google.golang.org/grpc/codes" |
| "google.golang.org/grpc/credentials/insecure" |
| "google.golang.org/grpc/status" |
| utilfeature "k8s.io/apiserver/pkg/util/feature" |
| "k8s.io/component-base/logs/logreduction" |
| tracing "k8s.io/component-base/tracing" |
| internalapi "k8s.io/cri-api/pkg/apis" |
| runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" |
| "k8s.io/klog/v2" |
| "k8s.io/kubernetes/pkg/features" |
| "k8s.io/kubernetes/pkg/kubelet/metrics" |
| "k8s.io/kubernetes/pkg/kubelet/util" |
| "k8s.io/kubernetes/pkg/probe/exec" |
| |
| utilexec "k8s.io/utils/exec" |
| ) |
| |
| // remoteRuntimeService is a gRPC implementation of internalapi.RuntimeService. |
| type remoteRuntimeService struct { |
| timeout time.Duration |
| runtimeClient runtimeapi.RuntimeServiceClient |
| // Cache last per-container error message to reduce log spam |
| logReduction *logreduction.LogReduction |
| } |
| |
| const ( |
| // How frequently to report identical errors |
| identicalErrorDelay = 1 * time.Minute |
| |
| // connection parameters |
| maxBackoffDelay = 3 * time.Second |
| baseBackoffDelay = 100 * time.Millisecond |
| minConnectionTimeout = 5 * time.Second |
| ) |
| |
| // CRIVersion is the type for valid Container Runtime Interface (CRI) API |
| // versions. |
| type CRIVersion string |
| |
| // ErrContainerStatusNil indicates that the returned container status is nil. |
| var ErrContainerStatusNil = errors.New("container status is nil") |
| |
| const ( |
| // CRIVersionV1 references the v1 CRI API. |
| CRIVersionV1 CRIVersion = "v1" |
| ) |
| |
| // NewRemoteRuntimeService creates a new internalapi.RuntimeService. |
| func NewRemoteRuntimeService(endpoint string, connectionTimeout time.Duration, tp trace.TracerProvider) (internalapi.RuntimeService, error) { |
| klog.V(3).InfoS("Connecting to runtime service", "endpoint", endpoint) |
| addr, dialer, err := util.GetAddressAndDialer(endpoint) |
| if err != nil { |
| return nil, err |
| } |
| ctx, cancel := context.WithTimeout(context.Background(), connectionTimeout) |
| defer cancel() |
| |
| var dialOpts []grpc.DialOption |
| dialOpts = append(dialOpts, |
| grpc.WithTransportCredentials(insecure.NewCredentials()), |
| grpc.WithContextDialer(dialer), |
| grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(maxMsgSize))) |
| if utilfeature.DefaultFeatureGate.Enabled(features.KubeletTracing) { |
| tracingOpts := []otelgrpc.Option{ |
| otelgrpc.WithPropagators(tracing.Propagators()), |
| otelgrpc.WithTracerProvider(tp), |
| } |
| // Even if there is no TracerProvider, the otelgrpc still handles context propagation. |
| // See https://github.com/open-telemetry/opentelemetry-go/tree/main/example/passthrough |
| dialOpts = append(dialOpts, |
| grpc.WithUnaryInterceptor(otelgrpc.UnaryClientInterceptor(tracingOpts...)), |
| grpc.WithStreamInterceptor(otelgrpc.StreamClientInterceptor(tracingOpts...))) |
| } |
| |
| connParams := grpc.ConnectParams{ |
| Backoff: backoff.DefaultConfig, |
| } |
| connParams.MinConnectTimeout = minConnectionTimeout |
| connParams.Backoff.BaseDelay = baseBackoffDelay |
| connParams.Backoff.MaxDelay = maxBackoffDelay |
| dialOpts = append(dialOpts, |
| grpc.WithConnectParams(connParams), |
| ) |
| |
| conn, err := grpc.DialContext(ctx, addr, dialOpts...) |
| if err != nil { |
| klog.ErrorS(err, "Connect remote runtime failed", "address", addr) |
| return nil, err |
| } |
| |
| service := &remoteRuntimeService{ |
| timeout: connectionTimeout, |
| logReduction: logreduction.NewLogReduction(identicalErrorDelay), |
| } |
| |
| if err := service.validateServiceConnection(ctx, conn, endpoint); err != nil { |
| return nil, fmt.Errorf("validate service connection: %w", err) |
| } |
| |
| return service, nil |
| } |
| |
| // validateServiceConnection tries to connect to the remote runtime service by |
| // using the CRI v1 API version and fails if that's not possible. |
| func (r *remoteRuntimeService) validateServiceConnection(ctx context.Context, conn *grpc.ClientConn, endpoint string) error { |
| klog.V(4).InfoS("Validating the CRI v1 API runtime version") |
| r.runtimeClient = runtimeapi.NewRuntimeServiceClient(conn) |
| |
| if _, err := r.runtimeClient.Version(ctx, &runtimeapi.VersionRequest{}); err != nil { |
| return fmt.Errorf("validate CRI v1 runtime API for endpoint %q: %w", endpoint, err) |
| } |
| |
| klog.V(2).InfoS("Validated CRI v1 runtime API") |
| return nil |
| } |
| |
| // Version returns the runtime name, runtime version and runtime API version. |
| func (r *remoteRuntimeService) Version(ctx context.Context, apiVersion string) (*runtimeapi.VersionResponse, error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] Version", "apiVersion", apiVersion, "timeout", r.timeout) |
| |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| return r.versionV1(ctx, apiVersion) |
| } |
| |
| func (r *remoteRuntimeService) versionV1(ctx context.Context, apiVersion string) (*runtimeapi.VersionResponse, error) { |
| typedVersion, err := r.runtimeClient.Version(ctx, &runtimeapi.VersionRequest{ |
| Version: apiVersion, |
| }) |
| if err != nil { |
| klog.ErrorS(err, "Version from runtime service failed") |
| return nil, err |
| } |
| |
| klog.V(10).InfoS("[RemoteRuntimeService] Version Response", "apiVersion", typedVersion) |
| |
| if typedVersion.Version == "" || typedVersion.RuntimeName == "" || typedVersion.RuntimeApiVersion == "" || typedVersion.RuntimeVersion == "" { |
| return nil, fmt.Errorf("not all fields are set in VersionResponse (%q)", *typedVersion) |
| } |
| |
| return typedVersion, err |
| } |
| |
| // RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure |
| // the sandbox is in ready state. |
| func (r *remoteRuntimeService) RunPodSandbox(ctx context.Context, config *runtimeapi.PodSandboxConfig, runtimeHandler string) (string, error) { |
| // Use 2 times longer timeout for sandbox operation (4 mins by default) |
| // TODO: Make the pod sandbox timeout configurable. |
| timeout := r.timeout * 2 |
| |
| klog.V(10).InfoS("[RemoteRuntimeService] RunPodSandbox", "config", config, "runtimeHandler", runtimeHandler, "timeout", timeout) |
| |
| ctx, cancel := context.WithTimeout(ctx, timeout) |
| defer cancel() |
| |
| resp, err := r.runtimeClient.RunPodSandbox(ctx, &runtimeapi.RunPodSandboxRequest{ |
| Config: config, |
| RuntimeHandler: runtimeHandler, |
| }) |
| |
| if err != nil { |
| klog.ErrorS(err, "RunPodSandbox from runtime service failed") |
| return "", err |
| } |
| |
| podSandboxID := resp.PodSandboxId |
| |
| if podSandboxID == "" { |
| errorMessage := fmt.Sprintf("PodSandboxId is not set for sandbox %q", config.Metadata) |
| err := errors.New(errorMessage) |
| klog.ErrorS(err, "RunPodSandbox failed") |
| return "", err |
| } |
| |
| klog.V(10).InfoS("[RemoteRuntimeService] RunPodSandbox Response", "podSandboxID", podSandboxID) |
| |
| return podSandboxID, nil |
| } |
| |
| // StopPodSandbox stops the sandbox. If there are any running containers in the |
| // sandbox, they should be forced to termination. |
| func (r *remoteRuntimeService) StopPodSandbox(ctx context.Context, podSandBoxID string) (err error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] StopPodSandbox", "podSandboxID", podSandBoxID, "timeout", r.timeout) |
| |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| if _, err := r.runtimeClient.StopPodSandbox(ctx, &runtimeapi.StopPodSandboxRequest{ |
| PodSandboxId: podSandBoxID, |
| }); err != nil { |
| klog.ErrorS(err, "StopPodSandbox from runtime service failed", "podSandboxID", podSandBoxID) |
| return err |
| } |
| |
| klog.V(10).InfoS("[RemoteRuntimeService] StopPodSandbox Response", "podSandboxID", podSandBoxID) |
| |
| return nil |
| } |
| |
| // RemovePodSandbox removes the sandbox. If there are any containers in the |
| // sandbox, they should be forcibly removed. |
| func (r *remoteRuntimeService) RemovePodSandbox(ctx context.Context, podSandBoxID string) (err error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] RemovePodSandbox", "podSandboxID", podSandBoxID, "timeout", r.timeout) |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| if _, err := r.runtimeClient.RemovePodSandbox(ctx, &runtimeapi.RemovePodSandboxRequest{ |
| PodSandboxId: podSandBoxID, |
| }); err != nil { |
| klog.ErrorS(err, "RemovePodSandbox from runtime service failed", "podSandboxID", podSandBoxID) |
| return err |
| } |
| |
| klog.V(10).InfoS("[RemoteRuntimeService] RemovePodSandbox Response", "podSandboxID", podSandBoxID) |
| |
| return nil |
| } |
| |
| // PodSandboxStatus returns the status of the PodSandbox. |
| func (r *remoteRuntimeService) PodSandboxStatus(ctx context.Context, podSandBoxID string, verbose bool) (*runtimeapi.PodSandboxStatusResponse, error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] PodSandboxStatus", "podSandboxID", podSandBoxID, "timeout", r.timeout) |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| return r.podSandboxStatusV1(ctx, podSandBoxID, verbose) |
| } |
| |
| func (r *remoteRuntimeService) podSandboxStatusV1(ctx context.Context, podSandBoxID string, verbose bool) (*runtimeapi.PodSandboxStatusResponse, error) { |
| resp, err := r.runtimeClient.PodSandboxStatus(ctx, &runtimeapi.PodSandboxStatusRequest{ |
| PodSandboxId: podSandBoxID, |
| Verbose: verbose, |
| }) |
| if err != nil { |
| return nil, err |
| } |
| |
| klog.V(10).InfoS("[RemoteRuntimeService] PodSandboxStatus Response", "podSandboxID", podSandBoxID, "status", resp.Status) |
| |
| status := resp.Status |
| if resp.Status != nil { |
| if err := verifySandboxStatus(status); err != nil { |
| return nil, err |
| } |
| } |
| |
| return resp, nil |
| } |
| |
| // ListPodSandbox returns a list of PodSandboxes. |
| func (r *remoteRuntimeService) ListPodSandbox(ctx context.Context, filter *runtimeapi.PodSandboxFilter) ([]*runtimeapi.PodSandbox, error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] ListPodSandbox", "filter", filter, "timeout", r.timeout) |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| return r.listPodSandboxV1(ctx, filter) |
| } |
| |
| func (r *remoteRuntimeService) listPodSandboxV1(ctx context.Context, filter *runtimeapi.PodSandboxFilter) ([]*runtimeapi.PodSandbox, error) { |
| resp, err := r.runtimeClient.ListPodSandbox(ctx, &runtimeapi.ListPodSandboxRequest{ |
| Filter: filter, |
| }) |
| if err != nil { |
| klog.ErrorS(err, "ListPodSandbox with filter from runtime service failed", "filter", filter) |
| return nil, err |
| } |
| |
| klog.V(10).InfoS("[RemoteRuntimeService] ListPodSandbox Response", "filter", filter, "items", resp.Items) |
| |
| return resp.Items, nil |
| } |
| |
| // CreateContainer creates a new container in the specified PodSandbox. |
| func (r *remoteRuntimeService) CreateContainer(ctx context.Context, podSandBoxID string, config *runtimeapi.ContainerConfig, sandboxConfig *runtimeapi.PodSandboxConfig) (string, error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] CreateContainer", "podSandboxID", podSandBoxID, "timeout", r.timeout) |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| return r.createContainerV1(ctx, podSandBoxID, config, sandboxConfig) |
| } |
| |
| func (r *remoteRuntimeService) createContainerV1(ctx context.Context, podSandBoxID string, config *runtimeapi.ContainerConfig, sandboxConfig *runtimeapi.PodSandboxConfig) (string, error) { |
| resp, err := r.runtimeClient.CreateContainer(ctx, &runtimeapi.CreateContainerRequest{ |
| PodSandboxId: podSandBoxID, |
| Config: config, |
| SandboxConfig: sandboxConfig, |
| }) |
| if err != nil { |
| klog.ErrorS(err, "CreateContainer in sandbox from runtime service failed", "podSandboxID", podSandBoxID) |
| return "", err |
| } |
| |
| klog.V(10).InfoS("[RemoteRuntimeService] CreateContainer", "podSandboxID", podSandBoxID, "containerID", resp.ContainerId) |
| if resp.ContainerId == "" { |
| errorMessage := fmt.Sprintf("ContainerId is not set for container %q", config.Metadata) |
| err := errors.New(errorMessage) |
| klog.ErrorS(err, "CreateContainer failed") |
| return "", err |
| } |
| |
| return resp.ContainerId, nil |
| } |
| |
| // StartContainer starts the container. |
| func (r *remoteRuntimeService) StartContainer(ctx context.Context, containerID string) (err error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] StartContainer", "containerID", containerID, "timeout", r.timeout) |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| if _, err := r.runtimeClient.StartContainer(ctx, &runtimeapi.StartContainerRequest{ |
| ContainerId: containerID, |
| }); err != nil { |
| klog.ErrorS(err, "StartContainer from runtime service failed", "containerID", containerID) |
| return err |
| } |
| klog.V(10).InfoS("[RemoteRuntimeService] StartContainer Response", "containerID", containerID) |
| |
| return nil |
| } |
| |
| // StopContainer stops a running container with a grace period (i.e., timeout). |
| func (r *remoteRuntimeService) StopContainer(ctx context.Context, containerID string, timeout int64) (err error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] StopContainer", "containerID", containerID, "timeout", timeout) |
| // Use timeout + default timeout (2 minutes) as timeout to leave extra time |
| // for SIGKILL container and request latency. |
| t := r.timeout + time.Duration(timeout)*time.Second |
| ctx, cancel := context.WithTimeout(ctx, t) |
| defer cancel() |
| |
| r.logReduction.ClearID(containerID) |
| |
| if _, err := r.runtimeClient.StopContainer(ctx, &runtimeapi.StopContainerRequest{ |
| ContainerId: containerID, |
| Timeout: timeout, |
| }); err != nil { |
| klog.ErrorS(err, "StopContainer from runtime service failed", "containerID", containerID) |
| return err |
| } |
| klog.V(10).InfoS("[RemoteRuntimeService] StopContainer Response", "containerID", containerID) |
| |
| return nil |
| } |
| |
| // RemoveContainer removes the container. If the container is running, the container |
| // should be forced to removal. |
| func (r *remoteRuntimeService) RemoveContainer(ctx context.Context, containerID string) (err error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] RemoveContainer", "containerID", containerID, "timeout", r.timeout) |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| r.logReduction.ClearID(containerID) |
| if _, err := r.runtimeClient.RemoveContainer(ctx, &runtimeapi.RemoveContainerRequest{ |
| ContainerId: containerID, |
| }); err != nil { |
| klog.ErrorS(err, "RemoveContainer from runtime service failed", "containerID", containerID) |
| return err |
| } |
| klog.V(10).InfoS("[RemoteRuntimeService] RemoveContainer Response", "containerID", containerID) |
| |
| return nil |
| } |
| |
| // ListContainers lists containers by filters. |
| func (r *remoteRuntimeService) ListContainers(ctx context.Context, filter *runtimeapi.ContainerFilter) ([]*runtimeapi.Container, error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] ListContainers", "filter", filter, "timeout", r.timeout) |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| return r.listContainersV1(ctx, filter) |
| } |
| |
| func (r *remoteRuntimeService) listContainersV1(ctx context.Context, filter *runtimeapi.ContainerFilter) ([]*runtimeapi.Container, error) { |
| resp, err := r.runtimeClient.ListContainers(ctx, &runtimeapi.ListContainersRequest{ |
| Filter: filter, |
| }) |
| if err != nil { |
| klog.ErrorS(err, "ListContainers with filter from runtime service failed", "filter", filter) |
| return nil, err |
| } |
| klog.V(10).InfoS("[RemoteRuntimeService] ListContainers Response", "filter", filter, "containers", resp.Containers) |
| |
| return resp.Containers, nil |
| } |
| |
| // ContainerStatus returns the container status. |
| func (r *remoteRuntimeService) ContainerStatus(ctx context.Context, containerID string, verbose bool) (*runtimeapi.ContainerStatusResponse, error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] ContainerStatus", "containerID", containerID, "timeout", r.timeout) |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| return r.containerStatusV1(ctx, containerID, verbose) |
| } |
| |
| func (r *remoteRuntimeService) containerStatusV1(ctx context.Context, containerID string, verbose bool) (*runtimeapi.ContainerStatusResponse, error) { |
| resp, err := r.runtimeClient.ContainerStatus(ctx, &runtimeapi.ContainerStatusRequest{ |
| ContainerId: containerID, |
| Verbose: verbose, |
| }) |
| if err != nil { |
| // Don't spam the log with endless messages about the same failure. |
| if r.logReduction.ShouldMessageBePrinted(err.Error(), containerID) { |
| klog.ErrorS(err, "ContainerStatus from runtime service failed", "containerID", containerID) |
| } |
| return nil, err |
| } |
| r.logReduction.ClearID(containerID) |
| klog.V(10).InfoS("[RemoteRuntimeService] ContainerStatus Response", "containerID", containerID, "status", resp.Status) |
| |
| status := resp.Status |
| if resp.Status != nil { |
| if err := verifyContainerStatus(status); err != nil { |
| klog.ErrorS(err, "verify ContainerStatus failed", "containerID", containerID) |
| return nil, err |
| } |
| } |
| |
| return resp, nil |
| } |
| |
| // UpdateContainerResources updates a containers resource config |
| func (r *remoteRuntimeService) UpdateContainerResources(ctx context.Context, containerID string, resources *runtimeapi.ContainerResources) (err error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] UpdateContainerResources", "containerID", containerID, "timeout", r.timeout) |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| if _, err := r.runtimeClient.UpdateContainerResources(ctx, &runtimeapi.UpdateContainerResourcesRequest{ |
| ContainerId: containerID, |
| Linux: resources.GetLinux(), |
| Windows: resources.GetWindows(), |
| }); err != nil { |
| klog.ErrorS(err, "UpdateContainerResources from runtime service failed", "containerID", containerID) |
| return err |
| } |
| klog.V(10).InfoS("[RemoteRuntimeService] UpdateContainerResources Response", "containerID", containerID) |
| |
| return nil |
| } |
| |
| // ExecSync executes a command in the container, and returns the stdout output. |
| // If command exits with a non-zero exit code, an error is returned. |
| func (r *remoteRuntimeService) ExecSync(ctx context.Context, containerID string, cmd []string, timeout time.Duration) (stdout []byte, stderr []byte, err error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] ExecSync", "containerID", containerID, "timeout", timeout) |
| // Do not set timeout when timeout is 0. |
| var cancel context.CancelFunc |
| if timeout != 0 { |
| // Use timeout + default timeout (2 minutes) as timeout to leave some time for |
| // the runtime to do cleanup. |
| ctx, cancel = context.WithTimeout(ctx, r.timeout+timeout) |
| } else { |
| ctx, cancel = context.WithCancel(ctx) |
| } |
| defer cancel() |
| |
| return r.execSyncV1(ctx, containerID, cmd, timeout) |
| } |
| |
| func (r *remoteRuntimeService) execSyncV1(ctx context.Context, containerID string, cmd []string, timeout time.Duration) (stdout []byte, stderr []byte, err error) { |
| timeoutSeconds := int64(timeout.Seconds()) |
| req := &runtimeapi.ExecSyncRequest{ |
| ContainerId: containerID, |
| Cmd: cmd, |
| Timeout: timeoutSeconds, |
| } |
| resp, err := r.runtimeClient.ExecSync(ctx, req) |
| if err != nil { |
| klog.ErrorS(err, "ExecSync cmd from runtime service failed", "containerID", containerID, "cmd", cmd) |
| |
| // interpret DeadlineExceeded gRPC errors as timedout probes |
| if status.Code(err) == codes.DeadlineExceeded { |
| err = exec.NewTimeoutError(fmt.Errorf("command %q timed out", strings.Join(cmd, " ")), timeout) |
| } |
| |
| return nil, nil, err |
| } |
| |
| klog.V(10).InfoS("[RemoteRuntimeService] ExecSync Response", "containerID", containerID, "exitCode", resp.ExitCode) |
| err = nil |
| if resp.ExitCode != 0 { |
| err = utilexec.CodeExitError{ |
| Err: fmt.Errorf("command '%s' exited with %d: %s", strings.Join(cmd, " "), resp.ExitCode, resp.Stderr), |
| Code: int(resp.ExitCode), |
| } |
| } |
| |
| return resp.Stdout, resp.Stderr, err |
| } |
| |
| // Exec prepares a streaming endpoint to execute a command in the container, and returns the address. |
| func (r *remoteRuntimeService) Exec(ctx context.Context, req *runtimeapi.ExecRequest) (*runtimeapi.ExecResponse, error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] Exec", "timeout", r.timeout) |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| return r.execV1(ctx, req) |
| } |
| |
| func (r *remoteRuntimeService) execV1(ctx context.Context, req *runtimeapi.ExecRequest) (*runtimeapi.ExecResponse, error) { |
| resp, err := r.runtimeClient.Exec(ctx, req) |
| if err != nil { |
| klog.ErrorS(err, "Exec cmd from runtime service failed", "containerID", req.ContainerId, "cmd", req.Cmd) |
| return nil, err |
| } |
| klog.V(10).InfoS("[RemoteRuntimeService] Exec Response") |
| |
| if resp.Url == "" { |
| errorMessage := "URL is not set" |
| err := errors.New(errorMessage) |
| klog.ErrorS(err, "Exec failed") |
| return nil, err |
| } |
| |
| return resp, nil |
| } |
| |
| // Attach prepares a streaming endpoint to attach to a running container, and returns the address. |
| func (r *remoteRuntimeService) Attach(ctx context.Context, req *runtimeapi.AttachRequest) (*runtimeapi.AttachResponse, error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] Attach", "containerID", req.ContainerId, "timeout", r.timeout) |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| return r.attachV1(ctx, req) |
| } |
| |
| func (r *remoteRuntimeService) attachV1(ctx context.Context, req *runtimeapi.AttachRequest) (*runtimeapi.AttachResponse, error) { |
| resp, err := r.runtimeClient.Attach(ctx, req) |
| if err != nil { |
| klog.ErrorS(err, "Attach container from runtime service failed", "containerID", req.ContainerId) |
| return nil, err |
| } |
| klog.V(10).InfoS("[RemoteRuntimeService] Attach Response", "containerID", req.ContainerId) |
| |
| if resp.Url == "" { |
| errorMessage := "URL is not set" |
| err := errors.New(errorMessage) |
| klog.ErrorS(err, "Attach failed") |
| return nil, err |
| } |
| return resp, nil |
| } |
| |
| // PortForward prepares a streaming endpoint to forward ports from a PodSandbox, and returns the address. |
| func (r *remoteRuntimeService) PortForward(ctx context.Context, req *runtimeapi.PortForwardRequest) (*runtimeapi.PortForwardResponse, error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] PortForward", "podSandboxID", req.PodSandboxId, "port", req.Port, "timeout", r.timeout) |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| return r.portForwardV1(ctx, req) |
| } |
| |
| func (r *remoteRuntimeService) portForwardV1(ctx context.Context, req *runtimeapi.PortForwardRequest) (*runtimeapi.PortForwardResponse, error) { |
| resp, err := r.runtimeClient.PortForward(ctx, req) |
| if err != nil { |
| klog.ErrorS(err, "PortForward from runtime service failed", "podSandboxID", req.PodSandboxId) |
| return nil, err |
| } |
| klog.V(10).InfoS("[RemoteRuntimeService] PortForward Response", "podSandboxID", req.PodSandboxId) |
| |
| if resp.Url == "" { |
| errorMessage := "URL is not set" |
| err := errors.New(errorMessage) |
| klog.ErrorS(err, "PortForward failed") |
| return nil, err |
| } |
| |
| return resp, nil |
| } |
| |
| // UpdateRuntimeConfig updates the config of a runtime service. The only |
| // update payload currently supported is the pod CIDR assigned to a node, |
| // and the runtime service just proxies it down to the network plugin. |
| func (r *remoteRuntimeService) UpdateRuntimeConfig(ctx context.Context, runtimeConfig *runtimeapi.RuntimeConfig) (err error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] UpdateRuntimeConfig", "runtimeConfig", runtimeConfig, "timeout", r.timeout) |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| // Response doesn't contain anything of interest. This translates to an |
| // Event notification to the network plugin, which can't fail, so we're |
| // really looking to surface destination unreachable. |
| if _, err := r.runtimeClient.UpdateRuntimeConfig(ctx, &runtimeapi.UpdateRuntimeConfigRequest{ |
| RuntimeConfig: runtimeConfig, |
| }); err != nil { |
| return err |
| } |
| klog.V(10).InfoS("[RemoteRuntimeService] UpdateRuntimeConfig Response", "runtimeConfig", runtimeConfig) |
| |
| return nil |
| } |
| |
| // Status returns the status of the runtime. |
| func (r *remoteRuntimeService) Status(ctx context.Context, verbose bool) (*runtimeapi.StatusResponse, error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] Status", "timeout", r.timeout) |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| return r.statusV1(ctx, verbose) |
| } |
| |
| func (r *remoteRuntimeService) statusV1(ctx context.Context, verbose bool) (*runtimeapi.StatusResponse, error) { |
| resp, err := r.runtimeClient.Status(ctx, &runtimeapi.StatusRequest{ |
| Verbose: verbose, |
| }) |
| if err != nil { |
| klog.ErrorS(err, "Status from runtime service failed") |
| return nil, err |
| } |
| |
| klog.V(10).InfoS("[RemoteRuntimeService] Status Response", "status", resp.Status) |
| |
| if resp.Status == nil || len(resp.Status.Conditions) < 2 { |
| errorMessage := "RuntimeReady or NetworkReady condition are not set" |
| err := errors.New(errorMessage) |
| klog.ErrorS(err, "Status failed") |
| return nil, err |
| } |
| |
| return resp, nil |
| } |
| |
| // ContainerStats returns the stats of the container. |
| func (r *remoteRuntimeService) ContainerStats(ctx context.Context, containerID string) (*runtimeapi.ContainerStats, error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] ContainerStats", "containerID", containerID, "timeout", r.timeout) |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| return r.containerStatsV1(ctx, containerID) |
| } |
| |
| func (r *remoteRuntimeService) containerStatsV1(ctx context.Context, containerID string) (*runtimeapi.ContainerStats, error) { |
| resp, err := r.runtimeClient.ContainerStats(ctx, &runtimeapi.ContainerStatsRequest{ |
| ContainerId: containerID, |
| }) |
| if err != nil { |
| if r.logReduction.ShouldMessageBePrinted(err.Error(), containerID) { |
| klog.ErrorS(err, "ContainerStats from runtime service failed", "containerID", containerID) |
| } |
| return nil, err |
| } |
| r.logReduction.ClearID(containerID) |
| klog.V(10).InfoS("[RemoteRuntimeService] ContainerStats Response", "containerID", containerID, "stats", resp.GetStats()) |
| |
| return resp.GetStats(), nil |
| } |
| |
| // ListContainerStats returns the list of ContainerStats given the filter. |
| func (r *remoteRuntimeService) ListContainerStats(ctx context.Context, filter *runtimeapi.ContainerStatsFilter) ([]*runtimeapi.ContainerStats, error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] ListContainerStats", "filter", filter) |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| return r.listContainerStatsV1(ctx, filter) |
| } |
| |
| func (r *remoteRuntimeService) listContainerStatsV1(ctx context.Context, filter *runtimeapi.ContainerStatsFilter) ([]*runtimeapi.ContainerStats, error) { |
| resp, err := r.runtimeClient.ListContainerStats(ctx, &runtimeapi.ListContainerStatsRequest{ |
| Filter: filter, |
| }) |
| if err != nil { |
| klog.ErrorS(err, "ListContainerStats with filter from runtime service failed", "filter", filter) |
| return nil, err |
| } |
| klog.V(10).InfoS("[RemoteRuntimeService] ListContainerStats Response", "filter", filter, "stats", resp.GetStats()) |
| |
| return resp.GetStats(), nil |
| } |
| |
| // PodSandboxStats returns the stats of the pod. |
| func (r *remoteRuntimeService) PodSandboxStats(ctx context.Context, podSandboxID string) (*runtimeapi.PodSandboxStats, error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] PodSandboxStats", "podSandboxID", podSandboxID, "timeout", r.timeout) |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| return r.podSandboxStatsV1(ctx, podSandboxID) |
| } |
| |
| func (r *remoteRuntimeService) podSandboxStatsV1(ctx context.Context, podSandboxID string) (*runtimeapi.PodSandboxStats, error) { |
| resp, err := r.runtimeClient.PodSandboxStats(ctx, &runtimeapi.PodSandboxStatsRequest{ |
| PodSandboxId: podSandboxID, |
| }) |
| if err != nil { |
| if r.logReduction.ShouldMessageBePrinted(err.Error(), podSandboxID) { |
| klog.ErrorS(err, "PodSandbox from runtime service failed", "podSandboxID", podSandboxID) |
| } |
| return nil, err |
| } |
| r.logReduction.ClearID(podSandboxID) |
| klog.V(10).InfoS("[RemoteRuntimeService] PodSandbox Response", "podSandboxID", podSandboxID, "stats", resp.GetStats()) |
| |
| return resp.GetStats(), nil |
| } |
| |
| // ListPodSandboxStats returns the list of pod sandbox stats given the filter |
| func (r *remoteRuntimeService) ListPodSandboxStats(ctx context.Context, filter *runtimeapi.PodSandboxStatsFilter) ([]*runtimeapi.PodSandboxStats, error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] ListPodSandboxStats", "filter", filter) |
| // Set timeout, because runtimes are able to cache disk stats results |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| return r.listPodSandboxStatsV1(ctx, filter) |
| } |
| |
| func (r *remoteRuntimeService) listPodSandboxStatsV1(ctx context.Context, filter *runtimeapi.PodSandboxStatsFilter) ([]*runtimeapi.PodSandboxStats, error) { |
| resp, err := r.runtimeClient.ListPodSandboxStats(ctx, &runtimeapi.ListPodSandboxStatsRequest{ |
| Filter: filter, |
| }) |
| if err != nil { |
| klog.ErrorS(err, "ListPodSandboxStats with filter from runtime service failed", "filter", filter) |
| return nil, err |
| } |
| klog.V(10).InfoS("[RemoteRuntimeService] ListPodSandboxStats Response", "filter", filter, "stats", resp.GetStats()) |
| |
| return resp.GetStats(), nil |
| } |
| |
| // ReopenContainerLog reopens the container log file. |
| func (r *remoteRuntimeService) ReopenContainerLog(ctx context.Context, containerID string) (err error) { |
| klog.V(10).InfoS("[RemoteRuntimeService] ReopenContainerLog", "containerID", containerID, "timeout", r.timeout) |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| if _, err := r.runtimeClient.ReopenContainerLog(ctx, &runtimeapi.ReopenContainerLogRequest{ContainerId: containerID}); err != nil { |
| klog.ErrorS(err, "ReopenContainerLog from runtime service failed", "containerID", containerID) |
| return err |
| } |
| |
| klog.V(10).InfoS("[RemoteRuntimeService] ReopenContainerLog Response", "containerID", containerID) |
| return nil |
| } |
| |
| // CheckpointContainer triggers a checkpoint of the given CheckpointContainerRequest |
| func (r *remoteRuntimeService) CheckpointContainer(ctx context.Context, options *runtimeapi.CheckpointContainerRequest) error { |
| klog.V(10).InfoS( |
| "[RemoteRuntimeService] CheckpointContainer", |
| "options", |
| options, |
| ) |
| if options == nil { |
| return errors.New("CheckpointContainer requires non-nil CheckpointRestoreOptions parameter") |
| } |
| if options.Timeout < 0 { |
| return errors.New("CheckpointContainer requires the timeout value to be > 0") |
| } |
| |
| ctx, cancel := func(ctx context.Context) (context.Context, context.CancelFunc) { |
| defaultTimeout := int64(r.timeout / time.Second) |
| if options.Timeout > defaultTimeout { |
| // The user requested a specific timeout, let's use that if it |
| // is larger than the CRI default. |
| return context.WithTimeout(ctx, time.Duration(options.Timeout)*time.Second) |
| } |
| // If the user requested a timeout less than the |
| // CRI default, let's use the CRI default. |
| options.Timeout = defaultTimeout |
| return context.WithTimeout(ctx, r.timeout) |
| }(ctx) |
| defer cancel() |
| |
| _, err := r.runtimeClient.CheckpointContainer( |
| ctx, |
| options, |
| ) |
| |
| if err != nil { |
| klog.ErrorS( |
| err, |
| "CheckpointContainer from runtime service failed", |
| "containerID", |
| options.ContainerId, |
| ) |
| return err |
| } |
| klog.V(10).InfoS( |
| "[RemoteRuntimeService] CheckpointContainer Response", |
| "containerID", |
| options.ContainerId, |
| ) |
| |
| return nil |
| } |
| |
| func (r *remoteRuntimeService) GetContainerEvents(containerEventsCh chan *runtimeapi.ContainerEventResponse) error { |
| containerEventsStreamingClient, err := r.runtimeClient.GetContainerEvents(context.Background(), &runtimeapi.GetEventsRequest{}) |
| if err != nil { |
| klog.ErrorS(err, "GetContainerEvents failed to get streaming client") |
| return err |
| } |
| |
| // The connection is successfully established and we have a streaming client ready for use. |
| metrics.EventedPLEGConn.Inc() |
| |
| for { |
| resp, err := containerEventsStreamingClient.Recv() |
| if err == io.EOF { |
| klog.ErrorS(err, "container events stream is closed") |
| return err |
| } |
| if err != nil { |
| klog.ErrorS(err, "failed to receive streaming container event") |
| return err |
| } |
| if resp != nil { |
| containerEventsCh <- resp |
| klog.V(4).InfoS("container event received", "resp", resp) |
| } |
| } |
| } |
| |
| // ListMetricDescriptors gets the descriptors for the metrics that will be returned in ListPodSandboxMetrics. |
| func (r *remoteRuntimeService) ListMetricDescriptors(ctx context.Context) ([]*runtimeapi.MetricDescriptor, error) { |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| resp, err := r.runtimeClient.ListMetricDescriptors(ctx, &runtimeapi.ListMetricDescriptorsRequest{}) |
| if err != nil { |
| klog.ErrorS(err, "ListMetricDescriptors from runtime service failed") |
| return nil, err |
| } |
| klog.V(10).InfoS("[RemoteRuntimeService] ListMetricDescriptors Response", "stats", resp.GetDescriptors()) |
| |
| return resp.GetDescriptors(), nil |
| } |
| |
| // ListPodSandboxMetrics retrieves the metrics for all pod sandboxes. |
| func (r *remoteRuntimeService) ListPodSandboxMetrics(ctx context.Context) ([]*runtimeapi.PodSandboxMetrics, error) { |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| resp, err := r.runtimeClient.ListPodSandboxMetrics(ctx, &runtimeapi.ListPodSandboxMetricsRequest{}) |
| if err != nil { |
| klog.ErrorS(err, "ListPodSandboxMetrics from runtime service failed") |
| return nil, err |
| } |
| klog.V(10).InfoS("[RemoteRuntimeService] ListPodSandboxMetrics Response", "stats", resp.GetPodMetrics()) |
| |
| return resp.GetPodMetrics(), nil |
| } |
| |
| // RuntimeConfig returns the configuration information of the runtime. |
| func (r *remoteRuntimeService) RuntimeConfig(ctx context.Context) (*runtimeapi.RuntimeConfigResponse, error) { |
| ctx, cancel := context.WithTimeout(ctx, r.timeout) |
| defer cancel() |
| |
| resp, err := r.runtimeClient.RuntimeConfig(ctx, &runtimeapi.RuntimeConfigRequest{}) |
| if err != nil { |
| klog.ErrorS(err, "RuntimeConfig from runtime service failed") |
| return nil, err |
| } |
| klog.V(10).InfoS("[RemoteRuntimeService] RuntimeConfigResponse", "linuxConfig", resp.GetLinux()) |
| |
| return resp, nil |
| } |