| - name: version_info |
| namespace: etcd |
| help: Etcd server's binary version |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - binary_version |
| - name: certificate_manager_client_ttl_seconds |
| subsystem: kubelet |
| help: Gauge of the TTL (time-to-live) of the Kubelet's client certificate. The value |
| is in seconds until certificate expiry (negative if already expired). If client |
| certificate is invalid or unused, the value will be +INF. |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: addresses_skipped_per_sync |
| subsystem: endpoint_slice_mirroring_controller |
| help: Number of addresses skipped on each Endpoints sync due to being invalid or |
| exceeding MaxEndpointsPerSubset |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 2 |
| - 4 |
| - 8 |
| - 16 |
| - 32 |
| - 64 |
| - 128 |
| - 256 |
| - 512 |
| - 1024 |
| - 2048 |
| - 4096 |
| - 8192 |
| - 16384 |
| - 32768 |
| - name: changes |
| subsystem: endpoint_slice_mirroring_controller |
| help: Number of EndpointSlice changes |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - operation |
| - name: desired_endpoint_slices |
| subsystem: endpoint_slice_mirroring_controller |
| help: Number of EndpointSlices that would exist with perfect endpoint allocation |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: endpoints_added_per_sync |
| subsystem: endpoint_slice_mirroring_controller |
| help: Number of endpoints added on each Endpoints sync |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 2 |
| - 4 |
| - 8 |
| - 16 |
| - 32 |
| - 64 |
| - 128 |
| - 256 |
| - 512 |
| - 1024 |
| - 2048 |
| - 4096 |
| - 8192 |
| - 16384 |
| - 32768 |
| - name: endpoints_desired |
| subsystem: endpoint_slice_mirroring_controller |
| help: Number of endpoints desired |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: endpoints_removed_per_sync |
| subsystem: endpoint_slice_mirroring_controller |
| help: Number of endpoints removed on each Endpoints sync |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 2 |
| - 4 |
| - 8 |
| - 16 |
| - 32 |
| - 64 |
| - 128 |
| - 256 |
| - 512 |
| - 1024 |
| - 2048 |
| - 4096 |
| - 8192 |
| - 16384 |
| - 32768 |
| - name: endpoints_sync_duration |
| subsystem: endpoint_slice_mirroring_controller |
| help: Duration of syncEndpoints() in seconds |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.001 |
| - 0.002 |
| - 0.004 |
| - 0.008 |
| - 0.016 |
| - 0.032 |
| - 0.064 |
| - 0.128 |
| - 0.256 |
| - 0.512 |
| - 1.024 |
| - 2.048 |
| - 4.096 |
| - 8.192 |
| - 16.384 |
| - name: endpoints_updated_per_sync |
| subsystem: endpoint_slice_mirroring_controller |
| help: Number of endpoints updated on each Endpoints sync |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 2 |
| - 4 |
| - 8 |
| - 16 |
| - 32 |
| - 64 |
| - 128 |
| - 256 |
| - 512 |
| - 1024 |
| - 2048 |
| - 4096 |
| - 8192 |
| - 16384 |
| - 32768 |
| - name: num_endpoint_slices |
| subsystem: endpoint_slice_mirroring_controller |
| help: Number of EndpointSlices |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: sync_duration_seconds |
| subsystem: root_ca_cert_publisher |
| help: Number of namespace syncs happened in root ca cert publisher. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - code |
| buckets: |
| - 0.001 |
| - 0.002 |
| - 0.004 |
| - 0.008 |
| - 0.016 |
| - 0.032 |
| - 0.064 |
| - 0.128 |
| - 0.256 |
| - 0.512 |
| - 1.024 |
| - 2.048 |
| - 4.096 |
| - 8.192 |
| - 16.384 |
| - name: sync_total |
| subsystem: root_ca_cert_publisher |
| help: Number of namespace syncs happened in root ca cert publisher. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - code |
| - name: job_creation_skew_duration_seconds |
| subsystem: cronjob_controller |
| help: Time between when a cronjob is scheduled to be run, and when the corresponding |
| job is created |
| type: Histogram |
| stabilityLevel: STABLE |
| buckets: |
| - 1 |
| - 2 |
| - 4 |
| - 8 |
| - 16 |
| - 32 |
| - 64 |
| - 128 |
| - 256 |
| - 512 |
| - name: resources_sync_error_total |
| subsystem: garbagecollector_controller |
| help: Number of garbage collector resources sync errors |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: metric_computation_duration_seconds |
| subsystem: horizontal_pod_autoscaler_controller |
| help: The time(seconds) that the HPA controller takes to calculate one metric. The |
| label 'action' should be either 'scale_down', 'scale_up', or 'none'. The label |
| 'error' should be either 'spec', 'internal', or 'none'. The label 'metric_type' |
| corresponds to HPA.spec.metrics[*].type |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - action |
| - error |
| - metric_type |
| buckets: |
| - 0.001 |
| - 0.002 |
| - 0.004 |
| - 0.008 |
| - 0.016 |
| - 0.032 |
| - 0.064 |
| - 0.128 |
| - 0.256 |
| - 0.512 |
| - 1.024 |
| - 2.048 |
| - 4.096 |
| - 8.192 |
| - 16.384 |
| - name: metric_computation_total |
| subsystem: horizontal_pod_autoscaler_controller |
| help: Number of metric computations. The label 'action' should be either 'scale_down', |
| 'scale_up', or 'none'. Also, the label 'error' should be either 'spec', 'internal', |
| or 'none'. The label 'metric_type' corresponds to HPA.spec.metrics[*].type |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - action |
| - error |
| - metric_type |
| - name: reconciliation_duration_seconds |
| subsystem: horizontal_pod_autoscaler_controller |
| help: The time(seconds) that the HPA controller takes to reconcile once. The label |
| 'action' should be either 'scale_down', 'scale_up', or 'none'. Also, the label |
| 'error' should be either 'spec', 'internal', or 'none'. Note that if both spec |
| and internal errors happen during a reconciliation, the first one to occur is |
| reported in `error` label. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - action |
| - error |
| buckets: |
| - 0.001 |
| - 0.002 |
| - 0.004 |
| - 0.008 |
| - 0.016 |
| - 0.032 |
| - 0.064 |
| - 0.128 |
| - 0.256 |
| - 0.512 |
| - 1.024 |
| - 2.048 |
| - 4.096 |
| - 8.192 |
| - 16.384 |
| - name: reconciliations_total |
| subsystem: horizontal_pod_autoscaler_controller |
| help: Number of reconciliations of HPA controller. The label 'action' should be |
| either 'scale_down', 'scale_up', or 'none'. Also, the label 'error' should be |
| either 'spec', 'internal', or 'none'. Note that if both spec and internal errors |
| happen during a reconciliation, the first one to occur is reported in `error` |
| label. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - action |
| - error |
| - name: job_finished_indexes_total |
| subsystem: job_controller |
| help: "`The number of finished indexes. Possible values for the\n\t\t\tstatus label |
| are: \"succeeded\", \"failed\". Possible values for the\n\t\t\tbackoffLimit label |
| are: \"perIndex\" and \"global\"`" |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - backoffLimit |
| - status |
| - name: job_pods_creation_total |
| subsystem: job_controller |
| help: |- |
| `The number of Pods created by the Job controller labelled with a reason for the Pod creation. |
| This metric also distinguishes between Pods created using different PodReplacementPolicy settings. |
| Possible values of the "reason" label are: |
| "new", "recreate_terminating_or_failed", "recreate_failed". |
| Possible values of the "status" label are: |
| "succeeded", "failed".` |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - reason |
| - status |
| - name: pod_failures_handled_by_failure_policy_total |
| subsystem: job_controller |
| help: "`The number of failed Pods handled by failure policy with\n\t\t\trespect |
| to the failure policy action applied based on the matched\n\t\t\trule. Possible |
| values of the action label correspond to the\n\t\t\tpossible values for the failure |
| policy rule action, which are:\n\t\t\t\"FailJob\", \"Ignore\" and \"Count\".`" |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - action |
| - name: terminated_pods_tracking_finalizer_total |
| subsystem: job_controller |
| help: |- |
| `The number of terminated pods (phase=Failed|Succeeded) |
| that have the finalizer batch.kubernetes.io/job-tracking |
| The event label can be "add" or "delete".` |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - event |
| - name: unhealthy_nodes_in_zone |
| subsystem: node_collector |
| help: Gauge measuring number of not Ready Nodes per zones. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - zone |
| - name: update_all_nodes_health_duration_seconds |
| subsystem: node_collector |
| help: Duration in seconds for NodeController to update the health of all nodes. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.01 |
| - 0.04 |
| - 0.16 |
| - 0.64 |
| - 2.56 |
| - 10.24 |
| - 40.96 |
| - 163.84 |
| - name: update_node_health_duration_seconds |
| subsystem: node_collector |
| help: Duration in seconds for NodeController to update the health of a single node. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.001 |
| - 0.004 |
| - 0.016 |
| - 0.064 |
| - 0.256 |
| - 1.024 |
| - 4.096 |
| - 16.384 |
| - name: zone_health |
| subsystem: node_collector |
| help: Gauge measuring percentage of healthy nodes per zone. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - zone |
| - name: zone_size |
| subsystem: node_collector |
| help: Gauge measuring number of registered Nodes per zones. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - zone |
| - name: cidrset_allocation_tries_per_request |
| subsystem: node_ipam_controller |
| help: Number of endpoints added on each Service sync |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - clusterCIDR |
| buckets: |
| - 1 |
| - 5 |
| - 25 |
| - 125 |
| - 625 |
| - name: cidrset_cidrs_allocations_total |
| subsystem: node_ipam_controller |
| help: Counter measuring total number of CIDR allocations. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - clusterCIDR |
| - name: cidrset_cidrs_releases_total |
| subsystem: node_ipam_controller |
| help: Counter measuring total number of CIDR releases. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - clusterCIDR |
| - name: cidrset_usage_cidrs |
| subsystem: node_ipam_controller |
| help: Gauge measuring percentage of allocated CIDRs. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - clusterCIDR |
| - name: cirdset_max_cidrs |
| subsystem: node_ipam_controller |
| help: Maximum number of CIDRs that can be allocated. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - clusterCIDR |
| - name: force_delete_pod_errors_total |
| subsystem: pod_gc_collector |
| help: Number of errors encountered when forcefully deleting the pods since the Pod |
| GC Controller started. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - namespace |
| - reason |
| - name: force_delete_pods_total |
| subsystem: pod_gc_collector |
| help: Number of pods that are being forcefully deleted since the Pod GC Controller |
| started. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - namespace |
| - reason |
| - name: sorting_deletion_age_ratio |
| subsystem: replicaset_controller |
| help: The ratio of chosen deleted pod's ages to the current youngest pod's age (at |
| the time). Should be <2. The intent of this metric is to measure the rough efficacy |
| of the LogarithmicScaleDown feature gate's effect on the sorting (and deletion) |
| of pods when a replicaset scales down. This only considers Ready pods when calculating |
| and reporting. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2 |
| - 4 |
| - 8 |
| - name: create_attempts_total |
| subsystem: resourceclaim_controller |
| help: Number of ResourceClaims creation requests |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: create_failures_total |
| subsystem: resourceclaim_controller |
| help: Number of ResourceClaims creation request failures |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: pod_deletion_duration_seconds |
| subsystem: taint_eviction_controller |
| help: Latency, in seconds, between the time when a taint effect has been activated |
| for the Pod and its deletion via TaintEvictionController. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.005 |
| - 0.025 |
| - 0.1 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - 10 |
| - 30 |
| - 60 |
| - 120 |
| - 180 |
| - 240 |
| - name: pod_deletions_total |
| subsystem: taint_eviction_controller |
| help: Total number of Pods deleted by TaintEvictionController since its start. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: job_pods_finished_total |
| subsystem: job_controller |
| help: The number of finished Pods that are fully tracked |
| type: Counter |
| stabilityLevel: STABLE |
| labels: |
| - completion_mode |
| - result |
| - name: job_sync_duration_seconds |
| subsystem: job_controller |
| help: The time it took to sync a job |
| type: Histogram |
| stabilityLevel: STABLE |
| labels: |
| - action |
| - completion_mode |
| - result |
| buckets: |
| - 0.004 |
| - 0.008 |
| - 0.016 |
| - 0.032 |
| - 0.064 |
| - 0.128 |
| - 0.256 |
| - 0.512 |
| - 1.024 |
| - 2.048 |
| - 4.096 |
| - 8.192 |
| - 16.384 |
| - 32.768 |
| - 65.536 |
| - name: job_syncs_total |
| subsystem: job_controller |
| help: The number of job syncs |
| type: Counter |
| stabilityLevel: STABLE |
| labels: |
| - action |
| - completion_mode |
| - result |
| - name: jobs_finished_total |
| subsystem: job_controller |
| help: The number of finished jobs |
| type: Counter |
| stabilityLevel: STABLE |
| labels: |
| - completion_mode |
| - reason |
| - result |
| - name: evictions_total |
| subsystem: node_collector |
| help: Number of Node evictions that happened since current instance of NodeController |
| started. |
| type: Counter |
| stabilityLevel: STABLE |
| labels: |
| - zone |
| - name: attachdetach_controller_forced_detaches |
| subsystem: attach_detach_controller |
| help: Number of times the A/D Controller performed a forced detach |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - reason |
| - name: attachdetach_controller_total_volumes |
| help: Number of volumes in A/D Controller |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - plugin_name |
| - state |
| - name: create_failures_total |
| subsystem: ephemeral_volume_controller |
| help: Number of PersistenVolumeClaims creation requests |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: create_total |
| subsystem: ephemeral_volume_controller |
| help: Number of PersistenVolumeClaims creation requests |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: client_expiration_renew_errors |
| subsystem: certificate_manager |
| namespace: kubelet |
| help: Counter of certificate renewal errors. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: certificate_manager_server_rotation_seconds |
| subsystem: kubelet |
| help: Histogram of the number of seconds the previous certificate lived before being |
| rotated. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 60 |
| - 3600 |
| - 14400 |
| - 86400 |
| - 604800 |
| - 2.592e+06 |
| - 7.776e+06 |
| - 1.5552e+07 |
| - 3.1104e+07 |
| - 1.24416e+08 |
| - name: certificate_manager_server_ttl_seconds |
| subsystem: kubelet |
| help: Gauge of the shortest TTL (time-to-live) of the Kubelet's serving certificate. |
| The value is in seconds until certificate expiry (negative if already expired). |
| If serving certificate is invalid or unused, the value will be +INF. |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: credential_provider_plugin_duration |
| subsystem: kubelet |
| help: Duration of execution in seconds for credential provider plugin |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - plugin_name |
| buckets: |
| - 0.005 |
| - 0.01 |
| - 0.025 |
| - 0.05 |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - 5 |
| - 10 |
| - name: credential_provider_plugin_errors |
| subsystem: kubelet |
| help: Number of errors from credential provider plugin |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - plugin_name |
| - name: server_expiration_renew_errors |
| subsystem: kubelet |
| help: Counter of certificate renewal errors. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: pv_collector_bound_pv_count |
| help: Gauge measuring number of persistent volume currently bound |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - storage_class |
| - name: pv_collector_bound_pvc_count |
| help: Gauge measuring number of persistent volume claim currently bound |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - namespace |
| - name: pv_collector_total_pv_count |
| help: Gauge measuring total number of persistent volumes |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - plugin_name |
| - volume_mode |
| - name: pv_collector_unbound_pv_count |
| help: Gauge measuring number of persistent volume currently unbound |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - storage_class |
| - name: pv_collector_unbound_pvc_count |
| help: Gauge measuring number of persistent volume claim currently unbound |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - namespace |
| - name: retroactive_storageclass_errors_total |
| help: Total number of failed retroactive StorageClass assignments to persistent |
| volume claim |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: retroactive_storageclass_total |
| help: Total number of retroactive StorageClass assignments to persistent volume |
| claim |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: storage_count_attachable_volumes_in_use |
| help: Measure number of volumes in use |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - node |
| - volume_plugin |
| - name: job_deletion_duration_seconds |
| subsystem: ttl_after_finished_controller |
| help: The time it took to delete the job since it became eligible for deletion |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.1 |
| - 0.2 |
| - 0.4 |
| - 0.8 |
| - 1.6 |
| - 3.2 |
| - 6.4 |
| - 12.8 |
| - 25.6 |
| - 51.2 |
| - 102.4 |
| - 204.8 |
| - 409.6 |
| - 819.2 |
| - name: volume_operation_total_errors |
| help: Total volume operation errors |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - operation_name |
| - plugin_name |
| - name: container_swap_usage_bytes |
| help: Current amount of the container swap usage in bytes. Reported only on non-windows |
| systems |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - container |
| - pod |
| - namespace |
| - name: active_pods |
| subsystem: kubelet |
| help: The number of pods the kubelet considers active and which are being considered |
| when admitting new pods. static is true if the pod is not from the apiserver. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - static |
| - name: cgroup_manager_duration_seconds |
| subsystem: kubelet |
| help: Duration in seconds for cgroup manager operations. Broken down by method. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - operation_type |
| buckets: |
| - 0.005 |
| - 0.01 |
| - 0.025 |
| - 0.05 |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - 5 |
| - 10 |
| - name: kubelet_container_log_filesystem_used_bytes |
| help: Bytes used by the container's logs on the filesystem. |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - uid |
| - namespace |
| - pod |
| - container |
| - name: containers_per_pod_count |
| subsystem: kubelet |
| help: The number of containers per pod. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 1 |
| - 2 |
| - 4 |
| - 8 |
| - 16 |
| - name: cpu_manager_pinning_errors_total |
| subsystem: kubelet |
| help: The number of cpu core allocations which required pinning failed. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: cpu_manager_pinning_requests_total |
| subsystem: kubelet |
| help: The number of cpu core allocations which required pinning. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: desired_pods |
| subsystem: kubelet |
| help: The number of pods the kubelet is being instructed to run. static is true |
| if the pod is not from the apiserver. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - static |
| - name: device_plugin_alloc_duration_seconds |
| subsystem: kubelet |
| help: Duration in seconds to serve a device plugin Allocation request. Broken down |
| by resource name. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - resource_name |
| buckets: |
| - 0.005 |
| - 0.01 |
| - 0.025 |
| - 0.05 |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - 5 |
| - 10 |
| - name: device_plugin_registration_total |
| subsystem: kubelet |
| help: Cumulative number of device plugin registrations. Broken down by resource |
| name. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - resource_name |
| - name: evented_pleg_connection_error_count |
| subsystem: kubelet |
| help: The number of errors encountered during the establishment of streaming connection |
| with the CRI runtime. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: evented_pleg_connection_latency_seconds |
| subsystem: kubelet |
| help: The latency of streaming connection with the CRI runtime, measured in seconds. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.005 |
| - 0.01 |
| - 0.025 |
| - 0.05 |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - 5 |
| - 10 |
| - name: evented_pleg_connection_success_count |
| subsystem: kubelet |
| help: The number of times a streaming client was obtained to receive CRI Events. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: eviction_stats_age_seconds |
| subsystem: kubelet |
| help: Time between when stats are collected, and when pod is evicted based on those |
| stats by eviction signal |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - eviction_signal |
| buckets: |
| - 0.005 |
| - 0.01 |
| - 0.025 |
| - 0.05 |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - 5 |
| - 10 |
| - name: evictions |
| subsystem: kubelet |
| help: Cumulative number of pod evictions by eviction signal |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - eviction_signal |
| - name: graceful_shutdown_end_time_seconds |
| subsystem: kubelet |
| help: Last graceful shutdown start time since unix epoch in seconds |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: graceful_shutdown_start_time_seconds |
| subsystem: kubelet |
| help: Last graceful shutdown start time since unix epoch in seconds |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: image_garbage_collected_total |
| subsystem: kubelet |
| help: Total number of images garbage collected by the kubelet, whether through disk |
| usage or image age. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: lifecycle_handler_http_fallbacks_total |
| subsystem: kubelet |
| help: The number of times lifecycle handlers successfully fell back to http from |
| https. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: managed_ephemeral_containers |
| subsystem: kubelet |
| help: Current number of ephemeral containers in pods managed by this kubelet. |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: mirror_pods |
| subsystem: kubelet |
| help: The number of mirror pods the kubelet will try to create (one per admitted |
| static pod) |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: node_name |
| subsystem: kubelet |
| help: The node's name. The count is always 1. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - node |
| - name: node_startup_duration_seconds |
| subsystem: kubelet |
| help: Duration in seconds of node startup in total. |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: node_startup_post_registration_duration_seconds |
| subsystem: kubelet |
| help: Duration in seconds of node startup after registration. |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: node_startup_pre_kubelet_duration_seconds |
| subsystem: kubelet |
| help: Duration in seconds of node startup before kubelet starts. |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: node_startup_pre_registration_duration_seconds |
| subsystem: kubelet |
| help: Duration in seconds of node startup before registration. |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: node_startup_registration_duration_seconds |
| subsystem: kubelet |
| help: Duration in seconds of node startup during registration. |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: orphan_pod_cleaned_volumes |
| subsystem: kubelet |
| help: The total number of orphaned Pods whose volumes were cleaned in the last periodic |
| sweep. |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: orphan_pod_cleaned_volumes_errors |
| subsystem: kubelet |
| help: The number of orphaned Pods whose volumes failed to be cleaned in the last |
| periodic sweep. |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: orphaned_runtime_pods_total |
| subsystem: kubelet |
| help: Number of pods that have been detected in the container runtime without being |
| already known to the pod worker. This typically indicates the kubelet was restarted |
| while a pod was force deleted in the API or in the local configuration, which |
| is unusual. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: pleg_discard_events |
| subsystem: kubelet |
| help: The number of discard events in PLEG. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: pleg_last_seen_seconds |
| subsystem: kubelet |
| help: Timestamp in seconds when PLEG was last seen active. |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: pleg_relist_duration_seconds |
| subsystem: kubelet |
| help: Duration in seconds for relisting pods in PLEG. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.005 |
| - 0.01 |
| - 0.025 |
| - 0.05 |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - 5 |
| - 10 |
| - name: pleg_relist_interval_seconds |
| subsystem: kubelet |
| help: Interval in seconds between relisting in PLEG. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.005 |
| - 0.01 |
| - 0.025 |
| - 0.05 |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - 5 |
| - 10 |
| - name: pod_resources_endpoint_errors_get |
| subsystem: kubelet |
| help: Number of requests to the PodResource Get endpoint which returned error. Broken |
| down by server api version. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - server_api_version |
| - name: pod_resources_endpoint_errors_get_allocatable |
| subsystem: kubelet |
| help: Number of requests to the PodResource GetAllocatableResources endpoint which |
| returned error. Broken down by server api version. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - server_api_version |
| - name: pod_resources_endpoint_errors_list |
| subsystem: kubelet |
| help: Number of requests to the PodResource List endpoint which returned error. |
| Broken down by server api version. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - server_api_version |
| - name: pod_resources_endpoint_requests_get |
| subsystem: kubelet |
| help: Number of requests to the PodResource Get endpoint. Broken down by server |
| api version. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - server_api_version |
| - name: pod_resources_endpoint_requests_get_allocatable |
| subsystem: kubelet |
| help: Number of requests to the PodResource GetAllocatableResources endpoint. Broken |
| down by server api version. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - server_api_version |
| - name: pod_resources_endpoint_requests_list |
| subsystem: kubelet |
| help: Number of requests to the PodResource List endpoint. Broken down by server |
| api version. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - server_api_version |
| - name: pod_resources_endpoint_requests_total |
| subsystem: kubelet |
| help: Cumulative number of requests to the PodResource endpoint. Broken down by |
| server api version. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - server_api_version |
| - name: pod_start_duration_seconds |
| subsystem: kubelet |
| help: Duration in seconds from kubelet seeing a pod for the first time to the pod |
| starting to run |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.5 |
| - 1 |
| - 2 |
| - 3 |
| - 4 |
| - 5 |
| - 6 |
| - 8 |
| - 10 |
| - 20 |
| - 30 |
| - 45 |
| - 60 |
| - 120 |
| - 180 |
| - 240 |
| - 300 |
| - 360 |
| - 480 |
| - 600 |
| - 900 |
| - 1200 |
| - 1800 |
| - 2700 |
| - 3600 |
| - name: pod_start_sli_duration_seconds |
| subsystem: kubelet |
| help: Duration in seconds to start a pod, excluding time to pull images and run |
| init containers, measured from pod creation timestamp to when all its containers |
| are reported as started and observed via watch |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.5 |
| - 1 |
| - 2 |
| - 3 |
| - 4 |
| - 5 |
| - 6 |
| - 8 |
| - 10 |
| - 20 |
| - 30 |
| - 45 |
| - 60 |
| - 120 |
| - 180 |
| - 240 |
| - 300 |
| - 360 |
| - 480 |
| - 600 |
| - 900 |
| - 1200 |
| - 1800 |
| - 2700 |
| - 3600 |
| - name: pod_start_total_duration_seconds |
| subsystem: kubelet |
| help: Duration in seconds to start a pod since creation, including time to pull |
| images and run init containers, measured from pod creation timestamp to when all |
| its containers are reported as started and observed via watch |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.5 |
| - 1 |
| - 2 |
| - 3 |
| - 4 |
| - 5 |
| - 6 |
| - 8 |
| - 10 |
| - 20 |
| - 30 |
| - 45 |
| - 60 |
| - 120 |
| - 180 |
| - 240 |
| - 300 |
| - 360 |
| - 480 |
| - 600 |
| - 900 |
| - 1200 |
| - 1800 |
| - 2700 |
| - 3600 |
| - name: pod_status_sync_duration_seconds |
| subsystem: kubelet |
| help: Duration in seconds to sync a pod status update. Measures time from detection |
| of a change to pod status until the API is successfully updated for that pod, |
| even if multiple intevening changes to pod status occur. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.01 |
| - 0.05 |
| - 0.1 |
| - 0.5 |
| - 1 |
| - 5 |
| - 10 |
| - 20 |
| - 30 |
| - 45 |
| - 60 |
| - name: pod_worker_duration_seconds |
| subsystem: kubelet |
| help: 'Duration in seconds to sync a single pod. Broken down by operation type: |
| create, update, or sync' |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - operation_type |
| buckets: |
| - 0.005 |
| - 0.01 |
| - 0.025 |
| - 0.05 |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - 5 |
| - 10 |
| - name: pod_worker_start_duration_seconds |
| subsystem: kubelet |
| help: Duration in seconds from kubelet seeing a pod to starting a worker. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.005 |
| - 0.01 |
| - 0.025 |
| - 0.05 |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - 5 |
| - 10 |
| - name: preemptions |
| subsystem: kubelet |
| help: Cumulative number of pod preemptions by preemption resource |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - preemption_signal |
| - name: restarted_pods_total |
| subsystem: kubelet |
| help: Number of pods that have been restarted because they were deleted and recreated |
| with the same UID while the kubelet was watching them (common for static pods, |
| extremely uncommon for API pods) |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - static |
| - name: run_podsandbox_duration_seconds |
| subsystem: kubelet |
| help: Duration in seconds of the run_podsandbox operations. Broken down by RuntimeClass.Handler. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - runtime_handler |
| buckets: |
| - 0.005 |
| - 0.01 |
| - 0.025 |
| - 0.05 |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - 5 |
| - 10 |
| - name: run_podsandbox_errors_total |
| subsystem: kubelet |
| help: Cumulative number of the run_podsandbox operation errors by RuntimeClass.Handler. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - runtime_handler |
| - name: running_containers |
| subsystem: kubelet |
| help: Number of containers currently running |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - container_state |
| - name: running_pods |
| subsystem: kubelet |
| help: Number of pods that have a running pod sandbox |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: runtime_operations_duration_seconds |
| subsystem: kubelet |
| help: Duration in seconds of runtime operations. Broken down by operation type. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - operation_type |
| buckets: |
| - 0.005 |
| - 0.0125 |
| - 0.03125 |
| - 0.078125 |
| - 0.1953125 |
| - 0.48828125 |
| - 1.220703125 |
| - 3.0517578125 |
| - 7.62939453125 |
| - 19.073486328125 |
| - 47.6837158203125 |
| - 119.20928955078125 |
| - 298.0232238769531 |
| - 745.0580596923828 |
| - name: runtime_operations_errors_total |
| subsystem: kubelet |
| help: Cumulative number of runtime operation errors by operation type. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - operation_type |
| - name: runtime_operations_total |
| subsystem: kubelet |
| help: Cumulative number of runtime operations by operation type. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - operation_type |
| - name: started_containers_errors_total |
| subsystem: kubelet |
| help: Cumulative number of errors when starting containers |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - code |
| - container_type |
| - name: started_containers_total |
| subsystem: kubelet |
| help: Cumulative number of containers started |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - container_type |
| - name: started_host_process_containers_errors_total |
| subsystem: kubelet |
| help: Cumulative number of errors when starting hostprocess containers. This metric |
| will only be collected on Windows. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - code |
| - container_type |
| - name: started_host_process_containers_total |
| subsystem: kubelet |
| help: Cumulative number of hostprocess containers started. This metric will only |
| be collected on Windows. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - container_type |
| - name: started_pods_errors_total |
| subsystem: kubelet |
| help: Cumulative number of errors when starting pods |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: started_pods_total |
| subsystem: kubelet |
| help: Cumulative number of pods started |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: topology_manager_admission_duration_ms |
| subsystem: kubelet |
| help: Duration in milliseconds to serve a pod admission request. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.05 |
| - 0.1 |
| - 0.2 |
| - 0.4 |
| - 0.8 |
| - 1.6 |
| - 3.2 |
| - 6.4 |
| - 12.8 |
| - 25.6 |
| - 51.2 |
| - 102.4 |
| - 204.8 |
| - 409.6 |
| - 819.2 |
| - name: topology_manager_admission_errors_total |
| subsystem: kubelet |
| help: The number of admission request failures where resources could not be aligned. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: topology_manager_admission_requests_total |
| subsystem: kubelet |
| help: The number of admission requests where resources have to be aligned. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: kubelet_volume_stats_available_bytes |
| help: Number of available bytes in the volume |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - namespace |
| - persistentvolumeclaim |
| - name: kubelet_volume_stats_capacity_bytes |
| help: Capacity in bytes of the volume |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - namespace |
| - persistentvolumeclaim |
| - name: kubelet_volume_stats_health_status_abnormal |
| help: Abnormal volume health status. The count is either 1 or 0. 1 indicates the |
| volume is unhealthy, 0 indicates volume is healthy |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - namespace |
| - persistentvolumeclaim |
| - name: kubelet_volume_stats_inodes |
| help: Maximum number of inodes in the volume |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - namespace |
| - persistentvolumeclaim |
| - name: kubelet_volume_stats_inodes_free |
| help: Number of free inodes in the volume |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - namespace |
| - persistentvolumeclaim |
| - name: kubelet_volume_stats_inodes_used |
| help: Number of used inodes in the volume |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - namespace |
| - persistentvolumeclaim |
| - name: kubelet_volume_stats_used_bytes |
| help: Number of used bytes in the volume |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - namespace |
| - persistentvolumeclaim |
| - name: working_pods |
| subsystem: kubelet |
| help: Number of pods the kubelet is actually running, broken down by lifecycle phase, |
| whether the pod is desired, orphaned, or runtime only (also orphaned), and whether |
| the pod is static. An orphaned pod has been removed from local configuration or |
| force deleted in the API and consumes resources that are not otherwise visible. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - config |
| - lifecycle |
| - static |
| - name: node_swap_usage_bytes |
| help: Current swap usage of the node in bytes. Reported only on non-windows systems |
| type: Custom |
| stabilityLevel: ALPHA |
| - name: pod_swap_usage_bytes |
| help: Current amount of the pod swap usage in bytes. Reported only on non-windows |
| systems |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - pod |
| - namespace |
| - name: scrape_error |
| help: 1 if there was an error while getting container metrics, 0 otherwise |
| type: Custom |
| deprecatedVersion: 1.29.0 |
| stabilityLevel: ALPHA |
| - name: container_cpu_usage_seconds_total |
| help: Cumulative cpu time consumed by the container in core-seconds |
| type: Custom |
| stabilityLevel: STABLE |
| labels: |
| - container |
| - pod |
| - namespace |
| - name: container_memory_working_set_bytes |
| help: Current working set of the container in bytes |
| type: Custom |
| stabilityLevel: STABLE |
| labels: |
| - container |
| - pod |
| - namespace |
| - name: container_start_time_seconds |
| help: Start time of the container since unix epoch in seconds |
| type: Custom |
| stabilityLevel: STABLE |
| labels: |
| - container |
| - pod |
| - namespace |
| - name: node_cpu_usage_seconds_total |
| help: Cumulative cpu time consumed by the node in core-seconds |
| type: Custom |
| stabilityLevel: STABLE |
| - name: node_memory_working_set_bytes |
| help: Current working set of the node in bytes |
| type: Custom |
| stabilityLevel: STABLE |
| - name: pod_cpu_usage_seconds_total |
| help: Cumulative cpu time consumed by the pod in core-seconds |
| type: Custom |
| stabilityLevel: STABLE |
| labels: |
| - pod |
| - namespace |
| - name: pod_memory_working_set_bytes |
| help: Current working set of the pod in bytes |
| type: Custom |
| stabilityLevel: STABLE |
| labels: |
| - pod |
| - namespace |
| - name: resource_scrape_error |
| help: 1 if there was an error while getting container metrics, 0 otherwise |
| type: Custom |
| stabilityLevel: STABLE |
| - name: force_cleaned_failed_volume_operation_errors_total |
| help: The number of volumes that failed force cleanup after their reconstruction |
| failed during kubelet startup. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: force_cleaned_failed_volume_operations_total |
| help: The number of volumes that were force cleaned after their reconstruction failed |
| during kubelet startup. This includes both successful and failed cleanups. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: http_inflight_requests |
| subsystem: kubelet |
| help: Number of the inflight http requests |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - long_running |
| - method |
| - path |
| - server_type |
| - name: http_requests_duration_seconds |
| subsystem: kubelet |
| help: Duration in seconds to serve http requests |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - long_running |
| - method |
| - path |
| - server_type |
| buckets: |
| - 0.005 |
| - 0.01 |
| - 0.025 |
| - 0.05 |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - 5 |
| - 10 |
| - name: http_requests_total |
| subsystem: kubelet |
| help: Number of the http requests received since the server started |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - long_running |
| - method |
| - path |
| - server_type |
| - name: volume_metric_collection_duration_seconds |
| subsystem: kubelet |
| help: Duration in seconds to calculate volume stats |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - metric_source |
| buckets: |
| - 0.005 |
| - 0.01 |
| - 0.025 |
| - 0.05 |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - 5 |
| - 10 |
| - name: network_programming_duration_seconds |
| subsystem: kubeproxy |
| help: In Cluster Network Programming Latency in seconds |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2 |
| - 3 |
| - 4 |
| - 5 |
| - 6 |
| - 7 |
| - 8 |
| - 9 |
| - 10 |
| - 11 |
| - 12 |
| - 13 |
| - 14 |
| - 15 |
| - 16 |
| - 17 |
| - 18 |
| - 19 |
| - 20 |
| - 21 |
| - 22 |
| - 23 |
| - 24 |
| - 25 |
| - 26 |
| - 27 |
| - 28 |
| - 29 |
| - 30 |
| - 31 |
| - 32 |
| - 33 |
| - 34 |
| - 35 |
| - 36 |
| - 37 |
| - 38 |
| - 39 |
| - 40 |
| - 41 |
| - 42 |
| - 43 |
| - 44 |
| - 45 |
| - 46 |
| - 47 |
| - 48 |
| - 49 |
| - 50 |
| - 51 |
| - 52 |
| - 53 |
| - 54 |
| - 55 |
| - 56 |
| - 57 |
| - 58 |
| - 59 |
| - 60 |
| - 65 |
| - 70 |
| - 75 |
| - 80 |
| - 85 |
| - 90 |
| - 95 |
| - 100 |
| - 105 |
| - 110 |
| - 115 |
| - 120 |
| - 150 |
| - 180 |
| - 210 |
| - 240 |
| - 270 |
| - 300 |
| - name: proxy_healthz_total |
| subsystem: kubeproxy |
| help: Cumulative proxy healthz HTTP status |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - code |
| - name: proxy_livez_total |
| subsystem: kubeproxy |
| help: Cumulative proxy livez HTTP status |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - code |
| - name: sync_full_proxy_rules_duration_seconds |
| subsystem: kubeproxy |
| help: SyncProxyRules latency in seconds for full resyncs |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.001 |
| - 0.002 |
| - 0.004 |
| - 0.008 |
| - 0.016 |
| - 0.032 |
| - 0.064 |
| - 0.128 |
| - 0.256 |
| - 0.512 |
| - 1.024 |
| - 2.048 |
| - 4.096 |
| - 8.192 |
| - 16.384 |
| - name: sync_partial_proxy_rules_duration_seconds |
| subsystem: kubeproxy |
| help: SyncProxyRules latency in seconds for partial resyncs |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.001 |
| - 0.002 |
| - 0.004 |
| - 0.008 |
| - 0.016 |
| - 0.032 |
| - 0.064 |
| - 0.128 |
| - 0.256 |
| - 0.512 |
| - 1.024 |
| - 2.048 |
| - 4.096 |
| - 8.192 |
| - 16.384 |
| - name: sync_proxy_rules_duration_seconds |
| subsystem: kubeproxy |
| help: SyncProxyRules latency in seconds |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.001 |
| - 0.002 |
| - 0.004 |
| - 0.008 |
| - 0.016 |
| - 0.032 |
| - 0.064 |
| - 0.128 |
| - 0.256 |
| - 0.512 |
| - 1.024 |
| - 2.048 |
| - 4.096 |
| - 8.192 |
| - 16.384 |
| - name: sync_proxy_rules_endpoint_changes_pending |
| subsystem: kubeproxy |
| help: Pending proxy rules Endpoint changes |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: sync_proxy_rules_endpoint_changes_total |
| subsystem: kubeproxy |
| help: Cumulative proxy rules Endpoint changes |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: sync_proxy_rules_iptables_last |
| subsystem: kubeproxy |
| help: Number of iptables rules written by kube-proxy in last sync |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - table |
| - name: sync_proxy_rules_iptables_partial_restore_failures_total |
| subsystem: kubeproxy |
| help: Cumulative proxy iptables partial restore failures |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: sync_proxy_rules_iptables_restore_failures_total |
| subsystem: kubeproxy |
| help: Cumulative proxy iptables restore failures |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: sync_proxy_rules_iptables_total |
| subsystem: kubeproxy |
| help: Total number of iptables rules owned by kube-proxy |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - table |
| - name: sync_proxy_rules_last_queued_timestamp_seconds |
| subsystem: kubeproxy |
| help: The last time a sync of proxy rules was queued |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: sync_proxy_rules_last_timestamp_seconds |
| subsystem: kubeproxy |
| help: The last time proxy rules were successfully synced |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: sync_proxy_rules_no_local_endpoints_total |
| subsystem: kubeproxy |
| help: Number of services with a Local traffic policy and no endpoints |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - traffic_policy |
| - name: sync_proxy_rules_service_changes_pending |
| subsystem: kubeproxy |
| help: Pending proxy rules Service changes |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: sync_proxy_rules_service_changes_total |
| subsystem: kubeproxy |
| help: Cumulative proxy rules Service changes |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: plugin_manager_total_plugins |
| help: Number of plugins in Plugin Manager |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - socket_path |
| - state |
| - name: probe_duration_seconds |
| subsystem: prober |
| help: Duration in seconds for a probe response. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - container |
| - namespace |
| - pod |
| - probe_type |
| - name: probe_total |
| subsystem: prober |
| help: Cumulative number of a liveness, readiness or startup probe for a container |
| by result. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - container |
| - namespace |
| - pod |
| - pod_uid |
| - probe_type |
| - result |
| - name: reconstruct_volume_operations_errors_total |
| help: The number of volumes that failed reconstruction from the operating system |
| during kubelet startup. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: reconstruct_volume_operations_total |
| help: The number of volumes that were attempted to be reconstructed from the operating |
| system during kubelet startup. This includes both successful and failed reconstruction. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: volume_manager_selinux_container_errors_total |
| help: Number of errors when kubelet cannot compute SELinux context for a container. |
| Kubelet can't start such a Pod then and it will retry, therefore value of this |
| metric may not represent the actual nr. of containers. |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: volume_manager_selinux_container_warnings_total |
| help: Number of errors when kubelet cannot compute SELinux context for a container |
| that are ignored. They will become real errors when SELinuxMountReadWriteOncePod |
| feature is expanded to all volume access modes. |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: volume_manager_selinux_pod_context_mismatch_errors_total |
| help: Number of errors when a Pod defines different SELinux contexts for its containers |
| that use the same volume. Kubelet can't start such a Pod then and it will retry, |
| therefore value of this metric may not represent the actual nr. of Pods. |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: volume_manager_selinux_pod_context_mismatch_warnings_total |
| help: Number of errors when a Pod defines different SELinux contexts for its containers |
| that use the same volume. They are not errors yet, but they will become real errors |
| when SELinuxMountReadWriteOncePod feature is expanded to all volume access modes. |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: volume_manager_selinux_volume_context_mismatch_errors_total |
| help: Number of errors when a Pod uses a volume that is already mounted with a different |
| SELinux context than the Pod needs. Kubelet can't start such a Pod then and it |
| will retry, therefore value of this metric may not represent the actual nr. of |
| Pods. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - volume_plugin |
| - name: volume_manager_selinux_volume_context_mismatch_warnings_total |
| help: Number of errors when a Pod uses a volume that is already mounted with a different |
| SELinux context than the Pod needs. They are not errors yet, but they will become |
| real errors when SELinuxMountReadWriteOncePod feature is expanded to all volume |
| access modes. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - volume_plugin |
| - name: volume_manager_selinux_volumes_admitted_total |
| help: Number of volumes whose SELinux context was fine and will be mounted with |
| mount -o context option. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - volume_plugin |
| - name: volume_manager_total_volumes |
| help: Number of volumes in Volume Manager |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - plugin_name |
| - state |
| - name: csr_honored_duration_total |
| subsystem: certificates_registry |
| namespace: apiserver |
| help: Total number of issued CSRs with a requested duration that was honored, sliced |
| by signer (only kubernetes.io signer names are specifically identified) |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - signerName |
| - name: csr_requested_duration_total |
| subsystem: certificates_registry |
| namespace: apiserver |
| help: Total number of issued CSRs with a requested duration, sliced by signer (only |
| kubernetes.io signer names are specifically identified) |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - signerName |
| - name: ip_errors_total |
| subsystem: clusterip_repair |
| namespace: apiserver |
| help: 'Number of errors detected on clusterips by the repair loop broken down by |
| type of error: leak, repair, full, outOfRange, duplicate, unknown, invalid' |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - type |
| - name: reconcile_errors_total |
| subsystem: clusterip_repair |
| namespace: apiserver |
| help: Number of reconciliation failures on the clusterip repair reconcile loop |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: port_errors_total |
| subsystem: nodeport_repair |
| namespace: apiserver |
| help: 'Number of errors detected on ports by the repair loop broken down by type |
| of error: leak, repair, full, outOfRange, duplicate, unknown' |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - type |
| - name: allocated_ips |
| subsystem: clusterip_allocator |
| namespace: kube_apiserver |
| help: Gauge measuring the number of allocated IPs for Services |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - cidr |
| - name: allocation_errors_total |
| subsystem: clusterip_allocator |
| namespace: kube_apiserver |
| help: Number of errors trying to allocate Cluster IPs |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - cidr |
| - scope |
| - name: allocation_total |
| subsystem: clusterip_allocator |
| namespace: kube_apiserver |
| help: Number of Cluster IPs allocations |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - cidr |
| - scope |
| - name: available_ips |
| subsystem: clusterip_allocator |
| namespace: kube_apiserver |
| help: Gauge measuring the number of available IPs for Services |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - cidr |
| - name: allocated_ports |
| subsystem: nodeport_allocator |
| namespace: kube_apiserver |
| help: Gauge measuring the number of allocated NodePorts for Services |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: available_ports |
| subsystem: nodeport_allocator |
| namespace: kube_apiserver |
| help: Gauge measuring the number of available NodePorts for Services |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: backend_tls_failure_total |
| subsystem: pod_logs |
| namespace: kube_apiserver |
| help: Total number of requests for pods/logs that failed due to kubelet server TLS |
| verification |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: insecure_backend_total |
| subsystem: pod_logs |
| namespace: kube_apiserver |
| help: 'Total number of requests for pods/logs sliced by usage type: enforce_tls, |
| skip_tls_allowed, skip_tls_denied' |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - usage |
| - name: pods_logs_backend_tls_failure_total |
| subsystem: pod_logs |
| namespace: kube_apiserver |
| help: Total number of requests for pods/logs that failed due to kubelet server TLS |
| verification |
| type: Counter |
| deprecatedVersion: 1.27.0 |
| stabilityLevel: ALPHA |
| - name: pods_logs_insecure_backend_total |
| subsystem: pod_logs |
| namespace: kube_apiserver |
| help: 'Total number of requests for pods/logs sliced by usage type: enforce_tls, |
| skip_tls_allowed, skip_tls_denied' |
| type: Counter |
| deprecatedVersion: 1.27.0 |
| stabilityLevel: ALPHA |
| labels: |
| - usage |
| - name: goroutines |
| subsystem: scheduler |
| help: Number of running goroutines split by the work they do such as binding. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - operation |
| - name: permit_wait_duration_seconds |
| subsystem: scheduler |
| help: Duration of waiting on permit. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - result |
| buckets: |
| - 0.001 |
| - 0.002 |
| - 0.004 |
| - 0.008 |
| - 0.016 |
| - 0.032 |
| - 0.064 |
| - 0.128 |
| - 0.256 |
| - 0.512 |
| - 1.024 |
| - 2.048 |
| - 4.096 |
| - 8.192 |
| - 16.384 |
| - name: plugin_evaluation_total |
| subsystem: scheduler |
| help: Number of attempts to schedule pods by each plugin and the extension point |
| (available only in PreFilter and Filter.). |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - extension_point |
| - plugin |
| - profile |
| - name: plugin_execution_duration_seconds |
| subsystem: scheduler |
| help: Duration for running a plugin at a specific extension point. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - extension_point |
| - plugin |
| - status |
| buckets: |
| - 1e-05 |
| - 1.5000000000000002e-05 |
| - 2.2500000000000005e-05 |
| - 3.375000000000001e-05 |
| - 5.062500000000001e-05 |
| - 7.593750000000002e-05 |
| - 0.00011390625000000003 |
| - 0.00017085937500000006 |
| - 0.0002562890625000001 |
| - 0.00038443359375000017 |
| - 0.0005766503906250003 |
| - 0.0008649755859375004 |
| - 0.0012974633789062506 |
| - 0.0019461950683593758 |
| - 0.0029192926025390638 |
| - 0.004378938903808595 |
| - 0.006568408355712893 |
| - 0.009852612533569338 |
| - 0.014778918800354007 |
| - 0.02216837820053101 |
| - name: scheduler_cache_size |
| subsystem: scheduler |
| help: Number of nodes, pods, and assumed (bound) pods in the scheduler cache. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - type |
| - name: scheduling_algorithm_duration_seconds |
| subsystem: scheduler |
| help: Scheduling algorithm latency in seconds |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0.001 |
| - 0.002 |
| - 0.004 |
| - 0.008 |
| - 0.016 |
| - 0.032 |
| - 0.064 |
| - 0.128 |
| - 0.256 |
| - 0.512 |
| - 1.024 |
| - 2.048 |
| - 4.096 |
| - 8.192 |
| - 16.384 |
| - name: unschedulable_pods |
| subsystem: scheduler |
| help: The number of unschedulable pods broken down by plugin name. A pod will increment |
| the gauge for all plugins that caused it to not schedule and so this metric have |
| meaning only when broken down by plugin. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - plugin |
| - profile |
| - name: binder_cache_requests_total |
| subsystem: scheduler_volume |
| help: Total number for request volume binding cache |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - operation |
| - name: scheduling_stage_error_total |
| subsystem: scheduler_volume |
| help: Volume scheduling stage error count |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - operation |
| - name: invalid_legacy_auto_token_uses_total |
| subsystem: serviceaccount |
| help: Cumulative invalid auto-generated legacy tokens used |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: legacy_auto_token_uses_total |
| subsystem: serviceaccount |
| help: Cumulative auto-generated legacy tokens used |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: legacy_manual_token_uses_total |
| subsystem: serviceaccount |
| help: Cumulative manually created legacy tokens used |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: legacy_tokens_total |
| subsystem: serviceaccount |
| help: Cumulative legacy service account tokens used |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: stale_tokens_total |
| subsystem: serviceaccount |
| help: Cumulative stale projected service account tokens used |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: valid_tokens_total |
| subsystem: serviceaccount |
| help: Cumulative valid projected service account tokens used |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: pod_scheduling_sli_duration_seconds |
| subsystem: scheduler |
| help: E2e latency for a pod being scheduled, from the time the pod enters the scheduling |
| queue an d might involve multiple scheduling attempts. |
| type: Histogram |
| stabilityLevel: BETA |
| labels: |
| - attempts |
| buckets: |
| - 0.01 |
| - 0.02 |
| - 0.04 |
| - 0.08 |
| - 0.16 |
| - 0.32 |
| - 0.64 |
| - 1.28 |
| - 2.56 |
| - 5.12 |
| - 10.24 |
| - 20.48 |
| - 40.96 |
| - 81.92 |
| - 163.84 |
| - 327.68 |
| - 655.36 |
| - 1310.72 |
| - 2621.44 |
| - 5242.88 |
| - name: kube_pod_resource_limit |
| help: Resources limit for workloads on the cluster, broken down by pod. This shows |
| the resource usage the scheduler and kubelet expect per pod for resources along |
| with the unit for the resource if any. |
| type: Custom |
| stabilityLevel: STABLE |
| labels: |
| - namespace |
| - pod |
| - node |
| - scheduler |
| - priority |
| - resource |
| - unit |
| - name: kube_pod_resource_request |
| help: Resources requested by workloads on the cluster, broken down by pod. This |
| shows the resource usage the scheduler and kubelet expect per pod for resources |
| along with the unit for the resource if any. |
| type: Custom |
| stabilityLevel: STABLE |
| labels: |
| - namespace |
| - pod |
| - node |
| - scheduler |
| - priority |
| - resource |
| - unit |
| - name: framework_extension_point_duration_seconds |
| subsystem: scheduler |
| help: Latency for running all plugins of a specific extension point. |
| type: Histogram |
| stabilityLevel: STABLE |
| labels: |
| - extension_point |
| - profile |
| - status |
| buckets: |
| - 0.0001 |
| - 0.0002 |
| - 0.0004 |
| - 0.0008 |
| - 0.0016 |
| - 0.0032 |
| - 0.0064 |
| - 0.0128 |
| - 0.0256 |
| - 0.0512 |
| - 0.1024 |
| - 0.2048 |
| - name: pending_pods |
| subsystem: scheduler |
| help: Number of pending pods, by the queue type. 'active' means number of pods in |
| activeQ; 'backoff' means number of pods in backoffQ; 'unschedulable' means number |
| of pods in unschedulablePods that the scheduler attempted to schedule and failed; |
| 'gated' is the number of unschedulable pods that the scheduler never attempted |
| to schedule because they are gated. |
| type: Gauge |
| stabilityLevel: STABLE |
| labels: |
| - queue |
| - name: pod_scheduling_attempts |
| subsystem: scheduler |
| help: Number of attempts to successfully schedule a pod. |
| type: Histogram |
| stabilityLevel: STABLE |
| buckets: |
| - 1 |
| - 2 |
| - 4 |
| - 8 |
| - 16 |
| - name: pod_scheduling_duration_seconds |
| subsystem: scheduler |
| help: E2e latency for a pod being scheduled which may include multiple scheduling |
| attempts. |
| type: Histogram |
| deprecatedVersion: 1.28.0 |
| stabilityLevel: STABLE |
| labels: |
| - attempts |
| buckets: |
| - 0.01 |
| - 0.02 |
| - 0.04 |
| - 0.08 |
| - 0.16 |
| - 0.32 |
| - 0.64 |
| - 1.28 |
| - 2.56 |
| - 5.12 |
| - 10.24 |
| - 20.48 |
| - 40.96 |
| - 81.92 |
| - 163.84 |
| - 327.68 |
| - 655.36 |
| - 1310.72 |
| - 2621.44 |
| - 5242.88 |
| - name: preemption_attempts_total |
| subsystem: scheduler |
| help: Total preemption attempts in the cluster till now |
| type: Counter |
| stabilityLevel: STABLE |
| - name: preemption_victims |
| subsystem: scheduler |
| help: Number of selected preemption victims |
| type: Histogram |
| stabilityLevel: STABLE |
| buckets: |
| - 1 |
| - 2 |
| - 4 |
| - 8 |
| - 16 |
| - 32 |
| - 64 |
| - name: queue_incoming_pods_total |
| subsystem: scheduler |
| help: Number of pods added to scheduling queues by event and queue type. |
| type: Counter |
| stabilityLevel: STABLE |
| labels: |
| - event |
| - queue |
| - name: schedule_attempts_total |
| subsystem: scheduler |
| help: Number of attempts to schedule pods, by the result. 'unschedulable' means |
| a pod could not be scheduled, while 'error' means an internal scheduler problem. |
| type: Counter |
| stabilityLevel: STABLE |
| labels: |
| - profile |
| - result |
| - name: scheduling_attempt_duration_seconds |
| subsystem: scheduler |
| help: Scheduling attempt latency in seconds (scheduling algorithm + binding) |
| type: Histogram |
| stabilityLevel: STABLE |
| labels: |
| - profile |
| - result |
| buckets: |
| - 0.001 |
| - 0.002 |
| - 0.004 |
| - 0.008 |
| - 0.016 |
| - 0.032 |
| - 0.064 |
| - 0.128 |
| - 0.256 |
| - 0.512 |
| - 1.024 |
| - 2.048 |
| - 4.096 |
| - 8.192 |
| - 16.384 |
| - name: operations_seconds |
| subsystem: csi |
| help: Container Storage Interface operation duration with gRPC error code status |
| total |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - driver_name |
| - grpc_status_code |
| - method_name |
| - migrated |
| buckets: |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - 5 |
| - 10 |
| - 15 |
| - 25 |
| - 50 |
| - 120 |
| - 300 |
| - 600 |
| - name: storage_operation_duration_seconds |
| help: Storage operation duration |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - migrated |
| - operation_name |
| - status |
| - volume_plugin |
| buckets: |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - 5 |
| - 10 |
| - 15 |
| - 25 |
| - 50 |
| - 120 |
| - 300 |
| - 600 |
| - name: volume_operation_total_seconds |
| help: Storage operation end to end duration in seconds |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - operation_name |
| - plugin_name |
| buckets: |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - 5 |
| - 10 |
| - 15 |
| - 25 |
| - 50 |
| - 120 |
| - 300 |
| - 600 |
| - name: graph_actions_duration_seconds |
| subsystem: node_authorizer |
| help: Histogram of duration of graph actions in node authorizer. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - operation |
| buckets: |
| - 0.0001 |
| - 0.0002 |
| - 0.0004 |
| - 0.0008 |
| - 0.0016 |
| - 0.0032 |
| - 0.0064 |
| - 0.0128 |
| - 0.0256 |
| - 0.0512 |
| - 0.1024 |
| - 0.2048 |
| - name: ratcheting_seconds |
| subsystem: validation |
| namespace: apiextensions_apiserver |
| help: Time for comparison of old to new for the purposes of CRDValidationRatcheting |
| during an UPDATE in seconds. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 1e-05 |
| - 4e-05 |
| - 0.00016 |
| - 0.00064 |
| - 0.00256 |
| - 0.01024 |
| - 0.04096 |
| - 0.16384 |
| - 0.65536 |
| - 2.62144 |
| - name: conversion_webhook_duration_seconds |
| namespace: apiserver |
| help: Conversion webhook request latency |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - failure_type |
| - result |
| buckets: |
| - 0.005 |
| - 0.01 |
| - 0.02 |
| - 0.05 |
| - 0.1 |
| - 0.2 |
| - 0.5 |
| - 1 |
| - 2 |
| - 5 |
| - 10 |
| - 20 |
| - 30 |
| - 45 |
| - 60 |
| - name: conversion_webhook_request_total |
| namespace: apiserver |
| help: Counter for conversion webhook requests with success/failure and failure error |
| type |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - failure_type |
| - result |
| - name: apiserver_crd_conversion_webhook_duration_seconds |
| help: CRD webhook conversion duration in seconds |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - crd_name |
| - from_version |
| - succeeded |
| - to_version |
| buckets: |
| - 0.001 |
| - 0.002 |
| - 0.004 |
| - 0.008 |
| - 0.016 |
| - 0.032 |
| - 0.064 |
| - 0.128 |
| - 0.256 |
| - 0.512 |
| - 1.024 |
| - 2.048 |
| - 4.096 |
| - 8.192 |
| - 16.384 |
| - name: apiextensions_openapi_v2_regeneration_count |
| help: Counter of OpenAPI v2 spec regeneration count broken down by causing CRD name |
| and reason. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - crd |
| - reason |
| - name: apiextensions_openapi_v3_regeneration_count |
| help: Counter of OpenAPI v3 spec regeneration count broken down by group, version, |
| causing CRD and reason. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - crd |
| - group |
| - reason |
| - version |
| - name: match_condition_evaluation_errors_total |
| subsystem: admission |
| namespace: apiserver |
| help: Admission match condition evaluation errors count, identified by name of resource |
| containing the match condition and broken out for each kind containing matchConditions |
| (webhook or policy), operation and admission type (validate or admit). |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - kind |
| - name |
| - operation |
| - type |
| - name: match_condition_evaluation_seconds |
| subsystem: admission |
| namespace: apiserver |
| help: Admission match condition evaluation time in seconds, identified by name and |
| broken out for each kind containing matchConditions (webhook or policy), operation |
| and type (validate or admit). |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - kind |
| - name |
| - operation |
| - type |
| buckets: |
| - 0.001 |
| - 0.005 |
| - 0.01 |
| - 0.025 |
| - 0.1 |
| - 0.2 |
| - 0.25 |
| - name: match_condition_exclusions_total |
| subsystem: admission |
| namespace: apiserver |
| help: Admission match condition evaluation exclusions count, identified by name |
| of resource containing the match condition and broken out for each kind containing |
| matchConditions (webhook or policy), operation and admission type (validate or |
| admit). |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - kind |
| - name |
| - operation |
| - type |
| - name: step_admission_duration_seconds_summary |
| subsystem: admission |
| namespace: apiserver |
| help: Admission sub-step latency summary in seconds, broken out for each operation |
| and API resource and step type (validate or admit). |
| type: Summary |
| stabilityLevel: ALPHA |
| labels: |
| - operation |
| - rejected |
| - type |
| maxAge: 18000000000000 |
| - name: webhook_fail_open_count |
| subsystem: admission |
| namespace: apiserver |
| help: Admission webhook fail open count, identified by name and broken out for each |
| admission type (validating or mutating). |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - name |
| - type |
| - name: webhook_rejection_count |
| subsystem: admission |
| namespace: apiserver |
| help: Admission webhook rejection count, identified by name and broken out for each |
| admission type (validating or admit) and operation. Additional labels specify |
| an error type (calling_webhook_error or apiserver_internal_error if an error occurred; |
| no_error otherwise) and optionally a non-zero rejection code if the webhook rejects |
| the request with an HTTP status code (honored by the apiserver when the code is |
| greater or equal to 400). Codes greater than 600 are truncated to 600, to keep |
| the metrics cardinality bounded. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - error_type |
| - name |
| - operation |
| - rejection_code |
| - type |
| - name: webhook_request_total |
| subsystem: admission |
| namespace: apiserver |
| help: Admission webhook request total, identified by name and broken out for each |
| admission type (validating or mutating) and operation. Additional labels specify |
| whether the request was rejected or not and an HTTP status code. Codes greater |
| than 600 are truncated to 600, to keep the metrics cardinality bounded. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - code |
| - name |
| - operation |
| - rejected |
| - type |
| - name: check_duration_seconds |
| subsystem: validating_admission_policy |
| namespace: apiserver |
| help: Validation admission latency for individual validation expressions in seconds, |
| labeled by policy and further including binding, state and enforcement action |
| taken. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - enforcement_action |
| - policy |
| - policy_binding |
| - state |
| buckets: |
| - 5e-07 |
| - 0.001 |
| - 0.01 |
| - 0.1 |
| - 1 |
| - name: check_total |
| subsystem: validating_admission_policy |
| namespace: apiserver |
| help: Validation admission policy check total, labeled by policy and further identified |
| by binding, enforcement action taken, and state. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - enforcement_action |
| - policy |
| - policy_binding |
| - state |
| - name: definition_total |
| subsystem: validating_admission_policy |
| namespace: apiserver |
| help: Validation admission policy count total, labeled by state and enforcement |
| action. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - enforcement_action |
| - state |
| - name: controller_admission_duration_seconds |
| subsystem: admission |
| namespace: apiserver |
| help: Admission controller latency histogram in seconds, identified by name and |
| broken out for each operation and API resource and type (validate or admit). |
| type: Histogram |
| stabilityLevel: STABLE |
| labels: |
| - name |
| - operation |
| - rejected |
| - type |
| buckets: |
| - 0.005 |
| - 0.025 |
| - 0.1 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - name: step_admission_duration_seconds |
| subsystem: admission |
| namespace: apiserver |
| help: Admission sub-step latency histogram in seconds, broken out for each operation |
| and API resource and step type (validate or admit). |
| type: Histogram |
| stabilityLevel: STABLE |
| labels: |
| - operation |
| - rejected |
| - type |
| buckets: |
| - 0.005 |
| - 0.025 |
| - 0.1 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - name: webhook_admission_duration_seconds |
| subsystem: admission |
| namespace: apiserver |
| help: Admission webhook latency histogram in seconds, identified by name and broken |
| out for each operation and API resource and type (validate or admit). |
| type: Histogram |
| stabilityLevel: STABLE |
| labels: |
| - name |
| - operation |
| - rejected |
| - type |
| buckets: |
| - 0.005 |
| - 0.025 |
| - 0.1 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - 10 |
| - 25 |
| - name: aggregator_discovery_aggregation_count_total |
| help: Counter of number of times discovery was aggregated |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: error_total |
| subsystem: apiserver_audit |
| help: Counter of audit events that failed to be audited properly. Plugin identifies |
| the plugin affected by the error. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - plugin |
| - name: event_total |
| subsystem: apiserver_audit |
| help: Counter of audit events generated and sent to the audit backend. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: level_total |
| subsystem: apiserver_audit |
| help: Counter of policy levels for audit events (1 per request). |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - level |
| - name: requests_rejected_total |
| subsystem: apiserver_audit |
| help: Counter of apiserver requests rejected due to an error in audit logging backend. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: compilation_duration_seconds |
| subsystem: cel |
| namespace: apiserver |
| help: CEL compilation time in seconds. |
| type: Histogram |
| stabilityLevel: ALPHA |
| - name: evaluation_duration_seconds |
| subsystem: cel |
| namespace: apiserver |
| help: CEL evaluation time in seconds. |
| type: Histogram |
| stabilityLevel: ALPHA |
| - name: certificate_expiration_seconds |
| subsystem: client |
| namespace: apiserver |
| help: Distribution of the remaining lifetime on the certificate used to authenticate |
| a request. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 0 |
| - 1800 |
| - 3600 |
| - 7200 |
| - 21600 |
| - 43200 |
| - 86400 |
| - 172800 |
| - 345600 |
| - 604800 |
| - 2.592e+06 |
| - 7.776e+06 |
| - 1.5552e+07 |
| - 3.1104e+07 |
| - name: apiserver_delegated_authn_request_duration_seconds |
| help: Request latency in seconds. Broken down by status code. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - code |
| buckets: |
| - 0.25 |
| - 0.5 |
| - 0.7 |
| - 1 |
| - 1.5 |
| - 3 |
| - 5 |
| - 10 |
| - name: apiserver_delegated_authn_request_total |
| help: Number of HTTP requests partitioned by status code. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - code |
| - name: apiserver_delegated_authz_request_duration_seconds |
| help: Request latency in seconds. Broken down by status code. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - code |
| buckets: |
| - 0.25 |
| - 0.5 |
| - 0.7 |
| - 1 |
| - 1.5 |
| - 3 |
| - 5 |
| - 10 |
| - name: apiserver_delegated_authz_request_total |
| help: Number of HTTP requests partitioned by status code. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - code |
| - name: active_fetch_count |
| subsystem: token_cache |
| namespace: authentication |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - status |
| - name: fetch_total |
| subsystem: token_cache |
| namespace: authentication |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - status |
| - name: request_duration_seconds |
| subsystem: token_cache |
| namespace: authentication |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - status |
| - name: request_total |
| subsystem: token_cache |
| namespace: authentication |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - status |
| - name: cache_list_fetched_objects_total |
| namespace: apiserver |
| help: Number of objects read from watch cache in the course of serving a LIST request |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - index |
| - resource_prefix |
| - name: cache_list_returned_objects_total |
| namespace: apiserver |
| help: Number of objects returned for a LIST request from watch cache |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - resource_prefix |
| - name: cache_list_total |
| namespace: apiserver |
| help: Number of LIST requests served from watch cache |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - index |
| - resource_prefix |
| - name: current_inqueue_requests |
| subsystem: apiserver |
| help: Maximal number of queued requests in this apiserver per request kind in last |
| second. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - request_kind |
| - name: dial_duration_seconds |
| subsystem: egress_dialer |
| namespace: apiserver |
| help: Dial latency histogram in seconds, labeled by the protocol (http-connect or |
| grpc), transport (tcp or uds) |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - protocol |
| - transport |
| buckets: |
| - 0.005 |
| - 0.025 |
| - 0.1 |
| - 0.5 |
| - 2.5 |
| - 12.5 |
| - name: dial_failure_count |
| subsystem: egress_dialer |
| namespace: apiserver |
| help: Dial failure count, labeled by the protocol (http-connect or grpc), transport |
| (tcp or uds), and stage (connect or proxy). The stage indicates at which stage |
| the dial failed |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - protocol |
| - stage |
| - transport |
| - name: dial_start_total |
| subsystem: egress_dialer |
| namespace: apiserver |
| help: Dial starts, labeled by the protocol (http-connect or grpc) and transport |
| (tcp or uds). |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - protocol |
| - transport |
| - name: automatic_reload_failures_total |
| subsystem: encryption_config_controller |
| namespace: apiserver |
| help: Total number of failed automatic reloads of encryption configuration split |
| by apiserver identity. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - apiserver_id_hash |
| - name: automatic_reload_last_timestamp_seconds |
| subsystem: encryption_config_controller |
| namespace: apiserver |
| help: Timestamp of the last successful or failed automatic reload of encryption |
| configuration split by apiserver identity. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - apiserver_id_hash |
| - status |
| - name: automatic_reload_success_total |
| subsystem: encryption_config_controller |
| namespace: apiserver |
| help: Total number of successful automatic reloads of encryption configuration split |
| by apiserver identity. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - apiserver_id_hash |
| - name: init_events_total |
| namespace: apiserver |
| help: Counter of init events processed in watch cache broken by resource type. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - resource |
| - name: request_aborts_total |
| subsystem: apiserver |
| help: Number of requests which apiserver aborted possibly due to a timeout, for |
| each group, version, verb, resource, subresource and scope |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - group |
| - resource |
| - scope |
| - subresource |
| - verb |
| - version |
| - name: request_body_size_bytes |
| subsystem: apiserver |
| help: Apiserver request body size in bytes broken out by resource and verb. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - resource |
| - verb |
| buckets: |
| - 50000 |
| - 150000 |
| - 250000 |
| - 350000 |
| - 450000 |
| - 550000 |
| - 650000 |
| - 750000 |
| - 850000 |
| - 950000 |
| - 1.05e+06 |
| - 1.15e+06 |
| - 1.25e+06 |
| - 1.35e+06 |
| - 1.45e+06 |
| - 1.55e+06 |
| - 1.65e+06 |
| - 1.75e+06 |
| - 1.85e+06 |
| - 1.95e+06 |
| - 2.05e+06 |
| - 2.15e+06 |
| - 2.25e+06 |
| - 2.35e+06 |
| - 2.45e+06 |
| - 2.55e+06 |
| - 2.65e+06 |
| - 2.75e+06 |
| - 2.85e+06 |
| - 2.95e+06 |
| - 3.05e+06 |
| - name: request_filter_duration_seconds |
| subsystem: apiserver |
| help: Request filter latency distribution in seconds, for each filter type |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - filter |
| buckets: |
| - 0.0001 |
| - 0.0003 |
| - 0.001 |
| - 0.003 |
| - 0.01 |
| - 0.03 |
| - 0.1 |
| - 0.3 |
| - 1 |
| - 5 |
| - 10 |
| - 15 |
| - 30 |
| - name: request_post_timeout_total |
| subsystem: apiserver |
| help: Tracks the activity of the request handlers after the associated requests |
| have been timed out by the apiserver |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - source |
| - status |
| - name: request_sli_duration_seconds |
| subsystem: apiserver |
| help: Response latency distribution (not counting webhook duration and priority |
| & fairness queue wait times) in seconds for each verb, group, version, resource, |
| subresource, scope and component. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - component |
| - group |
| - resource |
| - scope |
| - subresource |
| - verb |
| - version |
| buckets: |
| - 0.05 |
| - 0.1 |
| - 0.2 |
| - 0.4 |
| - 0.6 |
| - 0.8 |
| - 1 |
| - 1.25 |
| - 1.5 |
| - 2 |
| - 3 |
| - 4 |
| - 5 |
| - 6 |
| - 8 |
| - 10 |
| - 15 |
| - 20 |
| - 30 |
| - 45 |
| - 60 |
| - name: request_slo_duration_seconds |
| subsystem: apiserver |
| help: Response latency distribution (not counting webhook duration and priority |
| & fairness queue wait times) in seconds for each verb, group, version, resource, |
| subresource, scope and component. |
| type: Histogram |
| deprecatedVersion: 1.27.0 |
| stabilityLevel: ALPHA |
| labels: |
| - component |
| - group |
| - resource |
| - scope |
| - subresource |
| - verb |
| - version |
| buckets: |
| - 0.05 |
| - 0.1 |
| - 0.2 |
| - 0.4 |
| - 0.6 |
| - 0.8 |
| - 1 |
| - 1.25 |
| - 1.5 |
| - 2 |
| - 3 |
| - 4 |
| - 5 |
| - 6 |
| - 8 |
| - 10 |
| - 15 |
| - 20 |
| - 30 |
| - 45 |
| - 60 |
| - name: request_terminations_total |
| subsystem: apiserver |
| help: Number of requests which apiserver terminated in self-defense. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - code |
| - component |
| - group |
| - resource |
| - scope |
| - subresource |
| - verb |
| - version |
| - name: request_timestamp_comparison_time |
| subsystem: apiserver |
| help: Time taken for comparison of old vs new objects in UPDATE or PATCH requests |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - code_path |
| buckets: |
| - 0.0001 |
| - 0.0003 |
| - 0.001 |
| - 0.003 |
| - 0.01 |
| - 0.03 |
| - 0.1 |
| - 0.3 |
| - 1 |
| - 5 |
| - name: selfrequest_total |
| subsystem: apiserver |
| help: Counter of apiserver self-requests broken out for each verb, API resource |
| and subresource. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - resource |
| - subresource |
| - verb |
| - name: storage_db_total_size_in_bytes |
| subsystem: apiserver |
| help: Total size of the storage database file physically allocated in bytes. |
| type: Gauge |
| deprecatedVersion: 1.28.0 |
| stabilityLevel: ALPHA |
| labels: |
| - endpoint |
| - name: storage_decode_errors_total |
| namespace: apiserver |
| help: Number of stored object decode errors split by object type |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - resource |
| - name: storage_events_received_total |
| subsystem: apiserver |
| help: Number of etcd events received split by kind. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - resource |
| - name: apiserver_storage_list_evaluated_objects_total |
| help: Number of objects tested in the course of serving a LIST request from storage |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - resource |
| - name: apiserver_storage_list_fetched_objects_total |
| help: Number of objects read from storage in the course of serving a LIST request |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - resource |
| - name: apiserver_storage_list_returned_objects_total |
| help: Number of objects returned for a LIST request from storage |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - resource |
| - name: apiserver_storage_list_total |
| help: Number of LIST requests served from storage |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - resource |
| - name: apiserver_storage_size_bytes |
| help: Size of the storage database file physically allocated in bytes. |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - storage_cluster_id |
| - name: terminated_watchers_total |
| namespace: apiserver |
| help: Counter of watchers closed due to unresponsiveness broken by resource type. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - resource |
| - name: tls_handshake_errors_total |
| subsystem: apiserver |
| help: Number of requests dropped with 'TLS handshake error from' error |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: events_dispatched_total |
| subsystem: watch_cache |
| namespace: apiserver |
| help: Counter of events dispatched in watch cache broken by resource type. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - resource |
| - name: events_received_total |
| subsystem: watch_cache |
| namespace: apiserver |
| help: Counter of events received in watch cache broken by resource type. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - resource |
| - name: initializations_total |
| subsystem: watch_cache |
| namespace: apiserver |
| help: Counter of watch cache initializations broken by resource type. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - resource |
| - name: watch_events_sizes |
| subsystem: apiserver |
| help: Watch event size distribution in bytes |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - group |
| - kind |
| - version |
| buckets: |
| - 1024 |
| - 2048 |
| - 4096 |
| - 8192 |
| - 16384 |
| - 32768 |
| - 65536 |
| - 131072 |
| - name: watch_events_total |
| subsystem: apiserver |
| help: Number of events sent in watch clients |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - group |
| - kind |
| - version |
| - name: watch_list_duration_seconds |
| subsystem: apiserver |
| help: Response latency distribution in seconds for watch list requests broken by |
| group, version, resource and scope. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - group |
| - resource |
| - scope |
| - version |
| buckets: |
| - 0.05 |
| - 0.1 |
| - 0.2 |
| - 0.4 |
| - 0.6 |
| - 0.8 |
| - 1 |
| - 2 |
| - 4 |
| - 6 |
| - 8 |
| - 10 |
| - 15 |
| - 20 |
| - 30 |
| - 45 |
| - 60 |
| - name: authenticated_user_requests |
| help: Counter of authenticated requests broken out by username. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - username |
| - name: authentication_attempts |
| help: Counter of authenticated attempts. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - result |
| - name: authentication_duration_seconds |
| help: Authentication duration in seconds broken out by result. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - result |
| buckets: |
| - 0.001 |
| - 0.002 |
| - 0.004 |
| - 0.008 |
| - 0.016 |
| - 0.032 |
| - 0.064 |
| - 0.128 |
| - 0.256 |
| - 0.512 |
| - 1.024 |
| - 2.048 |
| - 4.096 |
| - 8.192 |
| - 16.384 |
| - name: authorization_attempts_total |
| help: Counter of authorization attempts broken down by result. It can be either |
| 'allowed', 'denied', 'no-opinion' or 'error'. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - result |
| - name: authorization_duration_seconds |
| help: Authorization duration in seconds broken out by result. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - result |
| buckets: |
| - 0.001 |
| - 0.002 |
| - 0.004 |
| - 0.008 |
| - 0.016 |
| - 0.032 |
| - 0.064 |
| - 0.128 |
| - 0.256 |
| - 0.512 |
| - 1.024 |
| - 2.048 |
| - 4.096 |
| - 8.192 |
| - 16.384 |
| - name: etcd_bookmark_counts |
| help: Number of etcd bookmarks (progress notify events) split by kind. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - resource |
| - name: etcd_lease_object_counts |
| help: Number of objects attached to a single etcd lease. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 10 |
| - 50 |
| - 100 |
| - 500 |
| - 1000 |
| - 2500 |
| - 5000 |
| - name: etcd_request_duration_seconds |
| help: Etcd request latency in seconds for each operation and object type. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - operation |
| - type |
| buckets: |
| - 0.005 |
| - 0.025 |
| - 0.05 |
| - 0.1 |
| - 0.2 |
| - 0.4 |
| - 0.6 |
| - 0.8 |
| - 1 |
| - 1.25 |
| - 1.5 |
| - 2 |
| - 3 |
| - 4 |
| - 5 |
| - 6 |
| - 8 |
| - 10 |
| - 15 |
| - 20 |
| - 30 |
| - 45 |
| - 60 |
| - name: etcd_request_errors_total |
| help: Etcd failed request counts for each operation and object type. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - operation |
| - type |
| - name: etcd_requests_total |
| help: Etcd request counts for each operation and object type. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - operation |
| - type |
| - name: field_validation_request_duration_seconds |
| help: Response latency distribution in seconds for each field validation value |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - field_validation |
| buckets: |
| - 0.05 |
| - 0.1 |
| - 0.2 |
| - 0.4 |
| - 0.6 |
| - 0.8 |
| - 1 |
| - 1.25 |
| - 1.5 |
| - 2 |
| - 3 |
| - 4 |
| - 5 |
| - 6 |
| - 8 |
| - 10 |
| - 15 |
| - 20 |
| - 30 |
| - 45 |
| - 60 |
| - name: capacity |
| subsystem: watch_cache |
| help: Total capacity of watch cache broken by resource type. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - resource |
| - name: capacity_decrease_total |
| subsystem: watch_cache |
| help: Total number of watch cache capacity decrease events broken by resource type. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - resource |
| - name: capacity_increase_total |
| subsystem: watch_cache |
| help: Total number of watch cache capacity increase events broken by resource type. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - resource |
| - name: current_inflight_requests |
| subsystem: apiserver |
| help: Maximal number of currently used inflight request limit of this apiserver |
| per request kind in last second. |
| type: Gauge |
| stabilityLevel: STABLE |
| labels: |
| - request_kind |
| - name: longrunning_requests |
| subsystem: apiserver |
| help: Gauge of all active long-running apiserver requests broken out by verb, group, |
| version, resource, scope and component. Not all requests are tracked this way. |
| type: Gauge |
| stabilityLevel: STABLE |
| labels: |
| - component |
| - group |
| - resource |
| - scope |
| - subresource |
| - verb |
| - version |
| - name: request_duration_seconds |
| subsystem: apiserver |
| help: Response latency distribution in seconds for each verb, dry run value, group, |
| version, resource, subresource, scope and component. |
| type: Histogram |
| stabilityLevel: STABLE |
| labels: |
| - component |
| - dry_run |
| - group |
| - resource |
| - scope |
| - subresource |
| - verb |
| - version |
| buckets: |
| - 0.005 |
| - 0.025 |
| - 0.05 |
| - 0.1 |
| - 0.2 |
| - 0.4 |
| - 0.6 |
| - 0.8 |
| - 1 |
| - 1.25 |
| - 1.5 |
| - 2 |
| - 3 |
| - 4 |
| - 5 |
| - 6 |
| - 8 |
| - 10 |
| - 15 |
| - 20 |
| - 30 |
| - 45 |
| - 60 |
| - name: request_total |
| subsystem: apiserver |
| help: Counter of apiserver requests broken out for each verb, dry run value, group, |
| version, resource, scope, component, and HTTP response code. |
| type: Counter |
| stabilityLevel: STABLE |
| labels: |
| - code |
| - component |
| - dry_run |
| - group |
| - resource |
| - scope |
| - subresource |
| - verb |
| - version |
| - name: requested_deprecated_apis |
| subsystem: apiserver |
| help: Gauge of deprecated APIs that have been requested, broken out by API group, |
| version, resource, subresource, and removed_release. |
| type: Gauge |
| stabilityLevel: STABLE |
| labels: |
| - group |
| - removed_release |
| - resource |
| - subresource |
| - version |
| - name: response_sizes |
| subsystem: apiserver |
| help: Response size distribution in bytes for each group, version, verb, resource, |
| subresource, scope and component. |
| type: Histogram |
| stabilityLevel: STABLE |
| labels: |
| - component |
| - group |
| - resource |
| - scope |
| - subresource |
| - verb |
| - version |
| buckets: |
| - 1000 |
| - 10000 |
| - 100000 |
| - 1e+06 |
| - 1e+07 |
| - 1e+08 |
| - 1e+09 |
| - name: apiserver_storage_objects |
| help: Number of stored objects at the time of last check split by kind. In case |
| of a fetching error, the value will be -1. |
| type: Gauge |
| stabilityLevel: STABLE |
| labels: |
| - resource |
| - name: dek_cache_fill_percent |
| subsystem: envelope_encryption |
| namespace: apiserver |
| help: Percent of the cache slots currently occupied by cached DEKs. |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: dek_cache_inter_arrival_time_seconds |
| subsystem: envelope_encryption |
| namespace: apiserver |
| help: Time (in seconds) of inter arrival of transformation requests. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - transformation_type |
| buckets: |
| - 60 |
| - 120 |
| - 240 |
| - 480 |
| - 960 |
| - 1920 |
| - 3840 |
| - 7680 |
| - 15360 |
| - 30720 |
| - name: dek_source_cache_size |
| subsystem: envelope_encryption |
| namespace: apiserver |
| help: Number of records in data encryption key (DEK) source cache. On a restart, |
| this value is an approximation of the number of decrypt RPC calls the server will |
| make to the KMS plugin. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - provider_name |
| - name: invalid_key_id_from_status_total |
| subsystem: envelope_encryption |
| namespace: apiserver |
| help: Number of times an invalid keyID is returned by the Status RPC call split |
| by error. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - error |
| - provider_name |
| - name: key_id_hash_last_timestamp_seconds |
| subsystem: envelope_encryption |
| namespace: apiserver |
| help: The last time in seconds when a keyID was used. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - apiserver_id_hash |
| - key_id_hash |
| - provider_name |
| - transformation_type |
| - name: key_id_hash_status_last_timestamp_seconds |
| subsystem: envelope_encryption |
| namespace: apiserver |
| help: The last time in seconds when a keyID was returned by the Status RPC call. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - apiserver_id_hash |
| - key_id_hash |
| - provider_name |
| - name: key_id_hash_total |
| subsystem: envelope_encryption |
| namespace: apiserver |
| help: Number of times a keyID is used split by transformation type, provider, and |
| apiserver identity. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - apiserver_id_hash |
| - key_id_hash |
| - provider_name |
| - transformation_type |
| - name: kms_operations_latency_seconds |
| subsystem: envelope_encryption |
| namespace: apiserver |
| help: KMS operation duration with gRPC error code status total. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - grpc_status_code |
| - method_name |
| - provider_name |
| buckets: |
| - 0.0001 |
| - 0.0002 |
| - 0.0004 |
| - 0.0008 |
| - 0.0016 |
| - 0.0032 |
| - 0.0064 |
| - 0.0128 |
| - 0.0256 |
| - 0.0512 |
| - 0.1024 |
| - 0.2048 |
| - 0.4096 |
| - 0.8192 |
| - 1.6384 |
| - 3.2768 |
| - 6.5536 |
| - 13.1072 |
| - 26.2144 |
| - 52.4288 |
| - name: current_inqueue_seats |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Number of seats currently pending in queues of the API Priority and Fairness |
| subsystem |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - flow_schema |
| - priority_level |
| - name: current_limit_seats |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: current derived number of execution seats available to each priority level |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - priority_level |
| - name: current_r |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: R(time of last change) |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - priority_level |
| - name: demand_seats |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Observations, at the end of every nanosecond, of (the number of seats each |
| priority level could use) / (nominal number of seats for that level) |
| type: TimingRatioHistogram |
| stabilityLevel: ALPHA |
| labels: |
| - priority_level |
| buckets: |
| - 0.2 |
| - 0.4 |
| - 0.6 |
| - 0.8 |
| - 1 |
| - 1.2 |
| - 1.4 |
| - 1.7 |
| - 2 |
| - 2.8 |
| - 4 |
| - 6 |
| - name: demand_seats_average |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Time-weighted average, over last adjustment period, of demand_seats |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - priority_level |
| - name: demand_seats_high_watermark |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: High watermark, over last adjustment period, of demand_seats |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - priority_level |
| - name: demand_seats_smoothed |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Smoothed seat demands |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - priority_level |
| - name: demand_seats_stdev |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Time-weighted standard deviation, over last adjustment period, of demand_seats |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - priority_level |
| - name: dispatch_r |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: R(time of last dispatch) |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - priority_level |
| - name: epoch_advance_total |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Number of times the queueset's progress meter jumped backward |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - priority_level |
| - success |
| - name: latest_s |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: S(most recently dispatched request) |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - priority_level |
| - name: lower_limit_seats |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Configured lower bound on number of execution seats available to each priority |
| level |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - priority_level |
| - name: next_discounted_s_bounds |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: min and max, over queues, of S(oldest waiting request in queue) - estimated |
| work in progress |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - bound |
| - priority_level |
| - name: next_s_bounds |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: min and max, over queues, of S(oldest waiting request in queue) |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - bound |
| - priority_level |
| - name: priority_level_request_utilization |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Observations, at the end of every nanosecond, of number of requests (as a |
| fraction of the relevant limit) waiting or in any stage of execution (but only |
| initial stage for WATCHes) |
| type: TimingRatioHistogram |
| stabilityLevel: ALPHA |
| labels: |
| - phase |
| - priority_level |
| buckets: |
| - 0 |
| - 0.001 |
| - 0.003 |
| - 0.01 |
| - 0.03 |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 0.75 |
| - 1 |
| - name: priority_level_seat_utilization |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Observations, at the end of every nanosecond, of utilization of seats for |
| any stage of execution (but only initial stage for WATCHes) |
| type: TimingRatioHistogram |
| stabilityLevel: ALPHA |
| labels: |
| - priority_level |
| buckets: |
| - 0 |
| - 0.1 |
| - 0.2 |
| - 0.3 |
| - 0.4 |
| - 0.5 |
| - 0.6 |
| - 0.7 |
| - 0.8 |
| - 0.9 |
| - 0.95 |
| - 0.99 |
| - 1 |
| constLabels: |
| phase: executing |
| - name: read_vs_write_current_requests |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Observations, at the end of every nanosecond, of the number of requests (as |
| a fraction of the relevant limit) waiting or in regular stage of execution |
| type: TimingRatioHistogram |
| stabilityLevel: ALPHA |
| labels: |
| - phase |
| - request_kind |
| buckets: |
| - 0 |
| - 0.001 |
| - 0.01 |
| - 0.1 |
| - 0.2 |
| - 0.3 |
| - 0.4 |
| - 0.5 |
| - 0.6 |
| - 0.7 |
| - 0.8 |
| - 0.9 |
| - 0.95 |
| - 0.99 |
| - 1 |
| - name: request_concurrency_in_use |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Concurrency (number of seats) occupied by the currently executing (initial |
| stage for a WATCH, any stage otherwise) requests in the API Priority and Fairness |
| subsystem |
| type: Gauge |
| deprecatedVersion: 1.31.0 |
| stabilityLevel: ALPHA |
| labels: |
| - flow_schema |
| - priority_level |
| - name: request_concurrency_limit |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Nominal number of execution seats configured for each priority level |
| type: Gauge |
| deprecatedVersion: 1.30.0 |
| stabilityLevel: ALPHA |
| labels: |
| - priority_level |
| - name: request_dispatch_no_accommodation_total |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Number of times a dispatch attempt resulted in a non accommodation due to |
| lack of available seats |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - flow_schema |
| - priority_level |
| - name: request_execution_seconds |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Duration of initial stage (for a WATCH) or any (for a non-WATCH) stage of |
| request execution in the API Priority and Fairness subsystem |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - flow_schema |
| - priority_level |
| - type |
| buckets: |
| - 0 |
| - 0.005 |
| - 0.02 |
| - 0.05 |
| - 0.1 |
| - 0.2 |
| - 0.5 |
| - 1 |
| - 2 |
| - 5 |
| - 10 |
| - 15 |
| - 30 |
| - name: request_queue_length_after_enqueue |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Length of queue in the API Priority and Fairness subsystem, as seen by each |
| request after it is enqueued |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - flow_schema |
| - priority_level |
| buckets: |
| - 0 |
| - 10 |
| - 25 |
| - 50 |
| - 100 |
| - 250 |
| - 500 |
| - 1000 |
| - name: seat_fair_frac |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Fair fraction of server's concurrency to allocate to each priority level that |
| can use it |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: target_seats |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Seat allocation targets |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - priority_level |
| - name: upper_limit_seats |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Configured upper bound on number of execution seats available to each priority |
| level |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - priority_level |
| - name: watch_count_samples |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: count of watchers for mutating requests in API Priority and Fairness |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - flow_schema |
| - priority_level |
| buckets: |
| - 0 |
| - 1 |
| - 10 |
| - 100 |
| - 1000 |
| - 10000 |
| - name: work_estimated_seats |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Number of estimated seats (maximum of initial and final seats) associated |
| with requests in API Priority and Fairness |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - flow_schema |
| - priority_level |
| buckets: |
| - 1 |
| - 2 |
| - 4 |
| - 10 |
| - name: rerouted_request_total |
| subsystem: apiserver |
| help: Total number of requests that were proxied to a peer kube apiserver because |
| the local apiserver was not capable of serving it |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - code |
| - name: data_key_generation_duration_seconds |
| subsystem: storage |
| namespace: apiserver |
| help: Latencies in seconds of data encryption key(DEK) generation operations. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 5e-06 |
| - 1e-05 |
| - 2e-05 |
| - 4e-05 |
| - 8e-05 |
| - 0.00016 |
| - 0.00032 |
| - 0.00064 |
| - 0.00128 |
| - 0.00256 |
| - 0.00512 |
| - 0.01024 |
| - 0.02048 |
| - 0.04096 |
| - name: data_key_generation_failures_total |
| subsystem: storage |
| namespace: apiserver |
| help: Total number of failed data encryption key(DEK) generation operations. |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: envelope_transformation_cache_misses_total |
| subsystem: storage |
| namespace: apiserver |
| help: Total number of cache misses while accessing key decryption key(KEK). |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: transformation_duration_seconds |
| subsystem: storage |
| namespace: apiserver |
| help: Latencies in seconds of value transformation operations. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - transformation_type |
| - transformer_prefix |
| buckets: |
| - 5e-06 |
| - 1e-05 |
| - 2e-05 |
| - 4e-05 |
| - 8e-05 |
| - 0.00016 |
| - 0.00032 |
| - 0.00064 |
| - 0.00128 |
| - 0.00256 |
| - 0.00512 |
| - 0.01024 |
| - 0.02048 |
| - 0.04096 |
| - 0.08192 |
| - 0.16384 |
| - 0.32768 |
| - 0.65536 |
| - 1.31072 |
| - 2.62144 |
| - 5.24288 |
| - 10.48576 |
| - 20.97152 |
| - 41.94304 |
| - 83.88608 |
| - name: transformation_operations_total |
| subsystem: storage |
| namespace: apiserver |
| help: Total number of transformations. Successful transformation will have a status |
| 'OK' and a varied status string when the transformation fails. This status and |
| transformation_type fields may be used for alerting on encryption/decryption failure |
| using transformation_type from_storage for decryption and to_storage for encryption |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - status |
| - transformation_type |
| - transformer_prefix |
| - name: x509_insecure_sha1_total |
| subsystem: webhooks |
| namespace: apiserver |
| help: Counts the number of requests to servers with insecure SHA1 signatures in |
| their serving certificate OR the number of connection failures due to the insecure |
| SHA1 signatures (either/or, based on the runtime environment) |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: x509_missing_san_total |
| subsystem: webhooks |
| namespace: apiserver |
| help: Counts the number of requests to servers missing SAN extension in their serving |
| certificate OR the number of connection failures due to the lack of x509 certificate |
| SAN extension missing (either/or, based on the runtime environment) |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: current_executing_requests |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Number of requests in initial (for a WATCH) or any (for a non-WATCH) execution |
| stage in the API Priority and Fairness subsystem |
| type: Gauge |
| stabilityLevel: BETA |
| labels: |
| - flow_schema |
| - priority_level |
| - name: current_executing_seats |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Concurrency (number of seats) occupied by the currently executing (initial |
| stage for a WATCH, any stage otherwise) requests in the API Priority and Fairness |
| subsystem |
| type: Gauge |
| stabilityLevel: BETA |
| labels: |
| - flow_schema |
| - priority_level |
| - name: current_inqueue_requests |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Number of requests currently pending in queues of the API Priority and Fairness |
| subsystem |
| type: Gauge |
| stabilityLevel: BETA |
| labels: |
| - flow_schema |
| - priority_level |
| - name: dispatched_requests_total |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Number of requests executed by API Priority and Fairness subsystem |
| type: Counter |
| stabilityLevel: BETA |
| labels: |
| - flow_schema |
| - priority_level |
| - name: nominal_limit_seats |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Nominal number of execution seats configured for each priority level |
| type: Gauge |
| stabilityLevel: BETA |
| labels: |
| - priority_level |
| - name: rejected_requests_total |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Number of requests rejected by API Priority and Fairness subsystem |
| type: Counter |
| stabilityLevel: BETA |
| labels: |
| - flow_schema |
| - priority_level |
| - reason |
| - name: request_wait_duration_seconds |
| subsystem: flowcontrol |
| namespace: apiserver |
| help: Length of time a request spent waiting in its queue |
| type: Histogram |
| stabilityLevel: BETA |
| labels: |
| - execute |
| - flow_schema |
| - priority_level |
| buckets: |
| - 0 |
| - 0.005 |
| - 0.02 |
| - 0.05 |
| - 0.1 |
| - 0.2 |
| - 0.5 |
| - 1 |
| - 2 |
| - 5 |
| - 10 |
| - 15 |
| - 30 |
| - name: request_duration_seconds |
| subsystem: cloud_provider_webhook |
| help: Request latency in seconds. Broken down by status code. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - code |
| - webhook |
| buckets: |
| - 0.25 |
| - 0.5 |
| - 0.7 |
| - 1 |
| - 1.5 |
| - 3 |
| - 5 |
| - 10 |
| - name: request_total |
| subsystem: cloud_provider_webhook |
| help: Number of HTTP requests partitioned by status code. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - code |
| - webhook |
| - name: cloud_provider_taint_removal_delay_seconds |
| subsystem: node_controller |
| help: Number of seconds after node creation when NodeController removed the cloud-provider |
| taint of a single node. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 1 |
| - 4 |
| - 16 |
| - 64 |
| - 256 |
| - 1024 |
| - name: initial_node_sync_delay_seconds |
| subsystem: node_controller |
| help: Number of seconds after node creation when NodeController finished the initial |
| synchronization of a single node. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 1 |
| - 4 |
| - 16 |
| - 64 |
| - 256 |
| - 1024 |
| - name: loadbalancer_sync_total |
| subsystem: service_controller |
| help: A metric counting the amount of times any load balancer has been configured, |
| as an effect of service/node changes on the cluster |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: nodesync_error_total |
| subsystem: service_controller |
| help: A metric counting the amount of times any load balancer has been configured |
| and errored, as an effect of node changes on the cluster |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: nodesync_latency_seconds |
| subsystem: service_controller |
| help: A metric measuring the latency for nodesync which updates loadbalancer hosts |
| on cluster node updates. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 1 |
| - 2 |
| - 4 |
| - 8 |
| - 16 |
| - 32 |
| - 64 |
| - 128 |
| - 256 |
| - 512 |
| - 1024 |
| - 2048 |
| - 4096 |
| - 8192 |
| - 16384 |
| - name: update_loadbalancer_host_latency_seconds |
| subsystem: service_controller |
| help: A metric measuring the latency for updating each load balancer hosts. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 1 |
| - 2 |
| - 4 |
| - 8 |
| - 16 |
| - 32 |
| - 64 |
| - 128 |
| - 256 |
| - 512 |
| - 1024 |
| - 2048 |
| - 4096 |
| - 8192 |
| - 16384 |
| - name: changes |
| subsystem: endpoint_slice_controller |
| help: Number of EndpointSlice changes |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - operation |
| - name: desired_endpoint_slices |
| subsystem: endpoint_slice_controller |
| help: Number of EndpointSlices that would exist with perfect endpoint allocation |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: endpoints_added_per_sync |
| subsystem: endpoint_slice_controller |
| help: Number of endpoints added on each Service sync |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 2 |
| - 4 |
| - 8 |
| - 16 |
| - 32 |
| - 64 |
| - 128 |
| - 256 |
| - 512 |
| - 1024 |
| - 2048 |
| - 4096 |
| - 8192 |
| - 16384 |
| - 32768 |
| - name: endpoints_desired |
| subsystem: endpoint_slice_controller |
| help: Number of endpoints desired |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: endpoints_removed_per_sync |
| subsystem: endpoint_slice_controller |
| help: Number of endpoints removed on each Service sync |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 2 |
| - 4 |
| - 8 |
| - 16 |
| - 32 |
| - 64 |
| - 128 |
| - 256 |
| - 512 |
| - 1024 |
| - 2048 |
| - 4096 |
| - 8192 |
| - 16384 |
| - 32768 |
| - name: endpointslices_changed_per_sync |
| subsystem: endpoint_slice_controller |
| help: Number of EndpointSlices changed on each Service sync |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - topology |
| - name: num_endpoint_slices |
| subsystem: endpoint_slice_controller |
| help: Number of EndpointSlices |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: syncs |
| subsystem: endpoint_slice_controller |
| help: Number of EndpointSlice syncs |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - result |
| - name: kubernetes_build_info |
| help: A metric with a constant '1' value labeled by major, minor, git version, git |
| commit, git tree state, build date, Go version, and compiler from which Kubernetes |
| was built, and platform on which it is running. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - build_date |
| - compiler |
| - git_commit |
| - git_tree_state |
| - git_version |
| - go_version |
| - major |
| - minor |
| - platform |
| - name: leader_election_master_status |
| help: Gauge of if the reporting system is master of the relevant lease, 0 indicates |
| backup, 1 indicates master. 'name' is the string used to identify the lease. Please |
| make sure to group by name. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - name |
| - name: leader_election_slowpath_total |
| help: Total number of slow path exercised in renewing leader leases. 'name' is the |
| string used to identify the lease. Please make sure to group by name. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - name |
| - name: rest_client_dns_resolution_duration_seconds |
| help: DNS resolver latency in seconds. Broken down by host. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - host |
| buckets: |
| - 0.005 |
| - 0.025 |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2 |
| - 4 |
| - 8 |
| - 15 |
| - 30 |
| - name: rest_client_exec_plugin_call_total |
| help: Number of calls to an exec plugin, partitioned by the type of event encountered |
| (no_error, plugin_execution_error, plugin_not_found_error, client_internal_error) |
| and an optional exit code. The exit code will be set to 0 if and only if the plugin |
| call was successful. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - call_status |
| - code |
| - name: rest_client_exec_plugin_certificate_rotation_age |
| help: Histogram of the number of seconds the last auth exec plugin client certificate |
| lived before being rotated. If auth exec plugin client certificates are unused, |
| histogram will contain no data. |
| type: Histogram |
| stabilityLevel: ALPHA |
| buckets: |
| - 600 |
| - 1800 |
| - 3600 |
| - 14400 |
| - 86400 |
| - 604800 |
| - 2.592e+06 |
| - 7.776e+06 |
| - 1.5552e+07 |
| - 3.1104e+07 |
| - 1.24416e+08 |
| - name: rest_client_exec_plugin_ttl_seconds |
| help: Gauge of the shortest TTL (time-to-live) of the client certificate(s) managed |
| by the auth exec plugin. The value is in seconds until certificate expiry (negative |
| if already expired). If auth exec plugins are unused or manage no TLS certificates, |
| the value will be +INF. |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: rest_client_rate_limiter_duration_seconds |
| help: Client side rate limiter latency in seconds. Broken down by verb, and host. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - host |
| - verb |
| buckets: |
| - 0.005 |
| - 0.025 |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2 |
| - 4 |
| - 8 |
| - 15 |
| - 30 |
| - 60 |
| - name: rest_client_request_duration_seconds |
| help: Request latency in seconds. Broken down by verb, and host. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - host |
| - verb |
| buckets: |
| - 0.005 |
| - 0.025 |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2 |
| - 4 |
| - 8 |
| - 15 |
| - 30 |
| - 60 |
| - name: rest_client_request_retries_total |
| help: Number of request retries, partitioned by status code, verb, and host. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - code |
| - host |
| - verb |
| - name: rest_client_request_size_bytes |
| help: Request size in bytes. Broken down by verb and host. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - host |
| - verb |
| buckets: |
| - 64 |
| - 256 |
| - 512 |
| - 1024 |
| - 4096 |
| - 16384 |
| - 65536 |
| - 262144 |
| - 1.048576e+06 |
| - 4.194304e+06 |
| - 1.6777216e+07 |
| - name: rest_client_requests_total |
| help: Number of HTTP requests, partitioned by status code, method, and host. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - code |
| - host |
| - method |
| - name: rest_client_response_size_bytes |
| help: Response size in bytes. Broken down by verb and host. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - host |
| - verb |
| buckets: |
| - 64 |
| - 256 |
| - 512 |
| - 1024 |
| - 4096 |
| - 16384 |
| - 65536 |
| - 262144 |
| - 1.048576e+06 |
| - 4.194304e+06 |
| - 1.6777216e+07 |
| - name: rest_client_transport_cache_entries |
| help: Number of transport entries in the internal cache. |
| type: Gauge |
| stabilityLevel: ALPHA |
| - name: rest_client_transport_create_calls_total |
| help: 'Number of calls to get a new transport, partitioned by the result of the |
| operation hit: obtained from the cache, miss: created and added to the cache, |
| uncacheable: created and not cached' |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - result |
| - name: running_managed_controllers |
| help: Indicates where instances of a controller are currently running |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - manager |
| - name |
| - name: adds_total |
| subsystem: workqueue |
| help: Total number of adds handled by workqueue |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - name |
| - name: depth |
| subsystem: workqueue |
| help: Current depth of workqueue |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - name |
| - name: longest_running_processor_seconds |
| subsystem: workqueue |
| help: How many seconds has the longest running processor for workqueue been running. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - name |
| - name: queue_duration_seconds |
| subsystem: workqueue |
| help: How long in seconds an item stays in workqueue before being requested. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - name |
| buckets: |
| - 1e-08 |
| - 1e-07 |
| - 1e-06 |
| - 9.999999999999999e-06 |
| - 9.999999999999999e-05 |
| - 0.001 |
| - 0.01 |
| - 0.1 |
| - 1 |
| - 10 |
| - name: retries_total |
| subsystem: workqueue |
| help: Total number of retries handled by workqueue |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - name |
| - name: unfinished_work_seconds |
| subsystem: workqueue |
| help: How many seconds of work has done that is in progress and hasn't been observed |
| by work_duration. Large values indicate stuck threads. One can deduce the number |
| of stuck threads by observing the rate at which this increases. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - name |
| - name: work_duration_seconds |
| subsystem: workqueue |
| help: How long in seconds processing an item from workqueue takes. |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - name |
| buckets: |
| - 1e-08 |
| - 1e-07 |
| - 1e-06 |
| - 9.999999999999999e-06 |
| - 9.999999999999999e-05 |
| - 0.001 |
| - 0.01 |
| - 0.1 |
| - 1 |
| - 10 |
| - name: disabled_metrics_total |
| help: The count of disabled metrics. |
| type: Counter |
| stabilityLevel: BETA |
| - name: hidden_metrics_total |
| help: The count of hidden metrics. |
| type: Counter |
| stabilityLevel: BETA |
| - name: feature_enabled |
| namespace: kubernetes |
| help: This metric records the data about the stage and enablement of a k8s feature. |
| type: Gauge |
| stabilityLevel: BETA |
| labels: |
| - name |
| - stage |
| - name: registered_metrics_total |
| help: The count of registered metrics broken by stability level and deprecation |
| version. |
| type: Counter |
| stabilityLevel: BETA |
| labels: |
| - deprecated_version |
| - stability_level |
| - name: healthcheck |
| namespace: kubernetes |
| help: This metric records the result of a single healthcheck. |
| type: Gauge |
| stabilityLevel: STABLE |
| labels: |
| - name |
| - type |
| - name: healthchecks_total |
| namespace: kubernetes |
| help: This metric records the results of all healthcheck. |
| type: Counter |
| stabilityLevel: STABLE |
| labels: |
| - name |
| - status |
| - type |
| - name: aggregator_openapi_v2_regeneration_count |
| help: Counter of OpenAPI v2 spec regeneration count broken down by causing APIService |
| name and reason. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - apiservice |
| - reason |
| - name: aggregator_openapi_v2_regeneration_duration |
| help: Gauge of OpenAPI v2 spec regeneration duration in seconds. |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - reason |
| - name: aggregator_unavailable_apiservice |
| help: Gauge of APIServices which are marked as unavailable broken down by APIService |
| name. |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - name |
| - name: aggregator_unavailable_apiservice_total |
| help: Counter of APIServices which are marked as unavailable broken down by APIService |
| name and reason. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - name |
| - reason |
| - name: x509_insecure_sha1_total |
| subsystem: kube_aggregator |
| namespace: apiserver |
| help: Counts the number of requests to servers with insecure SHA1 signatures in |
| their serving certificate OR the number of connection failures due to the insecure |
| SHA1 signatures (either/or, based on the runtime environment) |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: x509_missing_san_total |
| subsystem: kube_aggregator |
| namespace: apiserver |
| help: Counts the number of requests to servers missing SAN extension in their serving |
| certificate OR the number of connection failures due to the lack of x509 certificate |
| SAN extension missing (either/or, based on the runtime environment) |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: api_request_duration_seconds |
| namespace: cloudprovider_azure |
| help: Latency of an Azure API call |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - request |
| - resource_group |
| - source |
| - subscription_id |
| buckets: |
| - 0.1 |
| - 0.25 |
| - 0.5 |
| - 1 |
| - 2.5 |
| - 5 |
| - 10 |
| - 15 |
| - 25 |
| - 50 |
| - 120 |
| - 300 |
| - 600 |
| - 1200 |
| - name: api_request_errors |
| namespace: cloudprovider_azure |
| help: Number of errors for an Azure API call |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - request |
| - resource_group |
| - source |
| - subscription_id |
| - name: api_request_ratelimited_count |
| namespace: cloudprovider_azure |
| help: Number of rate limited Azure API calls |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - request |
| - resource_group |
| - source |
| - subscription_id |
| - name: api_request_throttled_count |
| namespace: cloudprovider_azure |
| help: Number of throttled Azure API calls |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - request |
| - resource_group |
| - source |
| - subscription_id |
| - name: op_duration_seconds |
| namespace: cloudprovider_azure |
| help: Latency of an Azure service operation |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - request |
| - resource_group |
| - source |
| - subscription_id |
| buckets: |
| - 0.1 |
| - 0.2 |
| - 0.5 |
| - 1 |
| - 10 |
| - 20 |
| - 30 |
| - 40 |
| - 50 |
| - 60 |
| - 100 |
| - 200 |
| - 300 |
| - name: op_failure_count |
| namespace: cloudprovider_azure |
| help: Number of failed Azure service operations |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - request |
| - resource_group |
| - source |
| - subscription_id |
| - name: cloudprovider_gce_api_request_duration_seconds |
| help: Latency of a GCE API call |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - region |
| - request |
| - version |
| - zone |
| - name: cloudprovider_gce_api_request_errors |
| help: Number of errors for an API call |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - region |
| - request |
| - version |
| - zone |
| - name: cloudprovider_vsphere_api_request_duration_seconds |
| help: Latency of vsphere api call |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - request |
| - name: cloudprovider_vsphere_api_request_errors |
| help: vsphere Api errors |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - request |
| - name: cloudprovider_vsphere_operation_duration_seconds |
| help: Latency of vsphere operation call |
| type: Histogram |
| stabilityLevel: ALPHA |
| labels: |
| - operation |
| - name: cloudprovider_vsphere_operation_errors |
| help: vsphere operation errors |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - operation |
| - name: cloudprovider_vsphere_vcenter_versions |
| help: Versions for connected vSphere vCenters |
| type: Custom |
| stabilityLevel: ALPHA |
| labels: |
| - hostname |
| - version |
| - build |
| - name: get_token_count |
| help: Counter of total Token() requests to the alternate token source |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: get_token_fail_count |
| help: Counter of failed Token() requests to the alternate token source |
| type: Counter |
| stabilityLevel: ALPHA |
| - name: number_of_l4_ilbs |
| help: Number of L4 ILBs |
| type: Gauge |
| stabilityLevel: ALPHA |
| labels: |
| - feature |
| - name: pod_security_errors_total |
| help: Number of errors preventing normal evaluation. Non-fatal errors may result |
| in the latest restricted profile being used for evaluation. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - fatal |
| - request_operation |
| - resource |
| - subresource |
| - name: pod_security_evaluations_total |
| help: Number of policy evaluations that occurred, not counting ignored or exempt |
| requests. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - decision |
| - mode |
| - policy_level |
| - policy_version |
| - request_operation |
| - resource |
| - subresource |
| - name: pod_security_exemptions_total |
| help: Number of exempt requests, not counting ignored or out of scope requests. |
| type: Counter |
| stabilityLevel: ALPHA |
| labels: |
| - request_operation |
| - resource |
| - subresource |