From 6e020336a3c286f10bda3ed4703229e9a88a2bf7 Mon Sep 17 00:00:00 2001 From: Pranshu Srivastava Date: Tue, 24 Sep 2024 03:01:10 +0530 Subject: [PATCH] Prefer kube-scheduler's resource metrics to kube-state-metrics' Since they are more accurate. --- alerts/resource_alerts.libsonnet | 8 +-- dashboards/resources/cluster.libsonnet | 12 ++-- dashboards/resources/multi-cluster.libsonnet | 24 +++---- dashboards/resources/namespace.libsonnet | 24 +++---- dashboards/resources/node.libsonnet | 16 ++--- dashboards/resources/pod.libsonnet | 14 ++-- .../resources/workload-namespace.libsonnet | 2 +- dashboards/resources/workload.libsonnet | 2 +- rules/apps.libsonnet | 72 +++++++++++++++++++ rules/windows.libsonnet | 8 +-- tests/tests.yaml | 68 +++++++----------- 11 files changed, 151 insertions(+), 99 deletions(-) diff --git a/alerts/resource_alerts.libsonnet b/alerts/resource_alerts.libsonnet index fdb82e0da..7cd73891f 100644 --- a/alerts/resource_alerts.libsonnet +++ b/alerts/resource_alerts.libsonnet @@ -6,7 +6,7 @@ local utils = import '../lib/utils.libsonnet'; # Non-HA clusters. ( ( - sum by(%(clusterLabel)s) (namespace_%(resource)s:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) + sum by(%(clusterLabel)s) (namespace_%(resource)s:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - sum by(%(clusterLabel)s) (kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="%(resource)s"}) > 0 ) @@ -16,7 +16,7 @@ local utils = import '../lib/utils.libsonnet'; or # HA clusters. ( - sum by(%(clusterLabel)s) (namespace_%(resource)s:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) + sum by(%(clusterLabel)s) (namespace_%(resource)s:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - ( # Skip clusters with only one allocatable node. @@ -33,7 +33,7 @@ local utils = import '../lib/utils.libsonnet'; # Non-HA clusters. ( ( - sum(namespace_%(resource)s:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) + sum(namespace_%(resource)s:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - sum(kube_node_status_allocatable{resource="%(resource)s", %(kubeStateMetricsSelector)s}) > 0 ) @@ -43,7 +43,7 @@ local utils = import '../lib/utils.libsonnet'; or # HA clusters. ( - sum(namespace_%(resource)s:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) + sum(namespace_%(resource)s:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - ( # Skip clusters with only one allocatable node. diff --git a/dashboards/resources/cluster.libsonnet b/dashboards/resources/cluster.libsonnet index e06522ad2..21b08b90d 100644 --- a/dashboards/resources/cluster.libsonnet +++ b/dashboards/resources/cluster.libsonnet @@ -88,7 +88,7 @@ local var = g.dashboard.variable; statPanel( 'CPU Requests Commitment', 'percentunit', - 'sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config + 'sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config ) + stat.gridPos.withW(4) + stat.gridPos.withH(3), @@ -96,7 +96,7 @@ local var = g.dashboard.variable; statPanel( 'CPU Limits Commitment', 'percentunit', - 'sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config + 'sum(namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s,resource="cpu",%(clusterLabel)s="$cluster"})' % $._config ) + stat.gridPos.withW(4) + stat.gridPos.withH(3), @@ -148,19 +148,19 @@ local var = g.dashboard.variable; + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.new('${datasource}', 'sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.new('${datasource}', 'sum(namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster"}) by (namespace) / sum(namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) diff --git a/dashboards/resources/multi-cluster.libsonnet b/dashboards/resources/multi-cluster.libsonnet index 9807327ba..f1091f2ac 100644 --- a/dashboards/resources/multi-cluster.libsonnet +++ b/dashboards/resources/multi-cluster.libsonnet @@ -68,13 +68,13 @@ local var = g.dashboard.variable; statPanel( 'CPU Requests Commitment', 'percentunit', - 'sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="cpu"})' % $._config + 'sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="cpu"} or kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="cpu"})' % $._config ), statPanel( 'CPU Limits Commitment', 'percentunit', - 'sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="cpu"})' % $._config + 'sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="cpu"} or kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="cpu"})' % $._config ), statPanel( @@ -86,13 +86,13 @@ local var = g.dashboard.variable; statPanel( 'Memory Requests Commitment', 'percentunit', - 'sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="memory"})' % $._config + 'sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="memory"} or kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="memory"})' % $._config ), statPanel( 'Memory Limits Commitment', 'percentunit', - 'sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="memory"})' % $._config + 'sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) / sum(kube_node_status_allocatable{%(kubeSchedulerSelector)s, resource="memory"} or kube_node_status_allocatable{%(kubeStateMetricsSelector)s, resource="memory"})' % $._config ), ], @@ -110,16 +110,16 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.new('${datasource}', 'sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m) by (%(clusterLabel)s) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.new('${datasource}', 'sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m) by (%(clusterLabel)s) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="cpu"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="cpu"}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) @@ -209,16 +209,16 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config) + prometheus.new('${datasource}', 'sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config) + prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config) + prometheus.new('${datasource}', 'sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config) + prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, container!=""}) by (%(clusterLabel)s) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, resource="memory"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, resource="memory"}) by (%(clusterLabel)s)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) diff --git a/dashboards/resources/namespace.libsonnet b/dashboards/resources/namespace.libsonnet index 4423f8968..c390848f4 100644 --- a/dashboards/resources/namespace.libsonnet +++ b/dashboards/resources/namespace.libsonnet @@ -91,7 +91,7 @@ local var = g.dashboard.variable; statPanel( 'CPU Utilisation (from requests)', 'percentunit', - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config ) + stat.gridPos.withW(6) + stat.gridPos.withH(3), @@ -99,7 +99,7 @@ local var = g.dashboard.variable; statPanel( 'CPU Utilisation (from limits)', 'percentunit', - 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config + 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"}) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})' % $._config ) + stat.gridPos.withW(6) + stat.gridPos.withH(3), @@ -107,7 +107,7 @@ local var = g.dashboard.variable; statPanel( 'Memory Utilisation (from requests)', 'percentunit', - 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config + 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config ) + stat.gridPos.withW(6) + stat.gridPos.withH(3), @@ -115,7 +115,7 @@ local var = g.dashboard.variable; statPanel( 'Memory Utilisation (from limits)', 'percentunit', - 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config + 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) / sum(kube_pod_resource_limit{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"} or kube_pod_container_resource_limits{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="memory"})' % $._config ) + stat.gridPos.withW(6) + stat.gridPos.withH(3), @@ -200,16 +200,16 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) @@ -363,16 +363,16 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!="", image!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace",container!=""}) by (pod)' % $._config) diff --git a/dashboards/resources/node.libsonnet b/dashboards/resources/node.libsonnet index e03fe29c4..73647a6b3 100644 --- a/dashboards/resources/node.libsonnet +++ b/dashboards/resources/node.libsonnet @@ -108,16 +108,16 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) @@ -235,16 +235,16 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_memory_rss{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config) diff --git a/dashboards/resources/pod.libsonnet b/dashboards/resources/pod.libsonnet index ae623fc62..9fa471767 100644 --- a/dashboards/resources/pod.libsonnet +++ b/dashboards/resources/pod.libsonnet @@ -80,7 +80,7 @@ local var = g.dashboard.variable; local cpuRequestsQuery = ||| sum( - kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"} + kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", resource="cpu"} ) ||| % $._config; @@ -217,10 +217,10 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config) + prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!=""}) by (container)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), ]) @@ -356,16 +356,16 @@ local var = g.dashboard.variable; prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="", image!=""}) by (container)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", image!=""}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", image!=""}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + prometheus.new('${datasource}', 'sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), - prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="", image!=""}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + prometheus.new('${datasource}', 'sum(container_memory_working_set_bytes{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container!="", image!=""}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + prometheus.withInstant(true) + prometheus.withFormat('table'), prometheus.new('${datasource}', 'sum(container_memory_rss{%(cadvisorSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod", container != "", container != "POD"}) by (container)' % $._config) diff --git a/dashboards/resources/workload-namespace.libsonnet b/dashboards/resources/workload-namespace.libsonnet index 2f781fb7c..412ff0ef8 100644 --- a/dashboards/resources/workload-namespace.libsonnet +++ b/dashboards/resources/workload-namespace.libsonnet @@ -99,7 +99,7 @@ local var = g.dashboard.variable; local cpuRequestsQuery = ||| sum( - kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} + (kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload_type=~"$type"} ) by (workload, workload_type) diff --git a/dashboards/resources/workload.libsonnet b/dashboards/resources/workload.libsonnet index 996851c8a..f669ab69a 100644 --- a/dashboards/resources/workload.libsonnet +++ b/dashboards/resources/workload.libsonnet @@ -111,7 +111,7 @@ local var = g.dashboard.variable; local cpuRequestsQuery = ||| sum( - kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} + (kube_pod_resource_request{%(kubeSchedulerSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"} or kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"}) * on(namespace,pod) group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type=~"$type"} ) by (pod) diff --git a/rules/apps.libsonnet b/rules/apps.libsonnet index d333702d6..e54852d68 100644 --- a/rules/apps.libsonnet +++ b/rules/apps.libsonnet @@ -116,6 +116,24 @@ ) ||| % $._config, }, + { + record: 'cluster:namespace:pod_memory:active:kube_pod_resource_request_or_kube_pod_container_resource_requests', + expr: ||| + (kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s}) + ||| % $._config, + }, + { + record: 'namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum', + expr: ||| + sum by (namespace, %(clusterLabel)s) ( + sum by (namespace, pod, %(clusterLabel)s) ( + max by (namespace, pod, container, %(clusterLabel)s) ( + kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} + ) + ) + ) + ||| % $._config, + }, ], }, { @@ -144,6 +162,24 @@ ) ||| % $._config, }, + { + record: 'cluster:namespace:pod_cpu:active:kube_pod_resource_request_or_kube_pod_container_resource_requests', + expr: ||| + (kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s}) + ||| % $._config, + }, + { + record: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum', + expr: ||| + sum by (namespace, %(clusterLabel)s) ( + sum by (namespace, pod, %(clusterLabel)s) ( + max by (namespace, pod, container, %(clusterLabel)s) ( + kube_pod_resource_request{resource="cpu",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} + ) + ) + ) + ||| % $._config, + }, ], }, { @@ -172,6 +208,24 @@ ) ||| % $._config, }, + { + record: 'cluster:namespace:pod_memory:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits', + expr: ||| + (kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s}) + ||| % $._config, + }, + { + record: 'namespace_memory:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum', + expr: ||| + sum by (namespace, %(clusterLabel)s) ( + sum by (namespace, pod, %(clusterLabel)s) ( + max by (namespace, pod, container, %(clusterLabel)s) ( + kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} + ) + ) + ) + ||| % $._config, + }, ], }, { @@ -200,6 +254,24 @@ ) ||| % $._config, }, + { + record: 'cluster:namespace:pod_cpu:active:kube_pod_resource_limit_or_kube_pod_container_resource_limits', + expr: ||| + (kube_pod_resource_limit{resource="cpu",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s}) + ||| % $._config, + }, + { + record: 'namespace_cpu:kube_pod_resource_limit_or_kube_pod_container_resource_limits:sum', + expr: ||| + sum by (namespace, %(clusterLabel)s) ( + sum by (namespace, pod, %(clusterLabel)s) ( + max by (namespace, pod, container, %(clusterLabel)s) ( + kube_pod_resource_limit{resource="cpu",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} + ) + ) + ) + ||| % $._config, + }, ], }, { diff --git a/rules/windows.libsonnet b/rules/windows.libsonnet index f4bc1215b..d7b75f70f 100644 --- a/rules/windows.libsonnet +++ b/rules/windows.libsonnet @@ -218,28 +218,28 @@ record: 'kube_pod_windows_container_resource_memory_request', expr: ||| max by (%(clusterLabel)s, namespace, pod, container) ( - kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} + kube_pod_resource_request{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="memory",%(kubeStateMetricsSelector)s} ) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_memory_limit', expr: ||| - kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s} * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) + (kube_pod_resource_limit{resource="memory",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="memory",%(kubeStateMetricsSelector)s}) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_cpu_cores_request', expr: ||| max by (%(clusterLabel)s, namespace, pod, container) ( - kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} + kube_pod_resource_request{resource="cpu",%(kubeSchedulerSelector)s} or kube_pod_container_resource_requests{resource="cpu",%(kubeStateMetricsSelector)s} ) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) ||| % $._config, }, { record: 'kube_pod_windows_container_resource_cpu_cores_limit', expr: ||| - kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s} * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) + (kube_pod_resource_limit{resource="cpu",%(kubeSchedulerSelector)s} or kube_pod_container_resource_limits{resource="cpu",%(kubeStateMetricsSelector)s}) * on(container,pod,namespace,%(clusterLabel)s) (windows_pod_container_available) ||| % $._config, }, { diff --git a/tests/tests.yaml b/tests/tests.yaml index 01f3f0048..5f9ae39c8 100644 --- a/tests/tests.yaml +++ b/tests/tests.yaml @@ -484,25 +484,15 @@ tests: values: '1+0x10' promql_expr_test: - eval_time: 0m - expr: namespace_cpu:kube_pod_container_resource_requests:sum + expr: namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum exp_samples: - - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - - eval_time: 0m - expr: namespace_memory:kube_pod_container_resource_requests:sum - exp_samples: - - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - - eval_time: 1m - expr: namespace_cpu:kube_pod_container_resource_requests:sum - exp_samples: - - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + - value: 0.3 + labels: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 1m - expr: namespace_memory:kube_pod_container_resource_requests:sum + expr: namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum exp_samples: - - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + - value: 0.3 + labels: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - interval: 1m input_series: @@ -529,25 +519,15 @@ tests: values: '1+0x10' promql_expr_test: - eval_time: 0m - expr: namespace_cpu:kube_pod_container_resource_requests:sum - exp_samples: - - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - - eval_time: 0m - expr: namespace_memory:kube_pod_container_resource_requests:sum - exp_samples: - - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - - eval_time: 1m - expr: namespace_cpu:kube_pod_container_resource_requests:sum + expr: namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum exp_samples: - - value: 0.15 - labels: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + - value: 0.3 + labels: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - eval_time: 1m - expr: namespace_memory:kube_pod_container_resource_requests:sum + expr: namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum exp_samples: - - value: 1.0e+9 - labels: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' + - value: 0.3 + labels: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes",namespace="kube-apiserver"}' - interval: 1m input_series: @@ -1427,9 +1407,9 @@ tests: - name: KubeCPUOvercommit alert (single-node) interval: 1m input_series: - - series: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}' + - series: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}' values: '1x10' - - series: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}' + - series: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}' values: '1x10' - series: 'kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="cpu", job="kube-state-metrics"}' values: '1.9x10' # This value was seen on a 2x vCPU node @@ -1451,9 +1431,9 @@ tests: - name: KubeCPUOvercommit alert (multi-node; non-HA) interval: 1m input_series: - - series: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}' + - series: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}' values: '2x10' - - series: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}' + - series: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}' values: '2x10' - series: 'kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="cpu", job="kube-state-metrics"}' values: '1.9x10' # This value was seen on a 2x vCPU node @@ -1479,9 +1459,9 @@ tests: - name: KubeCPUOvercommit alert (multi-node; HA) interval: 1m input_series: - - series: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}' + - series: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}' values: '2x10' - - series: 'namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}' + - series: 'namespace_cpu:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}' values: '2x10' - series: 'kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="cpu", job="kube-state-metrics"}' values: '1.9x10' # This value was seen on a 2x vCPU node @@ -1511,9 +1491,9 @@ tests: - name: KubeMemoryOvercommit alert (single-node) interval: 1m input_series: - - series: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}' + - series: 'namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}' values: '1000000000x10' # 1 GB - - series: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}' + - series: 'namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}' values: '1000000000x10' - series: 'kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="memory", job="kube-state-metrics"}' values: '1000000000x10' @@ -1535,9 +1515,9 @@ tests: - name: KubeMemoryOvercommit alert (multi-node; non-HA) interval: 1m input_series: - - series: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}' + - series: 'namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}' values: '2000000000x10' # 2 GB - - series: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}' + - series: 'namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}' values: '2000000000x10' - series: 'kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="memory", job="kube-state-metrics"}' values: '1000000000x10' @@ -1563,9 +1543,9 @@ tests: - name: KubeMemoryOvercommit alert (multi-node; HA) interval: 1m input_series: - - series: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}' + - series: 'namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}' values: '2000000000x10' # 2 GB - - series: 'namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}' + - series: 'namespace_memory:kube_pod_resource_request_or_kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}' values: '2000000000x10' - series: 'kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="memory", job="kube-state-metrics"}' values: '1000000000x10'