mirror of
https://github.com/prometheus-operator/kube-prometheus.git
synced 2025-10-25 14:11:02 +02:00
Sync with kubernetes-mixin
This commit is contained in:
parent
466eb7953f
commit
3a4e292aab
@ -48,8 +48,8 @@
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "8524aa43d49914b170b84816fc182319da04a167",
|
||||
"sum": "J06UiBvcfpRzLM5VbLRAhP39Zaz+EKguJ5sSTBDeygs="
|
||||
"version": "e0dc3563dcbf2e54e0ffe8e83f3f51b237ef33be",
|
||||
"sum": "egi2xHFco6VkCxettVvAju/yrsGnB3AFoPpCGKfWhtU="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
|
||||
@ -5602,7 +5602,7 @@ items:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"cpu\"}) / sum(kube_node_status_allocatable{resource=\"cpu\",cluster=\"$cluster\"})",
|
||||
"expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"cpu\",cluster=\"$cluster\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -5854,7 +5854,7 @@ items:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"memory\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})",
|
||||
"expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{resource=\"memory\",cluster=\"$cluster\"})",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -6258,7 +6258,7 @@ items:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", resource=\"cpu\"}) by (namespace)",
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -6267,7 +6267,7 @@ items:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"cpu\"}) by (namespace)",
|
||||
"expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -6276,7 +6276,7 @@ items:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"cpu\"}) by (namespace)",
|
||||
"expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -6676,7 +6676,7 @@ items:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(kube_pod_container_resource_requests{cluster=\"$cluster\", resource=\"memory\"}) by (namespace)",
|
||||
"expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -6685,7 +6685,7 @@ items:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_requests{cluster=\"$cluster\", resource=\"memory\"}) by (namespace)",
|
||||
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -6694,7 +6694,7 @@ items:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"memory\"}) by (namespace)",
|
||||
"expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
@ -6703,7 +6703,7 @@ items:
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(kube_pod_container_resource_limits{cluster=\"$cluster\", resource=\"memory\"}) by (namespace)",
|
||||
"expr": "sum(container_memory_rss{cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)",
|
||||
"format": "table",
|
||||
"instant": true,
|
||||
"intervalFactor": 2,
|
||||
|
||||
@ -19,6 +19,8 @@ spec:
|
||||
summary: Pod is crash looping.
|
||||
expr: |
|
||||
increase(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[10m]) > 0
|
||||
and
|
||||
sum without (phase) (kube_pod_status_phase{phase!="Running",job="kube-state-metrics"} == 1)
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -214,19 +216,19 @@ spec:
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubehpareplicasmismatch
|
||||
summary: HPA has not matched descired number of replicas.
|
||||
expr: |
|
||||
(kube_hpa_status_desired_replicas{job="kube-state-metrics"}
|
||||
(kube_horizontalpodautoscaler_status_desired_replicas{job="kube-state-metrics"}
|
||||
!=
|
||||
kube_hpa_status_current_replicas{job="kube-state-metrics"})
|
||||
kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics"})
|
||||
and
|
||||
(kube_hpa_status_current_replicas{job="kube-state-metrics"}
|
||||
(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics"}
|
||||
>
|
||||
kube_hpa_spec_min_replicas{job="kube-state-metrics"})
|
||||
kube_horizontalpodautoscaler_spec_min_replicas{job="kube-state-metrics"})
|
||||
and
|
||||
(kube_hpa_status_current_replicas{job="kube-state-metrics"}
|
||||
(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics"}
|
||||
<
|
||||
kube_hpa_spec_max_replicas{job="kube-state-metrics"})
|
||||
kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics"})
|
||||
and
|
||||
changes(kube_hpa_status_current_replicas{job="kube-state-metrics"}[15m]) == 0
|
||||
changes(kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics"}[15m]) == 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -236,9 +238,9 @@ spec:
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubehpamaxedout
|
||||
summary: HPA is running at max replicas
|
||||
expr: |
|
||||
kube_hpa_status_current_replicas{job="kube-state-metrics"}
|
||||
kube_horizontalpodautoscaler_status_current_replicas{job="kube-state-metrics"}
|
||||
==
|
||||
kube_hpa_spec_max_replicas{job="kube-state-metrics"}
|
||||
kube_horizontalpodautoscaler_spec_max_replicas{job="kube-state-metrics"}
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -1250,6 +1252,28 @@ spec:
|
||||
)
|
||||
)
|
||||
record: namespace_cpu:kube_pod_container_resource_requests:sum
|
||||
- expr: |
|
||||
sum by (namespace, cluster) (
|
||||
sum by (namespace, pod, cluster) (
|
||||
max by (namespace, pod, container, cluster) (
|
||||
kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"}
|
||||
) * on(namespace, pod, cluster) group_left() max by (namespace, pod) (
|
||||
kube_pod_status_phase{phase=~"Pending|Running"} == 1
|
||||
)
|
||||
)
|
||||
)
|
||||
record: namespace_memory:kube_pod_container_resource_limits:sum
|
||||
- expr: |
|
||||
sum by (namespace, cluster) (
|
||||
sum by (namespace, pod, cluster) (
|
||||
max by (namespace, pod, container, cluster) (
|
||||
kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"}
|
||||
) * on(namespace, pod, cluster) group_left() max by (namespace, pod) (
|
||||
kube_pod_status_phase{phase=~"Pending|Running"} == 1
|
||||
)
|
||||
)
|
||||
)
|
||||
record: namespace_cpu:kube_pod_container_resource_limits:sum
|
||||
- expr: |
|
||||
max by (cluster, namespace, workload, pod) (
|
||||
label_replace(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user