Merge pull request #1113 from paulfantom/unpin

Unpin jsonnet dependencies
This commit is contained in:
Paweł Krupa 2021-05-26 11:18:46 +02:00 committed by GitHub
commit a89da4adb6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 1874 additions and 1106 deletions

View File

@ -53,8 +53,10 @@ function(params) {
(import 'github.com/kubernetes-monitoring/kubernetes-mixin/alerts/add-runbook-links.libsonnet') + ( (import 'github.com/kubernetes-monitoring/kubernetes-mixin/alerts/add-runbook-links.libsonnet') + (
if p._config.thanos != {} then if p._config.thanos != {} then
(import 'github.com/thanos-io/thanos/mixin/alerts/sidecar.libsonnet') + { (import 'github.com/thanos-io/thanos/mixin/alerts/sidecar.libsonnet') + {
targetGroups: {},
sidecar: { sidecar: {
selector: p._config.mixin._config.thanosSelector, selector: p._config.mixin._config.thanosSelector,
dimensions: std.join(', ', ['job', 'instance']),
}, },
} }
else {} else {}

View File

@ -8,7 +8,7 @@
"subdir": "grafana" "subdir": "grafana"
} }
}, },
"version": "8ea4e7bc04b1bf5e9bd99918ca28c6271b42be0e" "version": "master"
}, },
{ {
"source": { "source": {
@ -17,7 +17,7 @@
"subdir": "contrib/mixin" "subdir": "contrib/mixin"
} }
}, },
"version": "562d645ac923388ff5b8d270b0536764d34b0e0f" "version": "main"
}, },
{ {
"source": { "source": {
@ -26,7 +26,7 @@
"subdir": "jsonnet/prometheus-operator" "subdir": "jsonnet/prometheus-operator"
} }
}, },
"version": "release-0.47" "version": "master"
}, },
{ {
"source": { "source": {
@ -35,7 +35,7 @@
"subdir": "jsonnet/mixin" "subdir": "jsonnet/mixin"
} }
}, },
"version": "release-0.47", "version": "master",
"name": "prometheus-operator-mixin" "name": "prometheus-operator-mixin"
}, },
{ {
@ -45,7 +45,7 @@
"subdir": "" "subdir": ""
} }
}, },
"version": "release-0.8" "version": "master"
}, },
{ {
"source": { "source": {
@ -54,7 +54,7 @@
"subdir": "jsonnet/kube-state-metrics" "subdir": "jsonnet/kube-state-metrics"
} }
}, },
"version": "release-2.0" "version": "master"
}, },
{ {
"source": { "source": {
@ -63,7 +63,7 @@
"subdir": "jsonnet/kube-state-metrics-mixin" "subdir": "jsonnet/kube-state-metrics-mixin"
} }
}, },
"version": "release-2.0" "version": "master"
}, },
{ {
"source": { "source": {
@ -72,7 +72,7 @@
"subdir": "docs/node-mixin" "subdir": "docs/node-mixin"
} }
}, },
"version": "release-1.1" "version": "master"
}, },
{ {
"source": { "source": {
@ -81,7 +81,7 @@
"subdir": "documentation/prometheus-mixin" "subdir": "documentation/prometheus-mixin"
} }
}, },
"version": "release-2.26", "version": "main",
"name": "prometheus" "name": "prometheus"
}, },
{ {
@ -91,7 +91,7 @@
"subdir": "doc/alertmanager-mixin" "subdir": "doc/alertmanager-mixin"
} }
}, },
"version": "99f64e944b1043c790784cf5373c8fb349816fc4", "version": "master",
"name": "alertmanager" "name": "alertmanager"
}, },
{ {
@ -101,7 +101,7 @@
"subdir": "mixin" "subdir": "mixin"
} }
}, },
"version": "release-0.19", "version": "main",
"name": "thanos-mixin" "name": "thanos-mixin"
} }
], ],

View File

@ -18,7 +18,7 @@
"subdir": "contrib/mixin" "subdir": "contrib/mixin"
} }
}, },
"version": "57a092b45d0eae6c9e600e62513ffcd2f1f25a92", "version": "7a7b668edb7ea4087fab00c3b3392202584b5fcf",
"sum": "W/Azptf1PoqjyMwJON96UY69MFugDA4IAYiKURscryc=" "sum": "W/Azptf1PoqjyMwJON96UY69MFugDA4IAYiKURscryc="
}, },
{ {
@ -28,8 +28,8 @@
"subdir": "grafonnet" "subdir": "grafonnet"
} }
}, },
"version": "daad85cf3fad3580e58029414630e29956aefe21", "version": "3082bfca110166cd69533fa3c0875fdb1b68c329",
"sum": "zkOBVXtNSGlOdbm5TRCbEik7c/Jk+btbJqaE9qW8j3Y=" "sum": "4/sUV0Kk+o8I+wlYxL9R6EPhL/NiLfYHk+NXlU64RUk="
}, },
{ {
"source": { "source": {
@ -38,20 +38,9 @@
"subdir": "grafana-builder" "subdir": "grafana-builder"
} }
}, },
"version": "89299b1c5e93952622801795353d496fb337f44e", "version": "ab47cabd71f0318fbda8efbbce0b4af074970a4e",
"sum": "GRf2GvwEU4jhXV+JOonXSZ4wdDv8mnHBPCQ6TUVd+g8=" "sum": "GRf2GvwEU4jhXV+JOonXSZ4wdDv8mnHBPCQ6TUVd+g8="
}, },
{
"source": {
"git": {
"remote": "https://github.com/ksonnet/ksonnet-lib.git",
"subdir": ""
}
},
"version": "0d2f82676817bbf9e4acf6495b2090205f323b9f",
"sum": "h28BXZ7+vczxYJ2sCt8JuR9+yznRtU/iA6DCpQUrtEg=",
"name": "ksonnet"
},
{ {
"source": { "source": {
"git": { "git": {
@ -59,8 +48,8 @@
"subdir": "" "subdir": ""
} }
}, },
"version": "ec3e85f45b5691d54a02ab38ed654c3c9f736fe5", "version": "faf65d6fe38b2fd279b60e2f0581b2d0cb878d01",
"sum": "6KgRTpd101espi7a7CDpkqN0yaIPmENxxlAXqGcCWhk=" "sum": "qDZhql8zB/4DNjniC5N8s26YcnYj6oZ6PBWTCUjB2t4="
}, },
{ {
"source": { "source": {
@ -69,7 +58,7 @@
"subdir": "lib/promgrafonnet" "subdir": "lib/promgrafonnet"
} }
}, },
"version": "faa0561a823cbd3b726aaefffcf6ee317547041a", "version": "faf65d6fe38b2fd279b60e2f0581b2d0cb878d01",
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps=" "sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
}, },
{ {
@ -79,7 +68,7 @@
"subdir": "jsonnet/kube-state-metrics" "subdir": "jsonnet/kube-state-metrics"
} }
}, },
"version": "93255df07113f87dcdec0726b4f4db4e6344df26", "version": "c36d9c6adca9b61b33398f6bcf06122564da2f95",
"sum": "S5qI+PJUdNeYOv76jH5nxwYS9N6U7CRxvyuB1wI4cTE=" "sum": "S5qI+PJUdNeYOv76jH5nxwYS9N6U7CRxvyuB1wI4cTE="
}, },
{ {
@ -89,8 +78,8 @@
"subdir": "jsonnet/kube-state-metrics-mixin" "subdir": "jsonnet/kube-state-metrics-mixin"
} }
}, },
"version": "93255df07113f87dcdec0726b4f4db4e6344df26", "version": "c36d9c6adca9b61b33398f6bcf06122564da2f95",
"sum": "Yf8mNAHrV1YWzrdV8Ry5dJ8YblepTGw3C0Zp10XIYLo=" "sum": "u8gaydJoxEjzizQ8jY8xSjYgWooPmxw+wIWdDxifMAk="
}, },
{ {
"source": { "source": {
@ -99,7 +88,7 @@
"subdir": "jsonnet/mixin" "subdir": "jsonnet/mixin"
} }
}, },
"version": "a4f5928b074e75addb76a27c5ebfe78314fcd6d1", "version": "e5797e3d1c7fe50b99d9b8e8006ad0416820b5c9",
"sum": "6reUygVmQrLEWQzTKcH8ceDbvM+2ztK3z2VBR2K2l+U=", "sum": "6reUygVmQrLEWQzTKcH8ceDbvM+2ztK3z2VBR2K2l+U=",
"name": "prometheus-operator-mixin" "name": "prometheus-operator-mixin"
}, },
@ -110,8 +99,8 @@
"subdir": "jsonnet/prometheus-operator" "subdir": "jsonnet/prometheus-operator"
} }
}, },
"version": "64d466d7730165c0d260f187e2e9742bc0295bf2", "version": "e5797e3d1c7fe50b99d9b8e8006ad0416820b5c9",
"sum": "MRwyChXdKG3anL2OWpbUu3qWc97w9J6YsjUWjLFQyB0=" "sum": "1bEFcCwj8VP9lh1GG76M+uHc0975rolxA4YezwXaiwk="
}, },
{ {
"source": { "source": {
@ -120,7 +109,7 @@
"subdir": "doc/alertmanager-mixin" "subdir": "doc/alertmanager-mixin"
} }
}, },
"version": "99f64e944b1043c790784cf5373c8fb349816fc4", "version": "7301451eb94d2081fb740abf29755861dd122c65",
"sum": "V8jcZQ1Qrlm7AQ6wjbuQQsacPb0NvrcZovKyplmzW5w=", "sum": "V8jcZQ1Qrlm7AQ6wjbuQQsacPb0NvrcZovKyplmzW5w=",
"name": "alertmanager" "name": "alertmanager"
}, },
@ -131,8 +120,8 @@
"subdir": "docs/node-mixin" "subdir": "docs/node-mixin"
} }
}, },
"version": "46cdf618c9419ea62dd1a87331def97a0f18c837", "version": "220aa5b8893c599c624bbdba2f59ac1ed473a4ff",
"sum": "sxI7cBEy34JSbB0gHy9xC/ErtFsRzl9eJPsWVqd+XSY=" "sum": "os3VfjBdFdDaTYzI+A/RahIhQcgQ7KoaLL68s1kiCbA="
}, },
{ {
"source": { "source": {
@ -141,8 +130,8 @@
"subdir": "documentation/prometheus-mixin" "subdir": "documentation/prometheus-mixin"
} }
}, },
"version": "3cafc58827d1ebd1a67749f88be4218f0bab3d8d", "version": "ef584a9df6b4d5b360aa99ddee605b9b92f5ad45",
"sum": "VK0c3sQ3ksiM6JQsAVfWmL5NbzGv9llMfXFNXfFdJ+A=", "sum": "Va7tcAcpgjCQjFoChBUlseoPoqQoo4mGoT70Kebep9Q=",
"name": "prometheus" "name": "prometheus"
}, },
{ {
@ -152,8 +141,8 @@
"subdir": "mixin" "subdir": "mixin"
} }
}, },
"version": "ba6c5c4726ff52807c7383c68f2159b1af7980bb", "version": "4e74c4ba0b6c2b4c34b5ca9cabd14e1a70232902",
"sum": "XP3uq7xcfKHsnWsz1v992csZhhZR3jQma6hFOfSViTs=", "sum": "saJz+8pxFRq4oS8H5fckUOBETHYcOdoFzFtgueoRQcU=",
"name": "thanos-mixin" "name": "thanos-mixin"
}, },
{ {

File diff suppressed because it is too large Load Diff

View File

@ -116,9 +116,6 @@ spec:
- mountPath: /grafana-dashboard-definitions/0/scheduler - mountPath: /grafana-dashboard-definitions/0/scheduler
name: grafana-dashboard-scheduler name: grafana-dashboard-scheduler
readOnly: false readOnly: false
- mountPath: /grafana-dashboard-definitions/0/statefulset
name: grafana-dashboard-statefulset
readOnly: false
- mountPath: /grafana-dashboard-definitions/0/workload-total - mountPath: /grafana-dashboard-definitions/0/workload-total
name: grafana-dashboard-workload-total name: grafana-dashboard-workload-total
readOnly: false readOnly: false
@ -201,9 +198,6 @@ spec:
- configMap: - configMap:
name: grafana-dashboard-scheduler name: grafana-dashboard-scheduler
name: grafana-dashboard-scheduler name: grafana-dashboard-scheduler
- configMap:
name: grafana-dashboard-statefulset
name: grafana-dashboard-statefulset
- configMap: - configMap:
name: grafana-dashboard-workload-total name: grafana-dashboard-workload-total
name: grafana-dashboard-workload-total name: grafana-dashboard-workload-total

View File

@ -40,3 +40,26 @@ spec:
for: 15m for: 15m
labels: labels:
severity: critical severity: critical
- alert: KubeStateMetricsShardingMismatch
annotations:
description: kube-state-metrics pods are running with different --total-shards configuration, some Kubernetes objects may be exposed multiple times or not exposed at all.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatemetricsshardingmismatch
summary: kube-state-metrics sharding is misconfigured.
expr: |
stdvar (kube_state_metrics_total_shards{job="kube-state-metrics"}) != 0
for: 15m
labels:
severity: critical
- alert: KubeStateMetricsShardsMissing
annotations:
description: kube-state-metrics shards are missing, some Kubernetes objects are not being exposed.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubestatemetricsshardsmissing
summary: kube-state-metrics shards are missing.
expr: |
2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) - 1
-
sum( 2 ^ max by (shard_ordinal) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) )
!= 0
for: 15m
labels:
severity: critical

View File

@ -699,26 +699,26 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1d]))
- -
( (
( (
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1d])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1d]))
or or
vector(0) vector(0)
) )
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1d]))
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1d]))
) )
) )
+ +
# errors # errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1d]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1d]))
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate1d record: apiserver_request:burnrate1d
@ -726,26 +726,26 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[1h]))
- -
( (
( (
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[1h]))
or or
vector(0) vector(0)
) )
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[1h]))
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[1h]))
) )
) )
+ +
# errors # errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[1h]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[1h]))
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate1h record: apiserver_request:burnrate1h
@ -753,26 +753,26 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[2h]))
- -
( (
( (
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[2h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[2h]))
or or
vector(0) vector(0)
) )
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[2h]))
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[2h]))
) )
) )
+ +
# errors # errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[2h]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[2h]))
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate2h record: apiserver_request:burnrate2h
@ -780,26 +780,26 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30m]))
- -
( (
( (
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30m])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30m]))
or or
vector(0) vector(0)
) )
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30m]))
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30m]))
) )
) )
+ +
# errors # errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[30m]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[30m]))
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate30m record: apiserver_request:burnrate30m
@ -807,26 +807,26 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[3d]))
- -
( (
( (
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[3d])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[3d]))
or or
vector(0) vector(0)
) )
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[3d]))
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[3d]))
) )
) )
+ +
# errors # errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[3d]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[3d]))
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate3d record: apiserver_request:burnrate3d
@ -834,26 +834,26 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[5m]))
- -
( (
( (
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[5m])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[5m]))
or or
vector(0) vector(0)
) )
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[5m]))
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[5m]))
) )
) )
+ +
# errors # errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[5m]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate5m record: apiserver_request:burnrate5m
@ -861,26 +861,26 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[6h]))
- -
( (
( (
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[6h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[6h]))
or or
vector(0) vector(0)
) )
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[6h]))
+ +
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[6h]))
) )
) )
+ +
# errors # errors
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET",code=~"5.."}[6h]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[6h]))
labels: labels:
verb: read verb: read
record: apiserver_request:burnrate6h record: apiserver_request:burnrate6h
@ -888,15 +888,15 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
- -
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1d])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1d]))
) )
+ +
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1d]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1d]))
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate1d record: apiserver_request:burnrate1d
@ -904,15 +904,15 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
- -
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[1h]))
) )
+ +
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[1h]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[1h]))
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate1h record: apiserver_request:burnrate1h
@ -920,15 +920,15 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
- -
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[2h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[2h]))
) )
+ +
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[2h]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[2h]))
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate2h record: apiserver_request:burnrate2h
@ -936,15 +936,15 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
- -
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[30m])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[30m]))
) )
+ +
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[30m]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[30m]))
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate30m record: apiserver_request:burnrate30m
@ -952,15 +952,15 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
- -
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[3d])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[3d]))
) )
+ +
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[3d]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[3d]))
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate3d record: apiserver_request:burnrate3d
@ -968,15 +968,15 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
- -
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[5m])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[5m]))
) )
+ +
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[5m]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate5m record: apiserver_request:burnrate5m
@ -984,36 +984,36 @@ spec:
( (
( (
# too slow # too slow
sum(rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h])) sum by (cluster) (rate(apiserver_request_duration_seconds_count{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
- -
sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[6h])) sum by (cluster) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",le="1"}[6h]))
) )
+ +
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE",code=~"5.."}[6h]))
) )
/ /
sum(rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h])) sum by (cluster) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[6h]))
labels: labels:
verb: write verb: write
record: apiserver_request:burnrate6h record: apiserver_request:burnrate6h
- expr: | - expr: |
sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m])) sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"LIST|GET"}[5m]))
labels: labels:
verb: read verb: read
record: code_resource:apiserver_request_total:rate5m record: code_resource:apiserver_request_total:rate5m
- expr: | - expr: |
sum by (code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m])) sum by (cluster,code,resource) (rate(apiserver_request_total{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))
labels: labels:
verb: write verb: write
record: code_resource:apiserver_request_total:rate5m record: code_resource:apiserver_request_total:rate5m
- expr: | - expr: |
histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0 histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET"}[5m]))) > 0
labels: labels:
quantile: "0.99" quantile: "0.99"
verb: read verb: read
record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile record: cluster_quantile:apiserver_request_duration_seconds:histogram_quantile
- expr: | - expr: |
histogram_quantile(0.99, sum by (le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0 histogram_quantile(0.99, sum by (cluster, le, resource) (rate(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"POST|PUT|PATCH|DELETE"}[5m]))) > 0
labels: labels:
quantile: "0.99" quantile: "0.99"
verb: write verb: write
@ -1040,56 +1040,56 @@ spec:
1 - ( 1 - (
( (
# write too slow # write too slow
sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
- -
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
) + ) +
( (
# read too slow # read too slow
sum(increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~"LIST|GET"}[30d]))
- -
( (
( (
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d]))
or or
vector(0) vector(0)
) )
+ +
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="namespace",le="0.5"}[30d]))
+ +
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="5"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
) )
) + ) +
# errors # errors
sum(code:apiserver_request_total:increase30d{code=~"5.."} or vector(0)) sum by (cluster) (code:apiserver_request_total:increase30d{code=~"5.."} or vector(0))
) )
/ /
sum(code:apiserver_request_total:increase30d) sum by (cluster) (code:apiserver_request_total:increase30d)
labels: labels:
verb: all verb: all
record: apiserver_request:availability30d record: apiserver_request:availability30d
- expr: | - expr: |
1 - ( 1 - (
sum(increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_count{job="apiserver",verb=~"LIST|GET"}[30d]))
- -
( (
# too slow # too slow
( (
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope=~"resource|",le="0.1"}[30d]))
or or
vector(0) vector(0)
) )
+ +
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="namespace",le="0.5"}[30d]))
+ +
sum(increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{job="apiserver",verb=~"LIST|GET",scope="cluster",le="5"}[30d]))
) )
+ +
# errors # errors
sum(code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0)) sum by (cluster) (code:apiserver_request_total:increase30d{verb="read",code=~"5.."} or vector(0))
) )
/ /
sum(code:apiserver_request_total:increase30d{verb="read"}) sum by (cluster) (code:apiserver_request_total:increase30d{verb="read"})
labels: labels:
verb: read verb: read
record: apiserver_request:availability30d record: apiserver_request:availability30d
@ -1097,16 +1097,16 @@ spec:
1 - ( 1 - (
( (
# too slow # too slow
sum(increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_count{verb=~"POST|PUT|PATCH|DELETE"}[30d]))
- -
sum(increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d])) sum by (cluster) (increase(apiserver_request_duration_seconds_bucket{verb=~"POST|PUT|PATCH|DELETE",le="1"}[30d]))
) )
+ +
# errors # errors
sum(code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0)) sum by (cluster) (code:apiserver_request_total:increase30d{verb="write",code=~"5.."} or vector(0))
) )
/ /
sum(code:apiserver_request_total:increase30d{verb="write"}) sum by (cluster) (code:apiserver_request_total:increase30d{verb="write"})
labels: labels:
verb: write verb: write
record: apiserver_request:availability30d record: apiserver_request:availability30d
@ -1114,84 +1114,84 @@ spec:
avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30 avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30
record: code_verb:apiserver_request_total:increase30d record: code_verb:apiserver_request_total:increase30d
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[1h])) sum by (cluster, code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[1h]))
record: code_verb:apiserver_request_total:increase1h record: code_verb:apiserver_request_total:increase1h
- expr: | - expr: |
sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"}) sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
labels: labels:
verb: read verb: read
record: code:apiserver_request_total:increase30d record: code:apiserver_request_total:increase30d
- expr: | - expr: |
sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"}) sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
labels: labels:
verb: write verb: write
record: code:apiserver_request_total:increase30d record: code:apiserver_request_total:increase30d

View File

@ -211,6 +211,16 @@ spec:
for: 15m for: 15m
labels: labels:
severity: warning severity: warning
- alert: PrometheusLabelLimitHit
annotations:
description: Prometheus {{$labels.namespace}}/{{$labels.pod}} has dropped {{ printf "%.0f" $value }} targets because some samples exceeded the configured label_limit, label_name_length_limit or label_value_length_limit.
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/prometheuslabellimithit
summary: Prometheus has dropped targets because some scrape configs have exceeded the labels limit.
expr: |
increase(prometheus_target_scrape_pool_exceeded_label_limits_total{job="prometheus-k8s",namespace="monitoring"}[5m]) > 0
for: 15m
labels:
severity: warning
- alert: PrometheusErrorSendingAlertsToAnyAlertmanager - alert: PrometheusErrorSendingAlertsToAnyAlertmanager
annotations: annotations:
description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.' description: '{{ printf "%.1f" $value }}% minimum errors while sending alerts from Prometheus {{$labels.namespace}}/{{$labels.pod}} to any Alertmanager.'

View File

@ -17,6 +17,8 @@ spec:
app.kubernetes.io/part-of: kube-prometheus app.kubernetes.io/part-of: kube-prometheus
template: template:
metadata: metadata:
annotations:
kubectl.kubernetes.io/default-container: prometheus-operator
labels: labels:
app.kubernetes.io/component: controller app.kubernetes.io/component: controller
app.kubernetes.io/name: prometheus-operator app.kubernetes.io/name: prometheus-operator