mirror of
https://github.com/prometheus-operator/kube-prometheus.git
synced 2025-11-01 16:41:02 +01:00
Merge pull request #882 from paulfantom/deps
This commit is contained in:
commit
a89a364ece
@ -80,7 +80,7 @@
|
||||
"subdir": "documentation/prometheus-mixin"
|
||||
}
|
||||
},
|
||||
"version": "release-2.23",
|
||||
"version": "release-2.24",
|
||||
"name": "prometheus"
|
||||
},
|
||||
{
|
||||
|
||||
@ -8,8 +8,8 @@
|
||||
"subdir": "grafana"
|
||||
}
|
||||
},
|
||||
"version": "7176a6d54b3b19e0529ce574ab5ed427f1c721e9",
|
||||
"sum": "IrxVMYJrTbDliaVMXX72jUKm8Ju2Za8cAbds7d26wuY="
|
||||
"version": "4204279da8d3d6317116ee161ac706fadbba9193",
|
||||
"sum": "VUavLhri7lTnJ2V7F9lDlL+K96NwIhqqlxMtasYBs3Q="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -18,7 +18,7 @@
|
||||
"subdir": "Documentation/etcd-mixin"
|
||||
}
|
||||
},
|
||||
"version": "ca866c02422ff3f3d1f0876898a30c33dd7bcccf",
|
||||
"version": "5dcd459ae9c7948f5620002f5b0bb9cf0b8f1502",
|
||||
"sum": "bLqTqEr0jky9zz5MV/7ucn6H5mph2NlXas0TVnGNB1Y="
|
||||
},
|
||||
{
|
||||
@ -28,8 +28,8 @@
|
||||
"subdir": "grafonnet"
|
||||
}
|
||||
},
|
||||
"version": "356bd73e4792ffe107725776ca8946895969c191",
|
||||
"sum": "CSMZ3dJrpJpwvffie8BqcfrIVVwiKNqdPEN+1XWRBGU="
|
||||
"version": "b0d72d6ed0e9fcab83fc2dd954b3bd57113e768c",
|
||||
"sum": "g2UC37YmOShdIFThAO99Uw89UO+H3sHt+y0ionv9/sA="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -38,8 +38,8 @@
|
||||
"subdir": "grafana-builder"
|
||||
}
|
||||
},
|
||||
"version": "9c3fb8096e1f80e2f3a84566566906ff187f5a8c",
|
||||
"sum": "9/eJqljTTtJeq9QRjabdKWL6yD8a7VzLmGKBK3ir77k="
|
||||
"version": "2cef89cb717c8b596443ac5de0415d1ffdb42252",
|
||||
"sum": "EmHrmBY8PbnV0BKXmVWvAEmax6eglRinKSyZbTmVWuc="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -59,8 +59,8 @@
|
||||
"subdir": ""
|
||||
}
|
||||
},
|
||||
"version": "ead45674dba3c8712e422d99223453177aac6bf4",
|
||||
"sum": "3i0NkntlBluDS1NRF+iSc2e727Alkv3ziuVjAP12/kE="
|
||||
"version": "4a8e078147dbca51067521e6ac59c7b54d44d3bd",
|
||||
"sum": "D5XwKXhd3c0e+1D5iRgUhStB0qpcT4dSCmytuGQa3+k="
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
@ -69,7 +69,7 @@
|
||||
"subdir": "lib/promgrafonnet"
|
||||
}
|
||||
},
|
||||
"version": "ead45674dba3c8712e422d99223453177aac6bf4",
|
||||
"version": "4a8e078147dbca51067521e6ac59c7b54d44d3bd",
|
||||
"sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
|
||||
},
|
||||
{
|
||||
@ -89,7 +89,7 @@
|
||||
"subdir": "jsonnet/kube-state-metrics-mixin"
|
||||
}
|
||||
},
|
||||
"version": "7bdd62593c9273b5179cf3c9d2d819e9d997aaa4",
|
||||
"version": "72d6d3106861f992b7d6ecc0a88abe9b12ad5427",
|
||||
"sum": "Yf8mNAHrV1YWzrdV8Ry5dJ8YblepTGw3C0Zp10XIYLo="
|
||||
},
|
||||
{
|
||||
@ -99,7 +99,7 @@
|
||||
"subdir": "jsonnet/mixin"
|
||||
}
|
||||
},
|
||||
"version": "5555f492df250168657b72bb8cb60bec071de71f",
|
||||
"version": "788d4456425eaf8c1d613582995bdf7de02154b0",
|
||||
"sum": "6reUygVmQrLEWQzTKcH8ceDbvM+2ztK3z2VBR2K2l+U="
|
||||
},
|
||||
{
|
||||
@ -119,8 +119,8 @@
|
||||
"subdir": "doc/alertmanager-mixin"
|
||||
}
|
||||
},
|
||||
"version": "193ebba04d1e70d971047e983a0b489112610460",
|
||||
"sum": "QcftU7gjCQyj7B6M4YJeCAeaPd0kwxd4J4rolo7AnLE=",
|
||||
"version": "3f46b62d75da4d68d2098388797e6a61fcc5e043",
|
||||
"sum": "VP1vn/WTGLZaBgGhGMUO81qNTc/fnp5KtzVjcaxad6Q=",
|
||||
"name": "alertmanager"
|
||||
},
|
||||
{
|
||||
@ -130,7 +130,7 @@
|
||||
"subdir": "docs/node-mixin"
|
||||
}
|
||||
},
|
||||
"version": "8b466360a35581e0301bd22918be7011cf4203c3",
|
||||
"version": "cfdd9dd0c983057df5e814e067fadbf8c7781559",
|
||||
"sum": "rvyiD/yCB4BeYAWqYF53bP8c+aCUt2ipLHW2Ea8ELO8="
|
||||
},
|
||||
{
|
||||
@ -140,8 +140,8 @@
|
||||
"subdir": "documentation/prometheus-mixin"
|
||||
}
|
||||
},
|
||||
"version": "26d89b4b0776fe4cd5a3656dfa520f119a375273",
|
||||
"sum": "1VRVMuxAEZ9vdGHFlndmG9iQzDD6AoIXrX80CDpGDaU=",
|
||||
"version": "e4487274853c587717006eeda8804e597d120340",
|
||||
"sum": "6kUzElCBWZ5U/3cxEpHNMmoKKPubG45QxpmLu8PGg08=",
|
||||
"name": "prometheus"
|
||||
},
|
||||
{
|
||||
|
||||
@ -55,17 +55,31 @@ spec:
|
||||
- alert: AlertmanagerClusterFailedToSendAlerts
|
||||
annotations:
|
||||
description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}.
|
||||
summary: All Alertmanager instances in a cluster failed to send notifications.
|
||||
summary: All Alertmanager instances in a cluster failed to send notifications to a critical integration.
|
||||
expr: |
|
||||
min by (namespace,service) (
|
||||
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring"}[5m])
|
||||
min by (namespace,service, integration) (
|
||||
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m])
|
||||
/
|
||||
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring"}[5m])
|
||||
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring", integration=~`.*`}[5m])
|
||||
)
|
||||
> 0.01
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: AlertmanagerClusterFailedToSendAlerts
|
||||
annotations:
|
||||
description: The minimum notification failure rate to {{ $labels.integration }} sent from any instance in the {{$labels.job}} cluster is {{ $value | humanizePercentage }}.
|
||||
summary: All Alertmanager instances in a cluster failed to send notifications to a non-critical integration.
|
||||
expr: |
|
||||
min by (namespace,service, integration) (
|
||||
rate(alertmanager_notifications_failed_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m])
|
||||
/
|
||||
rate(alertmanager_notifications_total{job="alertmanager-main",namespace="monitoring", integration!~`.*`}[5m])
|
||||
)
|
||||
> 0.01
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: AlertmanagerConfigInconsistent
|
||||
annotations:
|
||||
description: Alertmanager instances within the {{$labels.job}} cluster have different configurations.
|
||||
|
||||
@ -3,6 +3,11 @@ data:
|
||||
datasources.yaml: ewogICAgImFwaVZlcnNpb24iOiAxLAogICAgImRhdGFzb3VyY2VzIjogWwogICAgICAgIHsKICAgICAgICAgICAgImFjY2VzcyI6ICJwcm94eSIsCiAgICAgICAgICAgICJlZGl0YWJsZSI6IGZhbHNlLAogICAgICAgICAgICAibmFtZSI6ICJwcm9tZXRoZXVzIiwKICAgICAgICAgICAgIm9yZ0lkIjogMSwKICAgICAgICAgICAgInR5cGUiOiAicHJvbWV0aGV1cyIsCiAgICAgICAgICAgICJ1cmwiOiAiaHR0cDovL3Byb21ldGhldXMtazhzLm1vbml0b3Jpbmcuc3ZjOjkwOTAiLAogICAgICAgICAgICAidmVyc2lvbiI6IDEKICAgICAgICB9CiAgICBdCn0=
|
||||
kind: Secret
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-datasources
|
||||
namespace: monitoring
|
||||
type: Opaque
|
||||
|
||||
@ -1729,6 +1729,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-apiserver
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -3595,6 +3600,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-cluster-total
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -4730,6 +4740,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-controller-manager
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -7296,6 +7311,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-k8s-resources-cluster
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -9566,6 +9586,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-k8s-resources-namespace
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -10528,6 +10553,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-k8s-resources-node
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -12284,6 +12314,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-k8s-resources-pod
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -14302,6 +14337,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-k8s-resources-workload
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -16481,6 +16521,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-k8s-resources-workloads-namespace
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -18998,6 +19043,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-kubelet
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -20446,6 +20496,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-namespace-by-pod
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -22166,6 +22221,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-namespace-by-workload
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -23114,6 +23174,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-node-cluster-rsrc-use
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -24089,6 +24154,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-node-rsrc-use
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -25070,6 +25140,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-nodes
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -25262,7 +25337,7 @@ items:
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n",
|
||||
"expr": "max without(instance,node) (\n(\n kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n -\n kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n)\n/\nkubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
@ -25459,7 +25534,7 @@ items:
|
||||
"tableColumn": "",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100\n",
|
||||
"expr": "max without(instance,node) (\nkubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n/\nkubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", metrics_path=\"/metrics\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"}\n* 100)\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "",
|
||||
@ -25631,6 +25706,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-persistentvolumesusage
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -26843,6 +26923,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-pod-total
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -26868,7 +26953,7 @@ items:
|
||||
"links": [
|
||||
|
||||
],
|
||||
"refresh": "",
|
||||
"refresh": "60s",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
@ -27119,7 +27204,7 @@ items:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(remote_name, url) group_right(instance) rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n",
|
||||
"expr": "rate(\n prometheus_remote_storage_samples_in_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])\n- \n ignoring(remote_name, url) group_right(instance) (rate(prometheus_remote_storage_succeeded_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n- \n (rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]))\n",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
@ -27704,7 +27789,7 @@ items:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"expr": "prometheus_remote_storage_pending_samples{cluster=~\"$cluster\", instance=~\"$instance\"} or prometheus_remote_storage_samples_pending{cluster=~\"$cluster\", instance=~\"$instance\"}",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
@ -28009,7 +28094,7 @@ items:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
|
||||
"expr": "rate(prometheus_remote_storage_dropped_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
@ -28102,7 +28187,7 @@ items:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
|
||||
"expr": "rate(prometheus_remote_storage_failed_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_failed_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
@ -28195,7 +28280,7 @@ items:
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
|
||||
"expr": "rate(prometheus_remote_storage_retried_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m]) or rate(prometheus_remote_storage_samples_retried_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cluster}}:{{instance}} {{remote_name}}:{{url}}",
|
||||
@ -28348,7 +28433,7 @@ items:
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
|
||||
"prometheus-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
@ -28492,11 +28577,16 @@ items:
|
||||
]
|
||||
},
|
||||
"timezone": "browser",
|
||||
"title": "Prometheus Remote Write",
|
||||
"title": "Prometheus / Remote Write",
|
||||
"version": 0
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-prometheus-remote-write
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -28515,7 +28605,7 @@ items:
|
||||
"links": [
|
||||
|
||||
],
|
||||
"refresh": "10s",
|
||||
"refresh": "60s",
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
@ -29594,7 +29684,7 @@ items:
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
|
||||
"prometheus-mixin"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
@ -29702,12 +29792,17 @@ items:
|
||||
]
|
||||
},
|
||||
"timezone": "utc",
|
||||
"title": "Prometheus Overview",
|
||||
"title": "Prometheus / Overview",
|
||||
"uid": "",
|
||||
"version": 0
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-prometheus
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -30923,6 +31018,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-proxy
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -31981,6 +32081,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-scheduler
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -32893,6 +32998,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-statefulset
|
||||
namespace: monitoring
|
||||
- apiVersion: v1
|
||||
@ -34315,6 +34425,11 @@ items:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboard-workload-total
|
||||
namespace: monitoring
|
||||
kind: ConfigMapList
|
||||
|
||||
@ -17,5 +17,10 @@ data:
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana-dashboards
|
||||
namespace: monitoring
|
||||
|
||||
@ -2,21 +2,28 @@ apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app: grafana
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana
|
||||
namespace: monitoring
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: grafana
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
checksum/grafana-dashboards: b02ae450c84445cbaca8c685eefaec6c
|
||||
checksum/grafana-datasources: 48faab41f579fc8efde6034391496f6a
|
||||
checksum/grafana-datasources: a77789e5440a1e51e204e99e2f0f480a
|
||||
labels:
|
||||
app: grafana
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
spec:
|
||||
containers:
|
||||
- env: []
|
||||
|
||||
@ -2,7 +2,10 @@ apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app: grafana
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
app.kubernetes.io/version: 7.3.5
|
||||
name: grafana
|
||||
namespace: monitoring
|
||||
spec:
|
||||
@ -11,5 +14,7 @@ spec:
|
||||
port: 3000
|
||||
targetPort: http
|
||||
selector:
|
||||
app: grafana
|
||||
app.kubernetes.io/component: grafana
|
||||
app.kubernetes.io/name: grafana
|
||||
app.kubernetes.io/part-of: kube-prometheus
|
||||
type: NodePort
|
||||
|
||||
@ -15,11 +15,11 @@ spec:
|
||||
rules:
|
||||
- alert: KubePodCrashLooping
|
||||
annotations:
|
||||
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf "%.2f" $value }} times / 5 minutes.
|
||||
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} ({{ $labels.container }}) is restarting {{ printf "%.2f" $value }} times / 10 minutes.
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubepodcrashlooping
|
||||
summary: Pod is crash looping.
|
||||
expr: |
|
||||
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[5m]) * 60 * 5 > 0
|
||||
rate(kube_pod_container_status_restarts_total{job="kube-state-metrics"}[10m]) * 60 * 5 > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -499,11 +499,11 @@ spec:
|
||||
severity: critical
|
||||
- alert: AggregatedAPIErrors
|
||||
annotations:
|
||||
description: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. The number of errors have increased for it in the past five minutes. High values indicate that the availability of the service changes too often.
|
||||
description: An aggregated API {{ $labels.name }}/{{ $labels.namespace }} has reported errors. It has appeared unavailable {{ $value | humanize }} times averaged over the past 10m.
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/aggregatedapierrors
|
||||
summary: An aggregated API has reported errors.
|
||||
expr: |
|
||||
sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[5m])) > 2
|
||||
sum by(name, namespace)(increase(aggregator_unavailable_apiservice_count[10m])) > 4
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: AggregatedAPIDown
|
||||
@ -526,6 +526,16 @@ spec:
|
||||
for: 15m
|
||||
labels:
|
||||
severity: critical
|
||||
- alert: KubeAPITerminatedRequests
|
||||
annotations:
|
||||
description: The apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.
|
||||
runbook_url: https://github.com/prometheus-operator/kube-prometheus/wiki/kubeapiterminatedrequests
|
||||
summary: The apiserver has terminated {{ $value | humanizePercentage }} of its incoming requests.
|
||||
expr: |
|
||||
sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) / ( sum(rate(apiserver_request_total{job="apiserver"}[10m])) + sum(rate(apiserver_request_terminations_total{job="apiserver"}[10m])) ) > 0.20
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
- name: kubernetes-system-kubelet
|
||||
rules:
|
||||
- alert: KubeNodeNotReady
|
||||
@ -1102,77 +1112,80 @@ spec:
|
||||
verb: write
|
||||
record: apiserver_request:availability30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[30d]))
|
||||
avg_over_time(code_verb:apiserver_request_total:increase1h[30d]) * 24 * 30
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"2.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"2.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"2.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"2.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"2.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"2.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"3.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"3.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"3.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"3.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"3.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"3.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"4.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"4.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"4.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"4.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"4.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"4.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="LIST",code=~"5.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="GET",code=~"5.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="POST",code=~"5.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PUT",code=~"5.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[30d]))
|
||||
record: code_verb:apiserver_request_total:increase30d
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="PATCH",code=~"5.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code, verb) (increase(apiserver_request_total{job="apiserver",verb="DELETE",code=~"5.."}[1h]))
|
||||
record: code_verb:apiserver_request_total:increase1h
|
||||
- expr: |
|
||||
sum by (code) (code_verb:apiserver_request_total:increase30d{verb=~"LIST|GET"})
|
||||
labels:
|
||||
|
||||
@ -202,9 +202,9 @@ spec:
|
||||
summary: Prometheus encounters more than 3% errors sending alerts to any Alertmanager.
|
||||
expr: |
|
||||
min without (alertmanager) (
|
||||
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
rate(prometheus_notifications_errors_total{job="prometheus-k8s",namespace="monitoring",alertmanager!~``}[5m])
|
||||
/
|
||||
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring"}[5m])
|
||||
rate(prometheus_notifications_sent_total{job="prometheus-k8s",namespace="monitoring",alertmanager!~``}[5m])
|
||||
)
|
||||
* 100
|
||||
> 3
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user