mirror of
https://github.com/prometheus-operator/kube-prometheus.git
synced 2025-11-01 16:41:02 +01:00
Merge pull request #610 from lilic/add-more-alerts
Add PrometheusOperatorListErrors and fix PrometheusOperatorWatchErrors threshold
This commit is contained in:
commit
b55c2825f7
@ -5,15 +5,28 @@
|
||||
name: 'prometheus-operator',
|
||||
rules: [
|
||||
{
|
||||
alert: 'PrometheusOperatorWatchErrors',
|
||||
alert: 'PrometheusOperatorListErrors',
|
||||
expr: |||
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{%(prometheusOperatorSelector)s}[1h])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{%(prometheusOperatorSelector)s}[1h]))) > 0.1
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{%(prometheusOperatorSelector)s}[1h])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{%(prometheusOperatorSelector)s}[1h]))) > 0.4
|
||||
||| % $._config,
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: 'Errors while performing watch operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.',
|
||||
message: 'Errors while performing List operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.',
|
||||
},
|
||||
'for': '15m',
|
||||
},
|
||||
{
|
||||
alert: 'PrometheusOperatorWatchErrors',
|
||||
expr: |||
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{%(prometheusOperatorSelector)s}[1h])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{%(prometheusOperatorSelector)s}[1h]))) > 0.4
|
||||
||| % $._config,
|
||||
labels: {
|
||||
severity: 'warning',
|
||||
},
|
||||
annotations: {
|
||||
message: 'Errors while performing Watch operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.',
|
||||
},
|
||||
'for': '15m',
|
||||
},
|
||||
|
||||
@ -1793,12 +1793,21 @@ spec:
|
||||
severity: warning
|
||||
- name: prometheus-operator
|
||||
rules:
|
||||
- alert: PrometheusOperatorWatchErrors
|
||||
- alert: PrometheusOperatorListErrors
|
||||
annotations:
|
||||
message: Errors while performing watch operations in controller {{$labels.controller}}
|
||||
message: Errors while performing List operations in controller {{$labels.controller}}
|
||||
in {{$labels.namespace}} namespace.
|
||||
expr: |
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[1h])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring"}[1h]))) > 0.1
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[1h])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator",namespace="monitoring"}[1h]))) > 0.4
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
- alert: PrometheusOperatorWatchErrors
|
||||
annotations:
|
||||
message: Errors while performing Watch operations in controller {{$labels.controller}}
|
||||
in {{$labels.namespace}} namespace.
|
||||
expr: |
|
||||
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[1h])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring"}[1h]))) > 0.4
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user