Merge pull request #677 from paulfantom/mixin

Use prometheus-operator mixin
This commit is contained in:
Frederic Branczyk 2020-09-21 10:14:22 +02:00 committed by GitHub
commit e85046ad6f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 80 additions and 114 deletions

View File

@ -1,4 +1,3 @@
(import 'alertmanager.libsonnet') + (import 'alertmanager.libsonnet') +
(import 'general.libsonnet') + (import 'general.libsonnet') +
(import 'node.libsonnet') + (import 'node.libsonnet')
(import 'prometheus-operator.libsonnet')

View File

@ -1,63 +0,0 @@
{
prometheusAlerts+:: {
groups+: [
{
name: 'prometheus-operator',
rules: [
{
alert: 'PrometheusOperatorListErrors',
expr: |||
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{%(prometheusOperatorSelector)s}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{%(prometheusOperatorSelector)s}[10m]))) > 0.4
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
message: 'Errors while performing List operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.',
},
'for': '15m',
},
{
alert: 'PrometheusOperatorWatchErrors',
expr: |||
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{%(prometheusOperatorSelector)s}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{%(prometheusOperatorSelector)s}[10m]))) > 0.4
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
message: 'Errors while performing Watch operations in controller {{$labels.controller}} in {{$labels.namespace}} namespace.',
},
'for': '15m',
},
{
alert: 'PrometheusOperatorReconcileErrors',
expr: |||
rate(prometheus_operator_reconcile_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
message: 'Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace }} Namespace.',
},
'for': '10m',
},
{
alert: 'PrometheusOperatorNodeLookupErrors',
expr: |||
rate(prometheus_operator_node_address_lookup_errors_total{%(prometheusOperatorSelector)s}[5m]) > 0.1
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
message: 'Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.',
},
'for': '10m',
},
],
},
],
},
}

View File

@ -28,6 +28,15 @@
}, },
"version": "release-0.42" "version": "release-0.42"
}, },
{
"source": {
"git": {
"remote": "https://github.com/prometheus-operator/prometheus-operator",
"subdir": "jsonnet/mixin"
}
},
"version": "master"
},
{ {
"source": { "source": {
"git": { "git": {

View File

@ -10,6 +10,7 @@ local kubeRbacProxyContainer = import './kube-rbac-proxy/container.libsonnet';
(import 'github.com/prometheus/node_exporter/docs/node-mixin/mixin.libsonnet') + (import 'github.com/prometheus/node_exporter/docs/node-mixin/mixin.libsonnet') +
(import './alertmanager/alertmanager.libsonnet') + (import './alertmanager/alertmanager.libsonnet') +
(import 'github.com/prometheus-operator/prometheus-operator/jsonnet/prometheus-operator/prometheus-operator.libsonnet') + (import 'github.com/prometheus-operator/prometheus-operator/jsonnet/prometheus-operator/prometheus-operator.libsonnet') +
(import 'github.com/prometheus-operator/prometheus-operator/jsonnet/mixin/mixin.libsonnet') +
(import './prometheus/prometheus.libsonnet') + (import './prometheus/prometheus.libsonnet') +
(import './prometheus-adapter/prometheus-adapter.libsonnet') + (import './prometheus-adapter/prometheus-adapter.libsonnet') +
(import 'github.com/kubernetes-monitoring/kubernetes-mixin/mixin.libsonnet') + (import 'github.com/kubernetes-monitoring/kubernetes-mixin/mixin.libsonnet') +

View File

@ -4,7 +4,7 @@
{ {
"source": { "source": {
"git": { "git": {
"remote": "https://github.com/brancz/kubernetes-grafana.git", "remote": "https://github.com/brancz/kubernetes-grafana",
"subdir": "grafana" "subdir": "grafana"
} }
}, },
@ -14,7 +14,7 @@
{ {
"source": { "source": {
"git": { "git": {
"remote": "https://github.com/etcd-io/etcd.git", "remote": "https://github.com/etcd-io/etcd",
"subdir": "Documentation/etcd-mixin" "subdir": "Documentation/etcd-mixin"
} }
}, },
@ -24,7 +24,7 @@
{ {
"source": { "source": {
"git": { "git": {
"remote": "https://github.com/grafana/grafonnet-lib.git", "remote": "https://github.com/grafana/grafonnet-lib",
"subdir": "grafonnet" "subdir": "grafonnet"
} }
}, },
@ -34,7 +34,7 @@
{ {
"source": { "source": {
"git": { "git": {
"remote": "https://github.com/grafana/jsonnet-libs.git", "remote": "https://github.com/grafana/jsonnet-libs",
"subdir": "grafana-builder" "subdir": "grafana-builder"
} }
}, },
@ -44,7 +44,7 @@
{ {
"source": { "source": {
"git": { "git": {
"remote": "https://github.com/ksonnet/ksonnet-lib.git", "remote": "https://github.com/ksonnet/ksonnet-lib",
"subdir": "" "subdir": ""
} }
}, },
@ -55,7 +55,7 @@
{ {
"source": { "source": {
"git": { "git": {
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin.git", "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
"subdir": "" "subdir": ""
} }
}, },
@ -65,7 +65,7 @@
{ {
"source": { "source": {
"git": { "git": {
"remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin.git", "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin",
"subdir": "lib/promgrafonnet" "subdir": "lib/promgrafonnet"
} }
}, },
@ -75,7 +75,7 @@
{ {
"source": { "source": {
"git": { "git": {
"remote": "https://github.com/kubernetes/kube-state-metrics.git", "remote": "https://github.com/kubernetes/kube-state-metrics",
"subdir": "jsonnet/kube-state-metrics" "subdir": "jsonnet/kube-state-metrics"
} }
}, },
@ -85,7 +85,7 @@
{ {
"source": { "source": {
"git": { "git": {
"remote": "https://github.com/kubernetes/kube-state-metrics.git", "remote": "https://github.com/kubernetes/kube-state-metrics",
"subdir": "jsonnet/kube-state-metrics-mixin" "subdir": "jsonnet/kube-state-metrics-mixin"
} }
}, },
@ -95,7 +95,17 @@
{ {
"source": { "source": {
"git": { "git": {
"remote": "https://github.com/prometheus-operator/prometheus-operator.git", "remote": "https://github.com/prometheus-operator/prometheus-operator",
"subdir": "jsonnet/mixin"
}
},
"version": "64863c320adddf1ab7da9cd0af1bea6978f9a72b",
"sum": "vqz67twCROf5kVgo/61luBOx25Mk7Okbt8YP+/7xjT0="
},
{
"source": {
"git": {
"remote": "https://github.com/prometheus-operator/prometheus-operator",
"subdir": "jsonnet/prometheus-operator" "subdir": "jsonnet/prometheus-operator"
} }
}, },
@ -105,7 +115,7 @@
{ {
"source": { "source": {
"git": { "git": {
"remote": "https://github.com/prometheus/node_exporter.git", "remote": "https://github.com/prometheus/node_exporter",
"subdir": "docs/node-mixin" "subdir": "docs/node-mixin"
} }
}, },
@ -115,7 +125,7 @@
{ {
"source": { "source": {
"git": { "git": {
"remote": "https://github.com/prometheus/prometheus.git", "remote": "https://github.com/prometheus/prometheus",
"subdir": "documentation/prometheus-mixin" "subdir": "documentation/prometheus-mixin"
} }
}, },

View File

@ -1044,6 +1044,53 @@ spec:
node_md_disks{state="fail"} > 0 node_md_disks{state="fail"} > 0
labels: labels:
severity: warning severity: warning
- name: prometheus-operator
rules:
- alert: PrometheusOperatorListErrors
annotations:
description: Errors while performing List operations in controller {{$labels.controller}}
in {{$labels.namespace}} namespace.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorlisterrors
summary: Errors while performing list operations in controller.
expr: |
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4
for: 15m
labels:
severity: warning
- alert: PrometheusOperatorWatchErrors
annotations:
description: Errors while performing watch operations in controller {{$labels.controller}}
in {{$labels.namespace}} namespace.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorwatcherrors
summary: Errors while performing watch operations in controller.
expr: |
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4
for: 15m
labels:
severity: warning
- alert: PrometheusOperatorReconcileErrors
annotations:
description: '{{ $value | humanizePercentage }} of reconciling operations
failed for {{ $labels.controller }} controller in {{ $labels.namespace }}
namespace.'
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatorreconcileerrors
summary: Errors while reconciling controller.
expr: |
(sum by (controller,namespace) (rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]))) / (sum by (controller,namespace) (rate(prometheus_operator_reconcile_operations_total{job="prometheus-operator",namespace="monitoring"}[5m]))) > 0.1
for: 10m
labels:
severity: warning
- alert: PrometheusOperatorNodeLookupErrors
annotations:
description: Errors while reconciling Prometheus in {{ $labels.namespace }}
Namespace.
runbook_url: https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-prometheusoperatornodelookuperrors
summary: Errors while reconciling Prometheus.
expr: |
rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1
for: 10m
labels:
severity: warning
- name: kubernetes-apps - name: kubernetes-apps
rules: rules:
- alert: KubePodCrashLooping - alert: KubePodCrashLooping
@ -2031,40 +2078,3 @@ spec:
for: 2m for: 2m
labels: labels:
severity: warning severity: warning
- name: prometheus-operator
rules:
- alert: PrometheusOperatorListErrors
annotations:
message: Errors while performing List operations in controller {{$labels.controller}}
in {{$labels.namespace}} namespace.
expr: |
(sum by (controller,namespace) (rate(prometheus_operator_list_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_list_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4
for: 15m
labels:
severity: warning
- alert: PrometheusOperatorWatchErrors
annotations:
message: Errors while performing Watch operations in controller {{$labels.controller}}
in {{$labels.namespace}} namespace.
expr: |
(sum by (controller,namespace) (rate(prometheus_operator_watch_operations_failed_total{job="prometheus-operator",namespace="monitoring"}[10m])) / sum by (controller,namespace) (rate(prometheus_operator_watch_operations_total{job="prometheus-operator",namespace="monitoring"}[10m]))) > 0.4
for: 15m
labels:
severity: warning
- alert: PrometheusOperatorReconcileErrors
annotations:
message: Errors while reconciling {{ $labels.controller }} in {{ $labels.namespace
}} Namespace.
expr: |
rate(prometheus_operator_reconcile_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1
for: 10m
labels:
severity: warning
- alert: PrometheusOperatorNodeLookupErrors
annotations:
message: Errors while reconciling Prometheus in {{ $labels.namespace }} Namespace.
expr: |
rate(prometheus_operator_node_address_lookup_errors_total{job="prometheus-operator",namespace="monitoring"}[5m]) > 0.1
for: 10m
labels:
severity: warning