From dd174963a26139e135dedb4c703f9037d9fb3010 Mon Sep 17 00:00:00 2001 From: Simon Pasquier Date: Fri, 2 Aug 2019 10:07:54 +0200 Subject: [PATCH] prometheus-mixin: remove PrometheusTSDBWALCorruptions The counter is only increased when tsdb.Open() is called which Prometheus does only once in its lifetime (when it initializes). If the corruption can't be recovered, tsdb.Open() returns an error and Prometheus exits. Hence the metric is either 0 (no corruption) or 1 (corruption detected and repaired). If the latter, the alert isn't actionable and the only way to resolve it is to restart Prometheus which would reset the counter. Signed-off-by: Simon Pasquier --- documentation/prometheus-mixin/alerts.libsonnet | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/documentation/prometheus-mixin/alerts.libsonnet b/documentation/prometheus-mixin/alerts.libsonnet index 06c5274579..9cd3029844 100644 --- a/documentation/prometheus-mixin/alerts.libsonnet +++ b/documentation/prometheus-mixin/alerts.libsonnet @@ -124,20 +124,6 @@ description: 'Prometheus %(prometheusName)s has detected {{$value | humanize}} compaction failures over the last 3h.' % $._config, }, }, - { - alert: 'PrometheusTSDBWALCorruptions', - expr: ||| - increase(tsdb_wal_corruptions_total{%(prometheusSelector)s}[3h]) > 0 - ||| % $._config, - 'for': '4h', - labels: { - severity: 'warning', - }, - annotations: { - summary: 'Prometheus is detecting WAL corruptions.', - description: 'Prometheus %(prometheusName)s has detected {{$value | humanize}} corruptions of the write-ahead log (WAL) over the last 3h.' % $._config, - }, - }, { alert: 'PrometheusNotIngestingSamples', expr: |||