From aaaba57184848b7038d3b8221b3da732982650fa Mon Sep 17 00:00:00 2001 From: Tobias Schmidt Date: Sun, 2 Apr 2017 20:03:28 -0300 Subject: [PATCH 1/2] Export number of missed rule evaluations In case the execution of all rules takes longer than the configured rule evaluation interval, one or more iterations will be skipped. This needs to be visible to the opterator. --- rules/manager.go | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/rules/manager.go b/rules/manager.go index ca5d4185a0..7e9094e5a9 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -75,10 +75,15 @@ var ( Name: "evaluator_iterations_skipped_total", Help: "The total number of rule group evaluations skipped due to throttled metric storage.", }) + iterationsMissed = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: namespace, + Name: "evaluator_iterations_missed_total", + Help: "The total number of rule group evaluations missed due to slow rule group evaluation.", + }) iterationsScheduled = prometheus.NewCounter(prometheus.CounterOpts{ Namespace: namespace, Name: "evaluator_iterations_total", - Help: "The total number of scheduled rule group evaluations, whether skipped or executed.", + Help: "The total number of scheduled rule group evaluations, whether executed, missed or skipped.", }) ) @@ -90,6 +95,7 @@ func init() { prometheus.MustRegister(iterationDuration) prometheus.MustRegister(iterationsSkipped) + prometheus.MustRegister(iterationsMissed) prometheus.MustRegister(evalFailures) prometheus.MustRegister(evalDuration) } @@ -158,6 +164,7 @@ func (g *Group) run() { iterationDuration.Observe(time.Since(start).Seconds()) } + lastTriggered := time.Now() iter() tick := time.NewTicker(g.interval) @@ -172,6 +179,12 @@ func (g *Group) run() { case <-g.done: return case <-tick.C: + missed := (time.Since(lastTriggered).Nanoseconds() / g.interval.Nanoseconds()) - 1 + if missed > 0 { + iterationsMissed.Add(float64(missed)) + iterationsScheduled.Add(float64(missed)) + } + lastTriggered = time.Now() iter() } } From eaf33759fbaed5050348c73aaf83c884f71366b8 Mon Sep 17 00:00:00 2001 From: Tobias Schmidt Date: Sun, 2 Apr 2017 20:32:56 -0300 Subject: [PATCH 2/2] Register forgotten prometheus_evaluator_iterations_total metric --- rules/manager.go | 1 + 1 file changed, 1 insertion(+) diff --git a/rules/manager.go b/rules/manager.go index 7e9094e5a9..09d19a5da5 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -94,6 +94,7 @@ func init() { evalFailures.WithLabelValues(string(ruleTypeRecording)) prometheus.MustRegister(iterationDuration) + prometheus.MustRegister(iterationsScheduled) prometheus.MustRegister(iterationsSkipped) prometheus.MustRegister(iterationsMissed) prometheus.MustRegister(evalFailures)