From 3d5696c96083ea7173833a83eb18e8809315fe13 Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Thu, 10 Jul 2025 14:28:57 +0200 Subject: [PATCH] model: add time tracking to Matcher ### Background In grafana/mimir we sometimes see very expensive regular expressions. Unfortunately, apart from profiling and profiling recording rules, there is no insight into how impactful these regular expressions are. ### What this PR does Adds `NewMatcherWithTimeTracker` which increments an `*atomic.Duration` when the matcher is executed. This helps to put the time of regular expressions into perspective. For example, we can use it to measure the porion of time of serving a query which went to running regular expressions. Signed-off-by: Dimitar Dimitrov --- model/labels/matcher.go | 22 ++++++++++++++++++++++ model/labels/regexp.go | 29 +++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/model/labels/matcher.go b/model/labels/matcher.go index a09c838e3f..37b21f1af4 100644 --- a/model/labels/matcher.go +++ b/model/labels/matcher.go @@ -16,6 +16,8 @@ package labels import ( "bytes" "strconv" + + "go.uber.org/atomic" ) // MatchType is an enum for label matching types. @@ -78,6 +80,26 @@ func MustNewMatcher(mt MatchType, name, val string) *Matcher { return m } +// NewMatcherWithTimeTracker returns a matcher which can track the time spent running regular expression matchers. +// duration is incremented when the MatchType is MatchRegexp or MatchNotRegexp. +// duration is incremented every 64 Matcher.Matches() invocations and multiplied by 64; +// the assumption is that all previous 63 invocations took the same time. +func NewMatcherWithTimeTracker(t MatchType, n, v string, duration *atomic.Duration) (*Matcher, error) { + m := &Matcher{ + Type: t, + Name: n, + Value: v, + } + if t == MatchRegexp || t == MatchNotRegexp { + re, err := NewFastRegexMatcherWithTimeTracker(v, duration) + if err != nil { + return nil, err + } + m.re = re + } + return m, nil +} + func (m *Matcher) String() string { // Start a buffer with a pre-allocated size on stack to cover most needs. var bytea [1024]byte diff --git a/model/labels/regexp.go b/model/labels/regexp.go index 1636aacc21..9551a13fce 100644 --- a/model/labels/regexp.go +++ b/model/labels/regexp.go @@ -16,11 +16,13 @@ package labels import ( "slices" "strings" + "time" "unicode" "unicode/utf8" "github.com/grafana/regexp" "github.com/grafana/regexp/syntax" + "go.uber.org/atomic" "golang.org/x/text/unicode/norm" ) @@ -86,6 +88,33 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) { return m, nil } +// NewFastRegexMatcherWithTimeTracker returns a matcher which will track the time spent running the matcher. +// duration is incremented every 64 Matcher.Matches() invocations and multiplied by 64; +// the assumption is that all previous 63 invocations took the same time. +func NewFastRegexMatcherWithTimeTracker(regex string, duration *atomic.Duration) (*FastRegexMatcher, error) { + m, err := NewFastRegexMatcher(regex) + if err != nil { + return nil, err + } + withDifferentObserver := *m + sampler := atomic.NewInt64(-1) + oldMatchString := m.matchString + withDifferentObserver.matchString = func(s string) bool { + const sampleRate = 64 + if tick := sampler.Inc(); tick%sampleRate == 0 { + defer func(start time.Time) { + d := time.Since(start) + if tick != 0 { + d *= sampleRate + } + duration.Add(d) + }(time.Now()) + } + return oldMatchString(s) + } + return &withDifferentObserver, nil +} + // compileMatchStringFunction returns the function to run by MatchString(). func (m *FastRegexMatcher) compileMatchStringFunction() func(string) bool { // If the only optimization available is the string matcher, then we can just run it.