From 3d5696c96083ea7173833a83eb18e8809315fe13 Mon Sep 17 00:00:00 2001
From: Dimitar Dimitrov <dimitar.dimitrov@grafana.com>
Date: Thu, 10 Jul 2025 14:28:57 +0200
Subject: [PATCH] model: add time tracking to Matcher

### Background

In grafana/mimir we sometimes see very expensive regular expressions. Unfortunately, apart from profiling and profiling recording rules, there is no insight into how impactful these regular expressions are.

### What this PR does

Adds `NewMatcherWithTimeTracker` which increments an `*atomic.Duration` when the matcher is executed. This helps to put the time of regular expressions into perspective. For example, we can use it to measure the porion of time of serving a query which went to running regular expressions.

Signed-off-by: Dimitar Dimitrov <dimitar.dimitrov@grafana.com>
---
 model/labels/matcher.go | 22 ++++++++++++++++++++++
 model/labels/regexp.go  | 29 +++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

diff --git a/model/labels/matcher.go b/model/labels/matcher.go
index a09c838e3f..37b21f1af4 100644
--- a/model/labels/matcher.go
+++ b/model/labels/matcher.go
@@ -16,6 +16,8 @@ package labels
 import (
 	"bytes"
 	"strconv"
+
+	"go.uber.org/atomic"
 )
 
 // MatchType is an enum for label matching types.
@@ -78,6 +80,26 @@ func MustNewMatcher(mt MatchType, name, val string) *Matcher {
 	return m
 }
 
+// NewMatcherWithTimeTracker returns a matcher which can track the time spent running regular expression matchers.
+// duration is incremented when the MatchType is MatchRegexp or MatchNotRegexp.
+// duration is incremented every 64 Matcher.Matches() invocations and multiplied by 64;
+// the assumption is that all previous 63 invocations took the same time.
+func NewMatcherWithTimeTracker(t MatchType, n, v string, duration *atomic.Duration) (*Matcher, error) {
+	m := &Matcher{
+		Type:  t,
+		Name:  n,
+		Value: v,
+	}
+	if t == MatchRegexp || t == MatchNotRegexp {
+		re, err := NewFastRegexMatcherWithTimeTracker(v, duration)
+		if err != nil {
+			return nil, err
+		}
+		m.re = re
+	}
+	return m, nil
+}
+
 func (m *Matcher) String() string {
 	// Start a buffer with a pre-allocated size on stack to cover most needs.
 	var bytea [1024]byte
diff --git a/model/labels/regexp.go b/model/labels/regexp.go
index 1636aacc21..9551a13fce 100644
--- a/model/labels/regexp.go
+++ b/model/labels/regexp.go
@@ -16,11 +16,13 @@ package labels
 import (
 	"slices"
 	"strings"
+	"time"
 	"unicode"
 	"unicode/utf8"
 
 	"github.com/grafana/regexp"
 	"github.com/grafana/regexp/syntax"
+	"go.uber.org/atomic"
 	"golang.org/x/text/unicode/norm"
 )
 
@@ -86,6 +88,33 @@ func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) {
 	return m, nil
 }
 
+// NewFastRegexMatcherWithTimeTracker returns a matcher which will track the time spent running the matcher.
+// duration is incremented every 64 Matcher.Matches() invocations and multiplied by 64;
+// the assumption is that all previous 63 invocations took the same time.
+func NewFastRegexMatcherWithTimeTracker(regex string, duration *atomic.Duration) (*FastRegexMatcher, error) {
+	m, err := NewFastRegexMatcher(regex)
+	if err != nil {
+		return nil, err
+	}
+	withDifferentObserver := *m
+	sampler := atomic.NewInt64(-1)
+	oldMatchString := m.matchString
+	withDifferentObserver.matchString = func(s string) bool {
+		const sampleRate = 64
+		if tick := sampler.Inc(); tick%sampleRate == 0 {
+			defer func(start time.Time) {
+				d := time.Since(start)
+				if tick != 0 {
+					d *= sampleRate
+				}
+				duration.Add(d)
+			}(time.Now())
+		}
+		return oldMatchString(s)
+	}
+	return &withDifferentObserver, nil
+}
+
 // compileMatchStringFunction returns the function to run by MatchString().
 func (m *FastRegexMatcher) compileMatchStringFunction() func(string) bool {
 	// If the only optimization available is the string matcher, then we can just run it.