Merge pull request #17021 from ringerc/add_first_over_time

Add first_over_time and ts_of_first_over_time as experimental functions
2025-11-28 06:01:01 +01:00 · 2025-09-02 14:56:00 +02:00 · 2025-09-02 14:56:00 +02:00 · 637b176a9b
commit 637b176a9b
parent aa12c0d4c3 30bf18f968
10 changed files with 192 additions and 16 deletions
--- a/docs/querying/functions.md
+++ b/docs/querying/functions.md
@ -864,14 +864,17 @@ additional functions are available:
  that has the maximum value of all float samples in the specified interval.
 * `ts_of_last_over_time(range-vector)`: the timestamp of last sample in the
  specified interval.
+* `first_over_time(range-vector)`: the oldest sample in the specified interval.
+* `ts_of_first_over_time(range-vector)`: the timestamp of earliest sample in the
+  specified interval.

 Note that all values in the specified interval have the same weight in the
 aggregation even if the values are not equally spaced throughout the interval.

 These functions act on histograms in the following way:

- `count_over_time`, `last_over_time`, and `present_over_time()` act on float
-  and histogram samples in the same way.
+- `count_over_time`, `first_over_time`, `last_over_time`, and
+  `present_over_time()` act on float and histogram samples in the same way.
 - `avg_over_time()` and `sum_over_time()` act on histogram samples in a way
  that corresponds to the respective aggregation operators. If a series
  contains a mix of float samples and histogram samples within the range, the
@ -883,6 +886,13 @@ These functions act on histograms in the following way:
  samples are processed and the omission of the histogram samples is flagged by
  an info-level annotation.

+`first_over_time(m[1m])` differs from `m offset 1m` in that the former will
+select the first sample of `m` _within_ the 1m range, where `m offset 1m` will
+select the most recent sample within the lookback interval _outside and prior
+to_ the 1m offset. This is particularly useful with `first_over_time(m[step()])`
+in range queries (available when `--enable-feature=promql-duration-expr` is set)
+to ensure that the sample selected is within the range step.
+
 ## Trigonometric Functions

 The trigonometric functions work in radians. They ignore histogram samples in
--- a/promql/engine.go
+++ b/promql/engine.go
@ -1803,11 +1803,11 @@ func (ev *evaluator) eval(ctx context.Context, expr parser.Expr) (parser.Value,
 		it := storage.NewBuffer(selRange)
 		var chkIter chunkenc.Iterator

-		// The last_over_time function acts like offset; thus, it
-		// should keep the metric name.  For all the other range
-		// vector functions, the only change needed is to drop the
-		// metric name in the output.
-		dropName := e.Func.Name != "last_over_time"
+		// The last_over_time and first_over_time functions act like
+		// offset; thus, they should keep the metric name.  For all the
+		// other range vector functions, the only change needed is to
+		// drop the metric name in the output.
+		dropName := (e.Func.Name != "last_over_time" && e.Func.Name != "first_over_time")
 		vectorVals := make([]Vector, len(e.Args)-1)
 		for i, s := range selVS.Series {
 			if err := contextDone(ctx, "expression evaluation"); err != nil {
--- a/promql/engine_test.go
+++ b/promql/engine_test.go
@ -3829,6 +3829,10 @@ eval instant at 10m rate(metric_total{env="1"}[10m])
 eval instant at 10m last_over_time(metric_total{env="1"}[10m])
 	metric_total{env="1"} 120

+# Does not drop __name__ for first_over_time function
+eval instant at 10m first_over_time(metric_total{env="1"}[10m])
+	metric_total{env="1"} 60
+
 # Drops name for other _over_time functions
 eval instant at 10m max_over_time(metric_total{env="1"}[10m])
 	{env="1"} 120
--- a/promql/functions.go
+++ b/promql/functions.go
@ -780,6 +780,34 @@ func funcCountOverTime(_ []Vector, matrixVals Matrix, _ parser.Expressions, enh
 	}), nil
 }

+// === first_over_time(Matrix parser.ValueTypeMatrix) (Vector, Notes)  ===
+func funcFirstOverTime(_ []Vector, matrixVal Matrix, _ parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
+	el := matrixVal[0]
+
+	var f FPoint
+	if len(el.Floats) > 0 {
+		f = el.Floats[0]
+	}
+
+	var h HPoint
+	if len(el.Histograms) > 0 {
+		h = el.Histograms[0]
+	}
+
+	// If a float data point exists and is older than any histogram data
+	// points, return it.
+	if h.H == nil || (len(el.Floats) > 0 && f.T < h.T) {
+		return append(enh.Out, Sample{
+			Metric: el.Metric,
+			F:      f.F,
+		}), nil
+	}
+	return append(enh.Out, Sample{
+		Metric: el.Metric,
+		H:      h.H.Copy(),
+	}), nil
+}
+
 // === last_over_time(Matrix parser.ValueTypeMatrix) (Vector, Notes)  ===
 func funcLastOverTime(_ []Vector, matrixVal Matrix, _ parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
 	el := matrixVal[0]
@ -831,6 +859,26 @@ func funcMadOverTime(_ []Vector, matrixVal Matrix, args parser.Expressions, enh
 	}), annos
 }

+// === ts_of_first_over_time(Matrix parser.ValueTypeMatrix) (Vector, Notes)  ===
+func funcTsOfFirstOverTime(_ []Vector, matrixVal Matrix, _ parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
+	el := matrixVal[0]
+
+	var tf int64 = math.MaxInt64
+	if len(el.Floats) > 0 {
+		tf = el.Floats[0].T
+	}
+
+	var th int64 = math.MaxInt64
+	if len(el.Histograms) > 0 {
+		th = el.Histograms[0].T
+	}
+
+	return append(enh.Out, Sample{
+		Metric: el.Metric,
+		F:      float64(min(tf, th)) / 1000,
+	}), nil
+}
+
 // === ts_of_last_over_time(Matrix parser.ValueTypeMatrix) (Vector, Notes)  ===
 func funcTsOfLastOverTime(_ []Vector, matrixVal Matrix, _ parser.Expressions, enh *EvalNodeHelper) (Vector, annotations.Annotations) {
 	el := matrixVal[0]
@ -1801,6 +1849,7 @@ var FunctionCalls = map[string]FunctionCall{
 	"delta":                        funcDelta,
 	"deriv":                        funcDeriv,
 	"exp":                          funcExp,
+	"first_over_time":              funcFirstOverTime,
 	"floor":                        funcFloor,
 	"histogram_avg":                funcHistogramAvg,
 	"histogram_count":              funcHistogramCount,
@ -1824,6 +1873,7 @@ var FunctionCalls = map[string]FunctionCall{
 	"mad_over_time":                funcMadOverTime,
 	"max_over_time":                funcMaxOverTime,
 	"min_over_time":                funcMinOverTime,
+	"ts_of_first_over_time":        funcTsOfFirstOverTime,
 	"ts_of_last_over_time":         funcTsOfLastOverTime,
 	"ts_of_max_over_time":          funcTsOfMaxOverTime,
 	"ts_of_min_over_time":          funcTsOfMinOverTime,
--- a/promql/parser/functions.go
+++ b/promql/parser/functions.go
@ -162,6 +162,12 @@ var Functions = map[string]*Function{
 		ArgTypes:   []ValueType{ValueTypeVector},
 		ReturnType: ValueTypeVector,
 	},
+	"first_over_time": {
+		Name:         "first_over_time",
+		ArgTypes:     []ValueType{ValueTypeMatrix},
+		ReturnType:   ValueTypeVector,
+		Experimental: true,
+	},
 	"floor": {
 		Name:       "floor",
 		ArgTypes:   []ValueType{ValueTypeVector},
@ -283,6 +289,12 @@ var Functions = map[string]*Function{
 		ArgTypes:   []ValueType{ValueTypeMatrix},
 		ReturnType: ValueTypeVector,
 	},
+	"ts_of_first_over_time": {
+		Name:         "ts_of_first_over_time",
+		ArgTypes:     []ValueType{ValueTypeMatrix},
+		ReturnType:   ValueTypeVector,
+		Experimental: true,
+	},
 	"ts_of_max_over_time": {
 		Name:         "ts_of_max_over_time",
 		ArgTypes:     []ValueType{ValueTypeMatrix},
--- a/promql/promqltest/testdata/functions.test
+++ b/promql/promqltest/testdata/functions.test
@ -1320,6 +1320,28 @@ eval instant at 95s ts_of_last_over_time(metric_histogram{type="only_histogram"}
 eval instant at 95s ts_of_last_over_time(metric_histogram{type="mix"}[90s])
 	{type="mix"} 50.265

+# Tests for ts_of_first_over_time
+clear
+load 10s53ms
+	metric _ _ 1 2 3 _ _
+	metric_histogram{type="only_histogram"} {{schema:1 sum:2 count:3}}x4
+	metric_histogram{type="mix"} _ 1 1 1 {{schema:1 sum:2 count:3}} {{schema:1 sum:2 count:3}} 1
+
+eval instant at 90s ts_of_first_over_time(metric[90s])
+	{} 20.106
+
+eval instant at 95s ts_of_first_over_time(metric[90s])
+	{} 20.106
+
+eval instant at 15s ts_of_first_over_time(metric[90s])
+	#empty
+
+eval instant at 95s ts_of_first_over_time(metric_histogram{type="only_histogram"}[90s])
+	{type="only_histogram"} 10.053
+
+eval instant at 95s ts_of_first_over_time(metric_histogram{type="mix"}[90s])
+	{type="mix"} 10.053
+
 # Tests for quantile_over_time
 clear

@ -1520,7 +1542,19 @@ load 10s
 	data{type="some_nan3"} NaN 0 1
 	data{type="only_nan"} NaN NaN NaN
 	data_histogram{type="only_histogram"} {{schema:0 sum:1 count:2}} {{schema:0 sum:2 count:3}} {{schema:0 sum:3 count:4}}
-	data_histogram{type="mix_samples"} 0 1 {{schema:0 sum:1 count:2}} {{schema:0 sum:2 count:3}}
+	data_histogram{type="mix_samples_hlast"} 0 1 {{schema:0 sum:1 count:2}} {{schema:0 sum:2 count:3}}
+	data_sparse{type="sparse_numbers"} _ 5 2 _ 4 _
+	data_empty{type="empty_series"} _ _ _ _ _ _ _ _ _ _ _ _ _
+
+# workaround for https://github.com/prometheus/prometheus/issues/17025 causing histogram samples
+# before float samples in a load directive to be silently dropped as (incorrectly) out-of-order.
+# By splitting the vector across two loads, a commit is forced inbetween so the
+# ordering will be handled correctly.
+load 10s
+	data_histogram{type="mix_samples_hfirst"} {{schema:0 sum:1 count:2}} {{schema:0 sum:9 count:3}}
+
+load 10s
+	data_histogram{type="mix_samples_hfirst"} _ _ 4 1

 eval instant at 1m min_over_time(data[2m])
 	expect no_info
@ -1534,9 +1568,13 @@ eval instant at 1m min_over_time(data_histogram{type="only_histogram"}[2m])
 	expect no_info
 	#empty

-eval instant at 1m min_over_time(data_histogram{type="mix_samples"}[2m])
+eval instant at 1m min_over_time(data_histogram{type=~"mix_samples.*"}[2m])
 	expect info
-	{type="mix_samples"} 0
+	{type="mix_samples_hlast"} 0
+	{type="mix_samples_hfirst"} 1
+
+eval instant at 1m min_over_time(data_sparse[2m])
+	{type="sparse_numbers"} 2

 eval instant at 1m max_over_time(data[2m])
 	expect no_info
@ -1550,11 +1588,15 @@ eval instant at 1m max_over_time(data_histogram{type="only_histogram"}[2m])
 	expect no_info
 	#empty

-eval instant at 1m max_over_time(data_histogram{type="mix_samples"}[2m])
+eval instant at 1m max_over_time(data_histogram{type=~"mix_samples.*"}[2m])
 	expect info
-	{type="mix_samples"} 1
+	{type="mix_samples_hlast"} 1
+	{type="mix_samples_hfirst"} 4

-eval instant at 1m last_over_time({__name__=~"data(_histogram)?"}[2m])
+eval instant at 1m max_over_time(data_sparse[2m])
+	{type="sparse_numbers"} 5
+
+eval instant at 1m last_over_time({__name__=~"data(_histogram|_sparse|_empty)?"}[2m])
 	expect no_info
 	data{type="numbers"} 3
 	data{type="some_nan"} NaN
@ -1562,9 +1604,23 @@ eval instant at 1m last_over_time({__name__=~"data(_histogram)?"}[2m])
 	data{type="some_nan3"} 1
 	data{type="only_nan"} NaN
 	data_histogram{type="only_histogram"} {{schema:0 sum:3 count:4}}
-	data_histogram{type="mix_samples"} {{schema:0 sum:2 count:3}}
+	data_histogram{type="mix_samples_hlast"} {{schema:0 sum:2 count:3}}
+	data_histogram{type="mix_samples_hfirst"} 1
+	data_sparse{type="sparse_numbers"} 4

-eval instant at 1m count_over_time({__name__=~"data(_histogram)?"}[2m])
+eval instant at 1m first_over_time({__name__=~"data(_histogram|_sparse|_empty)?"}[2m])
+	expect no_info
+	data{type="numbers"} 2
+	data{type="some_nan"} 2
+	data{type="some_nan2"} 2
+	data{type="some_nan3"} NaN
+	data{type="only_nan"} NaN
+	data_histogram{type="only_histogram"} {{schema:0 sum:1 count:2}}
+	data_histogram{type="mix_samples_hlast"} 0
+	data_histogram{type="mix_samples_hfirst"} {{schema:0 sum:1 count:2}}
+	data_sparse{type="sparse_numbers"} 5
+
+eval instant at 1m count_over_time({__name__=~"data(_histogram|_sparse|_empty)?"}[2m])
 	expect no_info
 	{type="numbers"} 3
 	{type="some_nan"} 3
@ -1572,7 +1628,9 @@ eval instant at 1m count_over_time({__name__=~"data(_histogram)?"}[2m])
 	{type="some_nan3"} 3
 	{type="only_nan"} 3
 	{type="only_histogram"} 3
-	{type="mix_samples"} 4
+	{type="mix_samples_hlast"} 4
+	{type="mix_samples_hfirst"} 4
+	{type="sparse_numbers"} 3

 clear

--- a/promql/promqltest/testdata/name_label_dropping.test
+++ b/promql/promqltest/testdata/name_label_dropping.test
@ -43,6 +43,10 @@ eval instant at 10m rate(metric_total{env="1"}[10m])
 eval instant at 10m last_over_time(metric_total{env="1"}[10m])
 	metric_total{env="1"} 120

+# Does not drop __name__ for first_over_time function.
+eval instant at 10m first_over_time(metric_total{env="1"}[10m])
+	metric_total{env="1"} 60
+
 # Drops name for other _over_time functions.
 eval instant at 10m max_over_time(metric_total{env="1"}[10m])
 	{env="1"} 120
--- a/web/ui/module/codemirror-promql/src/types/function.ts
+++ b/web/ui/module/codemirror-promql/src/types/function.ts
@ -38,6 +38,7 @@ import {
  Delta,
  Deriv,
  Exp,
+  FirstOverTime,
  Floor,
  HistogramAvg,
  HistogramCount,
@ -61,6 +62,7 @@ import {
  MadOverTime,
  MaxOverTime,
  MinOverTime,
+  TsOfFirstOverTime,
  TsOfMaxOverTime,
  TsOfMinOverTime,
  TsOfLastOverTime,
@ -268,6 +270,12 @@ const promqlFunctions: { [key: number]: PromQLFunction } = {
    variadic: 0,
    returnType: ValueType.vector,
  },
+  [FirstOverTime]: {
+    name: 'first_over_time',
+    argTypes: [ValueType.matrix],
+    variadic: 0,
+    returnType: ValueType.vector,
+  },
  [Floor]: {
    name: 'floor',
    argTypes: [ValueType.vector],
@ -406,6 +414,12 @@ const promqlFunctions: { [key: number]: PromQLFunction } = {
    variadic: 0,
    returnType: ValueType.vector,
  },
+  [TsOfFirstOverTime]: {
+    name: 'ts_of_first_over_time',
+    argTypes: [ValueType.matrix],
+    variadic: 0,
+    returnType: ValueType.vector,
+  },
  [TsOfMaxOverTime]: {
    name: 'ts_of_max_over_time',
    argTypes: [ValueType.matrix],
--- a/web/ui/module/lezer-promql/src/promql.grammar
+++ b/web/ui/module/lezer-promql/src/promql.grammar
@ -133,6 +133,7 @@ FunctionIdentifier {
  Delta |
  Deriv |
  Exp |
+  FirstOverTime |
  Floor |
  HistogramCount |
  HistogramFraction |
@ -156,6 +157,7 @@ FunctionIdentifier {
  MadOverTime |
  MaxOverTime |
  MinOverTime |
+  TsOfFirstOverTime |
  TsOfMaxOverTime |
  TsOfMinOverTime |
  TsOfLastOverTime |
@ -384,6 +386,7 @@ NumberDurationLiteralInDurationContext {
  Delta { condFn<"delta"> }
  Deriv { condFn<"deriv"> }
  Exp { condFn<"exp"> }
+  FirstOverTime {condFn<"first_over_time">}
  Floor { condFn<"floor"> }
  HistogramAvg { condFn<"histogram_avg"> }
  HistogramCount { condFn<"histogram_count"> }
@ -407,6 +410,8 @@ NumberDurationLiteralInDurationContext {
  MadOverTime { condFn<"mad_over_time"> }
  MaxOverTime { condFn<"max_over_time"> }
  MinOverTime { condFn<"min_over_time"> }
+  TsOfFirstOverTime { condFn<"ts_of_first_over_time"> }
+  TsOfMaxFirstTime { condFn<"ts_of_first_over_time"> }
  TsOfMaxOverTime { condFn<"ts_of_max_over_time"> }
  TsOfMinOverTime { condFn<"ts_of_min_over_time"> }
  TsOfLastOverTime { condFn<"ts_of_last_over_time"> }
--- a/web/ui/module/lezer-promql/test/expression.txt
+++ b/web/ui/module/lezer-promql/test/expression.txt
@ -363,6 +363,25 @@ PromQL(
    )
 )

+# Function first_over_time
+
+first_over_time(data[1m])
+
+==>
+PromQL(
+    FunctionCall(
+      FunctionIdentifier(FirstOverTime),
+      FunctionCallBody(
+            MatrixSelector(
+                VectorSelector(
+                    Identifier
+                ),
+              NumberDurationLiteralInDurationContext
+            )
+      )
+    )
+)
+
 # Function sgn

 sgn(data)