From 26bddcf068365e39b99583dfd408a02d6c88b023 Mon Sep 17 00:00:00 2001 From: Aman <95525722+amanycodes@users.noreply.github.com> Date: Thu, 24 Apr 2025 18:18:58 +0530 Subject: [PATCH] promql: histogram_stddev and histogram_stdvar should use arithmetic mean for custom buckets Signed-off-by: amanycodes --- docs/querying/functions.md | 8 +++++--- promql/functions.go | 10 ++++++++-- promql/promqltest/testdata/histograms.test | 8 ++++---- promql/promqltest/testdata/native_histograms.test | 2 +- web/ui/mantine-ui/src/promql/functionDocs.tsx | 6 ++++-- 5 files changed, 22 insertions(+), 12 deletions(-) diff --git a/docs/querying/functions.md b/docs/querying/functions.md index 1539f4b0a5..40cc697055 100644 --- a/docs/querying/functions.md +++ b/docs/querying/functions.md @@ -428,9 +428,11 @@ annotation, you should find and remove the source of the invalid data. ## `histogram_stddev()` and `histogram_stdvar()` `histogram_stddev(v instant-vector)` returns the estimated standard deviation -of observations for each histogram sample in `v`, based on the geometric mean -of the buckets where the observations lie. Float samples are ignored and do not -show up in the returned vector. +of observations for each histogram sample in `v`. For this estimation, all observations +in a bucket are assumed to have the value of the mean of the bucket boundaries. For +the zero bucket and for buckets with custom boundaries, the arithmetic mean is used. +For the usual exponential buckets, the geometric mean is used. Float samples are ignored +and do not show up in the returned vector. Similarly, `histogram_stdvar(v instant-vector)` returns the estimated standard variance of observations for each histogram sample in `v`. diff --git a/promql/functions.go b/promql/functions.go index 1cd9d2df7b..0662c8d451 100644 --- a/promql/functions.go +++ b/promql/functions.go @@ -1350,9 +1350,15 @@ func histogramVariance(vals []parser.Value, enh *EvalNodeHelper, varianceToResul continue } var val float64 - if bucket.Lower <= 0 && 0 <= bucket.Upper { + switch { + case sample.H.UsesCustomBuckets(): + // Use arithmetic mean in case of custom buckets. + val = (bucket.Upper + bucket.Lower) / 2.0 + case bucket.Lower <= 0 && bucket.Upper >= 0: + // Use zero (effectively the arithmetic mean) in the zero bucket of a standard exponential histogram. val = 0 - } else { + default: + // Use geometric mean in case of standard exponential buckets. val = math.Sqrt(bucket.Upper * bucket.Lower) if bucket.Upper < 0 { val = -val diff --git a/promql/promqltest/testdata/histograms.test b/promql/promqltest/testdata/histograms.test index 8b5b471b04..45492d89f3 100644 --- a/promql/promqltest/testdata/histograms.test +++ b/promql/promqltest/testdata/histograms.test @@ -95,13 +95,13 @@ eval instant at 50m histogram_avg(testhistogram3) # Test histogram_stddev. This has no classic equivalent. eval instant at 50m histogram_stddev(testhistogram3) - {start="positive"} 2.8189265757336734 - {start="negative"} 4.182715937754936 + {start="positive"} 2.7435461458749795 + {start="negative"} 4.187667907081458 # Test histogram_stdvar. This has no classic equivalent. eval instant at 50m histogram_stdvar(testhistogram3) - {start="positive"} 7.946347039377573 - {start="negative"} 17.495112615949154 + {start="positive"} 7.527045454545455 + {start="negative"} 17.5365625 # Test histogram_fraction. # diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 751039e029..edca4eec4b 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -337,7 +337,7 @@ load 10m histogram_stddev_stdvar_3 {{schema:3 count:7 sum:62 z_bucket:1 buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ] n_buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ]}}x1 eval instant at 10m histogram_stddev(histogram_stddev_stdvar_3) - {} 42.947236400258 + {} 42.94723640026 eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_3) {} 1844.4651144196398 diff --git a/web/ui/mantine-ui/src/promql/functionDocs.tsx b/web/ui/mantine-ui/src/promql/functionDocs.tsx index 45fcd03b7f..99ffdd74f5 100644 --- a/web/ui/mantine-ui/src/promql/functionDocs.tsx +++ b/web/ui/mantine-ui/src/promql/functionDocs.tsx @@ -1266,9 +1266,11 @@ const funcDocs: Record = {

+ histogram_stddev(v instant-vector) returns the estimated standard deviation of observations in a native - histogram, based on the geometric mean of the buckets where the observations lie. Samples that are not native - histograms are ignored and do not show up in the returned vector. + histogram. For this estimation, all observations in a bucket are assumed to have the value of the mean of the bucket boundaries. + For the zero bucket and for buckets with custom boundaries, the arithmetic mean is used. For the usual exponential buckets, + the geometric mean is used. Samples that are not native histograms are ignored and do not show up in the returned vector.