Merge pull request #17166 from Naman-B-Parlecha/NamanParlecha/NHCBtoCH

Unroll NHCBs to Classic Histograms func for RW
This commit is contained in:
George Krajcsovits 2025-10-30 08:44:26 +01:00 committed by GitHub
commit 37418b5910
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 522 additions and 44 deletions

145
model/histogram/convert.go Normal file
View File

@ -0,0 +1,145 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package histogram
import (
"errors"
"fmt"
"math"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/model/labels"
)
// ConvertNHCBToClassic converts Native Histogram Custom Buckets (NHCB) to classic histogram series.
// This conversion is needed in various scenarios where users need to get NHCB back to classic histogram format,
// such as Remote Write v1 for external system compatibility and migration use cases.
//
// When calling this function, caller must ensure that provided nhcb is valid NHCB histogram.
func ConvertNHCBToClassic(nhcb any, lset labels.Labels, lsetBuilder *labels.Builder, emitSeriesFn func(labels labels.Labels, value float64) error) error {
baseName := lset.Get(model.MetricNameLabel)
if baseName == "" {
return errors.New("metric name label '__name__' is missing")
}
// We preserve original labels and restore them after conversion.
// This is to ensure that no modifications are made to the original labels
// that the queue_manager relies on.
oldLabels := lsetBuilder.Labels()
defer lsetBuilder.Reset(oldLabels)
var (
customValues []float64
positiveBuckets []float64
count, sum float64
idx int // This index is to track buckets in Classic Histogram
currIdx int // This index is to track buckets in Native Histogram
)
switch h := nhcb.(type) {
case *Histogram:
if !IsCustomBucketsSchema(h.Schema) {
return errors.New("unsupported histogram schema, not a NHCB")
}
// Validate the histogram before conversion.
// The caller must ensure that the provided histogram is valid NHCB.
if h.Validate() != nil {
return errors.New(h.Validate().Error())
}
customValues = h.CustomValues
positiveBuckets = make([]float64, len(customValues)+1)
// Histograms are in delta format so we first bring them to absolute format.
acc := int64(0)
for _, s := range h.PositiveSpans {
for i := 0; i < int(s.Offset); i++ {
positiveBuckets[idx] = float64(acc)
idx++
}
for i := 0; i < int(s.Length); i++ {
acc += h.PositiveBuckets[currIdx]
positiveBuckets[idx] = float64(acc)
idx++
currIdx++
}
}
count = float64(h.Count)
sum = h.Sum
case *FloatHistogram:
if !IsCustomBucketsSchema(h.Schema) {
return errors.New("unsupported histogram schema, not a NHCB")
}
// Validate the histogram before conversion.
// The caller must ensure that the provided histogram is valid NHCB.
if h.Validate() != nil {
return errors.New(h.Validate().Error())
}
customValues = h.CustomValues
positiveBuckets = make([]float64, len(customValues)+1)
for _, span := range h.PositiveSpans {
// Since Float Histogram is already in absolute format we should
// keep the sparse buckets empty so we jump and go to next filled
// bucket index.
idx += int(span.Offset)
for i := 0; i < int(span.Length); i++ {
positiveBuckets[idx] = h.PositiveBuckets[currIdx]
idx++
currIdx++
}
}
count = h.Count
sum = h.Sum
default:
return fmt.Errorf("unsupported histogram type: %T", h)
}
currCount := float64(0)
for i, val := range customValues {
currCount += positiveBuckets[i]
lsetBuilder.Reset(lset)
lsetBuilder.Set(model.MetricNameLabel, baseName+"_bucket")
lsetBuilder.Set(model.BucketLabel, labels.FormatOpenMetricsFloat(val))
if err := emitSeriesFn(lsetBuilder.Labels(), currCount); err != nil {
return err
}
}
currCount += positiveBuckets[len(positiveBuckets)-1]
lsetBuilder.Reset(lset)
lsetBuilder.Set(model.MetricNameLabel, baseName+"_bucket")
lsetBuilder.Set(model.BucketLabel, labels.FormatOpenMetricsFloat(math.Inf(1)))
if err := emitSeriesFn(lsetBuilder.Labels(), currCount); err != nil {
return err
}
lsetBuilder.Reset(lset)
lsetBuilder.Set(model.MetricNameLabel, baseName+"_count")
if err := emitSeriesFn(lsetBuilder.Labels(), count); err != nil {
return err
}
lsetBuilder.Reset(lset)
lsetBuilder.Set(model.MetricNameLabel, baseName+"_sum")
if err := emitSeriesFn(lsetBuilder.Labels(), sum); err != nil {
return err
}
return nil
}

View File

@ -0,0 +1,314 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package histogram
import (
"testing"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/labels"
)
type sample struct {
lset labels.Labels
val float64
}
func TestConvertNHCBToClassicHistogram(t *testing.T) {
tests := []struct {
name string
nhcb any
labels labels.Labels
expectErr bool
expected []sample
}{
{
name: "valid histogram",
nhcb: &Histogram{
CustomValues: []float64{1, 2, 3},
PositiveBuckets: []int64{10, 20, 30},
PositiveSpans: []Span{
{Offset: 0, Length: 3},
},
Count: 100,
Sum: 100.0,
Schema: CustomBucketsSchema,
},
labels: labels.FromStrings("__name__", "test_metric"),
expected: []sample{
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "1.0"), val: 10},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "2.0"), val: 40},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "3.0"), val: 100},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "+Inf"), val: 100},
{lset: labels.FromStrings("__name__", "test_metric_count"), val: 100},
{lset: labels.FromStrings("__name__", "test_metric_sum"), val: 100},
},
},
{
name: "valid floatHistogram",
nhcb: &FloatHistogram{
CustomValues: []float64{1, 2, 3},
PositiveBuckets: []float64{20.0, 40.0, 60.0}, // 20 -> 60 ->120
PositiveSpans: []Span{
{Offset: 0, Length: 3},
},
Count: 120.0,
Sum: 100.0,
Schema: CustomBucketsSchema,
},
labels: labels.FromStrings("__name__", "test_metric"),
expected: []sample{
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "1.0"), val: 20},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "2.0"), val: 60},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "3.0"), val: 120},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "+Inf"), val: 120},
{lset: labels.FromStrings("__name__", "test_metric_count"), val: 120},
{lset: labels.FromStrings("__name__", "test_metric_sum"), val: 100},
},
},
{
name: "empty histogram",
nhcb: &Histogram{
CustomValues: []float64{},
PositiveBuckets: []int64{},
PositiveSpans: []Span{},
Count: 0,
Sum: 0.0,
Schema: CustomBucketsSchema,
},
labels: labels.FromStrings("__name__", "test_metric"),
expected: []sample{
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "+Inf"), val: 0},
{lset: labels.FromStrings("__name__", "test_metric_count"), val: 0},
{lset: labels.FromStrings("__name__", "test_metric_sum"), val: 0},
},
},
{
name: "missing __name__ label",
nhcb: &Histogram{
CustomValues: []float64{1, 2, 3},
PositiveBuckets: []int64{10, 20, 30},
Count: 100,
Sum: 100.0,
Schema: CustomBucketsSchema,
},
labels: labels.FromStrings("job", "test_job"),
expectErr: true,
},
{
name: "unsupported histogram type",
nhcb: nil,
labels: labels.FromStrings("__name__", "test_metric"),
expectErr: true,
},
{
name: "histogram with zero bucket counts",
nhcb: &Histogram{
CustomValues: []float64{1, 2, 3},
PositiveBuckets: []int64{0, 10, 0},
PositiveSpans: []Span{
{Offset: 0, Length: 3},
},
Count: 20,
Sum: 50.0,
Schema: CustomBucketsSchema,
},
labels: labels.FromStrings("__name__", "test_metric"),
expected: []sample{
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "1.0"), val: 0},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "2.0"), val: 10},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "3.0"), val: 20},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "+Inf"), val: 20},
{lset: labels.FromStrings("__name__", "test_metric_count"), val: 20},
{lset: labels.FromStrings("__name__", "test_metric_sum"), val: 50},
},
},
{
name: "extra bucket counts than custom values",
nhcb: &Histogram{
CustomValues: []float64{1, 2},
PositiveBuckets: []int64{10, 20, 30},
PositiveSpans: []Span{{Offset: 0, Length: 3}},
Count: 100,
Sum: 100.0,
Schema: CustomBucketsSchema,
},
labels: labels.FromStrings("__name__", "test_metric"),
expected: []sample{
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "1.0"), val: 10},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "2.0"), val: 40},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "+Inf"), val: 100},
{lset: labels.FromStrings("__name__", "test_metric_count"), val: 100},
{lset: labels.FromStrings("__name__", "test_metric_sum"), val: 100},
},
},
{
name: "mismatched bucket lengths with less filled bucket count",
nhcb: &Histogram{
CustomValues: []float64{1, 2},
PositiveBuckets: []int64{10},
PositiveSpans: []Span{{Offset: 0, Length: 2}},
Count: 100,
Sum: 100.0,
Schema: CustomBucketsSchema,
},
labels: labels.FromStrings("__name__", "test_metric_bucket"),
expectErr: true,
},
{
name: "single series Histogram",
nhcb: &Histogram{
CustomValues: []float64{1},
PositiveBuckets: []int64{10},
PositiveSpans: []Span{
{Offset: 0, Length: 1},
},
Count: 10,
Sum: 20.0,
Schema: CustomBucketsSchema,
},
labels: labels.FromStrings("__name__", "test_metric"),
expected: []sample{
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "1.0"), val: 10},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "+Inf"), val: 10},
{lset: labels.FromStrings("__name__", "test_metric_count"), val: 10},
{lset: labels.FromStrings("__name__", "test_metric_sum"), val: 20},
},
},
{
name: "multiset label histogram",
nhcb: &Histogram{
CustomValues: []float64{1},
PositiveBuckets: []int64{10},
PositiveSpans: []Span{
{Offset: 0, Length: 1},
},
Count: 10,
Sum: 20.0,
Schema: CustomBucketsSchema,
},
labels: labels.FromStrings("__name__", "test_metric", "job", "test_job", "instance", "localhost:9090"),
expected: []sample{
{lset: labels.FromStrings("__name__", "test_metric_bucket", "job", "test_job", "instance", "localhost:9090", "le", "1.0"), val: 10},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "job", "test_job", "instance", "localhost:9090", "le", "+Inf"), val: 10},
{lset: labels.FromStrings("__name__", "test_metric_count", "job", "test_job", "instance", "localhost:9090"), val: 10},
{lset: labels.FromStrings("__name__", "test_metric_sum", "job", "test_job", "instance", "localhost:9090"), val: 20},
},
},
{
name: "exponential histogram",
nhcb: &FloatHistogram{
Schema: 1,
ZeroThreshold: 0.01,
ZeroCount: 5.5,
Count: 3493.3,
Sum: 2349209.324,
PositiveSpans: []Span{
{-2, 1},
{2, 3},
},
PositiveBuckets: []float64{1, 3.3, 4.2, 0.1},
NegativeSpans: []Span{
{3, 2},
{3, 2},
},
NegativeBuckets: []float64{3.1, 3, 1.234e5, 1000},
},
labels: labels.FromStrings("__name__", "test_metric_bucket"),
expectErr: true,
},
{
name: "sparse histogram",
nhcb: &Histogram{
Schema: CustomBucketsSchema,
CustomValues: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
PositiveSpans: []Span{
{0, 2},
{4, 1},
{1, 2},
},
PositiveBuckets: []int64{1, 2, 3, 4, 5}, // 1 -> 3 -> 3 -> 3 -> 3 -> 3 -> 6 ->6 ->10 -> 15
Count: 35, // 1 -> 4 -> 7 -> 10 -> 13 -> 16 -> 22 -> 28 -> 38 -> 53
Sum: 123,
},
labels: labels.FromStrings("__name__", "test_metric"),
expected: []sample{
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "1.0"), val: 1},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "2.0"), val: 4},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "3.0"), val: 7},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "4.0"), val: 10},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "5.0"), val: 13},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "6.0"), val: 16},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "7.0"), val: 22},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "8.0"), val: 28},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "9.0"), val: 38},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "10.0"), val: 53},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "+Inf"), val: 53},
{lset: labels.FromStrings("__name__", "test_metric_count"), val: 35},
{lset: labels.FromStrings("__name__", "test_metric_sum"), val: 123},
},
},
{
name: "sparse float histogram",
nhcb: &FloatHistogram{
Schema: CustomBucketsSchema,
CustomValues: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
PositiveSpans: []Span{
{0, 2},
{4, 1},
{1, 2},
},
PositiveBuckets: []float64{1, 2, 3, 4, 5}, // 1 -> 2 -> 0 -> 0 -> 0 -> 0 -> 3 -> 0 -> 4 -> 5
Count: 15, // 1 -> 3 -> 3 -> 3 -> 3 -> 3 -> 6 -> 6 -> 10 -> 15
Sum: 123,
},
labels: labels.FromStrings("__name__", "test_metric"),
expected: []sample{
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "1.0"), val: 1},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "2.0"), val: 3},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "3.0"), val: 3},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "4.0"), val: 3},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "5.0"), val: 3},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "6.0"), val: 3},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "7.0"), val: 6},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "8.0"), val: 6},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "9.0"), val: 10},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "10.0"), val: 15},
{lset: labels.FromStrings("__name__", "test_metric_bucket", "le", "+Inf"), val: 15},
{lset: labels.FromStrings("__name__", "test_metric_count"), val: 15},
{lset: labels.FromStrings("__name__", "test_metric_sum"), val: 123},
},
},
}
labelBuilder := labels.NewBuilder(labels.EmptyLabels())
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var emittedSamples []sample
err := ConvertNHCBToClassic(tt.nhcb, tt.labels, labelBuilder, func(lbls labels.Labels, val float64) error {
emittedSamples = append(emittedSamples, sample{lset: lbls, val: val})
return nil
})
require.Equal(t, tt.expectErr, err != nil, "unexpected error: %v", err)
if !tt.expectErr {
require.Len(t, emittedSamples, len(tt.expected))
for i, expSample := range tt.expected {
require.True(t, labels.Equal(expSample.lset, emittedSamples[i].lset), "labels mismatch at index %d: expected %v, got %v", i, expSample.lset, emittedSamples[i].lset)
require.Equal(t, expSample.val, emittedSamples[i].val, "value mismatch at index %d", i)
}
}
})
}
}

60
model/labels/float.go Normal file
View File

@ -0,0 +1,60 @@
// Copyright The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package labels
import (
"bytes"
"math"
"strconv"
"sync"
)
// floatFormatBufPool is exclusively used in FormatOpenMetricsFloat.
var floatFormatBufPool = sync.Pool{
New: func() any {
// To contain at most 17 digits and additional syntax for a float64.
b := make([]byte, 0, 24)
return &b
},
}
// FormatOpenMetricsFloat works like the usual Go string formatting of a float
// but appends ".0" if the resulting number would otherwise contain neither a
// "." nor an "e".
func FormatOpenMetricsFloat(f float64) string {
// A few common cases hardcoded.
switch {
case f == 1:
return "1.0"
case f == 0:
return "0.0"
case f == -1:
return "-1.0"
case math.IsNaN(f):
return "NaN"
case math.IsInf(f, +1):
return "+Inf"
case math.IsInf(f, -1):
return "-Inf"
}
bp := floatFormatBufPool.Get().(*[]byte)
defer floatFormatBufPool.Put(bp)
*bp = strconv.AppendFloat((*bp)[:0], f, 'g', -1, 64)
if bytes.ContainsAny(*bp, "e.") {
return string(*bp)
}
*bp = append(*bp, '.', '0')
return string(*bp)
}

View File

@ -773,7 +773,7 @@ func normalizeFloatsInLabelValues(t model.MetricType, l, v string) string {
if (t == model.MetricTypeSummary && l == model.QuantileLabel) || (t == model.MetricTypeHistogram && l == model.BucketLabel) { if (t == model.MetricTypeSummary && l == model.QuantileLabel) || (t == model.MetricTypeHistogram && l == model.BucketLabel) {
f, err := strconv.ParseFloat(v, 64) f, err := strconv.ParseFloat(v, 64)
if err == nil { if err == nil {
return formatOpenMetricsFloat(f) return labels.FormatOpenMetricsFloat(f)
} }
} }
return v return v

View File

@ -19,9 +19,7 @@ import (
"fmt" "fmt"
"io" "io"
"math" "math"
"strconv"
"strings" "strings"
"sync"
"unicode/utf8" "unicode/utf8"
"github.com/gogo/protobuf/types" "github.com/gogo/protobuf/types"
@ -35,15 +33,6 @@ import (
"github.com/prometheus/prometheus/util/convertnhcb" "github.com/prometheus/prometheus/util/convertnhcb"
) )
// floatFormatBufPool is exclusively used in formatOpenMetricsFloat.
var floatFormatBufPool = sync.Pool{
New: func() any {
// To contain at most 17 digits and additional syntax for a float64.
b := make([]byte, 0, 24)
return &b
},
}
// ProtobufParser parses the old Prometheus protobuf format and present it // ProtobufParser parses the old Prometheus protobuf format and present it
// as the text-style textparse.Parser interface. // as the text-style textparse.Parser interface.
// //
@ -698,7 +687,7 @@ func (p *ProtobufParser) getMagicLabel() (bool, string, string) {
qq := p.dec.GetSummary().GetQuantile() qq := p.dec.GetSummary().GetQuantile()
q := qq[p.fieldPos] q := qq[p.fieldPos]
p.fieldsDone = p.fieldPos == len(qq)-1 p.fieldsDone = p.fieldPos == len(qq)-1
return true, model.QuantileLabel, formatOpenMetricsFloat(q.GetQuantile()) return true, model.QuantileLabel, labels.FormatOpenMetricsFloat(q.GetQuantile())
case dto.MetricType_HISTOGRAM, dto.MetricType_GAUGE_HISTOGRAM: case dto.MetricType_HISTOGRAM, dto.MetricType_GAUGE_HISTOGRAM:
bb := p.dec.GetHistogram().GetBucket() bb := p.dec.GetHistogram().GetBucket()
if p.fieldPos >= len(bb) { if p.fieldPos >= len(bb) {
@ -707,41 +696,11 @@ func (p *ProtobufParser) getMagicLabel() (bool, string, string) {
} }
b := bb[p.fieldPos] b := bb[p.fieldPos]
p.fieldsDone = math.IsInf(b.GetUpperBound(), +1) p.fieldsDone = math.IsInf(b.GetUpperBound(), +1)
return true, model.BucketLabel, formatOpenMetricsFloat(b.GetUpperBound()) return true, model.BucketLabel, labels.FormatOpenMetricsFloat(b.GetUpperBound())
} }
return false, "", "" return false, "", ""
} }
// formatOpenMetricsFloat works like the usual Go string formatting of a float
// but appends ".0" if the resulting number would otherwise contain neither a
// "." nor an "e".
func formatOpenMetricsFloat(f float64) string {
// A few common cases hardcoded.
switch {
case f == 1:
return "1.0"
case f == 0:
return "0.0"
case f == -1:
return "-1.0"
case math.IsNaN(f):
return "NaN"
case math.IsInf(f, +1):
return "+Inf"
case math.IsInf(f, -1):
return "-Inf"
}
bp := floatFormatBufPool.Get().(*[]byte)
defer floatFormatBufPool.Put(bp)
*bp = strconv.AppendFloat((*bp)[:0], f, 'g', -1, 64)
if bytes.ContainsAny(*bp, "e.") {
return string(*bp)
}
*bp = append(*bp, '.', '0')
return string(*bp)
}
// isNativeHistogram returns false iff the provided histograms has no spans at // isNativeHistogram returns false iff the provided histograms has no spans at
// all (neither positive nor negative) and a zero threshold of 0 and a zero // all (neither positive nor negative) and a zero threshold of 0 and a zero
// count of 0. In principle, this could still be meant to be a native histogram // count of 0. In principle, this could still be meant to be a native histogram