fix(textparse/protobuf): metric family name corrupted by NHCB parser (#17156)

* fix(textparse): implement NHCB parsing in ProtoBuf parser directly

The NHCB conversion does some validation, but we can only return error
from Parser.Next() not Parser.Histogram(). So the conversion needs to
happen in Next().

There are 2 cases:
1. "always_scrape_classic_histograms" is enabled, in which case we
convert after returning the classic series. This is to be consistent
with the PromParser text parser, which collects NHCB while spitting out
classic series; then returns the NHCB.
2. "always_scrape_classic_histograms" is disabled. In which case we never
return the classic series.

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>

* refactor(textparse): skip classic series instead of adding NHCB around

Do not return the first classic series from the EntryType state,
switch to EntrySeries. This means we need to start the histogram
field state from -3 , not -2.

In EntrySeries, skip classic series if needed.

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>

* reuse nhcb converter

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>

* test(textparse/nhcb): test corrupting metric family name

NHCB parse doesn't always copy the metric name from the underlying
parser. When called via HELP, UNIT, the string is directly referenced
which means that the read-ahead of NHCB can corrupt it.

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
This commit is contained in:
George Krajcsovits 2025-09-08 17:26:41 +02:00 committed by GitHub
parent 979aea1d49
commit acd9aa0afb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 1391 additions and 90 deletions

View File

@ -43,6 +43,15 @@ type Label struct {
}
func (ls Labels) String() string {
return ls.stringImpl(true)
}
// StringNoSpace is like String but does not add a space after commas.
func (ls Labels) StringNoSpace() string {
return ls.stringImpl(false)
}
func (ls Labels) stringImpl(addSpace bool) string {
var bytea [1024]byte // On stack to avoid memory allocation while building the output.
b := bytes.NewBuffer(bytea[:0])
@ -51,7 +60,9 @@ func (ls Labels) String() string {
ls.Range(func(l Label) {
if i > 0 {
b.WriteByte(',')
b.WriteByte(' ')
if addSpace {
b.WriteByte(' ')
}
}
if !model.LegacyValidation.IsValidLabelName(l.Name) {
b.Write(strconv.AppendQuote(b.AvailableBuffer(), l.Name))

View File

@ -149,7 +149,7 @@ func benchParse(b *testing.B, data []byte, parser string) {
}
case "promproto":
newParserFn = func(b []byte, st *labels.SymbolTable) Parser {
return NewProtobufParser(b, true, false, st)
return NewProtobufParser(b, true, false, false, st)
}
case "omtext":
newParserFn = func(b []byte, st *labels.SymbolTable) Parser {
@ -276,7 +276,7 @@ func BenchmarkCreatedTimestampPromProto(b *testing.B) {
data := createTestProtoBuf(b).Bytes()
st := labels.NewSymbolTable()
p := NewProtobufParser(data, true, false, st)
p := NewProtobufParser(data, true, false, false, st)
found := false
Inner:

View File

@ -142,7 +142,7 @@ func New(b []byte, contentType, fallbackType string, parseClassicHistograms, con
o.enableTypeAndUnitLabels = enableTypeAndUnitLabels
})
case "application/vnd.google.protobuf":
baseParser = NewProtobufParser(b, parseClassicHistograms, enableTypeAndUnitLabels, st)
return NewProtobufParser(b, parseClassicHistograms, convertClassicHistogramsToNHCB, enableTypeAndUnitLabels, st), err
case "text/plain":
baseParser = NewPromParser(b, st, enableTypeAndUnitLabels)
default:

View File

@ -18,7 +18,6 @@ import (
"io"
"math"
"strconv"
"strings"
"github.com/prometheus/common/model"
@ -373,7 +372,16 @@ func (p *NHCBParser) processNHCB() bool {
p.hNHCB = nil
p.fhNHCB = fh
}
p.metricStringNHCB = p.tempLsetNHCB.Get(labels.MetricName) + strings.ReplaceAll(p.tempLsetNHCB.DropMetricName().String(), ", ", ",")
lblsWithMetricName := p.tempLsetNHCB.DropMetricName()
// Ensure we return `metric` instead of `metric{}` for name only
// series, for consistency with wrapped parsers.
if lblsWithMetricName.IsEmpty() {
p.metricStringNHCB = p.tempLsetNHCB.Get(labels.MetricName)
} else {
p.metricStringNHCB = p.tempLsetNHCB.Get(labels.MetricName) + lblsWithMetricName.StringNoSpace()
}
p.bytesNHCB = []byte(p.metricStringNHCB)
p.lsetNHCB = p.tempLsetNHCB
p.swapExemplars()

View File

@ -15,18 +15,15 @@ package textparse
import (
"bytes"
"encoding/binary"
"strconv"
"testing"
"github.com/gogo/protobuf/proto"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"github.com/prometheus/prometheus/model/exemplar"
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/model/labels"
dto "github.com/prometheus/prometheus/prompb/io/prometheus/client"
)
func TestNHCBParserOnOMParser(t *testing.T) {
@ -182,7 +179,7 @@ foobar{quantile="0.99"} 150.1`
m: "hh",
typ: model.MetricTypeHistogram,
}, {
m: `hh{}`,
m: `hh`,
shs: &histogram.Histogram{
Schema: histogram.CustomBucketsSchema,
Count: 1,
@ -203,7 +200,7 @@ foobar{quantile="0.99"} 150.1`
m: "hhh",
typ: model.MetricTypeHistogram,
}, {
m: `hhh{}`,
m: `hhh`,
shs: &histogram.Histogram{
Schema: histogram.CustomBucketsSchema,
Count: 1,
@ -333,7 +330,7 @@ foobar{quantile="0.99"} 150.1`
m: "baz",
typ: model.MetricTypeHistogram,
}, {
m: `baz{}`,
m: `baz`,
shs: &histogram.Histogram{
Schema: histogram.CustomBucketsSchema,
Count: 17,
@ -361,7 +358,7 @@ foobar{quantile="0.99"} 150.1`
m: "something",
typ: model.MetricTypeHistogram,
}, {
m: `something{}`,
m: `something`,
shs: &histogram.Histogram{
Schema: histogram.CustomBucketsSchema,
Count: 18,
@ -479,7 +476,7 @@ something_bucket{a="b",le="+Inf"} 9 # {id="something-test"} 2e100 123.000
m: "something",
typ: model.MetricTypeHistogram,
}, {
m: `something{}`,
m: `something`,
shs: &histogram.Histogram{
Schema: histogram.CustomBucketsSchema,
Count: 18,
@ -737,7 +734,7 @@ func TestNHCBParser_NoNHCBWhenExponential(t *testing.T) {
// Always expect NHCB series after classic.
nhcbSeries := []parsedEntry{
{
m: metric + "{}",
m: metric,
shs: &histogram.Histogram{
Schema: histogram.CustomBucketsSchema,
Count: 175,
@ -893,24 +890,7 @@ metric: <
>
`}
varintBuf := make([]byte, binary.MaxVarintLen32)
buf := &bytes.Buffer{}
for _, tmf := range testMetricFamilies {
pb := &dto.MetricFamily{}
// From text to proto message.
require.NoError(t, proto.UnmarshalText(tmf, pb))
// From proto message to binary protobuf.
protoBuf, err := proto.Marshal(pb)
require.NoError(t, err)
// Write first length, then binary protobuf.
varintLength := binary.PutUvarint(varintBuf, uint64(len(protoBuf)))
buf.Write(varintBuf[:varintLength])
buf.Write(protoBuf)
}
return buf
return metricFamiliesToProtobuf(t, testMetricFamilies)
}
func createTestOpenMetricsHistogram() string {
@ -1054,22 +1034,7 @@ metric: <
timestamp_ms: 1234568
>`}
varintBuf := make([]byte, binary.MaxVarintLen32)
buf := &bytes.Buffer{}
for _, tmf := range testMetricFamilies {
pb := &dto.MetricFamily{}
// From text to proto message.
require.NoError(t, proto.UnmarshalText(tmf, pb))
// From proto message to binary protobuf.
protoBuf, err := proto.Marshal(pb)
require.NoError(t, err)
// Write first length, then binary protobuf.
varintLength := binary.PutUvarint(varintBuf, uint64(len(protoBuf)))
buf.Write(varintBuf[:varintLength])
buf.Write(protoBuf)
}
buf := metricFamiliesToProtobuf(t, testMetricFamilies)
exp := []parsedEntry{
{
@ -1107,7 +1072,7 @@ metric: <
typ: model.MetricTypeHistogram,
},
{
m: "test_histogram2{}",
m: "test_histogram2",
shs: &histogram.Histogram{
Schema: histogram.CustomBucketsSchema,
Count: 175,
@ -1128,3 +1093,87 @@ metric: <
got := testParse(t, p)
requireEntries(t, exp, got)
}
// TestNHCBNotCorruptMetricNameAfterRead is a regression test for https://github.com/prometheus/prometheus/issues/17075.
func TestNHCBNotCorruptMetricNameAfterRead(t *testing.T) {
inputOM := `# HELP test_histogram_seconds Just a test histogram
# TYPE test_histogram_seconds histogram
test_histogram_seconds_count 10
test_histogram_seconds_sum 100
test_histogram_seconds_bucket{le="10"} 10
test_histogram_seconds_bucket{le="+Inf"} 10
# HELP different_metric Just a different metric
# TYPE different_metric histogram
different_metric_count 5
different_metric_sum 50
different_metric_bucket{le="10"} 5
different_metric_bucket{le="+Inf"} 5
# EOF`
testMetricFamilies := []string{`name: "test_histogram_seconds"
help: "Just a test histogram"
type: HISTOGRAM
metric: <
histogram: <
sample_count: 10
sample_sum: 100
bucket: <
cumulative_count: 10
upper_bound: 10
>
>
>`, `name: "different_metric"
help: "Just a different metric"
type: HISTOGRAM
metric: <
histogram: <
sample_count: 5
sample_sum: 50
bucket: <
cumulative_count: 5
upper_bound: 10
>
>
>`}
buf := metricFamiliesToProtobuf(t, testMetricFamilies)
testCases := []struct {
input []byte
typ string
}{
{input: buf.Bytes(), typ: "application/vnd.google.protobuf"},
{input: []byte(inputOM), typ: "text/plain"},
{input: []byte(inputOM), typ: "application/openmetrics-text"},
}
for _, tc := range testCases {
t.Run(tc.typ, func(t *testing.T) {
p, err := New(tc.input, tc.typ, "", false, true, false, false, labels.NewSymbolTable())
require.NoError(t, err)
require.NotNil(t, p)
getNext := func() Entry {
e, err := p.Next()
require.NoError(t, err)
return e
}
require.Equal(t, EntryHelp, getNext())
lastMFName, lastHelp := p.Help()
require.Equal(t, "test_histogram_seconds", string(lastMFName))
require.Equal(t, "Just a test histogram", string(lastHelp))
require.Equal(t, EntryType, getNext())
var lastType model.MetricType
lastMFName, lastType = p.Type()
require.Equal(t, "test_histogram_seconds", string(lastMFName))
require.Equal(t, model.MetricTypeHistogram, lastType)
require.Equal(t, EntryHistogram, getNext())
_, _, h, _ := p.Histogram()
require.NotNil(t, h)
require.Equal(t, "test_histogram_seconds", string(lastMFName))
})
}
}

View File

@ -32,6 +32,7 @@ import (
"github.com/prometheus/prometheus/model/labels"
dto "github.com/prometheus/prometheus/prompb/io/prometheus/client"
"github.com/prometheus/prometheus/schema"
"github.com/prometheus/prometheus/util/convertnhcb"
)
// floatFormatBufPool is exclusively used in formatOpenMetricsFloat.
@ -78,20 +79,31 @@ type ProtobufParser struct {
// Whether to also parse a classic histogram that is also present as a
// native histogram.
parseClassicHistograms bool
parseClassicHistograms bool
// Whether to add type and unit labels.
enableTypeAndUnitLabels bool
// Whether to convert classic histograms to native histograms with custom buckets.
convertClassicHistogramsToNHCB bool
// Reusable classic to NHCB converter.
tmpNHCB convertnhcb.TempHistogram
// We need to preload NHCB since we cannot do error handling in Histogram().
nhcbH *histogram.Histogram
nhcbFH *histogram.FloatHistogram
}
// NewProtobufParser returns a parser for the payload in the byte slice.
func NewProtobufParser(b []byte, parseClassicHistograms, enableTypeAndUnitLabels bool, st *labels.SymbolTable) Parser {
func NewProtobufParser(b []byte, parseClassicHistograms, convertClassicHistogramsToNHCB, enableTypeAndUnitLabels bool, st *labels.SymbolTable) Parser {
return &ProtobufParser{
dec: dto.NewMetricStreamingDecoder(b),
entryBytes: &bytes.Buffer{},
builder: labels.NewScratchBuilderWithSymbolTable(st, 16), // TODO(bwplotka): Try base builder.
state: EntryInvalid,
parseClassicHistograms: parseClassicHistograms,
enableTypeAndUnitLabels: enableTypeAndUnitLabels,
state: EntryInvalid,
parseClassicHistograms: parseClassicHistograms,
enableTypeAndUnitLabels: enableTypeAndUnitLabels,
convertClassicHistogramsToNHCB: convertClassicHistogramsToNHCB,
tmpNHCB: convertnhcb.NewTempHistogram(),
}
}
@ -182,6 +194,15 @@ func (p *ProtobufParser) Histogram() ([]byte, *int64, *histogram.Histogram, *his
h = p.dec.GetHistogram()
)
if !isNativeHistogram(h) {
// This only happens if we have a classic histogram and
// we converted it to NHCB already in Next.
if *ts != 0 {
return p.entryBytes.Bytes(), ts, p.nhcbH, p.nhcbFH
}
return p.entryBytes.Bytes(), nil, p.nhcbH, p.nhcbFH
}
if p.parseClassicHistograms && len(h.GetBucket()) > 0 {
p.redoClassic = true
}
@ -406,6 +427,8 @@ func (p *ProtobufParser) CreatedTimestamp() int64 {
// read.
func (p *ProtobufParser) Next() (Entry, error) {
p.exemplarReturned = false
p.nhcbH = nil
p.nhcbFH = nil
switch p.state {
// Invalid state occurs on:
// * First Next() call.
@ -468,8 +491,12 @@ func (p *ProtobufParser) Next() (Entry, error) {
p.state = EntryType
case EntryType:
t := p.dec.GetType()
if (t == dto.MetricType_HISTOGRAM || t == dto.MetricType_GAUGE_HISTOGRAM) &&
isNativeHistogram(p.dec.GetHistogram()) {
if t == dto.MetricType_HISTOGRAM || t == dto.MetricType_GAUGE_HISTOGRAM {
if !isNativeHistogram(p.dec.GetHistogram()) {
p.state = EntrySeries
p.fieldPos = -3 // We have not returned anything, let p.Next() increment it to -2.
return p.Next()
}
p.state = EntryHistogram
} else {
p.state = EntrySeries
@ -480,14 +507,18 @@ func (p *ProtobufParser) Next() (Entry, error) {
case EntrySeries:
// Potentially a second series in the metric family.
t := p.dec.GetType()
decodeNext := true
if t == dto.MetricType_SUMMARY ||
t == dto.MetricType_HISTOGRAM ||
t == dto.MetricType_GAUGE_HISTOGRAM {
// Non-trivial series (complex metrics, with magic suffixes).
isClassicHistogram := (t == dto.MetricType_HISTOGRAM || t == dto.MetricType_GAUGE_HISTOGRAM) && !isNativeHistogram(p.dec.GetHistogram())
skipSeries := p.convertClassicHistogramsToNHCB && isClassicHistogram && !p.parseClassicHistograms
// Did we iterate over all the classic representations fields?
// NOTE: p.fieldsDone is updated on p.onSeriesOrHistogramUpdate.
if !p.fieldsDone {
if !p.fieldsDone && !skipSeries {
// Still some fields to iterate over.
p.fieldPos++
if err := p.onSeriesOrHistogramUpdate(); err != nil {
@ -504,25 +535,39 @@ func (p *ProtobufParser) Next() (Entry, error) {
// If this is a metric family containing native
// histograms, it means we are here thanks to redoClassic state.
// Return to native histograms for the consistent flow.
if (t == dto.MetricType_HISTOGRAM || t == dto.MetricType_GAUGE_HISTOGRAM) &&
isNativeHistogram(p.dec.GetHistogram()) {
p.state = EntryHistogram
// If this is a metric family containing classic histograms,
// it means we might need to do NHCB conversion.
if t == dto.MetricType_HISTOGRAM || t == dto.MetricType_GAUGE_HISTOGRAM {
if !isClassicHistogram {
p.state = EntryHistogram
} else if p.convertClassicHistogramsToNHCB {
// We still need to spit out the NHCB.
var err error
p.nhcbH, p.nhcbFH, err = p.convertToNHCB(t)
if err != nil {
return EntryInvalid, err
}
p.state = EntryHistogram
// We have an NHCB to emit, no need to decode the next series.
decodeNext = false
}
}
}
// Is there another series?
if err := p.dec.NextMetric(); err != nil {
if errors.Is(err, io.EOF) {
p.state = EntryInvalid
return p.Next()
if decodeNext {
if err := p.dec.NextMetric(); err != nil {
if errors.Is(err, io.EOF) {
p.state = EntryInvalid
return p.Next()
}
return EntryInvalid, err
}
return EntryInvalid, err
}
if err := p.onSeriesOrHistogramUpdate(); err != nil {
return EntryInvalid, err
}
case EntryHistogram:
// Was Histogram() called and parseClassicHistograms is true?
if p.redoClassic {
switchToClassic := func() (Entry, error) {
p.redoClassic = false
p.fieldPos = -3
p.fieldsDone = false
@ -530,6 +575,11 @@ func (p *ProtobufParser) Next() (Entry, error) {
return p.Next() // Switch to classic histogram.
}
// Was Histogram() called and parseClassicHistograms is true?
if p.redoClassic {
return switchToClassic()
}
// Is there another series?
if err := p.dec.NextMetric(); err != nil {
if errors.Is(err, io.EOF) {
@ -538,6 +588,14 @@ func (p *ProtobufParser) Next() (Entry, error) {
}
return EntryInvalid, err
}
// If this is a metric family does not contain native
// histograms, it means we are here thanks to NHCB conversion.
// Return to classic histograms for the consistent flow.
if !isNativeHistogram(p.dec.GetHistogram()) {
return switchToClassic()
}
if err := p.onSeriesOrHistogramUpdate(); err != nil {
return EntryInvalid, err
}
@ -690,3 +748,43 @@ func isNativeHistogram(h *dto.Histogram) bool {
h.GetZeroThreshold() > 0 ||
h.GetZeroCount() > 0
}
func (p *ProtobufParser) convertToNHCB(t dto.MetricType) (*histogram.Histogram, *histogram.FloatHistogram, error) {
h := p.dec.GetHistogram()
p.tmpNHCB.Reset()
// TODO(krajorama): convertnhcb should support setting integer mode up
// front since we know it here. That would avoid the converter having
// to guess it based on counts.
v := h.GetSampleCountFloat()
if v == 0 {
v = float64(h.GetSampleCount())
}
if err := p.tmpNHCB.SetCount(v); err != nil {
return nil, nil, err
}
if err := p.tmpNHCB.SetSum(h.GetSampleSum()); err != nil {
return nil, nil, err
}
for _, b := range h.GetBucket() {
v := b.GetCumulativeCountFloat()
if v == 0 {
v = float64(b.GetCumulativeCount())
}
if err := p.tmpNHCB.SetBucketCount(b.GetUpperBound(), v); err != nil {
return nil, nil, err
}
}
ch, cfh, err := p.tmpNHCB.Convert()
if err != nil {
return nil, nil, err
}
if t == dto.MetricType_GAUGE_HISTOGRAM {
if ch != nil {
ch.CounterResetHint = histogram.GaugeType
} else {
cfh.CounterResetHint = histogram.GaugeType
}
}
return ch, cfh, nil
}

File diff suppressed because it is too large Load Diff

View File

@ -392,7 +392,7 @@ func TestFederationWithNativeHistograms(t *testing.T) {
require.Equal(t, http.StatusOK, res.Code)
body, err := io.ReadAll(res.Body)
require.NoError(t, err)
p := textparse.NewProtobufParser(body, false, false, labels.NewSymbolTable())
p := textparse.NewProtobufParser(body, false, false, false, labels.NewSymbolTable())
var actVec promql.Vector
metricFamilies := 0
l := labels.Labels{}