Merge 93bc3721fc into 25aee26a57

2025-08-06 06:07:11 +02:00 · 2025-08-05 12:57:43 +01:00 · 2025-08-05 12:57:43 +01:00 · 4d82829171
commit 4d82829171
parent 25aee26a57 93bc3721fc
7 changed files with 234 additions and 90 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -40,7 +40,7 @@ jobs:
      - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7
      - uses: ./.github/promci/actions/setup_environment
      - run: go test --tags=dedupelabels ./...
-      - run: go test --tags=slicelabels -race ./cmd/prometheus ./prompb/io/prometheus/client
+      - run: go test --tags=slicelabels -race ./...
      - run: go test --tags=forcedirectio -race ./tsdb/
      - run: GOARCH=386 go test ./...
      - uses: ./.github/promci/actions/check_proto
--- a/model/labels/labels_dedupelabels.go
+++ b/model/labels/labels_dedupelabels.go
@ -787,7 +787,7 @@ func (b *ScratchBuilder) Add(name, value string) {
 }

 // Add a name/value pair, using []byte instead of string to reduce memory allocations.
-// The values must remain live until Labels() is called.
+// The values must remain live until Labels() or Overwrite() is called.
 func (b *ScratchBuilder) UnsafeAddBytes(name, value []byte) {
 	b.add = append(b.add, Label{Name: yoloString(name), Value: yoloString(value)})
 }
@ -830,6 +830,12 @@ func (b *ScratchBuilder) Overwrite(ls *Labels) {
 	ls.data = yoloString(b.overwriteBuffer)
 }

+// UnsafeString must be used when passing to Add strings that are reusing buffers via the unsafe package.
+// For example, b.Add("__name__", labels.UnsafeString(yoloString(buf))).
+func UnsafeString(s string) string {
+	return s // No-op - the call to Labels() or Overwrite() will create a copy
+}
+
 // SizeOfLabels returns the approximate space required for n copies of a label.
 func SizeOfLabels(name, value string, n uint64) uint64 {
 	return uint64(len(name)+len(value)) + n*4 // Assuming most symbol-table entries are 2 bytes long.
--- a/model/labels/labels_slicelabels.go
+++ b/model/labels/labels_slicelabels.go
@ -506,6 +506,12 @@ func (b *ScratchBuilder) Overwrite(ls *Labels) {
 	*ls = append((*ls)[:0], b.add...)
 }

+// UnsafeString must be used when passing to Add strings that are reusing buffers via the unsafe package.
+// For example, b.Add("__name__", labels.UnsafeString(yoloString(buf))).
+func UnsafeString(s string) string {
+	return strings.Clone(s) // We need to clone such strings for slicelabels implementation.
+}
+
 // SizeOfLabels returns the approximate space required for n copies of a label.
 func SizeOfLabels(name, value string, n uint64) uint64 {
 	return (uint64(len(name)) + uint64(unsafe.Sizeof(name)) + uint64(len(value)) + uint64(unsafe.Sizeof(value))) * n
--- a/model/labels/labels_stringlabels.go
+++ b/model/labels/labels_stringlabels.go
@ -619,7 +619,7 @@ func (b *ScratchBuilder) Add(name, value string) {
 }

 // UnsafeAddBytes adds a name/value pair using []byte instead of string to reduce memory allocations.
-// The values must remain live until Labels() is called.
+// The values must remain live until Labels() or Overwrite() is called.
 func (b *ScratchBuilder) UnsafeAddBytes(name, value []byte) {
 	b.add = append(b.add, Label{Name: yoloString(name), Value: yoloString(value)})
 }
@ -680,6 +680,12 @@ func (b *ScratchBuilder) SetSymbolTable(_ *SymbolTable) {
 	// no-op
 }

+// UnsafeString must be used when passing to Add strings that are reusing buffers via the unsafe package.
+// For example, b.Add("__name__", labels.UnsafeString(yoloString(buf))).
+func UnsafeString(s string) string {
+	return s // No-op - the call to Labels() or Overwrite() will create a copy
+}
+
 // SizeOfLabels returns the approximate space required for n copies of a label.
 func SizeOfLabels(name, value string, n uint64) uint64 {
 	return uint64(labelSize(&Label{Name: name, Value: value})) * n
--- a/model/textparse/protobufparse.go
+++ b/model/textparse/protobufparse.go
@ -555,13 +555,19 @@ func (p *ProtobufParser) Next() (Entry, error) {
 func (p *ProtobufParser) onSeriesOrHistogramUpdate() error {
 	p.builder.Reset()

+	name, safe := p.getMagicName()
+	if !safe {
+		name = labels.UnsafeString(name) // Make sure the name is safe to use in labels.
+	}
+
 	if p.enableTypeAndUnitLabels {
 		_, typ := p.Type()

 		m := schema.Metadata{
-			Name: p.getMagicName(),
+			Name: name,
 			Type: typ,
-			Unit: p.dec.GetUnit(),
+			// After the next call to dec.NextMetricFamily, the unit value becomes invalid, use labels.UnsafeString.
+			Unit: labels.UnsafeString(p.dec.GetUnit()),
 		}
 		m.AddToLabels(&p.builder)
 		if err := p.dec.Label(schema.IgnoreOverriddenMetadataLabelsScratchBuilder{
@ -571,7 +577,7 @@ func (p *ProtobufParser) onSeriesOrHistogramUpdate() error {
 			return err
 		}
 	} else {
-		p.builder.Add(labels.MetricName, p.getMagicName())
+		p.builder.Add(labels.MetricName, name)
 		if err := p.dec.Label(&p.builder); err != nil {
 			return err
 		}
@ -600,24 +606,28 @@ func (p *ProtobufParser) onSeriesOrHistogramUpdate() error {
 	return nil
 }

-// getMagicName usually just returns p.mf.GetType() but adds a magic suffix
-// ("_count", "_sum", "_bucket") if needed according to the current parser
-// state.
-func (p *ProtobufParser) getMagicName() string {
+// getMagicName returns the name of the metric. It usually just returns
+// p.mf.GetName() but adds a magic suffix ("_count", "_sum", "_bucket") if
+// needed according to the current parser state.
+// The second return value is set to true if the metric name is safe to use
+// in labels without wrapping with labels.UnsafeString. When it is false, the
+// returned name will become invalid with the next call to dec.NextMetricFamily
+// and must be wrapped with labels.UnsafeString when used in labels.
+func (p *ProtobufParser) getMagicName() (string, bool) {
 	t := p.dec.GetType()
 	if p.state == EntryHistogram || (t != dto.MetricType_HISTOGRAM && t != dto.MetricType_GAUGE_HISTOGRAM && t != dto.MetricType_SUMMARY) {
-		return p.dec.GetName()
+		return p.dec.GetName(), false
 	}
 	if p.fieldPos == -2 {
-		return p.dec.GetName() + "_count"
+		return p.dec.GetName() + "_count", true
 	}
 	if p.fieldPos == -1 {
-		return p.dec.GetName() + "_sum"
+		return p.dec.GetName() + "_sum", true
 	}
 	if t == dto.MetricType_HISTOGRAM || t == dto.MetricType_GAUGE_HISTOGRAM {
-		return p.dec.GetName() + "_bucket"
+		return p.dec.GetName() + "_bucket", true
 	}
-	return p.dec.GetName()
+	return p.dec.GetName(), false
 }

 // getMagicLabel returns if a magic label ("quantile" or "le") is needed and, if
--- a/model/textparse/protobufparse_test.go
+++ b/model/textparse/protobufparse_test.go
@ -16,9 +16,15 @@ package textparse
 import (
 	"bytes"
 	"encoding/binary"
+	"errors"
+	"fmt"
+	"io"
+	"math/rand"
+	"strings"
 	"testing"

 	"github.com/gogo/protobuf/proto"
+	"github.com/gogo/protobuf/types"
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"

@ -26,6 +32,7 @@ import (
 	"github.com/prometheus/prometheus/model/histogram"
 	"github.com/prometheus/prometheus/model/labels"
 	dto "github.com/prometheus/prometheus/prompb/io/prometheus/client"
+	"github.com/prometheus/prometheus/util/pool"
 )

 func createTestProtoBuf(t testing.TB) *bytes.Buffer {
@ -3214,3 +3221,187 @@ func TestProtobufParse(t *testing.T) {
 		})
 	}
 }
+
+func FuzzProtobufParser_Labels(f *testing.F) {
+	// Add to the "seed corpus" the values that are known to reproduce issues
+	// which this test has found in the past. These cases run during regular
+	// testing, as well as the first step of the fuzzing process.
+	f.Add(true, true, int64(123))
+	f.Add(true, false, int64(129))
+	f.Add(false, true, int64(159))
+	f.Add(false, true, int64(-127))
+	f.Fuzz(func(
+		t *testing.T,
+		parseClassicHistogram bool,
+		enableTypeAndUnitLabels bool,
+		randSeed int64,
+	) {
+		var (
+			r              = rand.New(rand.NewSource(randSeed))
+			buffers        = pool.New(1+r.Intn(128), 128+r.Intn(1024), 2, func(sz int) interface{} { return make([]byte, 0, sz) })
+			lastScrapeSize = 0
+			observedLabels []labels.Labels
+			st             = labels.NewSymbolTable()
+		)
+
+		for i := 0; i < 20; i++ { // run multiple iterations to encounter memory corruptions
+			// Get buffer from pool like in scrape.go
+			b := buffers.Get(lastScrapeSize).([]byte)
+			buf := bytes.NewBuffer(b)
+
+			// Generate some scraped data to parse
+			mf := generateFuzzMetricFamily(r)
+			protoBuf, err := proto.Marshal(mf)
+			require.NoError(t, err)
+			sizeBuf := make([]byte, binary.MaxVarintLen32)
+			sizeBufSize := binary.PutUvarint(sizeBuf, uint64(len(protoBuf)))
+			buf.Write(sizeBuf[:sizeBufSize])
+			buf.Write(protoBuf)
+
+			// Use protobuf parser to parse like in real usage
+			b = buf.Bytes()
+			p := NewProtobufParser(b, parseClassicHistogram, enableTypeAndUnitLabels, st)
+
+			for {
+				entry, err := p.Next()
+				if errors.Is(err, io.EOF) {
+					break
+				}
+				require.NoError(t, err)
+				switch entry {
+				case EntryHelp:
+					name, help := p.Help()
+					require.Equal(t, mf.Name, string(name))
+					require.Equal(t, mf.Help, string(help))
+				case EntryType:
+					name, _ := p.Type()
+					require.Equal(t, mf.Name, string(name))
+				case EntryUnit:
+					name, unit := p.Unit()
+					require.Equal(t, mf.Name, string(name))
+					require.Equal(t, mf.Unit, string(unit))
+				case EntrySeries, EntryHistogram:
+					var lbs labels.Labels
+					p.Labels(&lbs)
+					observedLabels = append(observedLabels, lbs)
+				}
+
+				// Get labels from exemplars
+				for {
+					var e exemplar.Exemplar
+					if !p.Exemplar(&e) {
+						break
+					}
+					observedLabels = append(observedLabels, e.Labels)
+				}
+			}
+
+			// Validate all labels seen so far remain valid. This can find memory corruption issues.
+			for _, l := range observedLabels {
+				require.True(t, l.IsValid(model.LegacyValidation), "encountered corrupted labels: %v", l)
+			}
+
+			lastScrapeSize = len(b)
+			buffers.Put(b)
+		}
+	})
+}
+
+func generateFuzzMetricFamily(
+	r *rand.Rand,
+) *dto.MetricFamily {
+	unit := generateValidLabelName(r)
+	metricName := fmt.Sprintf("%s_%s", generateValidMetricName(r), unit)
+	metricTypeProto := dto.MetricType(r.Intn(len(dto.MetricType_name)))
+	metricFamily := &dto.MetricFamily{
+		Name: metricName,
+		Help: generateHelp(r),
+		Type: metricTypeProto,
+		Unit: unit,
+	}
+	metricsCount := r.Intn(20)
+	for i := 0; i < metricsCount; i++ {
+		metric := dto.Metric{
+			Label: generateFuzzLabels(r),
+		}
+		switch metricTypeProto {
+		case dto.MetricType_GAUGE:
+			metric.Gauge = &dto.Gauge{Value: r.Float64()}
+		case dto.MetricType_COUNTER:
+			metric.Counter = &dto.Counter{Value: r.Float64()}
+		case dto.MetricType_SUMMARY:
+			metric.Summary = &dto.Summary{Quantile: []dto.Quantile{{Quantile: 0.5, Value: r.Float64()}}}
+		case dto.MetricType_HISTOGRAM:
+			metric.Histogram = &dto.Histogram{Exemplars: generateExemplars(r)}
+		}
+		metricFamily.Metric = append(metricFamily.Metric, metric)
+	}
+	return metricFamily
+}
+
+func generateExemplars(r *rand.Rand) []*dto.Exemplar {
+	exemplarsCount := r.Intn(5)
+	exemplars := make([]*dto.Exemplar, 0, exemplarsCount)
+	for i := 0; i < exemplarsCount; i++ {
+		exemplars = append(exemplars, &dto.Exemplar{
+			Label: generateFuzzLabels(r),
+			Value: r.Float64(),
+			Timestamp: &types.Timestamp{
+				Seconds: int64(r.Intn(1000000000)),
+				Nanos:   int32(r.Intn(1000000000)),
+			},
+		})
+	}
+	return exemplars
+}
+
+func generateFuzzLabels(r *rand.Rand) []dto.LabelPair {
+	labelsCount := r.Intn(10)
+	ls := make([]dto.LabelPair, 0, labelsCount)
+	for i := 0; i < labelsCount; i++ {
+		ls = append(ls, dto.LabelPair{
+			Name:  generateValidLabelName(r),
+			Value: generateValidLabelName(r),
+		})
+	}
+	return ls
+}
+
+func generateHelp(r *rand.Rand) string {
+	result := make([]string, 1+r.Intn(20))
+	for i := 0; i < len(result); i++ {
+		result[i] = generateValidLabelName(r)
+	}
+	return strings.Join(result, "_")
+}
+
+func generateValidLabelName(r *rand.Rand) string {
+	return generateString(r, validFirstRunes, validLabelNameRunes)
+}
+
+func generateValidMetricName(r *rand.Rand) string {
+	return generateString(r, validFirstRunes, validMetricNameRunes)
+}
+
+func generateString(r *rand.Rand, firstRunes, restRunes []rune) string {
+	result := make([]rune, 1+r.Intn(20))
+	for i := range result {
+		if i == 0 {
+			result[i] = firstRunes[r.Intn(len(firstRunes))]
+		} else {
+			result[i] = restRunes[r.Intn(len(restRunes))]
+		}
+	}
+	return string(result)
+}
+
+var (
+	validMetricNameRunes = []rune{
+		'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+		'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+		'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+		'_', ':',
+	}
+	validLabelNameRunes = validMetricNameRunes[:len(validMetricNameRunes)-1] // skip the colon
+	validFirstRunes     = validMetricNameRunes[:52]                          // only the letters
+)
--- a/prompb/io/prometheus/client/decoder_test.go
+++ b/prompb/io/prometheus/client/decoder_test.go
@ -17,17 +17,13 @@ import (
 	"bytes"
 	"encoding/binary"
 	"errors"
-	"fmt"
 	"io"
-	"math/rand"
 	"testing"

 	"github.com/gogo/protobuf/proto"
-	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"

 	"github.com/prometheus/prometheus/model/labels"
-	"github.com/prometheus/prometheus/util/pool"
 )

 const (
@ -173,74 +169,3 @@ func TestMetricStreamingDecoder(t *testing.T) {
 	// Expect labels and metricBytes to be static and reusable even after parsing.
 	require.Equal(t, `{checksum="", path="github.com/prometheus/client_golang", version="(devel)"}`, firstMetricLset.String())
 }
-
-func TestMetricStreamingDecoder_LabelsCorruption(t *testing.T) {
-	lastScrapeSize := 0
-	var allPreviousLabels []labels.Labels
-	buffers := pool.New(128, 1024, 2, func(sz int) interface{} { return make([]byte, 0, sz) })
-	builder := labels.NewScratchBuilder(0)
-	for _, labelsCount := range []int{1, 2, 3, 5, 8, 5, 3, 2, 1} {
-		// Get buffer from pool like in scrape.go
-		b := buffers.Get(lastScrapeSize).([]byte)
-		buf := bytes.NewBuffer(b)
-
-		// Generate some scraped data to parse
-		mf := &MetricFamily{}
-		data := generateMetricFamilyText(labelsCount)
-		require.NoError(t, proto.UnmarshalText(data, mf))
-		protoBuf, err := proto.Marshal(mf)
-		require.NoError(t, err)
-		sizeBuf := make([]byte, binary.MaxVarintLen32)
-		sizeBufSize := binary.PutUvarint(sizeBuf, uint64(len(protoBuf)))
-		buf.Write(sizeBuf[:sizeBufSize])
-		buf.Write(protoBuf)
-
-		// Use decoder like protobufparse.go would
-		b = buf.Bytes()
-		d := NewMetricStreamingDecoder(b)
-		require.NoError(t, d.NextMetricFamily())
-		require.NoError(t, d.NextMetric())
-
-		// Get the labels
-		builder.Reset()
-		require.NoError(t, d.Label(&builder)) // <- this uses unsafe strings to create labels
-		lbs := builder.Labels()
-		allPreviousLabels = append(allPreviousLabels, lbs)
-
-		// Validate all labels seen so far remain valid and not corrupted
-		for _, l := range allPreviousLabels {
-			require.True(t, l.IsValid(model.LegacyValidation), "encountered corrupted labels: %v", l)
-		}
-
-		lastScrapeSize = len(b)
-		buffers.Put(b)
-	}
-}
-
-func generateLabels() string {
-	randomName := fmt.Sprintf("instance_%d", rand.Intn(1000))
-	randomValue := fmt.Sprintf("value_%d", rand.Intn(1000))
-	return fmt.Sprintf(`label: <
-    name: "%s"
-    value: "%s"
-  >`, randomName, randomValue)
-}
-
-func generateMetricFamilyText(labelsCount int) string {
-	randomName := fmt.Sprintf("metric_%d", rand.Intn(1000))
-	randomHelp := fmt.Sprintf("Test metric to demonstrate forced corruption %d.", rand.Intn(1000))
-	labels10 := ""
-	for i := 0; i < labelsCount; i++ {
-		labels10 += generateLabels()
-	}
-	return fmt.Sprintf(`name: "%s"
-help: "%s"
-type: GAUGE
-metric: <
-  %s
-  gauge: <
-    value: 1.0
-  >
->
-`, randomName, randomHelp, labels10)
-}