From 3f96458782e4d09c4d3f79aba9eb689fb39fd638 Mon Sep 17 00:00:00 2001 From: Piotr <17101802+thampiotr@users.noreply.github.com> Date: Thu, 31 Jul 2025 13:04:49 +0100 Subject: [PATCH] Add reproducer for metric name label corruption Signed-off-by: Piotr <17101802+thampiotr@users.noreply.github.com> --- .github/workflows/ci.yml | 2 +- model/textparse/protobufparse_test.go | 191 ++++++++++++++++++++ prompb/io/prometheus/client/decoder_test.go | 75 -------- 3 files changed, 192 insertions(+), 76 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 487b5ce083..6c62e8c7a0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -40,7 +40,7 @@ jobs: - uses: prometheus/promci@443c7fc2397e946bc9f5029e313a9c3441b9b86d # v0.4.7 - uses: ./.github/promci/actions/setup_environment - run: go test --tags=dedupelabels ./... - - run: go test --tags=slicelabels -race ./cmd/prometheus ./prompb/io/prometheus/client + - run: go test --tags=slicelabels -race ./... - run: go test --tags=forcedirectio -race ./tsdb/ - run: GOARCH=386 go test ./... - uses: ./.github/promci/actions/check_proto diff --git a/model/textparse/protobufparse_test.go b/model/textparse/protobufparse_test.go index 35a4238fdb..efd73ea0d3 100644 --- a/model/textparse/protobufparse_test.go +++ b/model/textparse/protobufparse_test.go @@ -16,9 +16,15 @@ package textparse import ( "bytes" "encoding/binary" + "errors" + "fmt" + "io" + "math/rand" + "strings" "testing" "github.com/gogo/protobuf/proto" + "github.com/gogo/protobuf/types" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" @@ -26,6 +32,7 @@ import ( "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" dto "github.com/prometheus/prometheus/prompb/io/prometheus/client" + "github.com/prometheus/prometheus/util/pool" ) func createTestProtoBuf(t testing.TB) *bytes.Buffer { @@ -3214,3 +3221,187 @@ func TestProtobufParse(t *testing.T) { }) } } + +func FuzzProtobufParser_Labels(f *testing.F) { + // Add to the "seed corpus" the values that are known to reproduce issues + // which this test has found in the past. These cases run during regular + // testing, as well as the first step of the fuzzing process. + f.Add(true, true, int64(123)) + f.Add(true, false, int64(129)) + f.Add(false, true, int64(159)) + f.Add(false, true, int64(-127)) + f.Fuzz(func( + t *testing.T, + parseClassicHistogram bool, + enableTypeAndUnitLabels bool, + randSeed int64, + ) { + var ( + r = rand.New(rand.NewSource(randSeed)) + buffers = pool.New(1+r.Intn(128), 128+r.Intn(1024), 2, func(sz int) interface{} { return make([]byte, 0, sz) }) + lastScrapeSize = 0 + observedLabels []labels.Labels + st = labels.NewSymbolTable() + ) + + for i := 0; i < 20; i++ { // run multiple iterations to encounter memory corruptions + // Get buffer from pool like in scrape.go + b := buffers.Get(lastScrapeSize).([]byte) + buf := bytes.NewBuffer(b) + + // Generate some scraped data to parse + mf := generateFuzzMetricFamily(r) + protoBuf, err := proto.Marshal(mf) + require.NoError(t, err) + sizeBuf := make([]byte, binary.MaxVarintLen32) + sizeBufSize := binary.PutUvarint(sizeBuf, uint64(len(protoBuf))) + buf.Write(sizeBuf[:sizeBufSize]) + buf.Write(protoBuf) + + // Use protobuf parser to parse like in real usage + b = buf.Bytes() + p := NewProtobufParser(b, parseClassicHistogram, enableTypeAndUnitLabels, st) + + for { + entry, err := p.Next() + if errors.Is(err, io.EOF) { + break + } + require.NoError(t, err) + switch entry { + case EntryHelp: + name, help := p.Help() + require.Equal(t, mf.Name, string(name)) + require.Equal(t, mf.Help, string(help)) + case EntryType: + name, _ := p.Type() + require.Equal(t, mf.Name, string(name)) + case EntryUnit: + name, unit := p.Unit() + require.Equal(t, mf.Name, string(name)) + require.Equal(t, mf.Unit, string(unit)) + case EntrySeries, EntryHistogram: + var lbs labels.Labels + p.Labels(&lbs) + observedLabels = append(observedLabels, lbs) + } + + // Get labels from exemplars + for { + var e exemplar.Exemplar + if !p.Exemplar(&e) { + break + } + observedLabels = append(observedLabels, e.Labels) + } + } + + // Validate all labels seen so far remain valid. This can find memory corruption issues. + for _, l := range observedLabels { + require.True(t, l.IsValid(model.LegacyValidation), "encountered corrupted labels: %v", l) + } + + lastScrapeSize = len(b) + buffers.Put(b) + } + }) +} + +func generateFuzzMetricFamily( + r *rand.Rand, +) *dto.MetricFamily { + unit := generateValidLabelName(r) + metricName := fmt.Sprintf("%s_%s", generateValidMetricName(r), unit) + metricTypeProto := dto.MetricType(r.Intn(len(dto.MetricType_name))) + metricFamily := &dto.MetricFamily{ + Name: metricName, + Help: generateHelp(r), + Type: metricTypeProto, + Unit: unit, + } + metricsCount := r.Intn(20) + for i := 0; i < metricsCount; i++ { + metric := dto.Metric{ + Label: generateFuzzLabels(r), + } + switch metricTypeProto { + case dto.MetricType_GAUGE: + metric.Gauge = &dto.Gauge{Value: r.Float64()} + case dto.MetricType_COUNTER: + metric.Counter = &dto.Counter{Value: r.Float64()} + case dto.MetricType_SUMMARY: + metric.Summary = &dto.Summary{Quantile: []dto.Quantile{{Quantile: 0.5, Value: r.Float64()}}} + case dto.MetricType_HISTOGRAM: + metric.Histogram = &dto.Histogram{Exemplars: generateExemplars(r)} + } + metricFamily.Metric = append(metricFamily.Metric, metric) + } + return metricFamily +} + +func generateExemplars(r *rand.Rand) []*dto.Exemplar { + exemplarsCount := r.Intn(5) + exemplars := make([]*dto.Exemplar, 0, exemplarsCount) + for i := 0; i < exemplarsCount; i++ { + exemplars = append(exemplars, &dto.Exemplar{ + Label: generateFuzzLabels(r), + Value: r.Float64(), + Timestamp: &types.Timestamp{ + Seconds: int64(r.Intn(1000000000)), + Nanos: int32(r.Intn(1000000000)), + }, + }) + } + return exemplars +} + +func generateFuzzLabels(r *rand.Rand) []dto.LabelPair { + labelsCount := r.Intn(10) + ls := make([]dto.LabelPair, 0, labelsCount) + for i := 0; i < labelsCount; i++ { + ls = append(ls, dto.LabelPair{ + Name: generateValidLabelName(r), + Value: generateValidLabelName(r), + }) + } + return ls +} + +func generateHelp(r *rand.Rand) string { + result := make([]string, 1+r.Intn(20)) + for i := 0; i < len(result); i++ { + result[i] = generateValidLabelName(r) + } + return strings.Join(result, "_") +} + +func generateValidLabelName(r *rand.Rand) string { + return generateString(r, validFirstRunes, validLabelNameRunes) +} + +func generateValidMetricName(r *rand.Rand) string { + return generateString(r, validFirstRunes, validMetricNameRunes) +} + +func generateString(r *rand.Rand, firstRunes, restRunes []rune) string { + result := make([]rune, 1+r.Intn(20)) + for i := range result { + if i == 0 { + result[i] = firstRunes[r.Intn(len(firstRunes))] + } else { + result[i] = restRunes[r.Intn(len(restRunes))] + } + } + return string(result) +} + +var ( + validMetricNameRunes = []rune{ + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + '_', ':', + } + validLabelNameRunes = validMetricNameRunes[:len(validMetricNameRunes)-1] // skip the colon + validFirstRunes = validMetricNameRunes[:52] // only the letters +) diff --git a/prompb/io/prometheus/client/decoder_test.go b/prompb/io/prometheus/client/decoder_test.go index 18cf186127..8697b78fca 100644 --- a/prompb/io/prometheus/client/decoder_test.go +++ b/prompb/io/prometheus/client/decoder_test.go @@ -17,17 +17,13 @@ import ( "bytes" "encoding/binary" "errors" - "fmt" "io" - "math/rand" "testing" "github.com/gogo/protobuf/proto" - "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/labels" - "github.com/prometheus/prometheus/util/pool" ) const ( @@ -173,74 +169,3 @@ func TestMetricStreamingDecoder(t *testing.T) { // Expect labels and metricBytes to be static and reusable even after parsing. require.Equal(t, `{checksum="", path="github.com/prometheus/client_golang", version="(devel)"}`, firstMetricLset.String()) } - -func TestMetricStreamingDecoder_LabelsCorruption(t *testing.T) { - lastScrapeSize := 0 - var allPreviousLabels []labels.Labels - buffers := pool.New(128, 1024, 2, func(sz int) interface{} { return make([]byte, 0, sz) }) - builder := labels.NewScratchBuilder(0) - for _, labelsCount := range []int{1, 2, 3, 5, 8, 5, 3, 2, 1} { - // Get buffer from pool like in scrape.go - b := buffers.Get(lastScrapeSize).([]byte) - buf := bytes.NewBuffer(b) - - // Generate some scraped data to parse - mf := &MetricFamily{} - data := generateMetricFamilyText(labelsCount) - require.NoError(t, proto.UnmarshalText(data, mf)) - protoBuf, err := proto.Marshal(mf) - require.NoError(t, err) - sizeBuf := make([]byte, binary.MaxVarintLen32) - sizeBufSize := binary.PutUvarint(sizeBuf, uint64(len(protoBuf))) - buf.Write(sizeBuf[:sizeBufSize]) - buf.Write(protoBuf) - - // Use decoder like protobufparse.go would - b = buf.Bytes() - d := NewMetricStreamingDecoder(b) - require.NoError(t, d.NextMetricFamily()) - require.NoError(t, d.NextMetric()) - - // Get the labels - builder.Reset() - require.NoError(t, d.Label(&builder)) // <- this uses unsafe strings to create labels - lbs := builder.Labels() - allPreviousLabels = append(allPreviousLabels, lbs) - - // Validate all labels seen so far remain valid and not corrupted - for _, l := range allPreviousLabels { - require.True(t, l.IsValid(model.LegacyValidation), "encountered corrupted labels: %v", l) - } - - lastScrapeSize = len(b) - buffers.Put(b) - } -} - -func generateLabels() string { - randomName := fmt.Sprintf("instance_%d", rand.Intn(1000)) - randomValue := fmt.Sprintf("value_%d", rand.Intn(1000)) - return fmt.Sprintf(`label: < - name: "%s" - value: "%s" - >`, randomName, randomValue) -} - -func generateMetricFamilyText(labelsCount int) string { - randomName := fmt.Sprintf("metric_%d", rand.Intn(1000)) - randomHelp := fmt.Sprintf("Test metric to demonstrate forced corruption %d.", rand.Intn(1000)) - labels10 := "" - for i := 0; i < labelsCount; i++ { - labels10 += generateLabels() - } - return fmt.Sprintf(`name: "%s" -help: "%s" -type: GAUGE -metric: < - %s - gauge: < - value: 1.0 - > -> -`, randomName, randomHelp, labels10) -}