From 9da75328ea76f4ebb9a02009b93331a8374f38cf Mon Sep 17 00:00:00 2001 From: Owen Williams Date: Wed, 28 Aug 2024 11:15:42 -0400 Subject: [PATCH] fix(utf8): ensure correct validation when legacy mode turned on (#14736) fix(utf8): ensure correct validation when legacy mode turned on This depends on the included update of the prometheus/common dependency. --------- Signed-off-by: Owen Williams --- go.mod | 4 +- go.sum | 8 ++-- model/labels/labels_common.go | 19 ++++++-- model/labels/labels_test.go | 78 ++++++++++++++++++++++++++++++++- promql/parser/printer.go | 2 +- scrape/scrape.go | 8 +++- scrape/scrape_test.go | 37 ++++++++++++++++ storage/remote/write_handler.go | 5 ++- 8 files changed, 146 insertions(+), 15 deletions(-) diff --git a/go.mod b/go.mod index 50d560bc3a..af327c64ad 100644 --- a/go.mod +++ b/go.mod @@ -52,9 +52,9 @@ require ( github.com/oklog/ulid v1.3.1 github.com/ovh/go-ovh v1.6.0 github.com/prometheus/alertmanager v0.27.0 - github.com/prometheus/client_golang v1.19.1 + github.com/prometheus/client_golang v1.20.0 github.com/prometheus/client_model v0.6.1 - github.com/prometheus/common v0.55.0 + github.com/prometheus/common v0.56.0 github.com/prometheus/common/assets v0.2.0 github.com/prometheus/common/sigv4 v0.1.0 github.com/prometheus/exporter-toolkit v0.11.0 diff --git a/go.sum b/go.sum index bd4aa4f6b3..933ef94201 100644 --- a/go.sum +++ b/go.sum @@ -608,8 +608,8 @@ github.com/prometheus/client_golang v1.3.0/go.mod h1:hJaj2vgQTGQmVCsAACORcieXFeD github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= -github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= +github.com/prometheus/client_golang v1.20.0 h1:jBzTZ7B099Rg24tny+qngoynol8LtVYlA2bqx3vEloI= +github.com/prometheus/client_golang v1.20.0/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -625,8 +625,8 @@ github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8b github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.29.0/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= -github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= -github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= +github.com/prometheus/common v0.56.0 h1:UffReloqkBtvtQEYDg2s+uDPGRrJyC6vZWPGXf6OhPY= +github.com/prometheus/common v0.56.0/go.mod h1:7uRPFSUTbfZWsJ7MHY56sqt7hLQu3bxXHDnNhl8E9qI= github.com/prometheus/common/assets v0.2.0 h1:0P5OrzoHrYBOSM1OigWL3mY8ZvV2N4zIE/5AahrSrfM= github.com/prometheus/common/assets v0.2.0/go.mod h1:D17UVUE12bHbim7HzwUvtqm6gwBEaDQ0F+hIGbFbccI= github.com/prometheus/common/sigv4 v0.1.0 h1:qoVebwtwwEhS85Czm2dSROY5fTo2PAPEVdDeppTwGX4= diff --git a/model/labels/labels_common.go b/model/labels/labels_common.go index 6db86b03c7..d7bdc1e076 100644 --- a/model/labels/labels_common.go +++ b/model/labels/labels_common.go @@ -95,12 +95,23 @@ func (ls *Labels) UnmarshalYAML(unmarshal func(interface{}) error) error { } // IsValid checks if the metric name or label names are valid. -func (ls Labels) IsValid() bool { +func (ls Labels) IsValid(validationScheme model.ValidationScheme) bool { err := ls.Validate(func(l Label) error { - if l.Name == model.MetricNameLabel && !model.IsValidMetricName(model.LabelValue(l.Value)) { - return strconv.ErrSyntax + if l.Name == model.MetricNameLabel { + // If the default validation scheme has been overridden with legacy mode, + // we need to call the special legacy validation checker. + if validationScheme == model.LegacyValidation && model.NameValidationScheme == model.UTF8Validation && !model.IsValidLegacyMetricName(string(model.LabelValue(l.Value))) { + return strconv.ErrSyntax + } + if !model.IsValidMetricName(model.LabelValue(l.Value)) { + return strconv.ErrSyntax + } } - if !model.LabelName(l.Name).IsValid() || !model.LabelValue(l.Value).IsValid() { + if validationScheme == model.LegacyValidation && model.NameValidationScheme == model.UTF8Validation { + if !model.LabelName(l.Name).IsValidLegacy() || !model.LabelValue(l.Value).IsValid() { + return strconv.ErrSyntax + } + } else if !model.LabelName(l.Name).IsValid() || !model.LabelValue(l.Value).IsValid() { return strconv.ErrSyntax } return nil diff --git a/model/labels/labels_test.go b/model/labels/labels_test.go index d8910cdc85..9208908311 100644 --- a/model/labels/labels_test.go +++ b/model/labels/labels_test.go @@ -21,6 +21,7 @@ import ( "strings" "testing" + "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" ) @@ -272,11 +273,86 @@ func TestLabels_IsValid(t *testing.T) { }, } { t.Run("", func(t *testing.T) { - require.Equal(t, test.expected, test.input.IsValid()) + require.Equal(t, test.expected, test.input.IsValid(model.LegacyValidation)) }) } } +func TestLabels_ValidationModes(t *testing.T) { + for _, test := range []struct { + input Labels + globalMode model.ValidationScheme + callMode model.ValidationScheme + expected bool + }{ + { + input: FromStrings( + "__name__", "test.metric", + "hostname", "localhost", + "job", "check", + ), + globalMode: model.UTF8Validation, + callMode: model.UTF8Validation, + expected: true, + }, + { + input: FromStrings( + "__name__", "test", + "\xc5 bad utf8", "localhost", + "job", "check", + ), + globalMode: model.UTF8Validation, + callMode: model.UTF8Validation, + expected: false, + }, + { + // Setting the common model to legacy validation and then trying to check for UTF-8 on a + // per-call basis is not supported. + input: FromStrings( + "__name__", "test.utf8.metric", + "hostname", "localhost", + "job", "check", + ), + globalMode: model.LegacyValidation, + callMode: model.UTF8Validation, + expected: false, + }, + { + input: FromStrings( + "__name__", "test", + "hostname", "localhost", + "job", "check", + ), + globalMode: model.LegacyValidation, + callMode: model.LegacyValidation, + expected: true, + }, + { + input: FromStrings( + "__name__", "test.utf8.metric", + "hostname", "localhost", + "job", "check", + ), + globalMode: model.UTF8Validation, + callMode: model.LegacyValidation, + expected: false, + }, + { + input: FromStrings( + "__name__", "test", + "host.name", "localhost", + "job", "check", + ), + globalMode: model.UTF8Validation, + callMode: model.LegacyValidation, + expected: false, + }, + } { + model.NameValidationScheme = test.globalMode + require.Equal(t, test.expected, test.input.IsValid(test.callMode)) + } +} + func TestLabels_Equal(t *testing.T) { labels := FromStrings( "aaa", "111", diff --git a/promql/parser/printer.go b/promql/parser/printer.go index 5613956f7a..69f5f082d6 100644 --- a/promql/parser/printer.go +++ b/promql/parser/printer.go @@ -88,7 +88,7 @@ func (node *AggregateExpr) getAggOpStr() string { func joinLabels(ss []string) string { for i, s := range ss { // If the label is already quoted, don't quote it again. - if s[0] != '"' && s[0] != '\'' && s[0] != '`' && !model.IsValidLegacyMetricName(model.LabelValue(s)) { + if s[0] != '"' && s[0] != '\'' && s[0] != '`' && !model.IsValidLegacyMetricName(string(model.LabelValue(s))) { ss[i] = fmt.Sprintf("\"%s\"", s) } } diff --git a/scrape/scrape.go b/scrape/scrape.go index 222cc62f93..5fcd623c0d 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -111,6 +111,7 @@ type scrapeLoopOptions struct { interval time.Duration timeout time.Duration scrapeClassicHistograms bool + validationScheme model.ValidationScheme mrc []*relabel.Config cache *scrapeCache @@ -186,6 +187,7 @@ func newScrapePool(cfg *config.ScrapeConfig, app storage.Appendable, offsetSeed options.PassMetadataInContext, metrics, options.skipOffsetting, + opts.validationScheme, ) } sp.metrics.targetScrapePoolTargetLimit.WithLabelValues(sp.config.JobName).Set(float64(sp.config.TargetLimit)) @@ -346,6 +348,7 @@ func (sp *scrapePool) restartLoops(reuseCache bool) { cache: cache, interval: interval, timeout: timeout, + validationScheme: validationScheme, }) ) if err != nil { @@ -853,6 +856,7 @@ type scrapeLoop struct { interval time.Duration timeout time.Duration scrapeClassicHistograms bool + validationScheme model.ValidationScheme // Feature flagged options. enableNativeHistogramIngestion bool @@ -1160,6 +1164,7 @@ func newScrapeLoop(ctx context.Context, passMetadataInContext bool, metrics *scrapeMetrics, skipOffsetting bool, + validationScheme model.ValidationScheme, ) *scrapeLoop { if l == nil { l = log.NewNopLogger() @@ -1211,6 +1216,7 @@ func newScrapeLoop(ctx context.Context, appendMetadataToWAL: appendMetadataToWAL, metrics: metrics, skipOffsetting: skipOffsetting, + validationScheme: validationScheme, } sl.ctx, sl.cancel = context.WithCancel(ctx) @@ -1631,7 +1637,7 @@ loop: err = errNameLabelMandatory break loop } - if !lset.IsValid() { + if !lset.IsValid(sl.validationScheme) { err = fmt.Errorf("invalid metric name or label names: %s", lset.String()) break loop } diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 9b30615219..b703f21d46 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -684,6 +684,7 @@ func newBasicScrapeLoop(t testing.TB, ctx context.Context, scraper scraper, app false, newTestScrapeMetrics(t), false, + model.LegacyValidation, ) } @@ -826,6 +827,7 @@ func TestScrapeLoopRun(t *testing.T) { false, scrapeMetrics, false, + model.LegacyValidation, ) // The loop must terminate during the initial offset if the context @@ -970,6 +972,7 @@ func TestScrapeLoopMetadata(t *testing.T) { false, scrapeMetrics, false, + model.LegacyValidation, ) defer cancel() @@ -1065,6 +1068,40 @@ func TestScrapeLoopFailWithInvalidLabelsAfterRelabel(t *testing.T) { require.Equal(t, 0, seriesAdded) } +func TestScrapeLoopFailLegacyUnderUTF8(t *testing.T) { + // Test that scrapes fail when default validation is utf8 but scrape config is + // legacy. + model.NameValidationScheme = model.UTF8Validation + defer func() { + model.NameValidationScheme = model.LegacyValidation + }() + s := teststorage.New(t) + defer s.Close() + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + sl := newBasicScrapeLoop(t, ctx, &testScraper{}, s.Appender, 0) + sl.validationScheme = model.LegacyValidation + + slApp := sl.appender(ctx) + total, added, seriesAdded, err := sl.append(slApp, []byte("{\"test.metric\"} 1\n"), "", time.Time{}) + require.ErrorContains(t, err, "invalid metric name or label names") + require.NoError(t, slApp.Rollback()) + require.Equal(t, 1, total) + require.Equal(t, 0, added) + require.Equal(t, 0, seriesAdded) + + // When scrapeloop has validation set to UTF-8, the metric is allowed. + sl.validationScheme = model.UTF8Validation + + slApp = sl.appender(ctx) + total, added, seriesAdded, err = sl.append(slApp, []byte("{\"test.metric\"} 1\n"), "", time.Time{}) + require.NoError(t, err) + require.Equal(t, 1, total) + require.Equal(t, 1, added) + require.Equal(t, 1, seriesAdded) +} + func makeTestMetrics(n int) []byte { // Construct a metrics string to parse sb := bytes.Buffer{} diff --git a/storage/remote/write_handler.go b/storage/remote/write_handler.go index 9ea3f2bf93..58fb668cc1 100644 --- a/storage/remote/write_handler.go +++ b/storage/remote/write_handler.go @@ -28,6 +28,7 @@ import ( "github.com/golang/snappy" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/prometheus/common/model" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/model/exemplar" @@ -239,7 +240,7 @@ func (h *writeHandler) write(ctx context.Context, req *prompb.WriteRequest) (err // TODO(bwplotka): Even as per 1.0 spec, this should be a 400 error, while other samples are // potentially written. Perhaps unify with fixed writeV2 implementation a bit. - if !ls.Has(labels.MetricName) || !ls.IsValid() { + if !ls.Has(labels.MetricName) || !ls.IsValid(model.NameValidationScheme) { level.Warn(h.logger).Log("msg", "Invalid metric names or labels", "got", ls.String()) samplesWithInvalidLabels++ continue @@ -380,7 +381,7 @@ func (h *writeHandler) appendV2(app storage.Appender, req *writev2.Request, rs * // Validate series labels early. // NOTE(bwplotka): While spec allows UTF-8, Prometheus Receiver may impose // specific limits and follow https://prometheus.io/docs/specs/remote_write_spec_2_0/#invalid-samples case. - if !ls.Has(labels.MetricName) || !ls.IsValid() { + if !ls.Has(labels.MetricName) || !ls.IsValid(model.NameValidationScheme) { badRequestErrs = append(badRequestErrs, fmt.Errorf("invalid metric name or labels, got %v", ls.String())) samplesWithInvalidLabels += len(ts.Samples) + len(ts.Histograms) continue