From 34072ff9c16faf2cb7e9123f955a8b57cf3dfeff Mon Sep 17 00:00:00 2001 From: Mickael Carl Date: Sat, 6 May 2023 07:20:58 +0100 Subject: [PATCH 01/47] makefile: let golangci-lint also run on arm64 Signed-off-by: Mickael Carl --- Makefile.common | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.common b/Makefile.common index bc2a07d728..8f36309210 100644 --- a/Makefile.common +++ b/Makefile.common @@ -62,10 +62,10 @@ SKIP_GOLANGCI_LINT := GOLANGCI_LINT := GOLANGCI_LINT_OPTS ?= GOLANGCI_LINT_VERSION ?= v1.55.2 -# golangci-lint only supports linux, darwin and windows platforms on i386/amd64. +# golangci-lint only supports linux, darwin and windows platforms on i386/amd64 and arm64. # windows isn't included here because of the path separator being different. ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin)) - ifeq ($(GOHOSTARCH),$(filter $(GOHOSTARCH),amd64 i386)) + ifeq ($(GOHOSTARCH),$(filter $(GOHOSTARCH),amd64 i386 arm64)) # If we're in CI and there is an Actions file, that means the linter # is being run in Actions, so we don't need to run it here. ifneq (,$(SKIP_GOLANGCI_LINT)) From 420b324c3e295ba1d26b81d45f2cfa9d25f75b58 Mon Sep 17 00:00:00 2001 From: Mickael Carl Date: Sat, 6 May 2023 08:21:15 +0100 Subject: [PATCH 02/47] docs: add a quick note on linting in contributing guidelines Signed-off-by: Mickael Carl --- CONTRIBUTING.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 57055ef38c..f666caae9c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -42,7 +42,12 @@ go build ./cmd/prometheus/ make test # Make sure all the tests pass before you commit and push :) ``` -We use [`golangci-lint`](https://github.com/golangci/golangci-lint) for linting the code. If it reports an issue and you think that the warning needs to be disregarded or is a false-positive, you can add a special comment `//nolint:linter1[,linter2,...]` before the offending line. Use this sparingly though, fixing the code to comply with the linter's recommendation is in general the preferred course of action. +To run a collection of Go linters through [`golangci-lint`](https://github.com/golangci/golangci-lint), do: +```bash +make lint +``` + +If it reports an issue and you think that the warning needs to be disregarded or is a false-positive, you can add a special comment `//nolint:linter1[,linter2,...]` before the offending line. Use this sparingly though, fixing the code to comply with the linter's recommendation is in general the preferred course of action. See [this section of the golangci-lint documentation](https://golangci-lint.run/usage/false-positives/#nolint-directive) for more information. All our issues are regularly tagged so that you can also filter down the issues involving the components you want to work on. For our labeling policy refer [the wiki page](https://github.com/prometheus/prometheus/wiki/Label-Names-and-Descriptions). From 7e8f03663ae91ee99bd231518733d7cf8498ca6e Mon Sep 17 00:00:00 2001 From: "roger.wang" Date: Thu, 29 Feb 2024 09:33:17 +0800 Subject: [PATCH 03/47] Code optimization: The relabel operation is used very frequently, and strconv.FormatInt() with better performance should be used. Signed-off-by: roger.wang --- model/relabel/relabel.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/model/relabel/relabel.go b/model/relabel/relabel.go index d29c3d07ae..fee39d0b15 100644 --- a/model/relabel/relabel.go +++ b/model/relabel/relabel.go @@ -17,6 +17,7 @@ import ( "crypto/md5" "encoding/binary" "fmt" + "strconv" "strings" "github.com/grafana/regexp" @@ -290,7 +291,7 @@ func relabel(cfg *Config, lb *labels.Builder) (keep bool) { hash := md5.Sum([]byte(val)) // Use only the last 8 bytes of the hash to give the same result as earlier versions of this code. mod := binary.BigEndian.Uint64(hash[8:]) % cfg.Modulus - lb.Set(cfg.TargetLabel, fmt.Sprintf("%d", mod)) + lb.Set(cfg.TargetLabel, strconv.FormatUint(mod, 10)) case LabelMap: lb.Range(func(l labels.Label) { if cfg.Regex.MatchString(l.Name) { From 2aab70b8390348b2814b7b9c7645f81dcf98a53b Mon Sep 17 00:00:00 2001 From: Federico Leva Date: Wed, 13 Mar 2024 11:28:02 +0200 Subject: [PATCH 04/47] Clarify batch_send_deadline docs This is the time period covered by a batch of samples, when the number of waiting samples is lower than max_samples_per_send. It does not affect timeouts or retries. Co-authored-by: Bartlomiej Plotka Signed-off-by: Federico Leva --- docs/configuration/configuration.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index d751a4084e..7d4fbc959e 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -3661,7 +3661,8 @@ queue_config: [ min_shards: | default = 1 ] # Maximum number of samples per send. [ max_samples_per_send: | default = 2000] - # Maximum time a sample will wait in buffer. + # Maximum time a sample will wait for a send. The sample might wait less + # if the buffer is full. Further time might pass due to potential retries. [ batch_send_deadline: | default = 5s ] # Initial retry delay. Gets doubled for every retry. [ min_backoff: | default = 30ms ] From c5a1cc914856670e65d6537f9f76eefdc111312a Mon Sep 17 00:00:00 2001 From: machine424 Date: Wed, 29 Nov 2023 17:49:01 +0100 Subject: [PATCH 05/47] chore(tsdb): add a sandboxDir to DBReadOnly, the directory can be used for transient file writes. use it in loadDataAsQueryable to make sure the RO Head doesn't truncate or cut new chunks in data/chunks_head/. add a -sandbox-dir-root flag to "promtool tsdb dump/dump-openmetrics" to control the root of that sandbox dirrectory. Signed-off-by: machine424 --- cmd/promtool/backfill.go | 2 +- cmd/promtool/main.go | 6 ++- cmd/promtool/tsdb.go | 8 ++-- cmd/promtool/tsdb_test.go | 1 + docs/command-line/promtool.md | 2 + tsdb/chunks/head_chunks.go | 27 +++++++++++ tsdb/compact_test.go | 2 +- tsdb/db.go | 45 +++++++++++++----- tsdb/db_test.go | 89 +++++++++++++++++++++++++++++++++-- 9 files changed, 158 insertions(+), 24 deletions(-) diff --git a/cmd/promtool/backfill.go b/cmd/promtool/backfill.go index 601c3ced9f..79db428c71 100644 --- a/cmd/promtool/backfill.go +++ b/cmd/promtool/backfill.go @@ -88,7 +88,7 @@ func createBlocks(input []byte, mint, maxt, maxBlockDuration int64, maxSamplesIn blockDuration := getCompatibleBlockDuration(maxBlockDuration) mint = blockDuration * (mint / blockDuration) - db, err := tsdb.OpenDBReadOnly(outputDir, nil) + db, err := tsdb.OpenDBReadOnly(outputDir, "", nil) if err != nil { return err } diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index a62ae4fbf4..e2a7a37f59 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -235,12 +235,14 @@ func main() { tsdbDumpCmd := tsdbCmd.Command("dump", "Dump samples from a TSDB.") dumpPath := tsdbDumpCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String() + dumpSandboxDirRoot := tsdbDumpCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end.").Default(defaultDBPath).String() dumpMinTime := tsdbDumpCmd.Flag("min-time", "Minimum timestamp to dump.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64() dumpMaxTime := tsdbDumpCmd.Flag("max-time", "Maximum timestamp to dump.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64() dumpMatch := tsdbDumpCmd.Flag("match", "Series selector. Can be specified multiple times.").Default("{__name__=~'(?s:.*)'}").Strings() tsdbDumpOpenMetricsCmd := tsdbCmd.Command("dump-openmetrics", "[Experimental] Dump samples from a TSDB into OpenMetrics format. Native histograms are not dumped.") dumpOpenMetricsPath := tsdbDumpOpenMetricsCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String() + dumpOpenMetricsSandboxDirRoot := tsdbDumpOpenMetricsCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end.").Default(defaultDBPath).String() dumpOpenMetricsMinTime := tsdbDumpOpenMetricsCmd.Flag("min-time", "Minimum timestamp to dump.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64() dumpOpenMetricsMaxTime := tsdbDumpOpenMetricsCmd.Flag("max-time", "Maximum timestamp to dump.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64() dumpOpenMetricsMatch := tsdbDumpOpenMetricsCmd.Flag("match", "Series selector. Can be specified multiple times.").Default("{__name__=~'(?s:.*)'}").Strings() @@ -396,9 +398,9 @@ func main() { os.Exit(checkErr(listBlocks(*listPath, *listHumanReadable))) case tsdbDumpCmd.FullCommand(): - os.Exit(checkErr(dumpSamples(ctx, *dumpPath, *dumpMinTime, *dumpMaxTime, *dumpMatch, formatSeriesSet))) + os.Exit(checkErr(dumpSamples(ctx, *dumpPath, *dumpSandboxDirRoot, *dumpMinTime, *dumpMaxTime, *dumpMatch, formatSeriesSet))) case tsdbDumpOpenMetricsCmd.FullCommand(): - os.Exit(checkErr(dumpSamples(ctx, *dumpOpenMetricsPath, *dumpOpenMetricsMinTime, *dumpOpenMetricsMaxTime, *dumpOpenMetricsMatch, formatSeriesSetOpenMetrics))) + os.Exit(checkErr(dumpSamples(ctx, *dumpOpenMetricsPath, *dumpOpenMetricsSandboxDirRoot, *dumpOpenMetricsMinTime, *dumpOpenMetricsMaxTime, *dumpOpenMetricsMatch, formatSeriesSetOpenMetrics))) // TODO(aSquare14): Work on adding support for custom block size. case openMetricsImportCmd.FullCommand(): os.Exit(backfillOpenMetrics(*importFilePath, *importDBPath, *importHumanReadable, *importQuiet, *maxBlockDuration)) diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go index b786c92976..4de3b3e06c 100644 --- a/cmd/promtool/tsdb.go +++ b/cmd/promtool/tsdb.go @@ -338,7 +338,7 @@ func readPrometheusLabels(r io.Reader, n int) ([]labels.Labels, error) { } func listBlocks(path string, humanReadable bool) error { - db, err := tsdb.OpenDBReadOnly(path, nil) + db, err := tsdb.OpenDBReadOnly(path, "", nil) if err != nil { return err } @@ -393,7 +393,7 @@ func getFormatedBytes(bytes int64, humanReadable bool) string { } func openBlock(path, blockID string) (*tsdb.DBReadOnly, tsdb.BlockReader, error) { - db, err := tsdb.OpenDBReadOnly(path, nil) + db, err := tsdb.OpenDBReadOnly(path, "", nil) if err != nil { return nil, nil, err } @@ -708,8 +708,8 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb. type SeriesSetFormatter func(series storage.SeriesSet) error -func dumpSamples(ctx context.Context, path string, mint, maxt int64, match []string, formatter SeriesSetFormatter) (err error) { - db, err := tsdb.OpenDBReadOnly(path, nil) +func dumpSamples(ctx context.Context, dbDir, sandboxDirRoot string, mint, maxt int64, match []string, formatter SeriesSetFormatter) (err error) { + db, err := tsdb.OpenDBReadOnly(dbDir, sandboxDirRoot, nil) if err != nil { return err } diff --git a/cmd/promtool/tsdb_test.go b/cmd/promtool/tsdb_test.go index 36a65d73e4..d3c4e8e05f 100644 --- a/cmd/promtool/tsdb_test.go +++ b/cmd/promtool/tsdb_test.go @@ -64,6 +64,7 @@ func getDumpedSamples(t *testing.T, path string, mint, maxt int64, match []strin err := dumpSamples( context.Background(), path, + t.TempDir(), mint, maxt, match, diff --git a/docs/command-line/promtool.md b/docs/command-line/promtool.md index 3eceed48f2..9ed51fb7c9 100644 --- a/docs/command-line/promtool.md +++ b/docs/command-line/promtool.md @@ -566,6 +566,7 @@ Dump samples from a TSDB. | Flag | Description | Default | | --- | --- | --- | +| --sandbox-dir-root | Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end. | `data/` | | --min-time | Minimum timestamp to dump. | `-9223372036854775808` | | --max-time | Maximum timestamp to dump. | `9223372036854775807` | | --match | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` | @@ -592,6 +593,7 @@ Dump samples from a TSDB. | Flag | Description | Default | | --- | --- | --- | +| --sandbox-dir-root | Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end. | `data/` | | --min-time | Minimum timestamp to dump. | `-9223372036854775808` | | --max-time | Maximum timestamp to dump. | `9223372036854775807` | | --match | Series selector. Can be specified multiple times. | `{__name__=~'(?s:.*)'}` | diff --git a/tsdb/chunks/head_chunks.go b/tsdb/chunks/head_chunks.go index 087f25fbb3..ec5a81263c 100644 --- a/tsdb/chunks/head_chunks.go +++ b/tsdb/chunks/head_chunks.go @@ -381,6 +381,33 @@ func listChunkFiles(dir string) (map[int]string, error) { return res, nil } +// HardLinkChunkFiles creates hardlinks for chunk files from src to dst. +// It does nothing if src doesn't exist and ensures dst is created if not. +func HardLinkChunkFiles(src, dst string) error { + _, err := os.Stat(src) + if os.IsNotExist(err) { + return nil + } + if err != nil { + return fmt.Errorf("check source chunks dir: %w", err) + } + if err := os.MkdirAll(dst, 0o777); err != nil { + return fmt.Errorf("set up destination chunks dir: %w", err) + } + files, err := listChunkFiles(src) + if err != nil { + return fmt.Errorf("list chunks: %w", err) + } + for _, filePath := range files { + _, fileName := filepath.Split(filePath) + err := os.Link(filepath.Join(src, fileName), filepath.Join(dst, fileName)) + if err != nil { + return fmt.Errorf("hardlink a chunk: %w", err) + } + } + return nil +} + // repairLastChunkFile deletes the last file if it's empty. // Because we don't fsync when creating these files, we could end // up with an empty file at the end during an abrupt shutdown. diff --git a/tsdb/compact_test.go b/tsdb/compact_test.go index b2d2ea6e7f..f2e9024e05 100644 --- a/tsdb/compact_test.go +++ b/tsdb/compact_test.go @@ -1297,7 +1297,7 @@ func TestCancelCompactions(t *testing.T) { // This checks that the `context.Canceled` error is properly checked at all levels: // - tsdb_errors.NewMulti() should have the Is() method implemented for correct checks. // - callers should check with errors.Is() instead of ==. - readOnlyDB, err := OpenDBReadOnly(tmpdirCopy, log.NewNopLogger()) + readOnlyDB, err := OpenDBReadOnly(tmpdirCopy, "", log.NewNopLogger()) require.NoError(t, err) blocks, err := readOnlyDB.Blocks() require.NoError(t, err) diff --git a/tsdb/db.go b/tsdb/db.go index 22292ab16e..51e03a0147 100644 --- a/tsdb/db.go +++ b/tsdb/db.go @@ -383,26 +383,36 @@ var ErrClosed = errors.New("db already closed") // Current implementation doesn't support concurrency so // all API calls should happen in the same go routine. type DBReadOnly struct { - logger log.Logger - dir string - closers []io.Closer - closed chan struct{} + logger log.Logger + dir string + sandboxDir string + closers []io.Closer + closed chan struct{} } // OpenDBReadOnly opens DB in the given directory for read only operations. -func OpenDBReadOnly(dir string, l log.Logger) (*DBReadOnly, error) { +func OpenDBReadOnly(dir, sandboxDirRoot string, l log.Logger) (*DBReadOnly, error) { if _, err := os.Stat(dir); err != nil { return nil, fmt.Errorf("opening the db dir: %w", err) } + if sandboxDirRoot == "" { + sandboxDirRoot = dir + } + sandboxDir, err := os.MkdirTemp(sandboxDirRoot, "tmp_dbro_sandbox") + if err != nil { + return nil, fmt.Errorf("setting up sandbox dir: %w", err) + } + if l == nil { l = log.NewNopLogger() } return &DBReadOnly{ - logger: l, - dir: dir, - closed: make(chan struct{}), + logger: l, + dir: dir, + sandboxDir: sandboxDir, + closed: make(chan struct{}), }, nil } @@ -491,7 +501,14 @@ func (db *DBReadOnly) loadDataAsQueryable(maxt int64) (storage.SampleAndChunkQue } opts := DefaultHeadOptions() - opts.ChunkDirRoot = db.dir + // Hard link the chunk files to a dir in db.sandboxDir in case the Head needs to truncate some of them + // or cut new ones while replaying the WAL. + // See https://github.com/prometheus/prometheus/issues/11618. + err = chunks.HardLinkChunkFiles(mmappedChunksDir(db.dir), mmappedChunksDir(db.sandboxDir)) + if err != nil { + return nil, err + } + opts.ChunkDirRoot = db.sandboxDir head, err := NewHead(nil, db.logger, nil, nil, opts, NewHeadStats()) if err != nil { return nil, err @@ -519,7 +536,7 @@ func (db *DBReadOnly) loadDataAsQueryable(maxt int64) (storage.SampleAndChunkQue } } opts := DefaultHeadOptions() - opts.ChunkDirRoot = db.dir + opts.ChunkDirRoot = db.sandboxDir head, err = NewHead(nil, db.logger, w, wbl, opts, NewHeadStats()) if err != nil { return nil, err @@ -690,8 +707,14 @@ func (db *DBReadOnly) Block(blockID string) (BlockReader, error) { return block, nil } -// Close all block readers. +// Close all block readers and delete the sandbox dir. func (db *DBReadOnly) Close() error { + defer func() { + // Delete the temporary sandbox directory that was created when opening the DB. + if err := os.RemoveAll(db.sandboxDir); err != nil { + level.Error(db.logger).Log("msg", "delete sandbox dir", "err", err) + } + }() select { case <-db.closed: return ErrClosed diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 71b2f05ac7..ca7b005ed8 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -25,6 +25,7 @@ import ( "os" "path" "path/filepath" + "runtime" "sort" "strconv" "sync" @@ -2494,7 +2495,7 @@ func TestDBReadOnly(t *testing.T) { } // Open a read only db and ensure that the API returns the same result as the normal DB. - dbReadOnly, err := OpenDBReadOnly(dbDir, logger) + dbReadOnly, err := OpenDBReadOnly(dbDir, "", logger) require.NoError(t, err) defer func() { require.NoError(t, dbReadOnly.Close()) }() @@ -2548,10 +2549,14 @@ func TestDBReadOnly(t *testing.T) { // TestDBReadOnlyClosing ensures that after closing the db // all api methods return an ErrClosed. func TestDBReadOnlyClosing(t *testing.T) { - dbDir := t.TempDir() - db, err := OpenDBReadOnly(dbDir, log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr))) + sandboxDir := t.TempDir() + db, err := OpenDBReadOnly(t.TempDir(), sandboxDir, log.NewLogfmtLogger(log.NewSyncWriter(os.Stderr))) require.NoError(t, err) + // The sandboxDir was there. + require.DirExists(t, db.sandboxDir) require.NoError(t, db.Close()) + // The sandboxDir was deleted when closing. + require.NoDirExists(t, db.sandboxDir) require.Equal(t, db.Close(), ErrClosed) _, err = db.Blocks() require.Equal(t, err, ErrClosed) @@ -2587,7 +2592,7 @@ func TestDBReadOnly_FlushWAL(t *testing.T) { } // Flush WAL. - db, err := OpenDBReadOnly(dbDir, logger) + db, err := OpenDBReadOnly(dbDir, "", logger) require.NoError(t, err) flush := t.TempDir() @@ -2595,7 +2600,7 @@ func TestDBReadOnly_FlushWAL(t *testing.T) { require.NoError(t, db.Close()) // Reopen the DB from the flushed WAL block. - db, err = OpenDBReadOnly(flush, logger) + db, err = OpenDBReadOnly(flush, "", logger) require.NoError(t, err) defer func() { require.NoError(t, db.Close()) }() blocks, err := db.Blocks() @@ -2624,6 +2629,80 @@ func TestDBReadOnly_FlushWAL(t *testing.T) { require.Equal(t, 1000.0, sum) } +func TestDBReadOnly_Querier_NoAlteration(t *testing.T) { + countChunks := func(dir string) int { + files, err := os.ReadDir(mmappedChunksDir(dir)) + require.NoError(t, err) + return len(files) + } + + dirHash := func(dir string) (hash []byte) { + // Windows requires the DB to be closed: "xxx\lock: The process cannot access the file because it is being used by another process." + // But closing the DB alters the directory in this case (it'll cut a new chunk). + if runtime.GOOS != "windows" { + hash = testutil.DirHash(t, dir) + } + return + } + + spinUpQuerierAndCheck := func(dir, sandboxDir string, chunksCount int) { + dBDirHash := dirHash(dir) + // Bootsrap a RO db from the same dir and set up a querier. + dbReadOnly, err := OpenDBReadOnly(dir, sandboxDir, nil) + require.NoError(t, err) + require.Equal(t, chunksCount, countChunks(dir)) + q, err := dbReadOnly.Querier(math.MinInt, math.MaxInt) + require.NoError(t, err) + require.NoError(t, q.Close()) + require.NoError(t, dbReadOnly.Close()) + // The RO Head doesn't alter RW db chunks_head/. + require.Equal(t, chunksCount, countChunks(dir)) + require.Equal(t, dirHash(dir), dBDirHash) + } + + t.Run("doesn't cut chunks while replaying WAL", func(t *testing.T) { + db := openTestDB(t, nil, nil) + defer func() { + require.NoError(t, db.Close()) + }() + + // Append until the first mmaped head chunk. + for i := 0; i < 121; i++ { + app := db.Appender(context.Background()) + _, err := app.Append(0, labels.FromStrings("foo", "bar"), int64(i), 0) + require.NoError(t, err) + require.NoError(t, app.Commit()) + } + + spinUpQuerierAndCheck(db.dir, t.TempDir(), 0) + + // The RW Head should have no problem cutting its own chunk, + // this also proves that a chunk needed to be cut. + require.NotPanics(t, func() { db.ForceHeadMMap() }) + require.Equal(t, 1, countChunks(db.dir)) + }) + + t.Run("doesn't truncate corrupted chunks", func(t *testing.T) { + db := openTestDB(t, nil, nil) + require.NoError(t, db.Close()) + + // Simulate a corrupted chunk: without a header. + _, err := os.Create(path.Join(mmappedChunksDir(db.dir), "000001")) + require.NoError(t, err) + + spinUpQuerierAndCheck(db.dir, t.TempDir(), 1) + + // The RW Head should have no problem truncating its corrupted file: + // this proves that the chunk needed to be truncated. + db, err = Open(db.dir, nil, nil, nil, nil) + defer func() { + require.NoError(t, db.Close()) + }() + require.NoError(t, err) + require.Equal(t, 0, countChunks(db.dir)) + }) +} + func TestDBCannotSeePartialCommits(t *testing.T) { if defaultIsolationDisabled { t.Skip("skipping test since tsdb isolation is disabled") From 0cd3a22a1844ca06ece00e8b99fe534f2ca1b173 Mon Sep 17 00:00:00 2001 From: Jiekun Date: Tue, 30 Apr 2024 17:35:25 +0800 Subject: [PATCH 06/47] docs: [ovh sd] Added missing label for OVH dedicated server in service discovery doc Signed-off-by: Jiekun --- docs/configuration/configuration.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 2f2e07a0c2..746e9123e9 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -1467,6 +1467,7 @@ For OVHcloud's [public cloud instances](https://www.ovhcloud.com/en/public-cloud * `__meta_ovhcloud_dedicated_server_ipv6`: the IPv6 of the server * `__meta_ovhcloud_dedicated_server_link_speed`: the link speed of the server * `__meta_ovhcloud_dedicated_server_name`: the name of the server +* `__meta_ovhcloud_dedicated_server_no_intervention`: whether datacenter intervention is disabled for the server * `__meta_ovhcloud_dedicated_server_os`: the operating system of the server * `__meta_ovhcloud_dedicated_server_rack`: the rack of the server * `__meta_ovhcloud_dedicated_server_reverse`: the reverse DNS name of the server From 87427682fd8d059614f71c1bc053c992b11d779d Mon Sep 17 00:00:00 2001 From: Pranshu Srivastava Date: Tue, 7 May 2024 22:11:59 +0530 Subject: [PATCH 07/47] bugfix: allow opting-out of multi-cluster setups Allow users to opt-out of the multi-cluster setup for Prometheus dashboard, in environments where it isn't applicable. Refer: https://github.com/prometheus/prometheus/pull/13180. Signed-off-by: Pranshu Srivastava --- .../prometheus-mixin/config.libsonnet | 5 ++ .../prometheus-mixin/dashboards.libsonnet | 81 +++++++++++++++---- 2 files changed, 69 insertions(+), 17 deletions(-) diff --git a/documentation/prometheus-mixin/config.libsonnet b/documentation/prometheus-mixin/config.libsonnet index ab9079a5e3..70d46a2212 100644 --- a/documentation/prometheus-mixin/config.libsonnet +++ b/documentation/prometheus-mixin/config.libsonnet @@ -44,5 +44,10 @@ // The default refresh time for all dashboards, default to 60s refresh: '60s', }, + + // Opt-out of multi-cluster dashboards by overriding this. + showMultiCluster: true, + // The cluster label to infer the cluster name from. + clusterLabel: 'cluster', }, } diff --git a/documentation/prometheus-mixin/dashboards.libsonnet b/documentation/prometheus-mixin/dashboards.libsonnet index efe53dbac9..2bdd168cc9 100644 --- a/documentation/prometheus-mixin/dashboards.libsonnet +++ b/documentation/prometheus-mixin/dashboards.libsonnet @@ -10,21 +10,32 @@ local template = grafana.template; { grafanaDashboards+:: { 'prometheus.json': - g.dashboard( + local showMultiCluster = $._config.showMultiCluster; + local dashboard = g.dashboard( '%(prefix)sOverview' % $._config.grafanaPrometheus - ) - .addMultiTemplate('cluster', 'prometheus_build_info{%(prometheusSelector)s}' % $._config, 'cluster') - .addMultiTemplate('job', 'prometheus_build_info{cluster=~"$cluster"}', 'job') - .addMultiTemplate('instance', 'prometheus_build_info{cluster=~"$cluster", job=~"$job"}', 'instance') + ); + local templatedDashboard = if showMultiCluster then + dashboard + .addMultiTemplate('cluster', 'prometheus_build_info{%(prometheusSelector)s}' % $._config, $._config.clusterLabel) + .addMultiTemplate('job', 'prometheus_build_info{cluster=~"$cluster"}', 'job') + .addMultiTemplate('instance', 'prometheus_build_info{cluster=~"$cluster", job=~"$job"}', 'instance') + else + dashboard + .addMultiTemplate('job', 'prometheus_build_info{%(prometheusSelector)s}' % $._config, 'job') + .addMultiTemplate('instance', 'prometheus_build_info{job=~"$job"}', 'instance'); + templatedDashboard .addRow( g.row('Prometheus Stats') .addPanel( g.panel('Prometheus Stats') + - g.tablePanel([ + g.tablePanel(if showMultiCluster then [ 'count by (cluster, job, instance, version) (prometheus_build_info{cluster=~"$cluster", job=~"$job", instance=~"$instance"})', 'max by (cluster, job, instance) (time() - process_start_time_seconds{cluster=~"$cluster", job=~"$job", instance=~"$instance"})', + ] else [ + 'count by (job, instance, version) (prometheus_build_info{job=~"$job", instance=~"$instance"})', + 'max by (job, instance) (time() - process_start_time_seconds{job=~"$job", instance=~"$instance"})', ], { - cluster: { alias: 'Cluster' }, + cluster: { alias: if showMultiCluster then 'Cluster' else '' }, job: { alias: 'Job' }, instance: { alias: 'Instance' }, version: { alias: 'Version' }, @@ -37,12 +48,18 @@ local template = grafana.template; g.row('Discovery') .addPanel( g.panel('Target Sync') + - g.queryPanel('sum(rate(prometheus_target_sync_length_seconds_sum{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[5m])) by (cluster, job, scrape_job, instance) * 1e3', '{{cluster}}:{{job}}:{{instance}}:{{scrape_job}}') + + g.queryPanel(if showMultiCluster then 'sum(rate(prometheus_target_sync_length_seconds_sum{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[5m])) by (cluster, job, scrape_job, instance) * 1e3' + else 'sum(rate(prometheus_target_sync_length_seconds_sum{job=~"$job",instance=~"$instance"}[5m])) by (scrape_job) * 1e3', + if showMultiCluster then '{{cluster}}:{{job}}:{{instance}}:{{scrape_job}}' + else '{{scrape_job}}') + { yaxes: g.yaxes('ms') } ) .addPanel( g.panel('Targets') + - g.queryPanel('sum by (cluster, job, instance) (prometheus_sd_discovered_targets{cluster=~"$cluster", job=~"$job",instance=~"$instance"})', '{{cluster}}:{{job}}:{{instance}}') + + g.queryPanel(if showMultiCluster then 'sum by (cluster, job, instance) (prometheus_sd_discovered_targets{cluster=~"$cluster", job=~"$job",instance=~"$instance"})' + else 'sum(prometheus_sd_discovered_targets{job=~"$job",instance=~"$instance"})', + if showMultiCluster then '{{cluster}}:{{job}}:{{instance}}' + else 'Targets') + g.stack ) ) @@ -50,29 +67,47 @@ local template = grafana.template; g.row('Retrieval') .addPanel( g.panel('Average Scrape Interval Duration') + - g.queryPanel('rate(prometheus_target_interval_length_seconds_sum{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m]) / rate(prometheus_target_interval_length_seconds_count{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m]) * 1e3', '{{cluster}}:{{job}}:{{instance}} {{interval}} configured') + + g.queryPanel(if showMultiCluster then 'rate(prometheus_target_interval_length_seconds_sum{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m]) / rate(prometheus_target_interval_length_seconds_count{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m]) * 1e3' + else 'rate(prometheus_target_interval_length_seconds_sum{job=~"$job",instance=~"$instance"}[5m]) / rate(prometheus_target_interval_length_seconds_count{job=~"$job",instance=~"$instance"}[5m]) * 1e3', + if showMultiCluster then '{{cluster}}:{{job}}:{{instance}} {{interval}} configured' + else '{{interval}} configured') + { yaxes: g.yaxes('ms') } ) .addPanel( g.panel('Scrape failures') + - g.queryPanel([ + g.queryPanel(if showMultiCluster then [ 'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))', 'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_exceeded_sample_limit_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))', 'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))', 'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_out_of_bounds_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))', 'sum by (cluster, job, instance) (rate(prometheus_target_scrapes_sample_out_of_order_total{cluster=~"$cluster",job=~"$job",instance=~"$instance"}[1m]))', - ], [ + ] else [ + 'sum by (job) (rate(prometheus_target_scrapes_exceeded_body_size_limit_total[1m]))', + 'sum by (job) (rate(prometheus_target_scrapes_exceeded_sample_limit_total[1m]))', + 'sum by (job) (rate(prometheus_target_scrapes_sample_duplicate_timestamp_total[1m]))', + 'sum by (job) (rate(prometheus_target_scrapes_sample_out_of_bounds_total[1m]))', + 'sum by (job) (rate(prometheus_target_scrapes_sample_out_of_order_total[1m]))', + ], if showMultiCluster then [ 'exceeded body size limit: {{cluster}} {{job}} {{instance}}', 'exceeded sample limit: {{cluster}} {{job}} {{instance}}', 'duplicate timestamp: {{cluster}} {{job}} {{instance}}', 'out of bounds: {{cluster}} {{job}} {{instance}}', 'out of order: {{cluster}} {{job}} {{instance}}', + ] else [ + 'exceeded body size limit: {{job}}', + 'exceeded sample limit: {{job}}', + 'duplicate timestamp: {{job}}', + 'out of bounds: {{job}}', + 'out of order: {{job}}', ]) + g.stack ) .addPanel( g.panel('Appended Samples') + - g.queryPanel('rate(prometheus_tsdb_head_samples_appended_total{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m])', '{{cluster}} {{job}} {{instance}}') + + g.queryPanel(if showMultiCluster then 'rate(prometheus_tsdb_head_samples_appended_total{cluster=~"$cluster", job=~"$job",instance=~"$instance"}[5m])' + else 'rate(prometheus_tsdb_head_samples_appended_total{job=~"$job",instance=~"$instance"}[5m])', + if showMultiCluster then '{{cluster}} {{job}} {{instance}}' + else '{{job}} {{instance}}') + g.stack ) ) @@ -80,12 +115,18 @@ local template = grafana.template; g.row('Storage') .addPanel( g.panel('Head Series') + - g.queryPanel('prometheus_tsdb_head_series{cluster=~"$cluster",job=~"$job",instance=~"$instance"}', '{{cluster}} {{job}} {{instance}} head series') + + g.queryPanel(if showMultiCluster then 'prometheus_tsdb_head_series{cluster=~"$cluster",job=~"$job",instance=~"$instance"}' + else 'prometheus_tsdb_head_series{job=~"$job",instance=~"$instance"}', + if showMultiCluster then '{{cluster}} {{job}} {{instance}} head series' + else '{{job}} {{instance}} head series') + g.stack ) .addPanel( g.panel('Head Chunks') + - g.queryPanel('prometheus_tsdb_head_chunks{cluster=~"$cluster",job=~"$job",instance=~"$instance"}', '{{cluster}} {{job}} {{instance}} head chunks') + + g.queryPanel(if showMultiCluster then 'prometheus_tsdb_head_chunks{cluster=~"$cluster",job=~"$job",instance=~"$instance"}' + else 'prometheus_tsdb_head_chunks{job=~"$job",instance=~"$instance"}', + if showMultiCluster then '{{cluster}} {{job}} {{instance}} head chunks' + else '{{job}} {{instance}} head chunks') + g.stack ) ) @@ -93,12 +134,18 @@ local template = grafana.template; g.row('Query') .addPanel( g.panel('Query Rate') + - g.queryPanel('rate(prometheus_engine_query_duration_seconds_count{cluster=~"$cluster",job=~"$job",instance=~"$instance",slice="inner_eval"}[5m])', '{{cluster}} {{job}} {{instance}}') + + g.queryPanel(if showMultiCluster then 'rate(prometheus_engine_query_duration_seconds_count{cluster=~"$cluster",job=~"$job",instance=~"$instance",slice="inner_eval"}[5m])' + else 'rate(prometheus_engine_query_duration_seconds_count{job=~"$job",instance=~"$instance",slice="inner_eval"}[5m])', + if showMultiCluster then '{{cluster}} {{job}} {{instance}}' + else '{{job}} {{instance}}') + g.stack, ) .addPanel( g.panel('Stage Duration') + - g.queryPanel('max by (slice) (prometheus_engine_query_duration_seconds{quantile="0.9",cluster=~"$cluster", job=~"$job",instance=~"$instance"}) * 1e3', '{{slice}}') + + g.queryPanel(if showMultiCluster then 'max by (slice) (prometheus_engine_query_duration_seconds{quantile="0.9",cluster=~"$cluster", job=~"$job",instance=~"$instance"}) * 1e3' + else 'max by (slice) (prometheus_engine_query_duration_seconds{quantile="0.9",job=~"$job",instance=~"$instance"}) * 1e3', + if showMultiCluster then '{{slice}}' + else '{{slice}}') + { yaxes: g.yaxes('ms') } + g.stack, ) From dabd789fd5d3a027ca070e175bd94e295edfe4d2 Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Wed, 8 May 2024 16:24:58 +0200 Subject: [PATCH 08/47] Quote label name in matchers when needed When the label name of a matcher contains non-standard characters, like a dot, or starts with a digit, it should be quoted. If it's not quoted, then `VectorSelector.String()` isn't a valid PromQL. Signed-off-by: Oleg Zaytsev --- model/labels/matcher.go | 15 +++- model/labels/matcher_test.go | 126 ++++++++++++++++++++++++++++++++++ promql/parser/printer_test.go | 10 +++ 3 files changed, 150 insertions(+), 1 deletion(-) diff --git a/model/labels/matcher.go b/model/labels/matcher.go index 1282f80d63..291eac1c73 100644 --- a/model/labels/matcher.go +++ b/model/labels/matcher.go @@ -78,7 +78,20 @@ func MustNewMatcher(mt MatchType, name, val string) *Matcher { } func (m *Matcher) String() string { - return fmt.Sprintf("%s%s%q", m.Name, m.Type, m.Value) + if !m.shouldQuoteName() { + return fmt.Sprintf("%s%s%q", m.Name, m.Type, m.Value) + } + return fmt.Sprintf("%q%s%q", m.Name, m.Type, m.Value) +} + +func (m *Matcher) shouldQuoteName() bool { + for i, c := range m.Name { + if c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (i > 0 && c >= '0' && c <= '9') { + continue + } + return true + } + return false } // Matches returns whether the matcher matches the given string value. diff --git a/model/labels/matcher_test.go b/model/labels/matcher_test.go index c23deafe61..ff39d40d0f 100644 --- a/model/labels/matcher_test.go +++ b/model/labels/matcher_test.go @@ -15,6 +15,7 @@ package labels import ( "fmt" + "math/rand" "testing" "github.com/stretchr/testify/require" @@ -225,3 +226,128 @@ func BenchmarkNewMatcher(b *testing.B) { } }) } + +func BenchmarkMatcher_String(b *testing.B) { + type benchCase struct { + name string + matchers []*Matcher + } + cases := []benchCase{ + { + name: "short name equal", + matchers: []*Matcher{ + MustNewMatcher(MatchEqual, "foo", "bar"), + MustNewMatcher(MatchEqual, "bar", "baz"), + MustNewMatcher(MatchEqual, "abc", "def"), + MustNewMatcher(MatchEqual, "ghi", "klm"), + MustNewMatcher(MatchEqual, "nop", "qrs"), + }, + }, + { + name: "short quoted name not equal", + matchers: []*Matcher{ + MustNewMatcher(MatchEqual, "f.o", "bar"), + MustNewMatcher(MatchEqual, "b.r", "baz"), + MustNewMatcher(MatchEqual, "a.c", "def"), + MustNewMatcher(MatchEqual, "g.i", "klm"), + MustNewMatcher(MatchEqual, "n.p", "qrs"), + }, + }, + { + name: "short quoted name with quotes not equal", + matchers: []*Matcher{ + MustNewMatcher(MatchEqual, `"foo"`, "bar"), + MustNewMatcher(MatchEqual, `"foo"`, "baz"), + MustNewMatcher(MatchEqual, `"foo"`, "def"), + MustNewMatcher(MatchEqual, `"foo"`, "klm"), + MustNewMatcher(MatchEqual, `"foo"`, "qrs"), + }, + }, + { + name: "short name value with quotes equal", + matchers: []*Matcher{ + MustNewMatcher(MatchEqual, "foo", `"bar"`), + MustNewMatcher(MatchEqual, "bar", `"baz"`), + MustNewMatcher(MatchEqual, "abc", `"def"`), + MustNewMatcher(MatchEqual, "ghi", `"klm"`), + MustNewMatcher(MatchEqual, "nop", `"qrs"`), + }, + }, + { + name: "short name and long value regexp", + matchers: []*Matcher{ + MustNewMatcher(MatchRegexp, "foo", "five_six_seven_eight_nine_ten_one_two_three_four"), + MustNewMatcher(MatchRegexp, "bar", "one_two_three_four_five_six_seven_eight_nine_ten"), + MustNewMatcher(MatchRegexp, "abc", "two_three_four_five_six_seven_eight_nine_ten_one"), + MustNewMatcher(MatchRegexp, "ghi", "three_four_five_six_seven_eight_nine_ten_one_two"), + MustNewMatcher(MatchRegexp, "nop", "four_five_six_seven_eight_nine_ten_one_two_three"), + }, + }, + { + name: "short name and long value with quotes equal", + matchers: []*Matcher{ + MustNewMatcher(MatchEqual, "foo", `five_six_seven_eight_nine_ten_"one"_two_three_four`), + MustNewMatcher(MatchEqual, "bar", `one_two_three_four_five_six_"seven"_eight_nine_ten`), + MustNewMatcher(MatchEqual, "abc", `two_three_four_five_six_seven_"eight"_nine_ten_one`), + MustNewMatcher(MatchEqual, "ghi", `three_four_five_six_seven_eight_"nine"_ten_one_two`), + MustNewMatcher(MatchEqual, "nop", `four_five_six_seven_eight_nine_"ten"_one_two_three`), + }, + }, + { + name: "long name regexp", + matchers: []*Matcher{ + MustNewMatcher(MatchRegexp, "one_two_three_four_five_six_seven_eight_nine_ten", "val"), + MustNewMatcher(MatchRegexp, "two_three_four_five_six_seven_eight_nine_ten_one", "val"), + MustNewMatcher(MatchRegexp, "three_four_five_six_seven_eight_nine_ten_one_two", "val"), + MustNewMatcher(MatchRegexp, "four_five_six_seven_eight_nine_ten_one_two_three", "val"), + MustNewMatcher(MatchRegexp, "five_six_seven_eight_nine_ten_one_two_three_four", "val"), + }, + }, + { + name: "long quoted name regexp", + matchers: []*Matcher{ + MustNewMatcher(MatchRegexp, "one.two.three.four.five.six.seven.eight.nine.ten", "val"), + MustNewMatcher(MatchRegexp, "two.three.four.five.six.seven.eight.nine.ten.one", "val"), + MustNewMatcher(MatchRegexp, "three.four.five.six.seven.eight.nine.ten.one.two", "val"), + MustNewMatcher(MatchRegexp, "four.five.six.seven.eight.nine.ten.one.two.three", "val"), + MustNewMatcher(MatchRegexp, "five.six.seven.eight.nine.ten.one.two.three.four", "val"), + }, + }, + { + name: "long name and long value regexp", + matchers: []*Matcher{ + MustNewMatcher(MatchRegexp, "one_two_three_four_five_six_seven_eight_nine_ten", "five_six_seven_eight_nine_ten_one_two_three_four"), + MustNewMatcher(MatchRegexp, "two_three_four_five_six_seven_eight_nine_ten_one", "one_two_three_four_five_six_seven_eight_nine_ten"), + MustNewMatcher(MatchRegexp, "three_four_five_six_seven_eight_nine_ten_one_two", "two_three_four_five_six_seven_eight_nine_ten_one"), + MustNewMatcher(MatchRegexp, "four_five_six_seven_eight_nine_ten_one_two_three", "three_four_five_six_seven_eight_nine_ten_one_two"), + MustNewMatcher(MatchRegexp, "five_six_seven_eight_nine_ten_one_two_three_four", "four_five_six_seven_eight_nine_ten_one_two_three"), + }, + }, + { + name: "long quoted name and long value regexp", + matchers: []*Matcher{ + MustNewMatcher(MatchRegexp, "one.two.three.four.five.six.seven.eight.nine.ten", "five.six.seven.eight.nine.ten.one.two.three.four"), + MustNewMatcher(MatchRegexp, "two.three.four.five.six.seven.eight.nine.ten.one", "one.two.three.four.five.six.seven.eight.nine.ten"), + MustNewMatcher(MatchRegexp, "three.four.five.six.seven.eight.nine.ten.one.two", "two.three.four.five.six.seven.eight.nine.ten.one"), + MustNewMatcher(MatchRegexp, "four.five.six.seven.eight.nine.ten.one.two.three", "three.four.five.six.seven.eight.nine.ten.one.two"), + MustNewMatcher(MatchRegexp, "five.six.seven.eight.nine.ten.one.two.three.four", "four.five.six.seven.eight.nine.ten.one.two.three"), + }, + }, + } + + var mixed []*Matcher + for _, bc := range cases { + mixed = append(mixed, bc.matchers...) + } + rand.Shuffle(len(mixed), func(i, j int) { mixed[i], mixed[j] = mixed[j], mixed[i] }) + cases = append(cases, benchCase{name: "mixed", matchers: mixed}) + + for _, bc := range cases { + b.Run(bc.name, func(b *testing.B) { + for i := 0; i <= b.N; i++ { + m := bc.matchers[i%len(bc.matchers)] + _ = m.String() + } + }) + } +} diff --git a/promql/parser/printer_test.go b/promql/parser/printer_test.go index 554fb69626..f224d841d0 100644 --- a/promql/parser/printer_test.go +++ b/promql/parser/printer_test.go @@ -138,6 +138,16 @@ func TestExprString(t *testing.T) { { in: `{__name__="",a="x"}`, }, + { + in: `{"a.b"="c"}`, + }, + { + in: `{"0"="1"}`, + }, + { + in: `{"_0"="1"}`, + out: `{_0="1"}`, + }, } for _, test := range inputs { From 6ebda5a7bc8c1f5611e41cc63a769bd9afc14592 Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Wed, 8 May 2024 17:05:27 +0200 Subject: [PATCH 09/47] Optimize Matcher.String() Signed-off-by: Oleg Zaytsev --- model/labels/matcher.go | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/model/labels/matcher.go b/model/labels/matcher.go index 291eac1c73..ce9e42471a 100644 --- a/model/labels/matcher.go +++ b/model/labels/matcher.go @@ -14,7 +14,8 @@ package labels import ( - "fmt" + "strconv" + "unsafe" ) // MatchType is an enum for label matching types. @@ -78,10 +79,20 @@ func MustNewMatcher(mt MatchType, name, val string) *Matcher { } func (m *Matcher) String() string { - if !m.shouldQuoteName() { - return fmt.Sprintf("%s%s%q", m.Name, m.Type, m.Value) + const quote = 1 + const matcher = 2 + // As we're not on go1.22 yet and we don't have the new fancy AvailableBuffer method on strings.Builder, + // we'll use a plain byte slice and then do the unsafe conversion to string just like strings.Builder does. + // We pre-allocate pessimistically for quoting the label name, and optimistically for not having to escape any quotes. + b := make([]byte, 0, quote+len(m.Name)+quote+matcher+quote+len(m.Value)+quote) + if m.shouldQuoteName() { + b = strconv.AppendQuote(b, m.Name) + } else { + b = append(b, m.Name...) } - return fmt.Sprintf("%q%s%q", m.Name, m.Type, m.Value) + b = append(b, m.Type.String()...) + b = strconv.AppendQuote(b, m.Value) + return *((*string)(unsafe.Pointer(&b))) } func (m *Matcher) shouldQuoteName() bool { From b7b4355807ca53dbff39dc9ed5baa80a8b34fed4 Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Thu, 9 May 2024 10:00:24 +0200 Subject: [PATCH 10/47] Use bytes.Buffer from stack buf in Matcher.String() Also removed the growing until there's a benchmark for that. Signed-off-by: Oleg Zaytsev --- model/labels/matcher.go | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/model/labels/matcher.go b/model/labels/matcher.go index ce9e42471a..8e220e392d 100644 --- a/model/labels/matcher.go +++ b/model/labels/matcher.go @@ -14,8 +14,8 @@ package labels import ( + "bytes" "strconv" - "unsafe" ) // MatchType is an enum for label matching types. @@ -79,20 +79,19 @@ func MustNewMatcher(mt MatchType, name, val string) *Matcher { } func (m *Matcher) String() string { - const quote = 1 - const matcher = 2 - // As we're not on go1.22 yet and we don't have the new fancy AvailableBuffer method on strings.Builder, - // we'll use a plain byte slice and then do the unsafe conversion to string just like strings.Builder does. - // We pre-allocate pessimistically for quoting the label name, and optimistically for not having to escape any quotes. - b := make([]byte, 0, quote+len(m.Name)+quote+matcher+quote+len(m.Value)+quote) + // Start a buffer with a pre-allocated size on stack to cover most needs. + var bytea [1024]byte + b := bytes.NewBuffer(bytea[:0]) + if m.shouldQuoteName() { - b = strconv.AppendQuote(b, m.Name) + b.Write(strconv.AppendQuote(b.AvailableBuffer(), m.Name)) } else { - b = append(b, m.Name...) + b.WriteString(m.Name) } - b = append(b, m.Type.String()...) - b = strconv.AppendQuote(b, m.Value) - return *((*string)(unsafe.Pointer(&b))) + b.WriteString(m.Type.String()) + b.Write(strconv.AppendQuote(b.AvailableBuffer(), m.Value)) + + return b.String() } func (m *Matcher) shouldQuoteName() bool { From aabe4d6e4ab39d4d87611668312ec4d93616b5b8 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Mon, 29 Apr 2024 16:16:51 +0200 Subject: [PATCH 11/47] promql.ActiveQueryTracker: Unmap mmapped file when done Signed-off-by: Arve Knudsen --- promql/engine_test.go | 4 +++- promql/query_logger.go | 32 ++++++++++++++++++++++++++++---- promql/query_logger_test.go | 27 ++++++++++++++------------- 3 files changed, 45 insertions(+), 18 deletions(-) diff --git a/promql/engine_test.go b/promql/engine_test.go index cc91855468..485239399d 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -59,7 +59,9 @@ func TestQueryConcurrency(t *testing.T) { require.NoError(t, err) defer os.RemoveAll(dir) queryTracker := promql.NewActiveQueryTracker(dir, maxConcurrency, nil) - t.Cleanup(queryTracker.Close) + t.Cleanup(func() { + require.NoError(t, queryTracker.Close()) + }) opts := promql.EngineOpts{ Logger: nil, diff --git a/promql/query_logger.go b/promql/query_logger.go index 7ddd8c2d5a..76528f9584 100644 --- a/promql/query_logger.go +++ b/promql/query_logger.go @@ -16,6 +16,7 @@ package promql import ( "context" "encoding/json" + "fmt" "io" "os" "path/filepath" @@ -36,6 +37,8 @@ type ActiveQueryTracker struct { maxConcurrent int } +var _ io.Closer = &ActiveQueryTracker{} + type Entry struct { Query string `json:"query"` Timestamp int64 `json:"timestamp_sec"` @@ -83,6 +86,23 @@ func logUnfinishedQueries(filename string, filesize int, logger log.Logger) { } } +type mmapedFile struct { + f io.Closer + m mmap.MMap +} + +func (f *mmapedFile) Close() error { + err := f.m.Unmap() + if fErr := f.f.Close(); fErr != nil && err == nil { + return fmt.Errorf("close mmapedFile.f: %w", fErr) + } + + if err != nil { + return fmt.Errorf("mmapedFile: unmapping: %w", err) + } + return nil +} + func getMMapedFile(filename string, filesize int, logger log.Logger) ([]byte, io.Closer, error) { file, err := os.OpenFile(filename, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0o666) if err != nil { @@ -108,7 +128,7 @@ func getMMapedFile(filename string, filesize int, logger log.Logger) ([]byte, io return nil, nil, err } - return fileAsBytes, file, err + return fileAsBytes, &mmapedFile{f: file, m: fileAsBytes}, err } func NewActiveQueryTracker(localStoragePath string, maxConcurrent int, logger log.Logger) *ActiveQueryTracker { @@ -204,9 +224,13 @@ func (tracker ActiveQueryTracker) Insert(ctx context.Context, query string) (int } } -func (tracker *ActiveQueryTracker) Close() { +// Close closes tracker. +func (tracker *ActiveQueryTracker) Close() error { if tracker == nil || tracker.closer == nil { - return + return nil } - tracker.closer.Close() + if err := tracker.closer.Close(); err != nil { + return fmt.Errorf("close ActiveQueryTracker.closer: %w", err) + } + return nil } diff --git a/promql/query_logger_test.go b/promql/query_logger_test.go index 376d61b641..7bd93781ec 100644 --- a/promql/query_logger_test.go +++ b/promql/query_logger_test.go @@ -16,6 +16,7 @@ package promql import ( "context" "os" + "path/filepath" "testing" "github.com/grafana/regexp" @@ -104,26 +105,26 @@ func TestIndexReuse(t *testing.T) { } func TestMMapFile(t *testing.T) { - file, err := os.CreateTemp("", "mmapedFile") + dir := t.TempDir() + fpath := filepath.Join(dir, "mmapedFile") + const data = "ab" + + fileAsBytes, closer, err := getMMapedFile(fpath, 2, nil) require.NoError(t, err) + copy(fileAsBytes, data) + require.NoError(t, closer.Close()) - filename := file.Name() - defer os.Remove(filename) - - fileAsBytes, _, err := getMMapedFile(filename, 2, nil) - - require.NoError(t, err) - copy(fileAsBytes, "ab") - - f, err := os.Open(filename) + f, err := os.Open(fpath) require.NoError(t, err) + t.Cleanup(func() { + _ = f.Close() + }) bytes := make([]byte, 4) n, err := f.Read(bytes) - require.Equal(t, 2, n) require.NoError(t, err, "Unexpected error while reading file.") - - require.Equal(t, fileAsBytes, bytes[:2], "Mmap failed") + require.Equal(t, 2, n) + require.Equal(t, []byte(data), bytes[:2], "Mmap failed") } func TestParseBrokenJSON(t *testing.T) { From fdfc6d472556171c7f95c9ac185803b4d5cabd89 Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Mon, 13 May 2024 15:36:55 +0200 Subject: [PATCH 12/47] Benchmark zeroOrOneCharacterStringMatcher.Matches This adds some more test cases for unicode values, and also a benchmark for zeroOrOneCharacterStringMatcher.Matches() Signed-off-by: Oleg Zaytsev --- model/labels/regexp_test.go | 80 +++++++++++++++++++++++++++++++------ 1 file changed, 68 insertions(+), 12 deletions(-) diff --git a/model/labels/regexp_test.go b/model/labels/regexp_test.go index 47d3eeb4a2..41160cc054 100644 --- a/model/labels/regexp_test.go +++ b/model/labels/regexp_test.go @@ -19,6 +19,7 @@ import ( "strings" "testing" "time" + "unicode/utf8" "github.com/grafana/regexp" "github.com/grafana/regexp/syntax" @@ -926,19 +927,74 @@ func BenchmarkOptimizeEqualStringMatchers(b *testing.B) { } func TestZeroOrOneCharacterStringMatcher(t *testing.T) { - matcher := &zeroOrOneCharacterStringMatcher{matchNL: true} - require.True(t, matcher.Matches("")) - require.True(t, matcher.Matches("x")) - require.True(t, matcher.Matches("\n")) - require.False(t, matcher.Matches("xx")) - require.False(t, matcher.Matches("\n\n")) + t.Run("match newline", func(t *testing.T) { + matcher := &zeroOrOneCharacterStringMatcher{matchNL: true} + require.True(t, matcher.Matches("")) + require.True(t, matcher.Matches("x")) + require.True(t, matcher.Matches("\n")) + require.False(t, matcher.Matches("xx")) + require.False(t, matcher.Matches("\n\n")) + }) - matcher = &zeroOrOneCharacterStringMatcher{matchNL: false} - require.True(t, matcher.Matches("")) - require.True(t, matcher.Matches("x")) - require.False(t, matcher.Matches("\n")) - require.False(t, matcher.Matches("xx")) - require.False(t, matcher.Matches("\n\n")) + t.Run("do not match newline", func(t *testing.T) { + matcher := &zeroOrOneCharacterStringMatcher{matchNL: false} + require.True(t, matcher.Matches("")) + require.True(t, matcher.Matches("x")) + require.False(t, matcher.Matches("\n")) + require.False(t, matcher.Matches("xx")) + require.False(t, matcher.Matches("\n\n")) + }) + + t.Run("unicode", func(t *testing.T) { + // Just for documentation purposes, emoji1 is 1 rune, emoji2 is 2 runes. + // Having this in mind, will make future readers fixing tests easier. + emoji1 := "😀" + emoji2 := "❤️" + require.Equal(t, 1, utf8.RuneCountInString(emoji1)) + require.Equal(t, 2, utf8.RuneCountInString(emoji2)) + + matcher := &zeroOrOneCharacterStringMatcher{matchNL: true} + require.True(t, matcher.Matches(emoji1)) + require.False(t, matcher.Matches(emoji2)) + require.False(t, matcher.Matches(emoji1+emoji1)) + require.False(t, matcher.Matches("x"+emoji1)) + require.False(t, matcher.Matches(emoji1+"x")) + require.False(t, matcher.Matches(emoji1+emoji2)) + }) +} + +func BenchmarkZeroOrOneCharacterStringMatcher(b *testing.B) { + type benchCase struct { + str string + matches bool + } + + emoji1 := "😀" + emoji2 := "❤️" + cases := []benchCase{ + {"", true}, + {"x", true}, + {"\n", true}, + {"xx", false}, + {"\n\n", false}, + {emoji1, true}, + {emoji2, false}, + {emoji1 + emoji1, false}, + {strings.Repeat("x", 100), false}, + {strings.Repeat(emoji1, 100), false}, + {strings.Repeat(emoji2, 100), false}, + } + + matcher := &zeroOrOneCharacterStringMatcher{matchNL: true} + b.ResetTimer() + + for n := 0; n < b.N; n++ { + c := cases[n%len(cases)] + got := matcher.Matches(c.str) + if got != c.matches { + b.Fatalf("unexpected result for %q: got %t, want %t", c.str, got, c.matches) + } + } } func TestLiteralPrefixStringMatcher(t *testing.T) { From bcff5059e6764084cea51342efd138f86eb4da42 Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Mon, 13 May 2024 15:41:00 +0200 Subject: [PATCH 13/47] Use utf8.DecodeRuneInString(s) This replaces the custom `moreThanOneRune` function with the standard `utf8.DecodeRuneInString(s)` that can be used to figure out the size of the first rune. Signed-off-by: Oleg Zaytsev --- model/labels/regexp.go | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/model/labels/regexp.go b/model/labels/regexp.go index 79e340984a..cc2ab366f1 100644 --- a/model/labels/regexp.go +++ b/model/labels/regexp.go @@ -828,7 +828,8 @@ type zeroOrOneCharacterStringMatcher struct { } func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool { - if moreThanOneRune(s) { + // If there's more than one rune in the string, then it can't match. + if _, size := utf8.DecodeRuneInString(s); size < len(s) { return false } @@ -840,27 +841,6 @@ func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool { return s[0] != '\n' } -// moreThanOneRune returns true if there are more than one runes in the string. -// It doesn't check whether the string is valid UTF-8. -// The return value should be always equal to utf8.RuneCountInString(s) > 1, -// but the function is optimized for the common case where the string prefix is ASCII. -func moreThanOneRune(s string) bool { - // If len(s) is exactly one or zero, there can't be more than one rune. - // Exit through this path quickly. - if len(s) <= 1 { - return false - } - - // There's one or more bytes: - // If first byte is ASCII then there are multiple runes if there are more bytes after that. - if s[0] < utf8.RuneSelf { - return len(s) > 1 - } - - // Less common case: first is a multibyte rune. - return utf8.RuneCountInString(s) > 1 -} - // trueMatcher is a stringMatcher which matches any string (always returns true). type trueMatcher struct{} From dbe88fae22d4e8106580e90f3aa4f328d7cb484c Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Mon, 13 May 2024 17:05:31 +0200 Subject: [PATCH 14/47] Add invalid utf8 test cases to regexp Signed-off-by: Oleg Zaytsev --- model/labels/regexp_test.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/model/labels/regexp_test.go b/model/labels/regexp_test.go index 41160cc054..e9f12d7b64 100644 --- a/model/labels/regexp_test.go +++ b/model/labels/regexp_test.go @@ -37,6 +37,7 @@ var ( ".*foo", "^.*foo$", "^.+foo$", + ".?", ".*", ".+", "foo.+", @@ -89,6 +90,12 @@ var ( // Values matching / not matching the test regexps on long alternations. "zQPbMkNO", "zQPbMkNo", "jyyfj00j0061", "jyyfj00j006", "jyyfj00j00612", "NNSPdvMi", "NNSPdvMiXXX", "NNSPdvMixxx", "nnSPdvMi", "nnSPdvMiXXX", + + // Invalid utf8 + "\xfefoo", + "foo\xfe", + "\xfd", + "\xff\xff", } ) From 8b4c9459a2226945e6596489e58dbbe4999e537b Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Mon, 13 May 2024 17:44:07 +0200 Subject: [PATCH 15/47] Check utf8.RuneError result Signed-off-by: Oleg Zaytsev --- model/labels/regexp.go | 6 +++++- model/labels/regexp_test.go | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/model/labels/regexp.go b/model/labels/regexp.go index cc2ab366f1..b484e27168 100644 --- a/model/labels/regexp.go +++ b/model/labels/regexp.go @@ -829,7 +829,11 @@ type zeroOrOneCharacterStringMatcher struct { func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool { // If there's more than one rune in the string, then it can't match. - if _, size := utf8.DecodeRuneInString(s); size < len(s) { + if r, size := utf8.DecodeRuneInString(s); r == utf8.RuneError { + // Size is 0 for empty strings, 1 for invalid rune. + // Empty string matches, invalid rune matches if there isn't anything else. + return size == len(s) + } else if size < len(s) { return false } diff --git a/model/labels/regexp_test.go b/model/labels/regexp_test.go index e9f12d7b64..1db90a473d 100644 --- a/model/labels/regexp_test.go +++ b/model/labels/regexp_test.go @@ -968,6 +968,23 @@ func TestZeroOrOneCharacterStringMatcher(t *testing.T) { require.False(t, matcher.Matches(emoji1+"x")) require.False(t, matcher.Matches(emoji1+emoji2)) }) + + t.Run("invalid unicode", func(t *testing.T) { + // Just for reference, we also compare to what `^.?$` regular expression matches. + re := regexp.MustCompile("^.?$") + matcher := &zeroOrOneCharacterStringMatcher{matchNL: true} + + requireMatches := func(s string, expected bool) { + t.Helper() + require.Equal(t, expected, matcher.Matches(s)) + require.Equal(t, re.MatchString(s), matcher.Matches(s)) + } + + requireMatches("\xff", true) + requireMatches("x\xff", false) + requireMatches("\xffx", false) + requireMatches("\xff\xfe", false) + }) } func BenchmarkZeroOrOneCharacterStringMatcher(b *testing.B) { From 548bd9d6fbc7baba9dc3306ae495a5ba9266c052 Mon Sep 17 00:00:00 2001 From: Neeraj Gartia Date: Mon, 26 Feb 2024 01:16:26 +0530 Subject: [PATCH 16/47] adds TestNativeHistogramRate func to promql test framework Signed-off-by: Neeraj Gartia --- promql/promqltest/testdata/native_histograms.test | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 1da68a385f..281dce7864 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -269,3 +269,13 @@ eval instant at 50m histogram_sum(sum(incr_sum_histogram)) eval instant at 50m histogram_sum(sum(last_over_time(incr_sum_histogram[5m]))) {} 30 + +# Test Native histogram rate func +load 5m + test_histogram {{schema:1 count:5 sum:20 buckets:[1 2 1 1]}}+{{schema:1 count:10 sum:5 buckets:[1 2 3 4]}}x10 + +eval instant at 50m rate(test_histogram[5m]) + {} {{schema:1 count:0.03333333333333333 sum:0.016666666666666666 buckets:[0.0033333333333333335 0.006666666666666667 0.01 0.013333333333333334]}} + +eval instant at 50m rate(test_histogram[10m]) + {} {{schema:1 count:0.03333333333333333 sum:0.016666666666666666 buckets:[0.0033333333333333335 0.006666666666666667 0.01 0.013333333333333334]}} From 8b838a05d99080a023093099feb4b694f21b774a Mon Sep 17 00:00:00 2001 From: Neeraj Gartia Date: Fri, 22 Mar 2024 03:17:47 +0530 Subject: [PATCH 17/47] adds test for native histogram rate func in promql testing framework Signed-off-by: Neeraj Gartia --- promql/promqltest/testdata/native_histograms.test | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 281dce7864..a5e2af3447 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -271,11 +271,8 @@ eval instant at 50m histogram_sum(sum(last_over_time(incr_sum_histogram[5m]))) {} 30 # Test Native histogram rate func -load 5m - test_histogram {{schema:1 count:5 sum:20 buckets:[1 2 1 1]}}+{{schema:1 count:10 sum:5 buckets:[1 2 3 4]}}x10 +load 15s + test_histogram {{schema:1 count:12 sum:18.4 z_bucket:2 z_bucket_w:0.001 buckets:[1 2 0 1 1] n_buckets:[1 2 0 1 1]}}+{{schema:1 count:9 sum:18.4 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 0 1 1] n_buckets:[1 1 0 1 1]}}x100 -eval instant at 50m rate(test_histogram[5m]) - {} {{schema:1 count:0.03333333333333333 sum:0.016666666666666666 buckets:[0.0033333333333333335 0.006666666666666667 0.01 0.013333333333333334]}} - -eval instant at 50m rate(test_histogram[10m]) - {} {{schema:1 count:0.03333333333333333 sum:0.016666666666666666 buckets:[0.0033333333333333335 0.006666666666666667 0.01 0.013333333333333334]}} +eval instant at 5m rate(test_histogram[45s]) + {} {{schema:1 count:0.6 sum:1.2266666666666652 z_bucket:0.06666666666666667 z_bucket_w:0.001 buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667] n_buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667]}} \ No newline at end of file From adf5a36c1ef3056750013a565b6c44e3c822accf Mon Sep 17 00:00:00 2001 From: Neeraj Gartia Date: Thu, 11 Apr 2024 01:47:09 +0530 Subject: [PATCH 18/47] adds test for sum, count, stddev, stdvar, quantile and fraction func to promql testing framework Signed-off-by: Neeraj Gartia --- .../testdata/native_histograms.test | 456 +++++++++++++++++- 1 file changed, 452 insertions(+), 4 deletions(-) diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index a5e2af3447..8df633819a 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -270,9 +270,457 @@ eval instant at 50m histogram_sum(sum(incr_sum_histogram)) eval instant at 50m histogram_sum(sum(last_over_time(incr_sum_histogram[5m]))) {} 30 -# Test Native histogram rate func +# Apply rate function to histogram. load 15s - test_histogram {{schema:1 count:12 sum:18.4 z_bucket:2 z_bucket_w:0.001 buckets:[1 2 0 1 1] n_buckets:[1 2 0 1 1]}}+{{schema:1 count:9 sum:18.4 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 0 1 1] n_buckets:[1 1 0 1 1]}}x100 + histogram_rate {{schema:1 count:12 sum:18.4 z_bucket:2 z_bucket_w:0.001 buckets:[1 2 0 1 1] n_buckets:[1 2 0 1 1]}}+{{schema:1 count:9 sum:18.4 z_bucket:1 z_bucket_w:0.001 buckets:[1 1 0 1 1] n_buckets:[1 1 0 1 1]}}x100 -eval instant at 5m rate(test_histogram[45s]) - {} {{schema:1 count:0.6 sum:1.2266666666666652 z_bucket:0.06666666666666667 z_bucket_w:0.001 buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667] n_buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667]}} \ No newline at end of file +eval instant at 5m rate(histogram_rate[45s]) + {} {{schema:1 count:0.6 sum:1.2266666666666652 z_bucket:0.06666666666666667 z_bucket_w:0.001 buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667] n_buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667]}} + +eval range from 5m to 5m30s step 30s rate(histogram_rate[45s]) + {} {{schema:1 count:0.6 sum:1.2266666666666652 z_bucket:0.06666666666666667 z_bucket_w:0.001 buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667] n_buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667]}}x1 + +# Apply count and sum function to empty histogram. +load 10m + histogram_count_sum_1 {{}}x1 + +eval instant at 10m histogram_count(histogram_count_sum_1) + {} NaN + +eval instant at 10m histogram_sum(histogram_count_sum_1) + {} NaN + +# Apply count and sum function to non-empty histogram. +load 10m + histogram_count_sum_2 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_count(histogram_count_sum_2) + {} 24 + +eval instant at 10m histogram_sum(histogram_count_sum_2) + {} 100 + +# Apply stddev and stdvar function to histogram with {1, 2, 3, 4} (low res). +load 10m + histogram_stddev_stdvar_1 {{schema:2 count:4 sum:10 buckets:[1 0 0 0 1 0 0 1 1]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_1) + {} 1.0787993180043811 + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_1) + {} 1.163807968526718 + +# Apply stddev and stdvar function to histogram with {1, 1, 1, 1} (high res). +load 10m + histogram_stddev_stdvar_2 {{schema:8 count:10 sum:10 buckets:[1 2 3 4]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_2) + {} 0.0048960313898237465 + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_2) + {} 2.3971123370139447e-05 + +# Apply stddev and stdvar function to histogram with {-50, -8, 0, 3, 8, 9}. +load 10m + histogram_stddev_stdvar_3 {{schema:3 count:7 sum:62 z_bucket:1 buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ] n_buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_3) + {} 42.947236400258 + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_3) + {} 1844.4651144196398 + +# Apply stddev and stdvar function to histogram with {-100000, -10000, -1000, -888, -888, -100, -50, -9, -8, -3}. +load 10m + histogram_stddev_stdvar_4 {{schema:0 count:10 sum:-112946 z_bucket:0 n_buckets:[0 0 1 1 1 0 1 1 0 0 3 0 0 0 1 0 0 1]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_4) + {} 27556.344499842 + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_4) + {} 759352122.1939945 + +# Apply stddev and stdvar function to histogram with {-10x10}. +load 10m + histogram_stddev_stdvar_5 {{schema:0 count:10 sum:-100 z_bucket:0 n_buckets:[0 0 0 0 10]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_5) + {} 1.3137084989848 + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_5) + {} 1.725830020304794 + +# Apply stddev and stdvar function to histogram with {-50, -8, 0, 3, 8, 9, NaN}. +load 10m + histogram_stddev_stdvar_6 {{schema:3 count:7 sum:NaN z_bucket:1 buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ] n_buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_6) + {} NaN + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_6) + {} NaN + +# Apply stddev and stdvar function to histogram with {-50, -8, 0, 3, 8, 9, Inf}. +load 10m + histogram_stddev_stdvar_7 {{schema:3 count:7 sum:Inf z_bucket:1 buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ] n_buckets:[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 ]}}x1 + +eval instant at 10m histogram_stddev(histogram_stddev_stdvar_7) + {} NaN + +eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_7) + {} NaN + +# Apply quantile function to histogram with all positive buckets with zero bucket +load 10m + histogram_quantile_1 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_quantile(1.001, histogram_quantile_1) + {} Inf + +eval instant at 10m histogram_quantile(1, histogram_quantile_1) + {} 16 + +eval instant at 10m histogram_quantile(0.99, histogram_quantile_1) + {} 15.759999999999998 + +eval instant at 10m histogram_quantile(0.9, histogram_quantile_1) + {} 13.600000000000001 + +eval instant at 10m histogram_quantile(0.6, histogram_quantile_1) + {} 4.799999999999997 + +eval instant at 10m histogram_quantile(0.5, histogram_quantile_1) + {} 1.6666666666666665 + +eval instant at 10m histogram_quantile(0.1, histogram_quantile_1) + {} 0.0006000000000000001 + +eval instant at 10m histogram_quantile(0, histogram_quantile_1) + {} 0 + +eval instant at 10m histogram_quantile(-1, histogram_quantile_1) + {} -Inf + +# Apply quantile function to histogram with all negative buckets with zero bucket +load 10m + histogram_quantile_2 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 n_buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_quantile(1.001, histogram_quantile_2) + {} Inf + +eval instant at 10m histogram_quantile(1, histogram_quantile_2) + {} 0 + +eval instant at 10m histogram_quantile(0.99, histogram_quantile_2) + {} -6.000000000000048e-05 + +eval instant at 10m histogram_quantile(0.9, histogram_quantile_2) + {} -0.0005999999999999996 + +eval instant at 10m histogram_quantile(0.5, histogram_quantile_2) + {} -1.6666666666666667 + +eval instant at 10m histogram_quantile(0.1, histogram_quantile_2) + {} -13.6 + +eval instant at 10m histogram_quantile(0, histogram_quantile_2) + {} -16 + +eval instant at 10m histogram_quantile(-1, histogram_quantile_2) + {} -Inf + +# Apply quantile function to histogram with both positive and negative buckets with zero bucket +load 10m + histogram_quantile_3 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_quantile(1.001, histogram_quantile_3) + {} Inf + +eval instant at 10m histogram_quantile(1, histogram_quantile_3) + {} 16 + +eval instant at 10m histogram_quantile(0.99, histogram_quantile_3) + {} 15.519999999999996 + +eval instant at 10m histogram_quantile(0.9, histogram_quantile_3) + {} 11.200000000000003 + +eval instant at 10m histogram_quantile(0.7, histogram_quantile_3) + {} 1.2666666666666657 + +eval instant at 10m histogram_quantile(0.55, histogram_quantile_3) + {} 0.0006000000000000005 + +eval instant at 10m histogram_quantile(0.5, histogram_quantile_3) + {} 0 + +eval instant at 10m histogram_quantile(0.45, histogram_quantile_3) + {} -0.0005999999999999996 + +eval instant at 10m histogram_quantile(0.3, histogram_quantile_3) + {} -1.266666666666667 + +eval instant at 10m histogram_quantile(0.1, histogram_quantile_3) + {} -11.2 + +eval instant at 10m histogram_quantile(0.01, histogram_quantile_3) + {} -15.52 + +eval instant at 10m histogram_quantile(0, histogram_quantile_3) + {} -16 + +eval instant at 10m histogram_quantile(-1, histogram_quantile_3) + {} -Inf + +# Apply fraction function to empty histogram. +load 10m + histogram_fraction_1 {{}}x1 + +eval instant at 10m histogram_fraction(3.1415, 42, histogram_fraction_1) + {} NaN + +# Apply fraction function to histogram with positive and zero buckets. +load 10m + histogram_fraction_2 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_fraction(0, +Inf, histogram_fraction_2) + {} 1 + +eval instant at 10m histogram_fraction(-Inf, 0, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(-0.001, 0, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_2) + {} 0.16666666666666666 + +eval instant at 10m histogram_fraction(0, 0.0005, histogram_fraction_2) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_2) + {} 0.8333333333333334 + +eval instant at 10m histogram_fraction(-inf, -0.001, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(1, 2, histogram_fraction_2) + {} 0.25 + +eval instant at 10m histogram_fraction(1.5, 2, histogram_fraction_2) + {} 0.125 + +eval instant at 10m histogram_fraction(1, 8, histogram_fraction_2) + {} 0.3333333333333333 + +eval instant at 10m histogram_fraction(1, 6, histogram_fraction_2) + {} 0.2916666666666667 + +eval instant at 10m histogram_fraction(1.5, 6, histogram_fraction_2) + {} 0.16666666666666666 + +eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(-2, -1.5, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(-8, -1, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(-6, -1, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(-6, -1.5, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(42, 3.1415, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(0, 0, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(0.000001, 0.000001, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(42, 42, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(-3.1, -3.1, histogram_fraction_2) + {} 0 + +eval instant at 10m histogram_fraction(3.1415, NaN, histogram_fraction_2) + {} NaN + +eval instant at 10m histogram_fraction(NaN, 42, histogram_fraction_2) + {} NaN + +eval instant at 10m histogram_fraction(NaN, NaN, histogram_fraction_2) + {} NaN + +eval instant at 10m histogram_fraction(-Inf, +Inf, histogram_fraction_2) + {} 1 + +# Apply fraction function to histogram with negative and zero buckets. +load 10m + histogram_fraction_3 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 n_buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_fraction(0, +Inf, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(-Inf, 0, histogram_fraction_3) + {} 1 + +eval instant at 10m histogram_fraction(-0.001, 0, histogram_fraction_3) + {} 0.16666666666666666 + +eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(-0.0005, 0, histogram_fraction_3) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(-inf, -0.001, histogram_fraction_3) + {} 0.8333333333333334 + +eval instant at 10m histogram_fraction(1, 2, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(1.5, 2, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(1, 8, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(1, 6, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(1.5, 6, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_3) + {} 0.25 + +eval instant at 10m histogram_fraction(-2, -1.5, histogram_fraction_3) + {} 0.125 + +eval instant at 10m histogram_fraction(-8, -1, histogram_fraction_3) + {} 0.3333333333333333 + +eval instant at 10m histogram_fraction(-6, -1, histogram_fraction_3) + {} 0.2916666666666667 + +eval instant at 10m histogram_fraction(-6, -1.5, histogram_fraction_3) + {} 0.16666666666666666 + +eval instant at 10m histogram_fraction(42, 3.1415, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(0, 0, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(0.000001, 0.000001, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(42, 42, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(-3.1, -3.1, histogram_fraction_3) + {} 0 + +eval instant at 10m histogram_fraction(3.1415, NaN, histogram_fraction_3) + {} NaN + +eval instant at 10m histogram_fraction(NaN, 42, histogram_fraction_3) + {} NaN + +eval instant at 10m histogram_fraction(NaN, NaN, histogram_fraction_3) + {} NaN + +eval instant at 10m histogram_fraction(-Inf, +Inf, histogram_fraction_3) + {} 1 + +# Apply fraction function to histogram with both positive, negative and zero buckets. +load 10m + histogram_fraction_4 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 + +eval instant at 10m histogram_fraction(0, +Inf, histogram_fraction_4) + {} 0.5 + +eval instant at 10m histogram_fraction(-Inf, 0, histogram_fraction_4) + {} 0.5 + +eval instant at 10m histogram_fraction(-0.001, 0, histogram_fraction_4) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(0, 0.001, histogram_fraction_4) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(-0.0005, 0.0005, histogram_fraction_4) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(0.001, inf, histogram_fraction_4) + {} 0.4166666666666667 + +eval instant at 10m histogram_fraction(-inf, -0.001, histogram_fraction_4) + {} 0.4166666666666667 + +eval instant at 10m histogram_fraction(1, 2, histogram_fraction_4) + {} 0.125 + +eval instant at 10m histogram_fraction(1.5, 2, histogram_fraction_4) + {} 0.0625 + +eval instant at 10m histogram_fraction(1, 8, histogram_fraction_4) + {} 0.16666666666666666 + +eval instant at 10m histogram_fraction(1, 6, histogram_fraction_4) + {} 0.14583333333333334 + +eval instant at 10m histogram_fraction(1.5, 6, histogram_fraction_4) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(-2, -1, histogram_fraction_4) + {} 0.125 + +eval instant at 10m histogram_fraction(-2, -1.5, histogram_fraction_4) + {} 0.0625 + +eval instant at 10m histogram_fraction(-8, -1, histogram_fraction_4) + {} 0.16666666666666666 + +eval instant at 10m histogram_fraction(-6, -1, histogram_fraction_4) + {} 0.14583333333333334 + +eval instant at 10m histogram_fraction(-6, -1.5, histogram_fraction_4) + {} 0.08333333333333333 + +eval instant at 10m histogram_fraction(42, 3.1415, histogram_fraction_4) + {} 0 + +eval instant at 10m histogram_fraction(0, 0, histogram_fraction_4) + {} 0 + +eval instant at 10m histogram_fraction(0.000001, 0.000001, histogram_fraction_4) + {} 0 + +eval instant at 10m histogram_fraction(42, 42, histogram_fraction_4) + {} 0 + +eval instant at 10m histogram_fraction(-3.1, -3.1, histogram_fraction_4) + {} 0 + +eval instant at 10m histogram_fraction(3.1415, NaN, histogram_fraction_4) + {} NaN + +eval instant at 10m histogram_fraction(NaN, 42, histogram_fraction_4) + {} NaN + +eval instant at 10m histogram_fraction(NaN, NaN, histogram_fraction_4) + {} NaN + +eval instant at 10m histogram_fraction(-Inf, +Inf, histogram_fraction_4) + {} 1 \ No newline at end of file From 6119124d0ee6c656fa0768500fae63a01da46061 Mon Sep 17 00:00:00 2001 From: Neeraj Gartia Date: Thu, 11 Apr 2024 18:13:49 +0530 Subject: [PATCH 19/47] some nits Signed-off-by: Neeraj Gartia --- .../promqltest/testdata/native_histograms.test | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 8df633819a..7fc0403e3b 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -280,17 +280,7 @@ eval instant at 5m rate(histogram_rate[45s]) eval range from 5m to 5m30s step 30s rate(histogram_rate[45s]) {} {{schema:1 count:0.6 sum:1.2266666666666652 z_bucket:0.06666666666666667 z_bucket_w:0.001 buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667] n_buckets:[0.06666666666666667 0.06666666666666667 0 0.06666666666666667 0.06666666666666667]}}x1 -# Apply count and sum function to empty histogram. -load 10m - histogram_count_sum_1 {{}}x1 - -eval instant at 10m histogram_count(histogram_count_sum_1) - {} NaN - -eval instant at 10m histogram_sum(histogram_count_sum_1) - {} NaN - -# Apply count and sum function to non-empty histogram. +# Apply count and sum function to histogram. load 10m histogram_count_sum_2 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 @@ -370,7 +360,7 @@ eval instant at 10m histogram_stddev(histogram_stddev_stdvar_7) eval instant at 10m histogram_stdvar(histogram_stddev_stdvar_7) {} NaN -# Apply quantile function to histogram with all positive buckets with zero bucket +# Apply quantile function to histogram with all positive buckets with zero bucket. load 10m histogram_quantile_1 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 buckets:[2 3 0 1 4]}}x1 @@ -401,7 +391,7 @@ eval instant at 10m histogram_quantile(0, histogram_quantile_1) eval instant at 10m histogram_quantile(-1, histogram_quantile_1) {} -Inf -# Apply quantile function to histogram with all negative buckets with zero bucket +# Apply quantile function to histogram with all negative buckets with zero bucket. load 10m histogram_quantile_2 {{schema:0 count:12 sum:100 z_bucket:2 z_bucket_w:0.001 n_buckets:[2 3 0 1 4]}}x1 @@ -429,7 +419,7 @@ eval instant at 10m histogram_quantile(0, histogram_quantile_2) eval instant at 10m histogram_quantile(-1, histogram_quantile_2) {} -Inf -# Apply quantile function to histogram with both positive and negative buckets with zero bucket +# Apply quantile function to histogram with both positive and negative buckets with zero bucket. load 10m histogram_quantile_3 {{schema:0 count:24 sum:100 z_bucket:4 z_bucket_w:0.001 buckets:[2 3 0 1 4] n_buckets:[2 3 0 1 4]}}x1 From 661856cb6596eabd0d5545fe16f8f09c1b98360d Mon Sep 17 00:00:00 2001 From: Neeraj Gartia Date: Tue, 7 May 2024 00:23:47 +0530 Subject: [PATCH 20/47] removes the added tests from engine_test.go Signed-off-by: Neeraj Gartia --- promql/engine_test.go | 1072 ----------------- .../testdata/native_histograms.test | 2 +- 2 files changed, 1 insertion(+), 1073 deletions(-) diff --git a/promql/engine_test.go b/promql/engine_test.go index cc91855468..45e0054f06 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -34,8 +34,6 @@ import ( "github.com/prometheus/prometheus/promql/parser/posrange" "github.com/prometheus/prometheus/promql/promqltest" "github.com/prometheus/prometheus/storage" - "github.com/prometheus/prometheus/tsdb/tsdbutil" - "github.com/prometheus/prometheus/util/almost" "github.com/prometheus/prometheus/util/annotations" "github.com/prometheus/prometheus/util/stats" "github.com/prometheus/prometheus/util/teststorage" @@ -3209,1076 +3207,6 @@ func TestRangeQuery(t *testing.T) { } } -func TestNativeHistogramRate(t *testing.T) { - // TODO(beorn7): Integrate histograms into the PromQL testing framework - // and write more tests there. - engine := newTestEngine() - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - - seriesName := "sparse_histogram_series" - lbls := labels.FromStrings("__name__", seriesName) - - app := storage.Appender(context.Background()) - for i, h := range tsdbutil.GenerateTestHistograms(100) { - _, err := app.AppendHistogram(0, lbls, int64(i)*int64(15*time.Second/time.Millisecond), h, nil) - require.NoError(t, err) - } - require.NoError(t, app.Commit()) - - queryString := fmt.Sprintf("rate(%s[45s])", seriesName) - t.Run("instant_query", func(t *testing.T) { - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(int64(5*time.Minute/time.Millisecond))) - require.NoError(t, err) - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - vector, err := res.Vector() - require.NoError(t, err) - require.Len(t, vector, 1) - actualHistogram := vector[0].H - expectedHistogram := &histogram.FloatHistogram{ - CounterResetHint: histogram.GaugeType, - Schema: 1, - ZeroThreshold: 0.001, - ZeroCount: 1. / 15., - Count: 9. / 15., - Sum: 1.2266666666666663, - PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - PositiveBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - NegativeSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - NegativeBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - } - require.Equal(t, expectedHistogram, actualHistogram) - }) - - t.Run("range_query", func(t *testing.T) { - step := 30 * time.Second - start := timestamp.Time(int64(5 * time.Minute / time.Millisecond)) - end := start.Add(step) - qry, err := engine.NewRangeQuery(context.Background(), storage, nil, queryString, start, end, step) - require.NoError(t, err) - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - matrix, err := res.Matrix() - require.NoError(t, err) - require.Len(t, matrix, 1) - require.Len(t, matrix[0].Histograms, 2) - actualHistograms := matrix[0].Histograms - expectedHistograms := []promql.HPoint{{ - T: 300000, - H: &histogram.FloatHistogram{ - CounterResetHint: histogram.GaugeType, - Schema: 1, - ZeroThreshold: 0.001, - ZeroCount: 1. / 15., - Count: 9. / 15., - Sum: 1.2266666666666663, - PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - PositiveBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - NegativeSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - NegativeBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - }, - }, { - T: 330000, - H: &histogram.FloatHistogram{ - CounterResetHint: histogram.GaugeType, - Schema: 1, - ZeroThreshold: 0.001, - ZeroCount: 1. / 15., - Count: 9. / 15., - Sum: 1.2266666666666663, - PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - PositiveBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - NegativeSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - NegativeBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - }, - }} - require.Equal(t, expectedHistograms, actualHistograms) - }) -} - -func TestNativeFloatHistogramRate(t *testing.T) { - // TODO(beorn7): Integrate histograms into the PromQL testing framework - // and write more tests there. - engine := newTestEngine() - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - - seriesName := "sparse_histogram_series" - lbls := labels.FromStrings("__name__", seriesName) - - app := storage.Appender(context.Background()) - for i, fh := range tsdbutil.GenerateTestFloatHistograms(100) { - _, err := app.AppendHistogram(0, lbls, int64(i)*int64(15*time.Second/time.Millisecond), nil, fh) - require.NoError(t, err) - } - require.NoError(t, app.Commit()) - - queryString := fmt.Sprintf("rate(%s[1m])", seriesName) - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(int64(5*time.Minute/time.Millisecond))) - require.NoError(t, err) - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - vector, err := res.Vector() - require.NoError(t, err) - require.Len(t, vector, 1) - actualHistogram := vector[0].H - expectedHistogram := &histogram.FloatHistogram{ - CounterResetHint: histogram.GaugeType, - Schema: 1, - ZeroThreshold: 0.001, - ZeroCount: 1. / 15., - Count: 9. / 15., - Sum: 1.226666666666667, - PositiveSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - PositiveBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - NegativeSpans: []histogram.Span{{Offset: 0, Length: 2}, {Offset: 1, Length: 2}}, - NegativeBuckets: []float64{1. / 15., 1. / 15., 1. / 15., 1. / 15.}, - } - require.Equal(t, expectedHistogram, actualHistogram) -} - -func TestNativeHistogram_HistogramCountAndSum(t *testing.T) { - // TODO(codesome): Integrate histograms into the PromQL testing framework - // and write more tests there. - h := &histogram.Histogram{ - Count: 24, - ZeroCount: 4, - ZeroThreshold: 0.001, - Sum: 100, - Schema: 0, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{2, 1, -2, 3}, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - NegativeBuckets: []int64{2, 1, -2, 3}, - } - for _, floatHisto := range []bool{true, false} { - t.Run(fmt.Sprintf("floatHistogram=%t", floatHisto), func(t *testing.T) { - engine := newTestEngine() - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - - seriesName := "sparse_histogram_series" - lbls := labels.FromStrings("__name__", seriesName) - - ts := int64(10 * time.Minute / time.Millisecond) - app := storage.Appender(context.Background()) - var err error - if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, h.ToFloat(nil)) - } else { - _, err = app.AppendHistogram(0, lbls, ts, h, nil) - } - require.NoError(t, err) - require.NoError(t, app.Commit()) - - queryString := fmt.Sprintf("histogram_count(%s)", seriesName) - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err := res.Vector() - require.NoError(t, err) - - require.Len(t, vector, 1) - require.Nil(t, vector[0].H) - if floatHisto { - require.Equal(t, h.ToFloat(nil).Count, vector[0].F) - } else { - require.Equal(t, float64(h.Count), vector[0].F) - } - - queryString = fmt.Sprintf("histogram_sum(%s)", seriesName) - qry, err = engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res = qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err = res.Vector() - require.NoError(t, err) - - require.Len(t, vector, 1) - require.Nil(t, vector[0].H) - if floatHisto { - require.Equal(t, h.ToFloat(nil).Sum, vector[0].F) - } else { - require.Equal(t, h.Sum, vector[0].F) - } - }) - } -} - -func TestNativeHistogram_HistogramStdDevVar(t *testing.T) { - // TODO(codesome): Integrate histograms into the PromQL testing framework - // and write more tests there. - testCases := []struct { - name string - h *histogram.Histogram - stdVar float64 - }{ - { - name: "1, 2, 3, 4 low-res", - h: &histogram.Histogram{ - Count: 4, - Sum: 10, - Schema: 2, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 1}, - {Offset: 3, Length: 1}, - {Offset: 2, Length: 2}, - }, - PositiveBuckets: []int64{1, 0, 0, 0}, - }, - stdVar: 1.163807968526718, // actual variance: 1.25 - }, - { - name: "1, 2, 3, 4 hi-res", - h: &histogram.Histogram{ - Count: 4, - Sum: 10, - Schema: 8, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 1}, - {Offset: 255, Length: 1}, - {Offset: 149, Length: 1}, - {Offset: 105, Length: 1}, - }, - PositiveBuckets: []int64{1, 0, 0, 0}, - }, - stdVar: 1.2471347737158793, // actual variance: 1.25 - }, - { - name: "-50, -8, 0, 3, 8, 9, 100", - h: &histogram.Histogram{ - Count: 7, - ZeroCount: 1, - Sum: 62, - Schema: 3, - PositiveSpans: []histogram.Span{ - {Offset: 13, Length: 1}, - {Offset: 10, Length: 1}, - {Offset: 1, Length: 1}, - {Offset: 27, Length: 1}, - }, - PositiveBuckets: []int64{1, 0, 0, 0}, - NegativeSpans: []histogram.Span{ - {Offset: 24, Length: 1}, - {Offset: 21, Length: 1}, - }, - NegativeBuckets: []int64{1, 0}, - }, - stdVar: 1844.4651144196398, // actual variance: 1738.4082 - }, - { - name: "-100000, -10000, -1000, -888, -888, -100, -50, -9, -8, -3", - h: &histogram.Histogram{ - Count: 10, - ZeroCount: 0, - Sum: -112946, - Schema: 0, - NegativeSpans: []histogram.Span{ - {Offset: 2, Length: 3}, - {Offset: 1, Length: 2}, - {Offset: 2, Length: 1}, - {Offset: 3, Length: 1}, - {Offset: 2, Length: 1}, - }, - NegativeBuckets: []int64{1, 0, 0, 0, 0, 2, -2, 0}, - }, - stdVar: 759352122.1939945, // actual variance: 882690990 - }, - { - name: "-10 x10", - h: &histogram.Histogram{ - Count: 10, - ZeroCount: 0, - Sum: -100, - Schema: 0, - NegativeSpans: []histogram.Span{ - {Offset: 4, Length: 1}, - }, - NegativeBuckets: []int64{10}, - }, - stdVar: 1.725830020304794, // actual variance: 0 - }, - { - name: "-50, -8, 0, 3, 8, 9, 100, NaN", - h: &histogram.Histogram{ - Count: 8, - ZeroCount: 1, - Sum: math.NaN(), - Schema: 3, - PositiveSpans: []histogram.Span{ - {Offset: 13, Length: 1}, - {Offset: 10, Length: 1}, - {Offset: 1, Length: 1}, - {Offset: 27, Length: 1}, - }, - PositiveBuckets: []int64{1, 0, 0, 0}, - NegativeSpans: []histogram.Span{ - {Offset: 24, Length: 1}, - {Offset: 21, Length: 1}, - }, - NegativeBuckets: []int64{1, 0}, - }, - stdVar: math.NaN(), - }, - { - name: "-50, -8, 0, 3, 8, 9, 100, +Inf", - h: &histogram.Histogram{ - Count: 7, - ZeroCount: 1, - Sum: math.Inf(1), - Schema: 3, - PositiveSpans: []histogram.Span{ - {Offset: 13, Length: 1}, - {Offset: 10, Length: 1}, - {Offset: 1, Length: 1}, - {Offset: 27, Length: 1}, - }, - PositiveBuckets: []int64{1, 0, 0, 0}, - NegativeSpans: []histogram.Span{ - {Offset: 24, Length: 1}, - {Offset: 21, Length: 1}, - }, - NegativeBuckets: []int64{1, 0}, - }, - stdVar: math.NaN(), - }, - } - for _, tc := range testCases { - for _, floatHisto := range []bool{true, false} { - t.Run(fmt.Sprintf("%s floatHistogram=%t", tc.name, floatHisto), func(t *testing.T) { - engine := newTestEngine() - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - - seriesName := "sparse_histogram_series" - lbls := labels.FromStrings("__name__", seriesName) - - ts := int64(10 * time.Minute / time.Millisecond) - app := storage.Appender(context.Background()) - var err error - if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, tc.h.ToFloat(nil)) - } else { - _, err = app.AppendHistogram(0, lbls, ts, tc.h, nil) - } - require.NoError(t, err) - require.NoError(t, app.Commit()) - - queryString := fmt.Sprintf("histogram_stdvar(%s)", seriesName) - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err := res.Vector() - require.NoError(t, err) - - require.Len(t, vector, 1) - require.Nil(t, vector[0].H) - require.InEpsilon(t, tc.stdVar, vector[0].F, 1e-12) - - queryString = fmt.Sprintf("histogram_stddev(%s)", seriesName) - qry, err = engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res = qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err = res.Vector() - require.NoError(t, err) - - require.Len(t, vector, 1) - require.Nil(t, vector[0].H) - require.InEpsilon(t, math.Sqrt(tc.stdVar), vector[0].F, 1e-12) - }) - } - } -} - -func TestNativeHistogram_HistogramQuantile(t *testing.T) { - // TODO(codesome): Integrate histograms into the PromQL testing framework - // and write more tests there. - type subCase struct { - quantile string - value float64 - } - - cases := []struct { - text string - // Histogram to test. - h *histogram.Histogram - // Different quantiles to test for this histogram. - subCases []subCase - }{ - { - text: "all positive buckets with zero bucket", - h: &histogram.Histogram{ - Count: 12, - ZeroCount: 2, - ZeroThreshold: 0.001, - Sum: 100, // Does not matter. - Schema: 0, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{2, 1, -2, 3}, - }, - subCases: []subCase{ - { - quantile: "1.0001", - value: math.Inf(1), - }, - { - quantile: "1", - value: 16, - }, - { - quantile: "0.99", - value: 15.759999999999998, - }, - { - quantile: "0.9", - value: 13.600000000000001, - }, - { - quantile: "0.6", - value: 4.799999999999997, - }, - { - quantile: "0.5", - value: 1.6666666666666665, - }, - { // Zero bucket. - quantile: "0.1", - value: 0.0006000000000000001, - }, - { - quantile: "0", - value: 0, - }, - { - quantile: "-1", - value: math.Inf(-1), - }, - }, - }, - { - text: "all negative buckets with zero bucket", - h: &histogram.Histogram{ - Count: 12, - ZeroCount: 2, - ZeroThreshold: 0.001, - Sum: 100, // Does not matter. - Schema: 0, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - NegativeBuckets: []int64{2, 1, -2, 3}, - }, - subCases: []subCase{ - { - quantile: "1.0001", - value: math.Inf(1), - }, - { // Zero bucket. - quantile: "1", - value: 0, - }, - { // Zero bucket. - quantile: "0.99", - value: -6.000000000000048e-05, - }, - { // Zero bucket. - quantile: "0.9", - value: -0.0005999999999999996, - }, - { - quantile: "0.5", - value: -1.6666666666666667, - }, - { - quantile: "0.1", - value: -13.6, - }, - { - quantile: "0", - value: -16, - }, - { - quantile: "-1", - value: math.Inf(-1), - }, - }, - }, - { - text: "both positive and negative buckets with zero bucket", - h: &histogram.Histogram{ - Count: 24, - ZeroCount: 4, - ZeroThreshold: 0.001, - Sum: 100, // Does not matter. - Schema: 0, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{2, 1, -2, 3}, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - NegativeBuckets: []int64{2, 1, -2, 3}, - }, - subCases: []subCase{ - { - quantile: "1.0001", - value: math.Inf(1), - }, - { - quantile: "1", - value: 16, - }, - { - quantile: "0.99", - value: 15.519999999999996, - }, - { - quantile: "0.9", - value: 11.200000000000003, - }, - { - quantile: "0.7", - value: 1.2666666666666657, - }, - { // Zero bucket. - quantile: "0.55", - value: 0.0006000000000000005, - }, - { // Zero bucket. - quantile: "0.5", - value: 0, - }, - { // Zero bucket. - quantile: "0.45", - value: -0.0005999999999999996, - }, - { - quantile: "0.3", - value: -1.266666666666667, - }, - { - quantile: "0.1", - value: -11.2, - }, - { - quantile: "0.01", - value: -15.52, - }, - { - quantile: "0", - value: -16, - }, - { - quantile: "-1", - value: math.Inf(-1), - }, - }, - }, - } - - engine := newTestEngine() - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - idx := int64(0) - for _, floatHisto := range []bool{true, false} { - for _, c := range cases { - t.Run(fmt.Sprintf("%s floatHistogram=%t", c.text, floatHisto), func(t *testing.T) { - seriesName := "sparse_histogram_series" - lbls := labels.FromStrings("__name__", seriesName) - ts := idx * int64(10*time.Minute/time.Millisecond) - app := storage.Appender(context.Background()) - var err error - if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, c.h.ToFloat(nil)) - } else { - _, err = app.AppendHistogram(0, lbls, ts, c.h, nil) - } - require.NoError(t, err) - require.NoError(t, app.Commit()) - - for j, sc := range c.subCases { - t.Run(fmt.Sprintf("%d %s", j, sc.quantile), func(t *testing.T) { - queryString := fmt.Sprintf("histogram_quantile(%s, %s)", sc.quantile, seriesName) - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err := res.Vector() - require.NoError(t, err) - - require.Len(t, vector, 1) - require.Nil(t, vector[0].H) - require.True(t, almost.Equal(sc.value, vector[0].F, defaultEpsilon)) - }) - } - idx++ - }) - } - } -} - -func TestNativeHistogram_HistogramFraction(t *testing.T) { - // TODO(codesome): Integrate histograms into the PromQL testing framework - // and write more tests there. - type subCase struct { - lower, upper string - value float64 - } - - invariantCases := []subCase{ - { - lower: "42", - upper: "3.1415", - value: 0, - }, - { - lower: "0", - upper: "0", - value: 0, - }, - { - lower: "0.000001", - upper: "0.000001", - value: 0, - }, - { - lower: "42", - upper: "42", - value: 0, - }, - { - lower: "-3.1", - upper: "-3.1", - value: 0, - }, - { - lower: "3.1415", - upper: "NaN", - value: math.NaN(), - }, - { - lower: "NaN", - upper: "42", - value: math.NaN(), - }, - { - lower: "NaN", - upper: "NaN", - value: math.NaN(), - }, - { - lower: "-Inf", - upper: "+Inf", - value: 1, - }, - } - - cases := []struct { - text string - // Histogram to test. - h *histogram.Histogram - // Different ranges to test for this histogram. - subCases []subCase - }{ - { - text: "empty histogram", - h: &histogram.Histogram{}, - subCases: []subCase{ - { - lower: "3.1415", - upper: "42", - value: math.NaN(), - }, - }, - }, - { - text: "all positive buckets with zero bucket", - h: &histogram.Histogram{ - Count: 12, - ZeroCount: 2, - ZeroThreshold: 0.001, - Sum: 100, // Does not matter. - Schema: 0, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{2, 1, -2, 3}, // Abs: 2, 3, 1, 4 - }, - subCases: append([]subCase{ - { - lower: "0", - upper: "+Inf", - value: 1, - }, - { - lower: "-Inf", - upper: "0", - value: 0, - }, - { - lower: "-0.001", - upper: "0", - value: 0, - }, - { - lower: "0", - upper: "0.001", - value: 2. / 12., - }, - { - lower: "0", - upper: "0.0005", - value: 1. / 12., - }, - { - lower: "0.001", - upper: "inf", - value: 10. / 12., - }, - { - lower: "-inf", - upper: "-0.001", - value: 0, - }, - { - lower: "1", - upper: "2", - value: 3. / 12., - }, - { - lower: "1.5", - upper: "2", - value: 1.5 / 12., - }, - { - lower: "1", - upper: "8", - value: 4. / 12., - }, - { - lower: "1", - upper: "6", - value: 3.5 / 12., - }, - { - lower: "1.5", - upper: "6", - value: 2. / 12., - }, - { - lower: "-2", - upper: "-1", - value: 0, - }, - { - lower: "-2", - upper: "-1.5", - value: 0, - }, - { - lower: "-8", - upper: "-1", - value: 0, - }, - { - lower: "-6", - upper: "-1", - value: 0, - }, - { - lower: "-6", - upper: "-1.5", - value: 0, - }, - }, invariantCases...), - }, - { - text: "all negative buckets with zero bucket", - h: &histogram.Histogram{ - Count: 12, - ZeroCount: 2, - ZeroThreshold: 0.001, - Sum: 100, // Does not matter. - Schema: 0, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - NegativeBuckets: []int64{2, 1, -2, 3}, - }, - subCases: append([]subCase{ - { - lower: "0", - upper: "+Inf", - value: 0, - }, - { - lower: "-Inf", - upper: "0", - value: 1, - }, - { - lower: "-0.001", - upper: "0", - value: 2. / 12., - }, - { - lower: "0", - upper: "0.001", - value: 0, - }, - { - lower: "-0.0005", - upper: "0", - value: 1. / 12., - }, - { - lower: "0.001", - upper: "inf", - value: 0, - }, - { - lower: "-inf", - upper: "-0.001", - value: 10. / 12., - }, - { - lower: "1", - upper: "2", - value: 0, - }, - { - lower: "1.5", - upper: "2", - value: 0, - }, - { - lower: "1", - upper: "8", - value: 0, - }, - { - lower: "1", - upper: "6", - value: 0, - }, - { - lower: "1.5", - upper: "6", - value: 0, - }, - { - lower: "-2", - upper: "-1", - value: 3. / 12., - }, - { - lower: "-2", - upper: "-1.5", - value: 1.5 / 12., - }, - { - lower: "-8", - upper: "-1", - value: 4. / 12., - }, - { - lower: "-6", - upper: "-1", - value: 3.5 / 12., - }, - { - lower: "-6", - upper: "-1.5", - value: 2. / 12., - }, - }, invariantCases...), - }, - { - text: "both positive and negative buckets with zero bucket", - h: &histogram.Histogram{ - Count: 24, - ZeroCount: 4, - ZeroThreshold: 0.001, - Sum: 100, // Does not matter. - Schema: 0, - PositiveSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - PositiveBuckets: []int64{2, 1, -2, 3}, - NegativeSpans: []histogram.Span{ - {Offset: 0, Length: 2}, - {Offset: 1, Length: 2}, - }, - NegativeBuckets: []int64{2, 1, -2, 3}, - }, - subCases: append([]subCase{ - { - lower: "0", - upper: "+Inf", - value: 0.5, - }, - { - lower: "-Inf", - upper: "0", - value: 0.5, - }, - { - lower: "-0.001", - upper: "0", - value: 2. / 24, - }, - { - lower: "0", - upper: "0.001", - value: 2. / 24., - }, - { - lower: "-0.0005", - upper: "0.0005", - value: 2. / 24., - }, - { - lower: "0.001", - upper: "inf", - value: 10. / 24., - }, - { - lower: "-inf", - upper: "-0.001", - value: 10. / 24., - }, - { - lower: "1", - upper: "2", - value: 3. / 24., - }, - { - lower: "1.5", - upper: "2", - value: 1.5 / 24., - }, - { - lower: "1", - upper: "8", - value: 4. / 24., - }, - { - lower: "1", - upper: "6", - value: 3.5 / 24., - }, - { - lower: "1.5", - upper: "6", - value: 2. / 24., - }, - { - lower: "-2", - upper: "-1", - value: 3. / 24., - }, - { - lower: "-2", - upper: "-1.5", - value: 1.5 / 24., - }, - { - lower: "-8", - upper: "-1", - value: 4. / 24., - }, - { - lower: "-6", - upper: "-1", - value: 3.5 / 24., - }, - { - lower: "-6", - upper: "-1.5", - value: 2. / 24., - }, - }, invariantCases...), - }, - } - idx := int64(0) - for _, floatHisto := range []bool{true, false} { - for _, c := range cases { - t.Run(fmt.Sprintf("%s floatHistogram=%t", c.text, floatHisto), func(t *testing.T) { - engine := newTestEngine() - storage := teststorage.New(t) - t.Cleanup(func() { storage.Close() }) - - seriesName := "sparse_histogram_series" - lbls := labels.FromStrings("__name__", seriesName) - - ts := idx * int64(10*time.Minute/time.Millisecond) - app := storage.Appender(context.Background()) - var err error - if floatHisto { - _, err = app.AppendHistogram(0, lbls, ts, nil, c.h.ToFloat(nil)) - } else { - _, err = app.AppendHistogram(0, lbls, ts, c.h, nil) - } - require.NoError(t, err) - require.NoError(t, app.Commit()) - - for j, sc := range c.subCases { - t.Run(fmt.Sprintf("%d %s %s", j, sc.lower, sc.upper), func(t *testing.T) { - queryString := fmt.Sprintf("histogram_fraction(%s, %s, %s)", sc.lower, sc.upper, seriesName) - qry, err := engine.NewInstantQuery(context.Background(), storage, nil, queryString, timestamp.Time(ts)) - require.NoError(t, err) - - res := qry.Exec(context.Background()) - require.NoError(t, res.Err) - - vector, err := res.Vector() - require.NoError(t, err) - - require.Len(t, vector, 1) - require.Nil(t, vector[0].H) - if math.IsNaN(sc.value) { - require.True(t, math.IsNaN(vector[0].F)) - return - } - require.Equal(t, sc.value, vector[0].F) - }) - } - idx++ - }) - } - } -} - func TestNativeHistogram_Sum_Count_Add_AvgOperator(t *testing.T) { // TODO(codesome): Integrate histograms into the PromQL testing framework // and write more tests there. diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 7fc0403e3b..37818e4f88 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -713,4 +713,4 @@ eval instant at 10m histogram_fraction(NaN, NaN, histogram_fraction_4) {} NaN eval instant at 10m histogram_fraction(-Inf, +Inf, histogram_fraction_4) - {} 1 \ No newline at end of file + {} 1 From 10eb23bd6b0d3f4b4207e7af701fef85c1bd5fee Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Tue, 14 May 2024 17:45:39 +0100 Subject: [PATCH 21/47] [TEST] Rules: Sleep 15ms to fit Windows behaviour better On Windows, Go will sleep 15ms if you ask for less. TestAsyncRuleEvaluation compares actual delay to the nominal time, so using 15ms should work better on Windows, and be hardly noticeable elsewhere. Signed-off-by: Bryan Boreham --- rules/manager_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules/manager_test.go b/rules/manager_test.go index 1862b58075..94d0755f9d 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -2043,7 +2043,7 @@ func TestBoundedRuleEvalConcurrency(t *testing.T) { require.EqualValues(t, maxInflight.Load(), int32(maxConcurrency)+int32(groupCount)) } -const artificialDelay = 10 * time.Millisecond +const artificialDelay = 15 * time.Millisecond func optsFactory(storage storage.Storage, maxInflight, inflightQueries *atomic.Int32, maxConcurrent int64) *ManagerOptions { var inflightMu sync.Mutex From fdaafdb041dcefddd5dfc9e29edc19951b807fe4 Mon Sep 17 00:00:00 2001 From: George Krajcsovits Date: Wed, 15 May 2024 06:26:19 +0200 Subject: [PATCH 22/47] tsdb: check for context cancel before regex matching postings (#14096) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * tsdb: check for context cancel before regex matching postings Regex matching can be heavy if the regex takes a lot of cycles to evaluate and we can get stuck evaluating postings for a long time without this fix. The constant checkContextEveryNIterations=100 may be changed later. Signed-off-by: György Krajcsovits --- tsdb/index/index.go | 8 ++++ tsdb/index/index_test.go | 33 ++++++++++++++++ tsdb/index/postings.go | 5 +++ tsdb/index/postings_test.go | 17 +++++++++ tsdb/querier.go | 13 +++++++ tsdb/querier_test.go | 75 +++++++++++++++++++++++++++++++++++++ util/testutil/context.go | 27 ++++++++++++- 7 files changed, 177 insertions(+), 1 deletion(-) diff --git a/tsdb/index/index.go b/tsdb/index/index.go index a36c33c4fb..4ded4cbe20 100644 --- a/tsdb/index/index.go +++ b/tsdb/index/index.go @@ -51,6 +51,9 @@ const ( indexFilename = "index" seriesByteAlign = 16 + + // checkContextEveryNIterations is used in some tight loops to check if the context is done. + checkContextEveryNIterations = 100 ) type indexWriterSeries struct { @@ -1797,7 +1800,12 @@ func (r *Reader) postingsForLabelMatchingV1(ctx context.Context, name string, ma } var its []Postings + count := 1 for val, offset := range e { + if count%checkContextEveryNIterations == 0 && ctx.Err() != nil { + return ErrPostings(ctx.Err()) + } + count++ if !match(val) { continue } diff --git a/tsdb/index/index_test.go b/tsdb/index/index_test.go index c451c38dd2..22133d0b70 100644 --- a/tsdb/index/index_test.go +++ b/tsdb/index/index_test.go @@ -719,3 +719,36 @@ func TestChunksTimeOrdering(t *testing.T) { require.NoError(t, idx.Close()) } + +func TestReader_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { + dir := t.TempDir() + + idx, err := NewWriter(context.Background(), filepath.Join(dir, "index")) + require.NoError(t, err) + + seriesCount := 1000 + for i := 1; i <= seriesCount; i++ { + require.NoError(t, idx.AddSymbol(fmt.Sprintf("%4d", i))) + } + require.NoError(t, idx.AddSymbol("__name__")) + + for i := 1; i <= seriesCount; i++ { + require.NoError(t, idx.AddSeries(storage.SeriesRef(i), labels.FromStrings("__name__", fmt.Sprintf("%4d", i)), + chunks.Meta{Ref: 1, MinTime: 0, MaxTime: 10}, + )) + } + + require.NoError(t, idx.Close()) + + ir, err := NewFileReader(filepath.Join(dir, "index")) + require.NoError(t, err) + defer ir.Close() + + failAfter := uint64(seriesCount / 2) // Fail after processing half of the series. + ctx := &testutil.MockContextErrAfter{FailAfter: failAfter} + p := ir.PostingsForLabelMatching(ctx, "__name__", func(string) bool { + return true + }) + require.Error(t, p.Err()) + require.Equal(t, failAfter, ctx.Count()) +} diff --git a/tsdb/index/postings.go b/tsdb/index/postings.go index 136b3441eb..c6cf20e0e5 100644 --- a/tsdb/index/postings.go +++ b/tsdb/index/postings.go @@ -416,7 +416,12 @@ func (p *MemPostings) PostingsForLabelMatching(ctx context.Context, name string, } var its []Postings + count := 1 for _, v := range vals { + if count%checkContextEveryNIterations == 0 && ctx.Err() != nil { + return ErrPostings(ctx.Err()) + } + count++ if match(v) { its = append(its, NewListPostings(e[v])) } diff --git a/tsdb/index/postings_test.go b/tsdb/index/postings_test.go index 9e6bd23f8c..cabca59774 100644 --- a/tsdb/index/postings_test.go +++ b/tsdb/index/postings_test.go @@ -28,6 +28,7 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/util/testutil" ) func TestMemPostings_addFor(t *testing.T) { @@ -1282,3 +1283,19 @@ func BenchmarkListPostings(b *testing.B) { }) } } + +func TestMemPostings_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { + memP := NewMemPostings() + seriesCount := 10 * checkContextEveryNIterations + for i := 1; i <= seriesCount; i++ { + memP.Add(storage.SeriesRef(i), labels.FromStrings("__name__", fmt.Sprintf("%4d", i))) + } + + failAfter := uint64(seriesCount / 2 / checkContextEveryNIterations) + ctx := &testutil.MockContextErrAfter{FailAfter: failAfter} + p := memP.PostingsForLabelMatching(ctx, "__name__", func(string) bool { + return true + }) + require.Error(t, p.Err()) + require.Equal(t, failAfter+1, ctx.Count()) // Plus one for the Err() call that puts the error in the result. +} diff --git a/tsdb/querier.go b/tsdb/querier.go index 1170493beb..efd4daf26b 100644 --- a/tsdb/querier.go +++ b/tsdb/querier.go @@ -33,6 +33,9 @@ import ( "github.com/prometheus/prometheus/util/annotations" ) +// checkContextEveryNIterations is used in some tight loops to check if the context is done. +const checkContextEveryNIterations = 100 + type blockBaseQuerier struct { blockID ulid.ULID index IndexReader @@ -358,7 +361,12 @@ func inversePostingsForMatcher(ctx context.Context, ix IndexReader, m *labels.Ma if m.Type == labels.MatchEqual && m.Value == "" { res = vals } else { + count := 1 for _, val := range vals { + if count%checkContextEveryNIterations == 0 && ctx.Err() != nil { + return nil, ctx.Err() + } + count++ if !m.Matches(val) { res = append(res, val) } @@ -387,7 +395,12 @@ func labelValuesWithMatchers(ctx context.Context, r IndexReader, name string, ma // re-use the allValues slice to avoid allocations // this is safe because the iteration is always ahead of the append filteredValues := allValues[:0] + count := 1 for _, v := range allValues { + if count%checkContextEveryNIterations == 0 && ctx.Err() != nil { + return nil, ctx.Err() + } + count++ if m.Matches(v) { filteredValues = append(filteredValues, v) } diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go index 16de6373d0..bb13531d7d 100644 --- a/tsdb/querier_test.go +++ b/tsdb/querier_test.go @@ -38,6 +38,7 @@ import ( "github.com/prometheus/prometheus/tsdb/tombstones" "github.com/prometheus/prometheus/tsdb/tsdbutil" "github.com/prometheus/prometheus/util/annotations" + "github.com/prometheus/prometheus/util/testutil" ) // TODO(bwplotka): Replace those mocks with remote.concreteSeriesSet. @@ -3638,3 +3639,77 @@ func TestQueryWithOneChunkCompletelyDeleted(t *testing.T) { require.NoError(t, css.Err()) require.Equal(t, 1, seriesCount) } + +func TestReader_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { + ir := mockReaderOfLabels{} + + failAfter := uint64(mockReaderOfLabelsSeriesCount / 2 / checkContextEveryNIterations) + ctx := &testutil.MockContextErrAfter{FailAfter: failAfter} + _, err := labelValuesWithMatchers(ctx, ir, "__name__", labels.MustNewMatcher(labels.MatchRegexp, "__name__", ".+")) + + require.Error(t, err) + require.Equal(t, failAfter+1, ctx.Count()) // Plus one for the Err() call that puts the error in the result. +} + +func TestReader_InversePostingsForMatcherHonorsContextCancel(t *testing.T) { + ir := mockReaderOfLabels{} + + failAfter := uint64(mockReaderOfLabelsSeriesCount / 2 / checkContextEveryNIterations) + ctx := &testutil.MockContextErrAfter{FailAfter: failAfter} + _, err := inversePostingsForMatcher(ctx, ir, labels.MustNewMatcher(labels.MatchRegexp, "__name__", ".*")) + + require.Error(t, err) + require.Equal(t, failAfter+1, ctx.Count()) // Plus one for the Err() call that puts the error in the result. +} + +type mockReaderOfLabels struct{} + +const mockReaderOfLabelsSeriesCount = checkContextEveryNIterations * 10 + +func (m mockReaderOfLabels) LabelValues(context.Context, string, ...*labels.Matcher) ([]string, error) { + return make([]string, mockReaderOfLabelsSeriesCount), nil +} + +func (m mockReaderOfLabels) LabelValueFor(context.Context, storage.SeriesRef, string) (string, error) { + panic("LabelValueFor called") +} + +func (m mockReaderOfLabels) SortedLabelValues(context.Context, string, ...*labels.Matcher) ([]string, error) { + panic("SortedLabelValues called") +} + +func (m mockReaderOfLabels) Close() error { + return nil +} + +func (m mockReaderOfLabels) LabelNames(context.Context, ...*labels.Matcher) ([]string, error) { + panic("LabelNames called") +} + +func (m mockReaderOfLabels) LabelNamesFor(context.Context, ...storage.SeriesRef) ([]string, error) { + panic("LabelNamesFor called") +} + +func (m mockReaderOfLabels) PostingsForLabelMatching(context.Context, string, func(string) bool) index.Postings { + panic("PostingsForLabelMatching called") +} + +func (m mockReaderOfLabels) Postings(context.Context, string, ...string) (index.Postings, error) { + panic("Postings called") +} + +func (m mockReaderOfLabels) ShardedPostings(index.Postings, uint64, uint64) index.Postings { + panic("Postings called") +} + +func (m mockReaderOfLabels) SortedPostings(index.Postings) index.Postings { + panic("SortedPostings called") +} + +func (m mockReaderOfLabels) Series(storage.SeriesRef, *labels.ScratchBuilder, *[]chunks.Meta) error { + panic("Series called") +} + +func (m mockReaderOfLabels) Symbols() index.StringIter { + panic("Series called") +} diff --git a/util/testutil/context.go b/util/testutil/context.go index 3f63b030d7..0c9e0f6f64 100644 --- a/util/testutil/context.go +++ b/util/testutil/context.go @@ -13,7 +13,12 @@ package testutil -import "time" +import ( + "context" + "time" + + "go.uber.org/atomic" +) // A MockContext provides a simple stub implementation of a Context. type MockContext struct { @@ -40,3 +45,23 @@ func (c *MockContext) Err() error { func (c *MockContext) Value(interface{}) interface{} { return nil } + +// MockContextErrAfter is a MockContext that will return an error after a certain +// number of calls to Err(). +type MockContextErrAfter struct { + MockContext + count atomic.Uint64 + FailAfter uint64 +} + +func (c *MockContextErrAfter) Err() error { + c.count.Inc() + if c.count.Load() >= c.FailAfter { + return context.Canceled + } + return c.MockContext.Err() +} + +func (c *MockContextErrAfter) Count() uint64 { + return c.count.Load() +} From b215a41be46cd463d5cfcc3deb7a7ab7028c1903 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gy=C3=B6rgy=20Krajcsovits?= Date: Wed, 15 May 2024 14:02:39 +0200 Subject: [PATCH 23/47] tsdb/index/postings: fix missing lock unlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Followup to #14096 Unfortunately the previous PR introduced this bug by not releasing the lock before returning. Signed-off-by: György Krajcsovits --- tsdb/index/postings.go | 1 + 1 file changed, 1 insertion(+) diff --git a/tsdb/index/postings.go b/tsdb/index/postings.go index c6cf20e0e5..159f6416e2 100644 --- a/tsdb/index/postings.go +++ b/tsdb/index/postings.go @@ -419,6 +419,7 @@ func (p *MemPostings) PostingsForLabelMatching(ctx context.Context, name string, count := 1 for _, v := range vals { if count%checkContextEveryNIterations == 0 && ctx.Err() != nil { + p.mtx.RUnlock() return ErrPostings(ctx.Err()) } count++ From f10c3454e972e2b0e9ad5deec1e76996ca8aeb4d Mon Sep 17 00:00:00 2001 From: Oleksandr Redko Date: Mon, 13 May 2024 18:36:19 +0300 Subject: [PATCH 24/47] Enable perfsprint linter and fix up code Signed-off-by: Oleksandr Redko --- .golangci.yml | 8 +++++++- cmd/prometheus/main_test.go | 3 ++- cmd/promtool/main_test.go | 3 ++- cmd/promtool/tsdb.go | 6 +++--- cmd/promtool/unittest.go | 2 +- discovery/aws/ec2.go | 3 ++- discovery/aws/lightsail.go | 3 ++- discovery/azure/azure.go | 3 ++- discovery/consul/consul.go | 4 ++-- discovery/digitalocean/digitalocean.go | 2 +- discovery/dns/dns.go | 5 +++-- discovery/hetzner/hcloud.go | 9 ++++----- discovery/hetzner/robot.go | 2 +- discovery/legacymanager/manager_test.go | 2 +- discovery/linode/linode.go | 12 +++++------ discovery/manager_test.go | 2 +- discovery/marathon/marathon.go | 2 +- discovery/moby/network.go | 6 +++--- discovery/moby/nodes.go | 2 +- discovery/moby/services.go | 2 +- discovery/moby/tasks.go | 2 +- discovery/openstack/hypervisor.go | 5 +++-- discovery/openstack/instance.go | 5 +++-- discovery/ovhcloud/dedicated_server.go | 4 ++-- discovery/ovhcloud/vps.go | 13 ++++++------ discovery/puppetdb/puppetdb.go | 2 +- discovery/uyuni/uyuni.go | 3 ++- discovery/zookeeper/zookeeper.go | 10 +++++----- .../examples/custom-sd/adapter-usage/main.go | 4 ++-- model/histogram/float_histogram_test.go | 6 +++--- model/histogram/histogram_test.go | 8 ++++---- model/labels/labels_test.go | 3 ++- model/relabel/relabel_test.go | 4 ++-- notifier/notifier_test.go | 20 +++++++++---------- promql/engine_test.go | 5 +++-- rules/manager_test.go | 3 ++- scrape/scrape_test.go | 2 +- scrape/target_test.go | 3 ++- storage/remote/queue_manager_test.go | 2 +- tsdb/agent/series_test.go | 4 ++-- tsdb/compact_test.go | 5 +++-- tsdb/db_test.go | 2 +- tsdb/exemplar_test.go | 2 +- tsdb/head_test.go | 6 +++--- tsdb/index/postings_test.go | 4 ++-- tsdb/wlog/checkpoint_test.go | 13 ++++++------ web/api/v1/api_test.go | 3 ++- web/federate_test.go | 5 +++-- 48 files changed, 126 insertions(+), 103 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index a85a76cdf1..c63184877a 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -21,6 +21,7 @@ linters: - goimports - misspell - nolintlint + - perfsprint - predeclared - revive - testifylint @@ -44,7 +45,9 @@ issues: - linters: - godot source: "^// ===" - + - linters: + - perfsprint + text: "fmt.Sprintf can be replaced with string addition" linters-settings: depguard: rules: @@ -85,6 +88,9 @@ linters-settings: local-prefixes: github.com/prometheus/prometheus gofumpt: extra-rules: true + perfsprint: + # Optimizes `fmt.Errorf`. + errorf: false revive: # By default, revive will enable only the linting rules that are named in the configuration file. # So, it's needed to explicitly set in configuration all required rules. diff --git a/cmd/prometheus/main_test.go b/cmd/prometheus/main_test.go index 03f3a9bc39..89c171bb5b 100644 --- a/cmd/prometheus/main_test.go +++ b/cmd/prometheus/main_test.go @@ -24,6 +24,7 @@ import ( "os/exec" "path/filepath" "runtime" + "strconv" "strings" "syscall" "testing" @@ -189,7 +190,7 @@ func TestSendAlerts(t *testing.T) { for i, tc := range testCases { tc := tc - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { senderFunc := senderFunc(func(alerts ...*notifier.Alert) { require.NotEmpty(t, tc.in, "sender called with 0 alert") require.Equal(t, tc.exp, alerts) diff --git a/cmd/promtool/main_test.go b/cmd/promtool/main_test.go index 7306a3e648..78500fe937 100644 --- a/cmd/promtool/main_test.go +++ b/cmd/promtool/main_test.go @@ -25,6 +25,7 @@ import ( "os/exec" "path/filepath" "runtime" + "strconv" "strings" "syscall" "testing" @@ -410,7 +411,7 @@ func TestExitCodes(t *testing.T) { } { t.Run(c.file, func(t *testing.T) { for _, lintFatal := range []bool{true, false} { - t.Run(fmt.Sprintf("%t", lintFatal), func(t *testing.T) { + t.Run(strconv.FormatBool(lintFatal), func(t *testing.T) { args := []string{"-test.main", "check", "config", "testdata/" + c.file} if lintFatal { args = append(args, "--lint-fatal") diff --git a/cmd/promtool/tsdb.go b/cmd/promtool/tsdb.go index 2ad969438c..6868102fa3 100644 --- a/cmd/promtool/tsdb.go +++ b/cmd/promtool/tsdb.go @@ -856,9 +856,9 @@ func displayHistogram(dataType string, datas []int, total int) { } avg := sum / len(datas) fmt.Printf("%s (min/avg/max): %d/%d/%d\n", dataType, datas[0], avg, datas[len(datas)-1]) - maxLeftLen := strconv.Itoa(len(fmt.Sprintf("%d", end))) - maxRightLen := strconv.Itoa(len(fmt.Sprintf("%d", end+step))) - maxCountLen := strconv.Itoa(len(fmt.Sprintf("%d", maxCount))) + maxLeftLen := strconv.Itoa(len(strconv.Itoa(end))) + maxRightLen := strconv.Itoa(len(strconv.Itoa(end + step))) + maxCountLen := strconv.Itoa(len(strconv.Itoa(maxCount))) for bucket, count := range buckets { percentage := 100.0 * count / total fmt.Printf("[%"+maxLeftLen+"d, %"+maxRightLen+"d]: %"+maxCountLen+"d %s\n", bucket*step+start+1, (bucket+1)*step+start, count, strings.Repeat("#", percentage)) diff --git a/cmd/promtool/unittest.go b/cmd/promtool/unittest.go index 27d5dd98b9..5451c5296c 100644 --- a/cmd/promtool/unittest.go +++ b/cmd/promtool/unittest.go @@ -573,7 +573,7 @@ func (la labelsAndAnnotations) String() string { } s := "[\n0:" + indentLines("\n"+la[0].String(), " ") for i, l := range la[1:] { - s += ",\n" + fmt.Sprintf("%d", i+1) + ":" + indentLines("\n"+l.String(), " ") + s += ",\n" + strconv.Itoa(i+1) + ":" + indentLines("\n"+l.String(), " ") } s += "\n]" diff --git a/discovery/aws/ec2.go b/discovery/aws/ec2.go index aa79fd9c62..a6a0a82577 100644 --- a/discovery/aws/ec2.go +++ b/discovery/aws/ec2.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "net" + "strconv" "strings" "time" @@ -279,7 +280,7 @@ func (d *EC2Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error if inst.PrivateDnsName != nil { labels[ec2LabelPrivateDNS] = model.LabelValue(*inst.PrivateDnsName) } - addr := net.JoinHostPort(*inst.PrivateIpAddress, fmt.Sprintf("%d", d.cfg.Port)) + addr := net.JoinHostPort(*inst.PrivateIpAddress, strconv.Itoa(d.cfg.Port)) labels[model.AddressLabel] = model.LabelValue(addr) if inst.Platform != nil { diff --git a/discovery/aws/lightsail.go b/discovery/aws/lightsail.go index 86b138be55..0ad7f2d541 100644 --- a/discovery/aws/lightsail.go +++ b/discovery/aws/lightsail.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "net" + "strconv" "strings" "time" @@ -229,7 +230,7 @@ func (d *LightsailDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, lightsailLabelRegion: model.LabelValue(d.cfg.Region), } - addr := net.JoinHostPort(*inst.PrivateIpAddress, fmt.Sprintf("%d", d.cfg.Port)) + addr := net.JoinHostPort(*inst.PrivateIpAddress, strconv.Itoa(d.cfg.Port)) labels[model.AddressLabel] = model.LabelValue(addr) if inst.PublicIpAddress != nil { diff --git a/discovery/azure/azure.go b/discovery/azure/azure.go index 7c2ece2c7b..70d95b9f3a 100644 --- a/discovery/azure/azure.go +++ b/discovery/azure/azure.go @@ -20,6 +20,7 @@ import ( "math/rand" "net" "net/http" + "strconv" "strings" "sync" "time" @@ -492,7 +493,7 @@ func (d *Discovery) vmToLabelSet(ctx context.Context, client client, vm virtualM } if ip.Properties != nil && ip.Properties.PrivateIPAddress != nil { labels[azureLabelMachinePrivateIP] = model.LabelValue(*ip.Properties.PrivateIPAddress) - address := net.JoinHostPort(*ip.Properties.PrivateIPAddress, fmt.Sprintf("%d", d.port)) + address := net.JoinHostPort(*ip.Properties.PrivateIPAddress, strconv.Itoa(d.port)) labels[model.AddressLabel] = model.LabelValue(address) return labels, nil } diff --git a/discovery/consul/consul.go b/discovery/consul/consul.go index 40eed7697a..bdc1fc8dce 100644 --- a/discovery/consul/consul.go +++ b/discovery/consul/consul.go @@ -539,9 +539,9 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr // since the service may be registered remotely through a different node. var addr string if serviceNode.Service.Address != "" { - addr = net.JoinHostPort(serviceNode.Service.Address, fmt.Sprintf("%d", serviceNode.Service.Port)) + addr = net.JoinHostPort(serviceNode.Service.Address, strconv.Itoa(serviceNode.Service.Port)) } else { - addr = net.JoinHostPort(serviceNode.Node.Address, fmt.Sprintf("%d", serviceNode.Service.Port)) + addr = net.JoinHostPort(serviceNode.Node.Address, strconv.Itoa(serviceNode.Service.Port)) } labels := model.LabelSet{ diff --git a/discovery/digitalocean/digitalocean.go b/discovery/digitalocean/digitalocean.go index 18380b7296..ecee60cb1f 100644 --- a/discovery/digitalocean/digitalocean.go +++ b/discovery/digitalocean/digitalocean.go @@ -177,7 +177,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { } labels := model.LabelSet{ - doLabelID: model.LabelValue(fmt.Sprintf("%d", droplet.ID)), + doLabelID: model.LabelValue(strconv.Itoa(droplet.ID)), doLabelName: model.LabelValue(droplet.Name), doLabelImage: model.LabelValue(droplet.Image.Slug), doLabelImageName: model.LabelValue(droplet.Image.Name), diff --git a/discovery/dns/dns.go b/discovery/dns/dns.go index cf56a2ad02..314c3d38cd 100644 --- a/discovery/dns/dns.go +++ b/discovery/dns/dns.go @@ -18,6 +18,7 @@ import ( "errors" "fmt" "net" + "strconv" "strings" "sync" "time" @@ -200,7 +201,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ tg := &targetgroup.Group{} hostPort := func(a string, p int) model.LabelValue { - return model.LabelValue(net.JoinHostPort(a, fmt.Sprintf("%d", p))) + return model.LabelValue(net.JoinHostPort(a, strconv.Itoa(p))) } for _, record := range response.Answer { @@ -209,7 +210,7 @@ func (d *Discovery) refreshOne(ctx context.Context, name string, ch chan<- *targ switch addr := record.(type) { case *dns.SRV: dnsSrvRecordTarget = model.LabelValue(addr.Target) - dnsSrvRecordPort = model.LabelValue(fmt.Sprintf("%d", addr.Port)) + dnsSrvRecordPort = model.LabelValue(strconv.Itoa(int(addr.Port))) // Remove the final dot from rooted DNS names to make them look more usual. addr.Target = strings.TrimRight(addr.Target, ".") diff --git a/discovery/hetzner/hcloud.go b/discovery/hetzner/hcloud.go index 6d0599dfa2..df56f94c5f 100644 --- a/discovery/hetzner/hcloud.go +++ b/discovery/hetzner/hcloud.go @@ -15,7 +15,6 @@ package hetzner import ( "context" - "fmt" "net" "net/http" "strconv" @@ -92,7 +91,7 @@ func (d *hcloudDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, er for i, server := range servers { labels := model.LabelSet{ hetznerLabelRole: model.LabelValue(HetznerRoleHcloud), - hetznerLabelServerID: model.LabelValue(fmt.Sprintf("%d", server.ID)), + hetznerLabelServerID: model.LabelValue(strconv.FormatInt(server.ID, 10)), hetznerLabelServerName: model.LabelValue(server.Name), hetznerLabelDatacenter: model.LabelValue(server.Datacenter.Name), hetznerLabelPublicIPv4: model.LabelValue(server.PublicNet.IPv4.IP.String()), @@ -102,10 +101,10 @@ func (d *hcloudDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, er hetznerLabelHcloudDatacenterLocation: model.LabelValue(server.Datacenter.Location.Name), hetznerLabelHcloudDatacenterLocationNetworkZone: model.LabelValue(server.Datacenter.Location.NetworkZone), hetznerLabelHcloudType: model.LabelValue(server.ServerType.Name), - hetznerLabelHcloudCPUCores: model.LabelValue(fmt.Sprintf("%d", server.ServerType.Cores)), + hetznerLabelHcloudCPUCores: model.LabelValue(strconv.Itoa(server.ServerType.Cores)), hetznerLabelHcloudCPUType: model.LabelValue(server.ServerType.CPUType), - hetznerLabelHcloudMemoryGB: model.LabelValue(fmt.Sprintf("%d", int(server.ServerType.Memory))), - hetznerLabelHcloudDiskGB: model.LabelValue(fmt.Sprintf("%d", server.ServerType.Disk)), + hetznerLabelHcloudMemoryGB: model.LabelValue(strconv.Itoa(int(server.ServerType.Memory))), + hetznerLabelHcloudDiskGB: model.LabelValue(strconv.Itoa(server.ServerType.Disk)), model.AddressLabel: model.LabelValue(net.JoinHostPort(server.PublicNet.IPv4.IP.String(), strconv.FormatUint(uint64(d.port), 10))), } diff --git a/discovery/hetzner/robot.go b/discovery/hetzner/robot.go index b862c33f5b..516470b05a 100644 --- a/discovery/hetzner/robot.go +++ b/discovery/hetzner/robot.go @@ -112,7 +112,7 @@ func (d *robotDiscovery) refresh(context.Context) ([]*targetgroup.Group, error) hetznerLabelPublicIPv4: model.LabelValue(server.Server.ServerIP), hetznerLabelServerStatus: model.LabelValue(server.Server.Status), hetznerLabelRobotProduct: model.LabelValue(server.Server.Product), - hetznerLabelRobotCancelled: model.LabelValue(fmt.Sprintf("%t", server.Server.Canceled)), + hetznerLabelRobotCancelled: model.LabelValue(strconv.FormatBool(server.Server.Canceled)), model.AddressLabel: model.LabelValue(net.JoinHostPort(server.Server.ServerIP, strconv.FormatUint(uint64(d.port), 10))), } diff --git a/discovery/legacymanager/manager_test.go b/discovery/legacymanager/manager_test.go index 1ed699645d..a455a8e341 100644 --- a/discovery/legacymanager/manager_test.go +++ b/discovery/legacymanager/manager_test.go @@ -720,7 +720,7 @@ func staticConfig(addrs ...string) discovery.StaticConfig { var cfg discovery.StaticConfig for i, addr := range addrs { cfg = append(cfg, &targetgroup.Group{ - Source: fmt.Sprint(i), + Source: strconv.Itoa(i), Targets: []model.LabelSet{ {model.AddressLabel: model.LabelValue(addr)}, }, diff --git a/discovery/linode/linode.go b/discovery/linode/linode.go index 2a5475b854..f09c1a734c 100644 --- a/discovery/linode/linode.go +++ b/discovery/linode/linode.go @@ -325,7 +325,7 @@ func (d *Discovery) refreshData(ctx context.Context) ([]*targetgroup.Group, erro } labels := model.LabelSet{ - linodeLabelID: model.LabelValue(fmt.Sprintf("%d", instance.ID)), + linodeLabelID: model.LabelValue(strconv.Itoa(instance.ID)), linodeLabelName: model.LabelValue(instance.Label), linodeLabelImage: model.LabelValue(instance.Image), linodeLabelPrivateIPv4: model.LabelValue(privateIPv4), @@ -338,13 +338,13 @@ func (d *Discovery) refreshData(ctx context.Context) ([]*targetgroup.Group, erro linodeLabelType: model.LabelValue(instance.Type), linodeLabelStatus: model.LabelValue(instance.Status), linodeLabelGroup: model.LabelValue(instance.Group), - linodeLabelGPUs: model.LabelValue(fmt.Sprintf("%d", instance.Specs.GPUs)), + linodeLabelGPUs: model.LabelValue(strconv.Itoa(instance.Specs.GPUs)), linodeLabelHypervisor: model.LabelValue(instance.Hypervisor), linodeLabelBackups: model.LabelValue(backupsStatus), - linodeLabelSpecsDiskBytes: model.LabelValue(fmt.Sprintf("%d", int64(instance.Specs.Disk)<<20)), - linodeLabelSpecsMemoryBytes: model.LabelValue(fmt.Sprintf("%d", int64(instance.Specs.Memory)<<20)), - linodeLabelSpecsVCPUs: model.LabelValue(fmt.Sprintf("%d", instance.Specs.VCPUs)), - linodeLabelSpecsTransferBytes: model.LabelValue(fmt.Sprintf("%d", int64(instance.Specs.Transfer)<<20)), + linodeLabelSpecsDiskBytes: model.LabelValue(strconv.FormatInt(int64(instance.Specs.Disk)<<20, 10)), + linodeLabelSpecsMemoryBytes: model.LabelValue(strconv.FormatInt(int64(instance.Specs.Memory)<<20, 10)), + linodeLabelSpecsVCPUs: model.LabelValue(strconv.Itoa(instance.Specs.VCPUs)), + linodeLabelSpecsTransferBytes: model.LabelValue(strconv.FormatInt(int64(instance.Specs.Transfer)<<20, 10)), } addr := net.JoinHostPort(publicIPv4, strconv.FormatUint(uint64(d.port), 10)) diff --git a/discovery/manager_test.go b/discovery/manager_test.go index 656d7c3c66..be07edbdb4 100644 --- a/discovery/manager_test.go +++ b/discovery/manager_test.go @@ -720,7 +720,7 @@ func staticConfig(addrs ...string) StaticConfig { var cfg StaticConfig for i, addr := range addrs { cfg = append(cfg, &targetgroup.Group{ - Source: fmt.Sprint(i), + Source: strconv.Itoa(i), Targets: []model.LabelSet{ {model.AddressLabel: model.LabelValue(addr)}, }, diff --git a/discovery/marathon/marathon.go b/discovery/marathon/marathon.go index 3e9e15967f..38b47accff 100644 --- a/discovery/marathon/marathon.go +++ b/discovery/marathon/marathon.go @@ -505,7 +505,7 @@ func targetEndpoint(task *task, port uint32, containerNet bool) string { host = task.Host } - return net.JoinHostPort(host, fmt.Sprintf("%d", port)) + return net.JoinHostPort(host, strconv.Itoa(int(port))) } // Get a list of ports and a list of labels from a PortMapping. diff --git a/discovery/moby/network.go b/discovery/moby/network.go index 0e0d0041de..794d2e607d 100644 --- a/discovery/moby/network.go +++ b/discovery/moby/network.go @@ -15,7 +15,7 @@ package moby import ( "context" - "fmt" + "strconv" "github.com/docker/docker/api/types" "github.com/docker/docker/client" @@ -44,8 +44,8 @@ func getNetworksLabels(ctx context.Context, client *client.Client, labelPrefix s labelPrefix + labelNetworkID: network.ID, labelPrefix + labelNetworkName: network.Name, labelPrefix + labelNetworkScope: network.Scope, - labelPrefix + labelNetworkInternal: fmt.Sprintf("%t", network.Internal), - labelPrefix + labelNetworkIngress: fmt.Sprintf("%t", network.Ingress), + labelPrefix + labelNetworkInternal: strconv.FormatBool(network.Internal), + labelPrefix + labelNetworkIngress: strconv.FormatBool(network.Ingress), } for k, v := range network.Labels { ln := strutil.SanitizeLabelName(k) diff --git a/discovery/moby/nodes.go b/discovery/moby/nodes.go index a7c5551c02..b5be844eda 100644 --- a/discovery/moby/nodes.go +++ b/discovery/moby/nodes.go @@ -66,7 +66,7 @@ func (d *Discovery) refreshNodes(ctx context.Context) ([]*targetgroup.Group, err swarmLabelNodeAddress: model.LabelValue(n.Status.Addr), } if n.ManagerStatus != nil { - labels[swarmLabelNodeManagerLeader] = model.LabelValue(fmt.Sprintf("%t", n.ManagerStatus.Leader)) + labels[swarmLabelNodeManagerLeader] = model.LabelValue(strconv.FormatBool(n.ManagerStatus.Leader)) labels[swarmLabelNodeManagerReachability] = model.LabelValue(n.ManagerStatus.Reachability) labels[swarmLabelNodeManagerAddr] = model.LabelValue(n.ManagerStatus.Addr) } diff --git a/discovery/moby/services.go b/discovery/moby/services.go index 1d472b5c00..c61b499259 100644 --- a/discovery/moby/services.go +++ b/discovery/moby/services.go @@ -116,7 +116,7 @@ func (d *Discovery) refreshServices(ctx context.Context) ([]*targetgroup.Group, labels[model.LabelName(k)] = model.LabelValue(v) } - addr := net.JoinHostPort(ip.String(), fmt.Sprintf("%d", d.port)) + addr := net.JoinHostPort(ip.String(), strconv.Itoa(d.port)) labels[model.AddressLabel] = model.LabelValue(addr) tg.Targets = append(tg.Targets, labels) diff --git a/discovery/moby/tasks.go b/discovery/moby/tasks.go index 2505a7b07a..38b9d33de2 100644 --- a/discovery/moby/tasks.go +++ b/discovery/moby/tasks.go @@ -150,7 +150,7 @@ func (d *Discovery) refreshTasks(ctx context.Context) ([]*targetgroup.Group, err labels[model.LabelName(k)] = model.LabelValue(v) } - addr := net.JoinHostPort(ip.String(), fmt.Sprintf("%d", d.port)) + addr := net.JoinHostPort(ip.String(), strconv.Itoa(d.port)) labels[model.AddressLabel] = model.LabelValue(addr) tg.Targets = append(tg.Targets, labels) diff --git a/discovery/openstack/hypervisor.go b/discovery/openstack/hypervisor.go index 16964cfb62..8964da9294 100644 --- a/discovery/openstack/hypervisor.go +++ b/discovery/openstack/hypervisor.go @@ -17,6 +17,7 @@ import ( "context" "fmt" "net" + "strconv" "github.com/go-kit/log" "github.com/gophercloud/gophercloud" @@ -72,7 +73,7 @@ func (h *HypervisorDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group } tg := &targetgroup.Group{ - Source: fmt.Sprintf("OS_" + h.region), + Source: "OS_" + h.region, } // OpenStack API reference // https://developer.openstack.org/api-ref/compute/#list-hypervisors-details @@ -84,7 +85,7 @@ func (h *HypervisorDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group } for _, hypervisor := range hypervisorList { labels := model.LabelSet{} - addr := net.JoinHostPort(hypervisor.HostIP, fmt.Sprintf("%d", h.port)) + addr := net.JoinHostPort(hypervisor.HostIP, strconv.Itoa(h.port)) labels[model.AddressLabel] = model.LabelValue(addr) labels[openstackLabelHypervisorID] = model.LabelValue(hypervisor.ID) labels[openstackLabelHypervisorHostName] = model.LabelValue(hypervisor.HypervisorHostname) diff --git a/discovery/openstack/instance.go b/discovery/openstack/instance.go index 9b28c1d6e1..750d414a2b 100644 --- a/discovery/openstack/instance.go +++ b/discovery/openstack/instance.go @@ -17,6 +17,7 @@ import ( "context" "fmt" "net" + "strconv" "github.com/go-kit/log" "github.com/go-kit/log/level" @@ -120,7 +121,7 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, } pager := servers.List(client, opts) tg := &targetgroup.Group{ - Source: fmt.Sprintf("OS_" + i.region), + Source: "OS_" + i.region, } err = pager.EachPage(func(page pagination.Page) (bool, error) { if ctx.Err() != nil { @@ -194,7 +195,7 @@ func (i *InstanceDiscovery) refresh(ctx context.Context) ([]*targetgroup.Group, if val, ok := floatingIPList[floatingIPKey{id: s.ID, fixed: addr}]; ok { lbls[openstackLabelPublicIP] = model.LabelValue(val) } - addr = net.JoinHostPort(addr, fmt.Sprintf("%d", i.port)) + addr = net.JoinHostPort(addr, strconv.Itoa(i.port)) lbls[model.AddressLabel] = model.LabelValue(addr) tg.Targets = append(tg.Targets, lbls) diff --git a/discovery/ovhcloud/dedicated_server.go b/discovery/ovhcloud/dedicated_server.go index bb5dadcd7b..a70857a08b 100644 --- a/discovery/ovhcloud/dedicated_server.go +++ b/discovery/ovhcloud/dedicated_server.go @@ -144,12 +144,12 @@ func (d *dedicatedServerDiscovery) refresh(context.Context) ([]*targetgroup.Grou model.InstanceLabel: model.LabelValue(server.Name), dedicatedServerLabelPrefix + "state": model.LabelValue(server.State), dedicatedServerLabelPrefix + "commercial_range": model.LabelValue(server.CommercialRange), - dedicatedServerLabelPrefix + "link_speed": model.LabelValue(fmt.Sprintf("%d", server.LinkSpeed)), + dedicatedServerLabelPrefix + "link_speed": model.LabelValue(strconv.Itoa(server.LinkSpeed)), dedicatedServerLabelPrefix + "rack": model.LabelValue(server.Rack), dedicatedServerLabelPrefix + "no_intervention": model.LabelValue(strconv.FormatBool(server.NoIntervention)), dedicatedServerLabelPrefix + "os": model.LabelValue(server.Os), dedicatedServerLabelPrefix + "support_level": model.LabelValue(server.SupportLevel), - dedicatedServerLabelPrefix + "server_id": model.LabelValue(fmt.Sprintf("%d", server.ServerID)), + dedicatedServerLabelPrefix + "server_id": model.LabelValue(strconv.FormatInt(server.ServerID, 10)), dedicatedServerLabelPrefix + "reverse": model.LabelValue(server.Reverse), dedicatedServerLabelPrefix + "datacenter": model.LabelValue(server.Datacenter), dedicatedServerLabelPrefix + "name": model.LabelValue(server.Name), diff --git a/discovery/ovhcloud/vps.go b/discovery/ovhcloud/vps.go index e2d1dee364..58ceeabd87 100644 --- a/discovery/ovhcloud/vps.go +++ b/discovery/ovhcloud/vps.go @@ -19,6 +19,7 @@ import ( "net/netip" "net/url" "path" + "strconv" "github.com/go-kit/log" "github.com/go-kit/log/level" @@ -161,21 +162,21 @@ func (d *vpsDiscovery) refresh(context.Context) ([]*targetgroup.Group, error) { model.InstanceLabel: model.LabelValue(server.Name), vpsLabelPrefix + "offer": model.LabelValue(server.Model.Offer), vpsLabelPrefix + "datacenter": model.LabelValue(fmt.Sprintf("%+v", server.Model.Datacenter)), - vpsLabelPrefix + "model_vcore": model.LabelValue(fmt.Sprintf("%d", server.Model.Vcore)), - vpsLabelPrefix + "maximum_additional_ip": model.LabelValue(fmt.Sprintf("%d", server.Model.MaximumAdditionalIP)), + vpsLabelPrefix + "model_vcore": model.LabelValue(strconv.Itoa(server.Model.Vcore)), + vpsLabelPrefix + "maximum_additional_ip": model.LabelValue(strconv.Itoa(server.Model.MaximumAdditionalIP)), vpsLabelPrefix + "version": model.LabelValue(server.Model.Version), vpsLabelPrefix + "model_name": model.LabelValue(server.Model.Name), - vpsLabelPrefix + "disk": model.LabelValue(fmt.Sprintf("%d", server.Model.Disk)), - vpsLabelPrefix + "memory": model.LabelValue(fmt.Sprintf("%d", server.Model.Memory)), + vpsLabelPrefix + "disk": model.LabelValue(strconv.Itoa(server.Model.Disk)), + vpsLabelPrefix + "memory": model.LabelValue(strconv.Itoa(server.Model.Memory)), vpsLabelPrefix + "zone": model.LabelValue(server.Zone), vpsLabelPrefix + "display_name": model.LabelValue(server.DisplayName), vpsLabelPrefix + "cluster": model.LabelValue(server.Cluster), vpsLabelPrefix + "state": model.LabelValue(server.State), vpsLabelPrefix + "name": model.LabelValue(server.Name), vpsLabelPrefix + "netboot_mode": model.LabelValue(server.NetbootMode), - vpsLabelPrefix + "memory_limit": model.LabelValue(fmt.Sprintf("%d", server.MemoryLimit)), + vpsLabelPrefix + "memory_limit": model.LabelValue(strconv.Itoa(server.MemoryLimit)), vpsLabelPrefix + "offer_type": model.LabelValue(server.OfferType), - vpsLabelPrefix + "vcore": model.LabelValue(fmt.Sprintf("%d", server.Vcore)), + vpsLabelPrefix + "vcore": model.LabelValue(strconv.Itoa(server.Vcore)), vpsLabelPrefix + "ipv4": model.LabelValue(ipv4), vpsLabelPrefix + "ipv6": model.LabelValue(ipv6), } diff --git a/discovery/puppetdb/puppetdb.go b/discovery/puppetdb/puppetdb.go index 8c9ccde0a4..8f89acbf93 100644 --- a/discovery/puppetdb/puppetdb.go +++ b/discovery/puppetdb/puppetdb.go @@ -237,7 +237,7 @@ func (d *Discovery) refresh(ctx context.Context) ([]*targetgroup.Group, error) { pdbLabelResource: model.LabelValue(resource.Resource), pdbLabelType: model.LabelValue(resource.Type), pdbLabelTitle: model.LabelValue(resource.Title), - pdbLabelExported: model.LabelValue(fmt.Sprintf("%t", resource.Exported)), + pdbLabelExported: model.LabelValue(strconv.FormatBool(resource.Exported)), pdbLabelFile: model.LabelValue(resource.File), pdbLabelEnvironment: model.LabelValue(resource.Environment), } diff --git a/discovery/uyuni/uyuni.go b/discovery/uyuni/uyuni.go index e885ef2e80..c8af2f1587 100644 --- a/discovery/uyuni/uyuni.go +++ b/discovery/uyuni/uyuni.go @@ -20,6 +20,7 @@ import ( "net/http" "net/url" "path" + "strconv" "strings" "time" @@ -269,7 +270,7 @@ func (d *Discovery) getEndpointLabels( model.AddressLabel: model.LabelValue(addr), uyuniLabelMinionHostname: model.LabelValue(networkInfo.Hostname), uyuniLabelPrimaryFQDN: model.LabelValue(networkInfo.PrimaryFQDN), - uyuniLablelSystemID: model.LabelValue(fmt.Sprintf("%d", endpoint.SystemID)), + uyuniLablelSystemID: model.LabelValue(strconv.Itoa(endpoint.SystemID)), uyuniLablelGroups: model.LabelValue(strings.Join(managedGroupNames, d.separator)), uyuniLablelEndpointName: model.LabelValue(endpoint.EndpointName), uyuniLablelExporter: model.LabelValue(endpoint.ExporterName), diff --git a/discovery/zookeeper/zookeeper.go b/discovery/zookeeper/zookeeper.go index 303c7ca6d0..92904dd71c 100644 --- a/discovery/zookeeper/zookeeper.go +++ b/discovery/zookeeper/zookeeper.go @@ -280,17 +280,17 @@ func parseServersetMember(data []byte, path string) (model.LabelSet, error) { labels := model.LabelSet{} labels[serversetPathLabel] = model.LabelValue(path) labels[model.AddressLabel] = model.LabelValue( - net.JoinHostPort(member.ServiceEndpoint.Host, fmt.Sprintf("%d", member.ServiceEndpoint.Port))) + net.JoinHostPort(member.ServiceEndpoint.Host, strconv.Itoa(member.ServiceEndpoint.Port))) labels[serversetEndpointLabelPrefix+"_host"] = model.LabelValue(member.ServiceEndpoint.Host) - labels[serversetEndpointLabelPrefix+"_port"] = model.LabelValue(fmt.Sprintf("%d", member.ServiceEndpoint.Port)) + labels[serversetEndpointLabelPrefix+"_port"] = model.LabelValue(strconv.Itoa(member.ServiceEndpoint.Port)) for name, endpoint := range member.AdditionalEndpoints { cleanName := model.LabelName(strutil.SanitizeLabelName(name)) labels[serversetEndpointLabelPrefix+"_host_"+cleanName] = model.LabelValue( endpoint.Host) labels[serversetEndpointLabelPrefix+"_port_"+cleanName] = model.LabelValue( - fmt.Sprintf("%d", endpoint.Port)) + strconv.Itoa(endpoint.Port)) } labels[serversetStatusLabel] = model.LabelValue(member.Status) @@ -321,10 +321,10 @@ func parseNerveMember(data []byte, path string) (model.LabelSet, error) { labels := model.LabelSet{} labels[nervePathLabel] = model.LabelValue(path) labels[model.AddressLabel] = model.LabelValue( - net.JoinHostPort(member.Host, fmt.Sprintf("%d", member.Port))) + net.JoinHostPort(member.Host, strconv.Itoa(member.Port))) labels[nerveEndpointLabelPrefix+"_host"] = model.LabelValue(member.Host) - labels[nerveEndpointLabelPrefix+"_port"] = model.LabelValue(fmt.Sprintf("%d", member.Port)) + labels[nerveEndpointLabelPrefix+"_port"] = model.LabelValue(strconv.Itoa(member.Port)) labels[nerveEndpointLabelPrefix+"_name"] = model.LabelValue(member.Name) return labels, nil diff --git a/documentation/examples/custom-sd/adapter-usage/main.go b/documentation/examples/custom-sd/adapter-usage/main.go index bfbca7b70c..8ccbafe6f1 100644 --- a/documentation/examples/custom-sd/adapter-usage/main.go +++ b/documentation/examples/custom-sd/adapter-usage/main.go @@ -127,9 +127,9 @@ func (d *discovery) parseServiceNodes(resp *http.Response, name string) (*target // since the service may be registered remotely through a different node. var addr string if node.ServiceAddress != "" { - addr = net.JoinHostPort(node.ServiceAddress, fmt.Sprintf("%d", node.ServicePort)) + addr = net.JoinHostPort(node.ServiceAddress, strconv.Itoa(node.ServicePort)) } else { - addr = net.JoinHostPort(node.Address, fmt.Sprintf("%d", node.ServicePort)) + addr = net.JoinHostPort(node.Address, strconv.Itoa(node.ServicePort)) } target := model.LabelSet{model.AddressLabel: model.LabelValue(addr)} diff --git a/model/histogram/float_histogram_test.go b/model/histogram/float_histogram_test.go index 49fb77ab0b..759da6540c 100644 --- a/model/histogram/float_histogram_test.go +++ b/model/histogram/float_histogram_test.go @@ -14,9 +14,9 @@ package histogram import ( - "fmt" "math" "math/rand" + "strconv" "testing" "github.com/stretchr/testify/require" @@ -2134,7 +2134,7 @@ func TestAllFloatBucketIterator(t *testing.T) { } for i, c := range cases { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { var expBuckets, actBuckets []Bucket[float64] if c.includeNeg { @@ -2360,7 +2360,7 @@ func TestAllReverseFloatBucketIterator(t *testing.T) { } for i, c := range cases { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { var expBuckets, actBuckets []Bucket[float64] if c.includePos { diff --git a/model/histogram/histogram_test.go b/model/histogram/histogram_test.go index 14a948e644..d1a074135c 100644 --- a/model/histogram/histogram_test.go +++ b/model/histogram/histogram_test.go @@ -14,8 +14,8 @@ package histogram import ( - "fmt" "math" + "strconv" "testing" "github.com/stretchr/testify/require" @@ -72,7 +72,7 @@ func TestHistogramString(t *testing.T) { } for i, c := range cases { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { actualString := c.histogram.String() require.Equal(t, c.expectedString, actualString) }) @@ -211,7 +211,7 @@ func TestCumulativeBucketIterator(t *testing.T) { } for i, c := range cases { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { it := c.histogram.CumulativeBucketIterator() actualBuckets := make([]Bucket[uint64], 0, len(c.expectedBuckets)) for it.Next() { @@ -371,7 +371,7 @@ func TestRegularBucketIterator(t *testing.T) { } for i, c := range cases { - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { it := c.histogram.PositiveBucketIterator() actualPositiveBuckets := make([]Bucket[uint64], 0, len(c.expectedPositiveBuckets)) for it.Next() { diff --git a/model/labels/labels_test.go b/model/labels/labels_test.go index 3d6e7659f4..6464d007d3 100644 --- a/model/labels/labels_test.go +++ b/model/labels/labels_test.go @@ -17,6 +17,7 @@ import ( "encoding/json" "fmt" "net/http" + "strconv" "strings" "testing" @@ -732,7 +733,7 @@ func TestScratchBuilder(t *testing.T) { want: FromStrings("ddd", "444"), }, } { - t.Run(fmt.Sprint(i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { b := NewScratchBuilder(len(tcase.add)) for _, lbl := range tcase.add { b.Add(lbl.Name, lbl.Value) diff --git a/model/relabel/relabel_test.go b/model/relabel/relabel_test.go index 6798fb02a5..507ea101dd 100644 --- a/model/relabel/relabel_test.go +++ b/model/relabel/relabel_test.go @@ -14,7 +14,7 @@ package relabel import ( - "fmt" + "strconv" "testing" "github.com/prometheus/common/model" @@ -657,7 +657,7 @@ func TestRelabelValidate(t *testing.T) { }, } for i, test := range tests { - t.Run(fmt.Sprint(i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { err := test.config.Validate() if test.expected == "" { require.NoError(t, err) diff --git a/notifier/notifier_test.go b/notifier/notifier_test.go index e7a9243bc2..d2e72ca33b 100644 --- a/notifier/notifier_test.go +++ b/notifier/notifier_test.go @@ -74,7 +74,7 @@ func TestHandlerNextBatch(t *testing.T) { for i := range make([]struct{}, 2*maxBatchSize+1) { h.queue = append(h.queue, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }) } @@ -186,10 +186,10 @@ func TestHandlerSendAll(t *testing.T) { for i := range make([]struct{}, maxBatchSize) { h.queue = append(h.queue, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }) expected = append(expected, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }) } @@ -297,23 +297,23 @@ func TestHandlerSendAllRemapPerAm(t *testing.T) { for i := range make([]struct{}, maxBatchSize/2) { h.queue = append(h.queue, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }, &Alert{ - Labels: labels.FromStrings("alertname", "test", "alertnamedrop", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", "test", "alertnamedrop", strconv.Itoa(i)), }, ) expected1 = append(expected1, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }, &Alert{ - Labels: labels.FromStrings("alertname", "test", "alertnamedrop", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", "test", "alertnamedrop", strconv.Itoa(i)), }, ) expected2 = append(expected2, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }) } @@ -502,7 +502,7 @@ func TestHandlerQueuing(t *testing.T) { var alerts []*Alert for i := range make([]struct{}, 20*maxBatchSize) { alerts = append(alerts, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }) } @@ -762,7 +762,7 @@ func TestHangingNotifier(t *testing.T) { var alerts []*Alert for i := range make([]struct{}, 20*maxBatchSize) { alerts = append(alerts, &Alert{ - Labels: labels.FromStrings("alertname", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("alertname", strconv.Itoa(i)), }) } diff --git a/promql/engine_test.go b/promql/engine_test.go index cc91855468..220eaca9da 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -20,6 +20,7 @@ import ( "math" "os" "sort" + "strconv" "testing" "time" @@ -4406,7 +4407,7 @@ func TestNativeHistogram_Sum_Count_Add_AvgOperator(t *testing.T) { _, err := app.Append(0, labels.FromStrings("__name__", "float_series", "idx", "0"), ts, 42) require.NoError(t, err) for idx1, h := range c.histograms { - lbls := labels.FromStrings("__name__", seriesName, "idx", fmt.Sprintf("%d", idx1)) + lbls := labels.FromStrings("__name__", seriesName, "idx", strconv.Itoa(idx1)) // Since we mutate h later, we need to create a copy here. var err error if floatHisto { @@ -4678,7 +4679,7 @@ func TestNativeHistogram_SubOperator(t *testing.T) { ts := idx0 * int64(10*time.Minute/time.Millisecond) app := storage.Appender(context.Background()) for idx1, h := range c.histograms { - lbls := labels.FromStrings("__name__", seriesName, "idx", fmt.Sprintf("%d", idx1)) + lbls := labels.FromStrings("__name__", seriesName, "idx", strconv.Itoa(idx1)) // Since we mutate h later, we need to create a copy here. var err error if floatHisto { diff --git a/rules/manager_test.go b/rules/manager_test.go index 1862b58075..aeb3276603 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -19,6 +19,7 @@ import ( "math" "os" "sort" + "strconv" "sync" "testing" "time" @@ -1361,7 +1362,7 @@ func TestNativeHistogramsInRecordingRules(t *testing.T) { ts := time.Now() app := db.Appender(context.Background()) for i, h := range hists { - l := labels.FromStrings("__name__", "histogram_metric", "idx", fmt.Sprintf("%d", i)) + l := labels.FromStrings("__name__", "histogram_metric", "idx", strconv.Itoa(i)) _, err := app.AppendHistogram(0, l, ts.UnixMilli(), h.Copy(), nil) require.NoError(t, err) } diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 51bd377e47..b5a31cb650 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -1285,7 +1285,7 @@ func TestScrapeLoopCacheMemoryExhaustionProtection(t *testing.T) { for i := 0; i < 500; i++ { s = fmt.Sprintf("%smetric_%d_%d 42\n", s, i, numScrapes) } - w.Write([]byte(fmt.Sprintf(s + "&"))) + w.Write([]byte(s + "&")) } else { cancel() } diff --git a/scrape/target_test.go b/scrape/target_test.go index f91e31050a..b43ff24063 100644 --- a/scrape/target_test.go +++ b/scrape/target_test.go @@ -21,6 +21,7 @@ import ( "net/http/httptest" "net/url" "os" + "strconv" "strings" "testing" "time" @@ -67,7 +68,7 @@ func TestTargetOffset(t *testing.T) { // Calculate offsets for 10000 different targets. for i := range offsets { target := newTestTarget("example.com:80", 0, labels.FromStrings( - "label", fmt.Sprintf("%d", i), + "label", strconv.Itoa(i), )) offsets[i] = target.offset(interval, offsetSeed) } diff --git a/storage/remote/queue_manager_test.go b/storage/remote/queue_manager_test.go index e32a3ace05..6121fb6c03 100644 --- a/storage/remote/queue_manager_test.go +++ b/storage/remote/queue_manager_test.go @@ -468,7 +468,7 @@ func TestReleaseNoninternedString(t *testing.T) { m.StoreSeries([]record.RefSeries{ { Ref: chunks.HeadSeriesRef(i), - Labels: labels.FromStrings("asdf", fmt.Sprintf("%d", i)), + Labels: labels.FromStrings("asdf", strconv.Itoa(i)), }, }, 0) m.SeriesReset(1) diff --git a/tsdb/agent/series_test.go b/tsdb/agent/series_test.go index ae327d8582..bc5a4af5d3 100644 --- a/tsdb/agent/series_test.go +++ b/tsdb/agent/series_test.go @@ -14,8 +14,8 @@ package agent import ( - "fmt" "math" + "strconv" "sync" "testing" "time" @@ -53,7 +53,7 @@ func TestNoDeadlock(t *testing.T) { series := &memSeries{ ref: chunks.HeadSeriesRef(i), lset: labels.FromMap(map[string]string{ - "id": fmt.Sprintf("%d", i), + "id": strconv.Itoa(i), }), } stripeSeries.Set(series.lset.Hash(), series) diff --git a/tsdb/compact_test.go b/tsdb/compact_test.go index b2d2ea6e7f..10c90e30dc 100644 --- a/tsdb/compact_test.go +++ b/tsdb/compact_test.go @@ -22,6 +22,7 @@ import ( "os" "path" "path/filepath" + "strconv" "sync" "testing" "time" @@ -1129,7 +1130,7 @@ func BenchmarkCompactionFromHead(b *testing.B) { for ln := 0; ln < labelNames; ln++ { app := h.Appender(context.Background()) for lv := 0; lv < labelValues; lv++ { - app.Append(0, labels.FromStrings(fmt.Sprintf("%d", ln), fmt.Sprintf("%d%s%d", lv, postingsBenchSuffix, ln)), 0, 0) + app.Append(0, labels.FromStrings(strconv.Itoa(ln), fmt.Sprintf("%d%s%d", lv, postingsBenchSuffix, ln)), 0, 0) } require.NoError(b, app.Commit()) } @@ -1161,7 +1162,7 @@ func BenchmarkCompactionFromOOOHead(b *testing.B) { for ln := 0; ln < labelNames; ln++ { app := h.Appender(context.Background()) for lv := 0; lv < labelValues; lv++ { - lbls := labels.FromStrings(fmt.Sprintf("%d", ln), fmt.Sprintf("%d%s%d", lv, postingsBenchSuffix, ln)) + lbls := labels.FromStrings(strconv.Itoa(ln), fmt.Sprintf("%d%s%d", lv, postingsBenchSuffix, ln)) _, err = app.Append(0, lbls, int64(totalSamples), 0) require.NoError(b, err) for ts := 0; ts < totalSamples; ts++ { diff --git a/tsdb/db_test.go b/tsdb/db_test.go index 71b2f05ac7..a682f46554 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -1065,7 +1065,7 @@ func TestWALSegmentSizeOptions(t *testing.T) { for i := int64(0); i < 155; i++ { app := db.Appender(context.Background()) - ref, err := app.Append(0, labels.FromStrings("wal"+fmt.Sprintf("%d", i), "size"), i, rand.Float64()) + ref, err := app.Append(0, labels.FromStrings("wal"+strconv.Itoa(int(i)), "size"), i, rand.Float64()) require.NoError(t, err) for j := int64(1); j <= 78; j++ { _, err := app.Append(ref, labels.EmptyLabels(), i+j, rand.Float64()) diff --git a/tsdb/exemplar_test.go b/tsdb/exemplar_test.go index 21030e4577..b2be221700 100644 --- a/tsdb/exemplar_test.go +++ b/tsdb/exemplar_test.go @@ -416,7 +416,7 @@ func BenchmarkAddExemplar(b *testing.B) { exLabels := labels.FromStrings("trace_id", "89620921") for _, n := range []int{10000, 100000, 1000000} { - b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { + b.Run(strconv.Itoa(n), func(b *testing.B) { for j := 0; j < b.N; j++ { b.StopTimer() exs, err := NewCircularExemplarStorage(int64(n), eMetrics) diff --git a/tsdb/head_test.go b/tsdb/head_test.go index d9631b3b91..804886ad7b 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -3383,7 +3383,7 @@ func TestWaitForPendingReadersInTimeRange(t *testing.T) { func TestAppendHistogram(t *testing.T) { l := labels.FromStrings("a", "b") for _, numHistograms := range []int{1, 10, 150, 200, 250, 300} { - t.Run(fmt.Sprintf("%d", numHistograms), func(t *testing.T) { + t.Run(strconv.Itoa(numHistograms), func(t *testing.T) { head, _ := newTestHead(t, 1000, wlog.CompressionNone, false) t.Cleanup(func() { require.NoError(t, head.Close()) @@ -3692,7 +3692,7 @@ func TestChunkSnapshot(t *testing.T) { e := ex{ seriesLabels: lbls, e: exemplar.Exemplar{ - Labels: labels.FromStrings("trace_id", fmt.Sprintf("%d", rand.Int())), + Labels: labels.FromStrings("trace_id", strconv.Itoa(rand.Int())), Value: rand.Float64(), Ts: ts, }, @@ -5032,7 +5032,7 @@ func TestOOOAppendWithNoSeries(t *testing.T) { require.Equal(t, expSamples, ms.headChunks.chunk.NumSamples()) } - newLabels := func(idx int) labels.Labels { return labels.FromStrings("foo", fmt.Sprintf("%d", idx)) } + newLabels := func(idx int) labels.Labels { return labels.FromStrings("foo", strconv.Itoa(idx)) } s1 := newLabels(1) appendSample(s1, 300) // At 300m. diff --git a/tsdb/index/postings_test.go b/tsdb/index/postings_test.go index cabca59774..7fa0a892b9 100644 --- a/tsdb/index/postings_test.go +++ b/tsdb/index/postings_test.go @@ -50,7 +50,7 @@ func TestMemPostings_ensureOrder(t *testing.T) { for j := range l { l[j] = storage.SeriesRef(rand.Uint64()) } - v := fmt.Sprintf("%d", i) + v := strconv.Itoa(i) p.m["a"][v] = l } @@ -391,7 +391,7 @@ func BenchmarkMerge(t *testing.B) { its := make([]Postings, len(refs)) for _, nSeries := range []int{1, 10, 100, 1000, 10000, 100000} { - t.Run(fmt.Sprint(nSeries), func(bench *testing.B) { + t.Run(strconv.Itoa(nSeries), func(bench *testing.B) { ctx := context.Background() for i := 0; i < bench.N; i++ { // Reset the ListPostings to their original values each time round the loop. diff --git a/tsdb/wlog/checkpoint_test.go b/tsdb/wlog/checkpoint_test.go index 279f7c4356..ea11c5dd75 100644 --- a/tsdb/wlog/checkpoint_test.go +++ b/tsdb/wlog/checkpoint_test.go @@ -19,6 +19,7 @@ import ( "os" "path/filepath" "sort" + "strconv" "strings" "testing" @@ -232,10 +233,10 @@ func TestCheckpoint(t *testing.T) { // Write changing metadata for each series. In the end, only the latest // version should end up in the checkpoint. b = enc.Metadata([]record.RefMetadata{ - {Ref: 0, Unit: fmt.Sprintf("%d", last), Help: fmt.Sprintf("%d", last)}, - {Ref: 1, Unit: fmt.Sprintf("%d", last), Help: fmt.Sprintf("%d", last)}, - {Ref: 2, Unit: fmt.Sprintf("%d", last), Help: fmt.Sprintf("%d", last)}, - {Ref: 3, Unit: fmt.Sprintf("%d", last), Help: fmt.Sprintf("%d", last)}, + {Ref: 0, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, + {Ref: 1, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, + {Ref: 2, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, + {Ref: 3, Unit: strconv.FormatInt(last, 10), Help: strconv.FormatInt(last, 10)}, }, nil) require.NoError(t, w.Log(b)) @@ -324,8 +325,8 @@ func TestCheckpoint(t *testing.T) { testutil.RequireEqual(t, expectedRefSeries, series) expectedRefMetadata := []record.RefMetadata{ - {Ref: 0, Unit: fmt.Sprintf("%d", last-100), Help: fmt.Sprintf("%d", last-100)}, - {Ref: 2, Unit: fmt.Sprintf("%d", last-100), Help: fmt.Sprintf("%d", last-100)}, + {Ref: 0, Unit: strconv.FormatInt(last-100, 10), Help: strconv.FormatInt(last-100, 10)}, + {Ref: 2, Unit: strconv.FormatInt(last-100, 10), Help: strconv.FormatInt(last-100, 10)}, {Ref: 4, Unit: "unit", Help: "help"}, } sort.Slice(metadata, func(i, j int) bool { return metadata[i].Ref < metadata[j].Ref }) diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index 9d7d1d502a..7d55dd11a0 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -25,6 +25,7 @@ import ( "reflect" "runtime" "sort" + "strconv" "strings" "testing" "time" @@ -3544,7 +3545,7 @@ func TestTSDBStatus(t *testing.T) { }, } { tc := tc - t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { + t.Run(strconv.Itoa(i), func(t *testing.T) { api := &API{db: tc.db, gatherer: prometheus.DefaultGatherer} endpoint := tc.endpoint(api) req, err := http.NewRequest(tc.method, fmt.Sprintf("?%s", tc.values.Encode()), nil) diff --git a/web/federate_test.go b/web/federate_test.go index e5adb97eb5..056a95d67f 100644 --- a/web/federate_test.go +++ b/web/federate_test.go @@ -22,6 +22,7 @@ import ( "net/http" "net/http/httptest" "sort" + "strconv" "strings" "testing" "time" @@ -341,8 +342,8 @@ func TestFederationWithNativeHistograms(t *testing.T) { } app := db.Appender(context.Background()) for i := 0; i < 6; i++ { - l := labels.FromStrings("__name__", "test_metric", "foo", fmt.Sprintf("%d", i)) - expL := labels.FromStrings("__name__", "test_metric", "instance", "", "foo", fmt.Sprintf("%d", i)) + l := labels.FromStrings("__name__", "test_metric", "foo", strconv.Itoa(i)) + expL := labels.FromStrings("__name__", "test_metric", "instance", "", "foo", strconv.Itoa(i)) var err error switch i { case 0, 3: From 0f01d4b336cb4829475fbee6595041a25d85e594 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Wed, 15 May 2024 21:58:56 +0200 Subject: [PATCH 25/47] Fix flaky test Signed-off-by: Arve Knudsen --- promql/engine_test.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/promql/engine_test.go b/promql/engine_test.go index c47ceb2460..f431ab41e8 100644 --- a/promql/engine_test.go +++ b/promql/engine_test.go @@ -21,6 +21,7 @@ import ( "os" "sort" "strconv" + "sync" "testing" "time" @@ -94,9 +95,14 @@ func TestQueryConcurrency(t *testing.T) { return nil } + var wg sync.WaitGroup for i := 0; i < maxConcurrency; i++ { q := engine.NewTestQuery(f) - go q.Exec(ctx) + wg.Add(1) + go func() { + q.Exec(ctx) + wg.Done() + }() select { case <-processing: // Expected. @@ -106,7 +112,11 @@ func TestQueryConcurrency(t *testing.T) { } q := engine.NewTestQuery(f) - go q.Exec(ctx) + wg.Add(1) + go func() { + q.Exec(ctx) + wg.Done() + }() select { case <-processing: @@ -129,6 +139,8 @@ func TestQueryConcurrency(t *testing.T) { for i := 0; i < maxConcurrency; i++ { block <- struct{}{} } + + wg.Wait() } // contextDone returns an error if the context was canceled or timed out. From 5ca56eeb6bc7fb17c143a0a1ac075e7bf9e3414b Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Thu, 16 May 2024 11:51:46 +0200 Subject: [PATCH 26/47] tsdb/index: Refactor Reader tests (#14071) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tsdb/index: Refactor Reader tests Co-authored-by: Björn Rabenstein Signed-off-by: Arve Knudsen --------- Signed-off-by: Arve Knudsen Co-authored-by: Björn Rabenstein --- tsdb/index/index_test.go | 218 +++++++++++++-------------------------- 1 file changed, 69 insertions(+), 149 deletions(-) diff --git a/tsdb/index/index_test.go b/tsdb/index/index_test.go index 22133d0b70..5c6d64e076 100644 --- a/tsdb/index/index_test.go +++ b/tsdb/index/index_test.go @@ -20,7 +20,9 @@ import ( "hash/crc32" "os" "path/filepath" + "slices" "sort" + "strconv" "testing" "github.com/stretchr/testify/require" @@ -160,39 +162,14 @@ func TestIndexRW_Create_Open(t *testing.T) { } func TestIndexRW_Postings(t *testing.T) { - dir := t.TempDir() ctx := context.Background() - - fn := filepath.Join(dir, indexFilename) - - iw, err := NewWriter(context.Background(), fn) - require.NoError(t, err) - - series := []labels.Labels{ - labels.FromStrings("a", "1", "b", "1"), - labels.FromStrings("a", "1", "b", "2"), - labels.FromStrings("a", "1", "b", "3"), - labels.FromStrings("a", "1", "b", "4"), + var input indexWriterSeriesSlice + for i := 1; i < 5; i++ { + input = append(input, &indexWriterSeries{ + labels: labels.FromStrings("a", "1", "b", strconv.Itoa(i)), + }) } - - require.NoError(t, iw.AddSymbol("1")) - require.NoError(t, iw.AddSymbol("2")) - require.NoError(t, iw.AddSymbol("3")) - require.NoError(t, iw.AddSymbol("4")) - require.NoError(t, iw.AddSymbol("a")) - require.NoError(t, iw.AddSymbol("b")) - - // Postings lists are only written if a series with the respective - // reference was added before. - require.NoError(t, iw.AddSeries(1, series[0])) - require.NoError(t, iw.AddSeries(2, series[1])) - require.NoError(t, iw.AddSeries(3, series[2])) - require.NoError(t, iw.AddSeries(4, series[3])) - - require.NoError(t, iw.Close()) - - ir, err := NewFileReader(fn) - require.NoError(t, err) + ir, fn, _ := createFileReader(ctx, t, input) p, err := ir.Postings(ctx, "a", "1") require.NoError(t, err) @@ -205,7 +182,7 @@ func TestIndexRW_Postings(t *testing.T) { require.NoError(t, err) require.Empty(t, c) - testutil.RequireEqual(t, series[i], builder.Labels()) + testutil.RequireEqual(t, input[i].labels, builder.Labels()) } require.NoError(t, p.Err()) @@ -240,8 +217,6 @@ func TestIndexRW_Postings(t *testing.T) { "b": {"1", "2", "3", "4"}, }, labelIndices) - require.NoError(t, ir.Close()) - t.Run("ShardedPostings()", func(t *testing.T) { ir, err := NewFileReader(fn) require.NoError(t, err) @@ -296,42 +271,16 @@ func TestIndexRW_Postings(t *testing.T) { } func TestPostingsMany(t *testing.T) { - dir := t.TempDir() ctx := context.Background() - - fn := filepath.Join(dir, indexFilename) - - iw, err := NewWriter(context.Background(), fn) - require.NoError(t, err) - // Create a label in the index which has 999 values. - symbols := map[string]struct{}{} - series := []labels.Labels{} + var input indexWriterSeriesSlice for i := 1; i < 1000; i++ { v := fmt.Sprintf("%03d", i) - series = append(series, labels.FromStrings("i", v, "foo", "bar")) - symbols[v] = struct{}{} + input = append(input, &indexWriterSeries{ + labels: labels.FromStrings("i", v, "foo", "bar"), + }) } - symbols["i"] = struct{}{} - symbols["foo"] = struct{}{} - symbols["bar"] = struct{}{} - syms := []string{} - for s := range symbols { - syms = append(syms, s) - } - sort.Strings(syms) - for _, s := range syms { - require.NoError(t, iw.AddSymbol(s)) - } - - for i, s := range series { - require.NoError(t, iw.AddSeries(storage.SeriesRef(i), s)) - } - require.NoError(t, iw.Close()) - - ir, err := NewFileReader(fn) - require.NoError(t, err) - defer func() { require.NoError(t, ir.Close()) }() + ir, _, symbols := createFileReader(ctx, t, input) cases := []struct { in []string @@ -387,25 +336,13 @@ func TestPostingsMany(t *testing.T) { } func TestPersistence_index_e2e(t *testing.T) { - dir := t.TempDir() ctx := context.Background() - lbls, err := labels.ReadLabels(filepath.Join("..", "testdata", "20kseries.json"), 20000) require.NoError(t, err) - // Sort labels as the index writer expects series in sorted order. sort.Sort(labels.Slice(lbls)) - symbols := map[string]struct{}{} - for _, lset := range lbls { - lset.Range(func(l labels.Label) { - symbols[l.Name] = struct{}{} - symbols[l.Value] = struct{}{} - }) - } - var input indexWriterSeriesSlice - ref := uint64(0) // Generate ChunkMetas for every label set. for i, lset := range lbls { @@ -426,17 +363,7 @@ func TestPersistence_index_e2e(t *testing.T) { }) } - iw, err := NewWriter(context.Background(), filepath.Join(dir, indexFilename)) - require.NoError(t, err) - - syms := []string{} - for s := range symbols { - syms = append(syms, s) - } - sort.Strings(syms) - for _, s := range syms { - require.NoError(t, iw.AddSymbol(s)) - } + ir, _, _ := createFileReader(ctx, t, input) // Population procedure as done by compaction. var ( @@ -447,8 +374,6 @@ func TestPersistence_index_e2e(t *testing.T) { mi := newMockIndex() for i, s := range input { - err = iw.AddSeries(storage.SeriesRef(i), s.labels, s.chunks...) - require.NoError(t, err) require.NoError(t, mi.AddSeries(storage.SeriesRef(i), s.labels, s.chunks...)) s.labels.Range(func(l labels.Label) { @@ -462,12 +387,6 @@ func TestPersistence_index_e2e(t *testing.T) { postings.Add(storage.SeriesRef(i), s.labels) } - err = iw.Close() - require.NoError(t, err) - - ir, err := NewFileReader(filepath.Join(dir, indexFilename)) - require.NoError(t, err) - for p := range mi.postings { gotp, err := ir.Postings(ctx, p.Name, p.Value) require.NoError(t, err) @@ -523,8 +442,6 @@ func TestPersistence_index_e2e(t *testing.T) { } sort.Strings(expSymbols) require.Equal(t, expSymbols, gotSymbols) - - require.NoError(t, ir.Close()) } func TestWriter_ShouldReturnErrorOnSeriesWithDuplicatedLabelNames(t *testing.T) { @@ -624,39 +541,14 @@ func BenchmarkReader_ShardedPostings(b *testing.B) { numShards = 16 ) - dir, err := os.MkdirTemp("", "benchmark_reader_sharded_postings") - require.NoError(b, err) - defer func() { - require.NoError(b, os.RemoveAll(dir)) - }() - ctx := context.Background() - - // Generate an index. - fn := filepath.Join(dir, indexFilename) - - iw, err := NewWriter(ctx, fn) - require.NoError(b, err) - + var input indexWriterSeriesSlice for i := 1; i <= numSeries; i++ { - require.NoError(b, iw.AddSymbol(fmt.Sprintf("%10d", i))) + input = append(input, &indexWriterSeries{ + labels: labels.FromStrings("const", fmt.Sprintf("%10d", 1), "unique", fmt.Sprintf("%10d", i)), + }) } - require.NoError(b, iw.AddSymbol("const")) - require.NoError(b, iw.AddSymbol("unique")) - - for i := 1; i <= numSeries; i++ { - require.NoError(b, iw.AddSeries(storage.SeriesRef(i), - labels.FromStrings("const", fmt.Sprintf("%10d", 1), "unique", fmt.Sprintf("%10d", i)))) - } - - require.NoError(b, iw.Close()) - - b.ResetTimer() - - // Create a reader to read back all postings from the index. - ir, err := NewFileReader(fn) - require.NoError(b, err) - + ir, _, _ := createFileReader(ctx, b, input) b.ResetTimer() for n := 0; n < b.N; n++ { @@ -721,28 +613,17 @@ func TestChunksTimeOrdering(t *testing.T) { } func TestReader_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { - dir := t.TempDir() - - idx, err := NewWriter(context.Background(), filepath.Join(dir, "index")) - require.NoError(t, err) - - seriesCount := 1000 - for i := 1; i <= seriesCount; i++ { - require.NoError(t, idx.AddSymbol(fmt.Sprintf("%4d", i))) + const seriesCount = 1000 + var input indexWriterSeriesSlice + for i := 1; i < seriesCount; i++ { + input = append(input, &indexWriterSeries{ + labels: labels.FromStrings("__name__", fmt.Sprintf("%4d", i)), + chunks: []chunks.Meta{ + {Ref: 1, MinTime: 0, MaxTime: 10}, + }, + }) } - require.NoError(t, idx.AddSymbol("__name__")) - - for i := 1; i <= seriesCount; i++ { - require.NoError(t, idx.AddSeries(storage.SeriesRef(i), labels.FromStrings("__name__", fmt.Sprintf("%4d", i)), - chunks.Meta{Ref: 1, MinTime: 0, MaxTime: 10}, - )) - } - - require.NoError(t, idx.Close()) - - ir, err := NewFileReader(filepath.Join(dir, "index")) - require.NoError(t, err) - defer ir.Close() + ir, _, _ := createFileReader(context.Background(), t, input) failAfter := uint64(seriesCount / 2) // Fail after processing half of the series. ctx := &testutil.MockContextErrAfter{FailAfter: failAfter} @@ -752,3 +633,42 @@ func TestReader_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { require.Error(t, p.Err()) require.Equal(t, failAfter, ctx.Count()) } + +// createFileReader creates a temporary index file. It writes the provided input to this file. +// It returns a Reader for this file, the file's name, and the symbol map. +func createFileReader(ctx context.Context, tb testing.TB, input indexWriterSeriesSlice) (*Reader, string, map[string]struct{}) { + tb.Helper() + + fn := filepath.Join(tb.TempDir(), indexFilename) + + iw, err := NewWriter(ctx, fn) + require.NoError(tb, err) + + symbols := map[string]struct{}{} + for _, s := range input { + s.labels.Range(func(l labels.Label) { + symbols[l.Name] = struct{}{} + symbols[l.Value] = struct{}{} + }) + } + + syms := []string{} + for s := range symbols { + syms = append(syms, s) + } + slices.Sort(syms) + for _, s := range syms { + require.NoError(tb, iw.AddSymbol(s)) + } + for i, s := range input { + require.NoError(tb, iw.AddSeries(storage.SeriesRef(i), s.labels, s.chunks...)) + } + require.NoError(tb, iw.Close()) + + ir, err := NewFileReader(fn) + require.NoError(tb, err) + tb.Cleanup(func() { + require.NoError(tb, ir.Close()) + }) + return ir, fn, symbols +} From 0fea1065feb7b6e93cf529a7ff33a40f4e8a10cd Mon Sep 17 00:00:00 2001 From: kushagra Shukla Date: Sat, 18 May 2024 07:26:59 -0400 Subject: [PATCH 27/47] added line When set, query.max-concurrency may need to be adjusted accordingly. Signed-off-by: kushagra Shukla Signed-off-by: kushagra Shukla --- cmd/prometheus/main.go | 2 +- docs/command-line/prometheus.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 8218ffb18d..e250a95c82 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -418,7 +418,7 @@ func main() { serverOnlyFlag(a, "rules.alert.resend-delay", "Minimum amount of time to wait before resending an alert to Alertmanager."). Default("1m").SetValue(&cfg.resendDelay) - serverOnlyFlag(a, "rules.max-concurrent-evals", "Global concurrency limit for independent rules that can run concurrently."). + serverOnlyFlag(a, "rules.max-concurrent-evals", "Global concurrency limit for independent rules that can run concurrently. When set, \"query.max-concurrency\" may need to be adjusted accordingly."). Default("4").Int64Var(&cfg.maxConcurrentEvals) a.Flag("scrape.adjust-timestamps", "Adjust scrape timestamps by up to `scrape.timestamp-tolerance` to align them to the intended schedule. See https://github.com/prometheus/prometheus/issues/7846 for more context. Experimental. This flag will be removed in a future release."). diff --git a/docs/command-line/prometheus.md b/docs/command-line/prometheus.md index 93eaf251d0..aa9bf3bfb0 100644 --- a/docs/command-line/prometheus.md +++ b/docs/command-line/prometheus.md @@ -48,7 +48,7 @@ The Prometheus monitoring server | --rules.alert.for-outage-tolerance | Max time to tolerate prometheus outage for restoring "for" state of alert. Use with server mode only. | `1h` | | --rules.alert.for-grace-period | Minimum duration between alert and restored "for" state. This is maintained only for alerts with configured "for" time greater than grace period. Use with server mode only. | `10m` | | --rules.alert.resend-delay | Minimum amount of time to wait before resending an alert to Alertmanager. Use with server mode only. | `1m` | -| --rules.max-concurrent-evals | Global concurrency limit for independent rules that can run concurrently. Use with server mode only. | `4` | +| --rules.max-concurrent-evals | Global concurrency limit for independent rules that can run concurrently. When set, "query.max-concurrency" may need to be adjusted accordingly. Use with server mode only. | `4` | | --alertmanager.notification-queue-capacity | The capacity of the queue for pending Alertmanager notifications. Use with server mode only. | `10000` | | --query.lookback-delta | The maximum lookback duration for retrieving metrics during expression evaluations and federation. Use with server mode only. | `5m` | | --query.timeout | Maximum time a query may take before being aborted. Use with server mode only. | `2m` | From 52f68a96a45b1c60eda9986656f332472babd047 Mon Sep 17 00:00:00 2001 From: George Krajcsovits Date: Sat, 18 May 2024 16:12:33 +0200 Subject: [PATCH 28/47] web/api: export defaultStatsRenderer (#14121) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit defaultStatsRenderer->DefaultStatsRenderer Add short docstrings. I'd like to use the stats renderer to peek at the statistics in https://github.com/grafana/mimir/pull/7966 However I cannot call the original function without this export afterwards. Signed-off-by: György Krajcsovits --- web/api/v1/api.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/web/api/v1/api.go b/web/api/v1/api.go index dc22365073..7bbf38a697 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -116,9 +116,11 @@ type RulesRetriever interface { AlertingRules() []*rules.AlertingRule } +// StatsRenderer converts engine statistics into a format suitable for the API. type StatsRenderer func(context.Context, *stats.Statistics, string) stats.QueryStats -func defaultStatsRenderer(_ context.Context, s *stats.Statistics, param string) stats.QueryStats { +// DefaultStatsRenderer is the default stats renderer for the API. +func DefaultStatsRenderer(_ context.Context, s *stats.Statistics, param string) stats.QueryStats { if param != "" { return stats.NewQueryStats(s) } @@ -272,7 +274,7 @@ func NewAPI( buildInfo: buildInfo, gatherer: gatherer, isAgent: isAgent, - statsRenderer: defaultStatsRenderer, + statsRenderer: DefaultStatsRenderer, remoteReadHandler: remote.NewReadHandler(logger, registerer, q, configFunc, remoteReadSampleLimit, remoteReadConcurrencyLimit, remoteReadMaxBytesInFrame), } @@ -461,7 +463,7 @@ func (api *API) query(r *http.Request) (result apiFuncResult) { // Optional stats field in response if parameter "stats" is not empty. sr := api.statsRenderer if sr == nil { - sr = defaultStatsRenderer + sr = DefaultStatsRenderer } qs := sr(ctx, qry.Stats(), r.FormValue("stats")) @@ -563,7 +565,7 @@ func (api *API) queryRange(r *http.Request) (result apiFuncResult) { // Optional stats field in response if parameter "stats" is not empty. sr := api.statsRenderer if sr == nil { - sr = defaultStatsRenderer + sr = DefaultStatsRenderer } qs := sr(ctx, qry.Stats(), r.FormValue("stats")) From fe9cb5a803f69b1bad21b374e6fbd9084d6481d2 Mon Sep 17 00:00:00 2001 From: Oleg Zaytsev Date: Tue, 21 May 2024 11:30:43 +0200 Subject: [PATCH 29/47] Check context every 128 labels instead of 100 (#14118) Follow up on https://github.com/prometheus/prometheus/pull/14096 As promised, I bring a benchmark, which shows a very small improvement if context is checked every 128 iterations of label instead of every 100. It's much easier for a computer to check modulo 128 than modulo 100. This is a very small 0-2% improvement but I'd say this is one of the hottest paths of the app so this is still relevant. Signed-off-by: Oleg Zaytsev --- tsdb/index/index.go | 2 +- tsdb/index/postings_test.go | 54 +++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/tsdb/index/index.go b/tsdb/index/index.go index 4ded4cbe20..480e6a8fc7 100644 --- a/tsdb/index/index.go +++ b/tsdb/index/index.go @@ -53,7 +53,7 @@ const ( seriesByteAlign = 16 // checkContextEveryNIterations is used in some tight loops to check if the context is done. - checkContextEveryNIterations = 100 + checkContextEveryNIterations = 128 ) type indexWriterSeries struct { diff --git a/tsdb/index/postings_test.go b/tsdb/index/postings_test.go index 7fa0a892b9..2cbc14ac64 100644 --- a/tsdb/index/postings_test.go +++ b/tsdb/index/postings_test.go @@ -22,8 +22,10 @@ import ( "math/rand" "sort" "strconv" + "strings" "testing" + "github.com/grafana/regexp" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/labels" @@ -1284,6 +1286,58 @@ func BenchmarkListPostings(b *testing.B) { } } +func slowRegexpString() string { + nums := map[int]struct{}{} + for i := 10_000; i < 20_000; i++ { + if i%3 == 0 { + nums[i] = struct{}{} + } + } + + var sb strings.Builder + sb.WriteString(".*(9999") + for i := range nums { + sb.WriteString("|") + sb.WriteString(strconv.Itoa(i)) + } + sb.WriteString(").*") + return sb.String() +} + +func BenchmarkMemPostings_PostingsForLabelMatching(b *testing.B) { + fast := regexp.MustCompile("^(100|200)$") + slowRegexp := "^" + slowRegexpString() + "$" + b.Logf("Slow regexp length = %d", len(slowRegexp)) + slow := regexp.MustCompile(slowRegexp) + + for _, labelValueCount := range []int{1_000, 10_000, 100_000} { + b.Run(fmt.Sprintf("labels=%d", labelValueCount), func(b *testing.B) { + mp := NewMemPostings() + for i := 0; i < labelValueCount; i++ { + mp.Add(storage.SeriesRef(i), labels.FromStrings("label", strconv.Itoa(i))) + } + + fp, err := ExpandPostings(mp.PostingsForLabelMatching(context.Background(), "label", fast.MatchString)) + require.NoError(b, err) + b.Logf("Fast matcher matches %d series", len(fp)) + b.Run("matcher=fast", func(b *testing.B) { + for i := 0; i < b.N; i++ { + mp.PostingsForLabelMatching(context.Background(), "label", fast.MatchString).Next() + } + }) + + sp, err := ExpandPostings(mp.PostingsForLabelMatching(context.Background(), "label", slow.MatchString)) + require.NoError(b, err) + b.Logf("Slow matcher matches %d series", len(sp)) + b.Run("matcher=slow", func(b *testing.B) { + for i := 0; i < b.N; i++ { + mp.PostingsForLabelMatching(context.Background(), "label", slow.MatchString).Next() + } + }) + }) + } +} + func TestMemPostings_PostingsForLabelMatchingHonorsContextCancel(t *testing.T) { memP := NewMemPostings() seriesCount := 10 * checkContextEveryNIterations From 3127a4029ea5e860c5b93441ecf5122f6cf82df0 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Tue, 21 May 2024 14:44:55 +0200 Subject: [PATCH 30/47] doc: Clarify the limits of dumping/backfilling via OpenMetrics This is about native histograms (not yet supported) and staleness markers (for which OpenMetrics support isn't even planned). Signed-off-by: beorn7 --- cmd/promtool/main.go | 2 +- docs/command-line/promtool.md | 2 +- docs/storage.md | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/cmd/promtool/main.go b/cmd/promtool/main.go index 40d933376c..e1d275e97e 100644 --- a/cmd/promtool/main.go +++ b/cmd/promtool/main.go @@ -240,7 +240,7 @@ func main() { dumpMaxTime := tsdbDumpCmd.Flag("max-time", "Maximum timestamp to dump.").Default(strconv.FormatInt(math.MaxInt64, 10)).Int64() dumpMatch := tsdbDumpCmd.Flag("match", "Series selector. Can be specified multiple times.").Default("{__name__=~'(?s:.*)'}").Strings() - tsdbDumpOpenMetricsCmd := tsdbCmd.Command("dump-openmetrics", "[Experimental] Dump samples from a TSDB into OpenMetrics format. Native histograms are not dumped.") + tsdbDumpOpenMetricsCmd := tsdbCmd.Command("dump-openmetrics", "[Experimental] Dump samples from a TSDB into OpenMetrics text format, excluding native histograms and staleness markers, which are not representable in OpenMetrics.") dumpOpenMetricsPath := tsdbDumpOpenMetricsCmd.Arg("db path", "Database path (default is "+defaultDBPath+").").Default(defaultDBPath).String() dumpOpenMetricsSandboxDirRoot := tsdbDumpOpenMetricsCmd.Flag("sandbox-dir-root", "Root directory where a sandbox directory would be created in case WAL replay generates chunks. The sandbox directory is cleaned up at the end.").Default(defaultDBPath).String() dumpOpenMetricsMinTime := tsdbDumpOpenMetricsCmd.Flag("min-time", "Minimum timestamp to dump.").Default(strconv.FormatInt(math.MinInt64, 10)).Int64() diff --git a/docs/command-line/promtool.md b/docs/command-line/promtool.md index 9ed51fb7c9..443cd3f0cb 100644 --- a/docs/command-line/promtool.md +++ b/docs/command-line/promtool.md @@ -585,7 +585,7 @@ Dump samples from a TSDB. ##### `promtool tsdb dump-openmetrics` -[Experimental] Dump samples from a TSDB into OpenMetrics format. Native histograms are not dumped. +[Experimental] Dump samples from a TSDB into OpenMetrics text format, excluding native histograms and staleness markers, which are not representable in OpenMetrics. diff --git a/docs/storage.md b/docs/storage.md index 46bb7210e0..b66f2062af 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -197,6 +197,9 @@ or time-series database to Prometheus. To do so, the user must first convert the source data into [OpenMetrics](https://openmetrics.io/) format, which is the input format for the backfilling as described below. +Note that native histograms and staleness markers are not supported by this +procedure, as they cannot be represented in the OpenMetrics format. + ### Usage Backfilling can be used via the Promtool command line. Promtool will write the blocks From fabcd7e7c6db57ebd5456d7f5239f22cde37ba32 Mon Sep 17 00:00:00 2001 From: Ayoub Mrini Date: Tue, 21 May 2024 19:07:29 +0200 Subject: [PATCH 31/47] fix(api): Send warnings only if the limit is really exceeded (#14116) for the the series, label names and label values APIs Add warnings count check to TestEndpoints The limit param was added in https://github.com/prometheus/prometheus/pull/13396 Signed-off-by: machine424 --- web/api/v1/api.go | 7 ++++--- web/api/v1/api_test.go | 40 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/web/api/v1/api.go b/web/api/v1/api.go index 7bbf38a697..f0884926e1 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -704,7 +704,7 @@ func (api *API) labelNames(r *http.Request) apiFuncResult { names = []string{} } - if len(names) >= limit { + if len(names) > limit { names = names[:limit] warnings = warnings.Add(errors.New("results truncated due to limit")) } @@ -793,7 +793,7 @@ func (api *API) labelValues(r *http.Request) (result apiFuncResult) { slices.Sort(vals) - if len(vals) >= limit { + if len(vals) > limit { vals = vals[:limit] warnings = warnings.Add(errors.New("results truncated due to limit")) } @@ -889,7 +889,8 @@ func (api *API) series(r *http.Request) (result apiFuncResult) { } metrics = append(metrics, set.At().Labels()) - if len(metrics) >= limit { + if len(metrics) > limit { + metrics = metrics[:limit] warnings.Add(errors.New("results truncated due to limit")) return apiFuncResult{metrics, nil, warnings, closer} } diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index 7d55dd11a0..74cd2239d5 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -1060,6 +1060,7 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E responseLen int // If nonzero, check only the length; `response` is ignored. responseMetadataTotal int responseAsJSON string + warningsCount int errType errorType sorter func(interface{}) metadata []targetMetadata @@ -1417,7 +1418,17 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E "match[]": []string{"test_metric1"}, "limit": []string{"1"}, }, - responseLen: 1, // API does not specify which particular value will come back. + responseLen: 1, // API does not specify which particular value will come back. + warningsCount: 1, + }, + { + endpoint: api.series, + query: url.Values{ + "match[]": []string{"test_metric1"}, + "limit": []string{"2"}, + }, + responseLen: 2, // API does not specify which particular value will come back. + warningsCount: 0, // No warnings if limit isn't exceeded. }, // Missing match[] query params in series requests. { @@ -2700,7 +2711,19 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E query: url.Values{ "limit": []string{"2"}, }, - responseLen: 2, // API does not specify which particular values will come back. + responseLen: 2, // API does not specify which particular values will come back. + warningsCount: 1, + }, + { + endpoint: api.labelValues, + params: map[string]string{ + "name": "__name__", + }, + query: url.Values{ + "limit": []string{"4"}, + }, + responseLen: 4, // API does not specify which particular values will come back. + warningsCount: 0, // No warnings if limit isn't exceeded. }, // Label names. { @@ -2847,7 +2870,16 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E query: url.Values{ "limit": []string{"2"}, }, - responseLen: 2, // API does not specify which particular values will come back. + responseLen: 2, // API does not specify which particular values will come back. + warningsCount: 1, + }, + { + endpoint: api.labelNames, + query: url.Values{ + "limit": []string{"3"}, + }, + responseLen: 3, // API does not specify which particular values will come back. + warningsCount: 0, // No warnings if limit isn't exceeded. }, }...) } @@ -2924,6 +2956,8 @@ func testEndpoints(t *testing.T, api *API, tr *testTargetRetriever, es storage.E require.NoError(t, err) require.JSONEq(t, test.responseAsJSON, string(s)) } + + require.Len(t, res.warnings, test.warningsCount) }) } }) From 2d2b440304d0ec599bf19b102724aab67650b91e Mon Sep 17 00:00:00 2001 From: Jayapriya Pai Date: Tue, 21 May 2024 22:38:35 +0530 Subject: [PATCH 32/47] fix: correct the typo in azuread sdk auth (#14106) Signed-off-by: Jayapriya Pai --- storage/remote/azuread/azuread.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/remote/azuread/azuread.go b/storage/remote/azuread/azuread.go index e2058fb54d..58520c6a5d 100644 --- a/storage/remote/azuread/azuread.go +++ b/storage/remote/azuread/azuread.go @@ -75,7 +75,7 @@ type AzureADConfig struct { //nolint:revive // exported. // OAuth is the oauth config that is being used to authenticate. OAuth *OAuthConfig `yaml:"oauth,omitempty"` - // OAuth is the oauth config that is being used to authenticate. + // SDK is the SDK config that is being used to authenticate. SDK *SDKConfig `yaml:"sdk,omitempty"` // Cloud is the Azure cloud in which the service is running. Example: AzurePublic/AzureGovernment/AzureChina. From 5be753f17731b1d37646737d5e0c8fe73d2b67d2 Mon Sep 17 00:00:00 2001 From: Mohamed Awnallah Date: Fri, 24 May 2024 09:15:00 +0300 Subject: [PATCH 33/47] .gitpod.Dockerfile: Auto-fetch Go and goyacc vers In this commit we auto-fetch Go version from go.mod and goyacc version from Makefile in the Prometheus repo. Signed-off-by: Mohamed Awnallah --- .gitpod.Dockerfile | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/.gitpod.Dockerfile b/.gitpod.Dockerfile index d645db5de1..2370ec5f5c 100644 --- a/.gitpod.Dockerfile +++ b/.gitpod.Dockerfile @@ -1,15 +1,33 @@ FROM gitpod/workspace-full +# Set Node.js version as an environment variable. ENV CUSTOM_NODE_VERSION=16 -ENV CUSTOM_GO_VERSION=1.19 -ENV GOPATH=$HOME/go-packages -ENV GOROOT=$HOME/go -ENV PATH=$GOROOT/bin:$GOPATH/bin:$PATH +# Install and use the specified Node.js version via nvm. RUN bash -c ". .nvm/nvm.sh && nvm install ${CUSTOM_NODE_VERSION} && nvm use ${CUSTOM_NODE_VERSION} && nvm alias default ${CUSTOM_NODE_VERSION}" +# Ensure nvm uses the default Node.js version in all new shells. RUN echo "nvm use default &>/dev/null" >> ~/.bashrc.d/51-nvm-fix -RUN curl -fsSL https://dl.google.com/go/go${GO_VERSION}.linux-amd64.tar.gz | tar xzs \ - && printf '%s\n' 'export GOPATH=/workspace/go' \ - 'export PATH=$GOPATH/bin:$PATH' > $HOME/.bashrc.d/300-go +# Remove any existing Go installation in $HOME path. +RUN rm -rf $HOME/go $HOME/go-packages + +# Export go environment variables. +RUN echo "export GOPATH=/workspace/go" >> ~/.bashrc.d/300-go && \ + echo "export GOBIN=\$GOPATH/bin" >> ~/.bashrc.d/300-go && \ + echo "export GOROOT=${HOME}/go" >> ~/.bashrc.d/300-go && \ + echo "export PATH=\$GOROOT/bin:\$GOBIN:\$PATH" >> ~/.bashrc + +# Reload the environment variables to ensure go environment variables are +# available in subsequent commands. +RUN bash -c "source ~/.bashrc && source ~/.bashrc.d/300-go" + +# Fetch the Go version dynamically from the Prometheus go.mod file and Install Go in $HOME path. +RUN export CUSTOM_GO_VERSION=$(curl -sSL "https://raw.githubusercontent.com/prometheus/prometheus/main/go.mod" | awk '/^go/{print $2".0"}') && \ + curl -fsSL "https://dl.google.com/go/go${CUSTOM_GO_VERSION}.linux-amd64.tar.gz" | \ + tar -xz -C $HOME + +# Fetch the goyacc parser version dynamically from the Prometheus Makefile +# and install it globally in $GOBIN path. +RUN GOYACC_VERSION=$(curl -fsSL "https://raw.githubusercontent.com/prometheus/prometheus/main/Makefile" | awk -F'=' '/GOYACC_VERSION \?=/{gsub(/ /, "", $2); print $2}') && \ + go install "golang.org/x/tools/cmd/goyacc@${GOYACC_VERSION}" From 8894d65cd6135635c4ac9cf100464e6aacd29593 Mon Sep 17 00:00:00 2001 From: Alan Protasio Date: Fri, 24 May 2024 19:43:21 -0700 Subject: [PATCH 34/47] Fix head stats and hooks when replaying a corrupted snapshot (#14079) * Fixing head stats and hooks when replaying a corrupted snapshot Signed-off-by: alanprot * Fixing create/removed series metrics Signed-off-by: alanprot * Refactoring to have common code between gc and flush method Signed-off-by: alanprot * Update tsdb/head.go Co-authored-by: Ayoub Mrini Signed-off-by: Alan Protasio * refactor Signed-off-by: alanprot * Update tsdb/head_test.go Co-authored-by: Ganesh Vernekar Signed-off-by: Alan Protasio * Update tsdb/head_test.go Co-authored-by: Ganesh Vernekar Signed-off-by: Alan Protasio --------- Signed-off-by: alanprot Signed-off-by: Alan Protasio Co-authored-by: Ayoub Mrini Co-authored-by: Ganesh Vernekar --- tsdb/head.go | 74 +++++++++++++++++++++++++++++------------------ tsdb/head_test.go | 48 ++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 28 deletions(-) diff --git a/tsdb/head.go b/tsdb/head.go index 8b3d9787ca..d5f7144fdb 100644 --- a/tsdb/head.go +++ b/tsdb/head.go @@ -310,12 +310,22 @@ func (h *Head) resetInMemoryState() error { return err } + if h.series != nil { + // reset the existing series to make sure we call the appropriated hooks + // and increment the series removed metrics + fs := h.series.iterForDeletion(func(_ int, _ uint64, s *memSeries, flushedForCallback map[chunks.HeadSeriesRef]labels.Labels) { + // All series should be flushed + flushedForCallback[s.ref] = s.lset + }) + h.metrics.seriesRemoved.Add(float64(fs)) + } + + h.series = newStripeSeries(h.opts.StripeSize, h.opts.SeriesCallback) h.iso = newIsolation(h.opts.IsolationDisabled) h.oooIso = newOOOIsolation() - + h.numSeries.Store(0) h.exemplarMetrics = em h.exemplars = es - h.series = newStripeSeries(h.opts.StripeSize, h.opts.SeriesCallback) h.postings = index.NewUnorderedMemPostings() h.tombstones = tombstones.NewMemTombstones() h.deleted = map[chunks.HeadSeriesRef]int{} @@ -1861,11 +1871,10 @@ func newStripeSeries(stripeSize int, seriesCallback SeriesLifecycleCallback) *st // minMmapFile is the min mmap file number seen in the series (in-order and out-of-order) after gc'ing the series. func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) (_ map[storage.SeriesRef]struct{}, _ int, _, _ int64, minMmapFile int) { var ( - deleted = map[storage.SeriesRef]struct{}{} - rmChunks = 0 - actualMint int64 = math.MaxInt64 - minOOOTime int64 = math.MaxInt64 - deletedFromPrevStripe = 0 + deleted = map[storage.SeriesRef]struct{}{} + rmChunks = 0 + actualMint int64 = math.MaxInt64 + minOOOTime int64 = math.MaxInt64 ) minMmapFile = math.MaxInt32 @@ -1923,27 +1932,7 @@ func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) ( deletedForCallback[series.ref] = series.lset } - // Run through all series shard by shard, checking which should be deleted. - for i := 0; i < s.size; i++ { - deletedForCallback := make(map[chunks.HeadSeriesRef]labels.Labels, deletedFromPrevStripe) - s.locks[i].Lock() - - // Delete conflicts first so seriesHashmap.del doesn't move them to the `unique` field, - // after deleting `unique`. - for hash, all := range s.hashes[i].conflicts { - for _, series := range all { - check(i, hash, series, deletedForCallback) - } - } - for hash, series := range s.hashes[i].unique { - check(i, hash, series, deletedForCallback) - } - - s.locks[i].Unlock() - - s.seriesLifecycleCallback.PostDeletion(deletedForCallback) - deletedFromPrevStripe = len(deletedForCallback) - } + s.iterForDeletion(check) if actualMint == math.MaxInt64 { actualMint = mint @@ -1952,6 +1941,35 @@ func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) ( return deleted, rmChunks, actualMint, minOOOTime, minMmapFile } +// The iterForDeletion function iterates through all series, invoking the checkDeletedFunc for each. +// The checkDeletedFunc takes a map as input and should add to it all series that were deleted and should be included +// when invoking the PostDeletion hook. +func (s *stripeSeries) iterForDeletion(checkDeletedFunc func(int, uint64, *memSeries, map[chunks.HeadSeriesRef]labels.Labels)) int { + seriesSetFromPrevStripe := 0 + totalDeletedSeries := 0 + // Run through all series shard by shard + for i := 0; i < s.size; i++ { + seriesSet := make(map[chunks.HeadSeriesRef]labels.Labels, seriesSetFromPrevStripe) + s.locks[i].Lock() + // Iterate conflicts first so f doesn't move them to the `unique` field, + // after deleting `unique`. + for hash, all := range s.hashes[i].conflicts { + for _, series := range all { + checkDeletedFunc(i, hash, series, seriesSet) + } + } + + for hash, series := range s.hashes[i].unique { + checkDeletedFunc(i, hash, series, seriesSet) + } + s.locks[i].Unlock() + s.seriesLifecycleCallback.PostDeletion(seriesSet) + totalDeletedSeries += len(seriesSet) + seriesSetFromPrevStripe = len(seriesSet) + } + return totalDeletedSeries +} + func (s *stripeSeries) getByID(id chunks.HeadSeriesRef) *memSeries { i := uint64(id) & uint64(s.size-1) diff --git a/tsdb/head_test.go b/tsdb/head_test.go index 804886ad7b..6b4ec4ca41 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -4007,6 +4007,9 @@ func TestSnapshotError(t *testing.T) { require.NoError(t, err) f, err := os.OpenFile(path.Join(snapDir, files[0].Name()), os.O_RDWR, 0) require.NoError(t, err) + // Create snapshot backup to be restored on future test cases. + snapshotBackup, err := io.ReadAll(f) + require.NoError(t, err) _, err = f.WriteAt([]byte{0b11111111}, 18) require.NoError(t, err) require.NoError(t, f.Close()) @@ -4021,10 +4024,44 @@ func TestSnapshotError(t *testing.T) { // There should be no series in the memory after snapshot error since WAL was removed. require.Equal(t, 1.0, prom_testutil.ToFloat64(head.metrics.snapshotReplayErrorTotal)) + require.Equal(t, uint64(0), head.NumSeries()) require.Nil(t, head.series.getByHash(lbls.Hash(), lbls)) tm, err = head.tombstones.Get(1) require.NoError(t, err) require.Empty(t, tm) + require.NoError(t, head.Close()) + + // Test corruption in the middle of the snapshot. + f, err = os.OpenFile(path.Join(snapDir, files[0].Name()), os.O_RDWR, 0) + require.NoError(t, err) + _, err = f.WriteAt(snapshotBackup, 0) + require.NoError(t, err) + _, err = f.WriteAt([]byte{0b11111111}, 300) + require.NoError(t, err) + require.NoError(t, f.Close()) + + c := &countSeriesLifecycleCallback{} + opts := head.opts + opts.SeriesCallback = c + + w, err = wlog.NewSize(nil, nil, head.wal.Dir(), 32768, wlog.CompressionNone) + require.NoError(t, err) + head, err = NewHead(prometheus.NewRegistry(), nil, w, nil, head.opts, nil) + require.NoError(t, err) + require.NoError(t, head.Init(math.MinInt64)) + + // There should be no series in the memory after snapshot error since WAL was removed. + require.Equal(t, 1.0, prom_testutil.ToFloat64(head.metrics.snapshotReplayErrorTotal)) + require.Nil(t, head.series.getByHash(lbls.Hash(), lbls)) + require.Equal(t, uint64(0), head.NumSeries()) + + // Since the snapshot could replay certain series, we continue invoking the create hooks. + // In such instances, we need to ensure that we also trigger the delete hooks when resetting the memory. + require.Equal(t, int64(2), c.created.Load()) + require.Equal(t, int64(2), c.deleted.Load()) + + require.Equal(t, 2.0, prom_testutil.ToFloat64(head.metrics.seriesRemoved)) + require.Equal(t, 2.0, prom_testutil.ToFloat64(head.metrics.seriesCreated)) } func TestHistogramMetrics(t *testing.T) { @@ -5829,3 +5866,14 @@ func TestHeadCompactableDoesNotCompactEmptyHead(t *testing.T) { require.False(t, head.compactable()) } + +type countSeriesLifecycleCallback struct { + created atomic.Int64 + deleted atomic.Int64 +} + +func (c *countSeriesLifecycleCallback) PreCreation(labels.Labels) error { return nil } +func (c *countSeriesLifecycleCallback) PostCreation(labels.Labels) { c.created.Inc() } +func (c *countSeriesLifecycleCallback) PostDeletion(s map[chunks.HeadSeriesRef]labels.Labels) { + c.deleted.Add(int64(len(s))) +} From f3b8750339d65fc25714c8f92f8afacefb6a727d Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Mon, 27 May 2024 17:14:17 +0200 Subject: [PATCH 35/47] Join errors Signed-off-by: Arve Knudsen --- promql/query_logger.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/promql/query_logger.go b/promql/query_logger.go index 76528f9584..7e06ebb97f 100644 --- a/promql/query_logger.go +++ b/promql/query_logger.go @@ -16,6 +16,7 @@ package promql import ( "context" "encoding/json" + "errors" "fmt" "io" "os" @@ -93,14 +94,14 @@ type mmapedFile struct { func (f *mmapedFile) Close() error { err := f.m.Unmap() - if fErr := f.f.Close(); fErr != nil && err == nil { - return fmt.Errorf("close mmapedFile.f: %w", fErr) + if err != nil { + err = fmt.Errorf("mmapedFile: unmapping: %w", err) + } + if fErr := f.f.Close(); fErr != nil { + return errors.Join(fmt.Errorf("close mmapedFile.f: %w", fErr), err) } - if err != nil { - return fmt.Errorf("mmapedFile: unmapping: %w", err) - } - return nil + return err } func getMMapedFile(filename string, filesize int, logger log.Logger) ([]byte, io.Closer, error) { From e6f1f7e32db72df1cf42bedc4df087d99e1f39d3 Mon Sep 17 00:00:00 2001 From: Simon Pasquier Date: Mon, 27 May 2024 18:25:02 +0200 Subject: [PATCH 36/47] docs/configuration: clarify OpenStack metadata labels (#14149) On several occasions, users assumed that the `__meta_openstack_tag_` labels were about tags [1] instead of metadata [2]. While we can't really change the Prometheus label name, we can at least clarify in the documentation what's the information carried in the label. [1] https://specs.openstack.org/openstack/api-wg/guidelines/tags.html [2] https://docs.openstack.org/api-ref/compute/#server-metadata-servers-metadata Signed-off-by: Simon Pasquier --- docs/configuration/configuration.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index 61e86988e1..dedd7a0f7c 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -1349,7 +1349,7 @@ interface. The following meta labels are available on targets during [relabeling](#relabel_config): * `__meta_openstack_address_pool`: the pool of the private IP. -* `__meta_openstack_instance_flavor`: the flavor of the OpenStack instance. +* `__meta_openstack_instance_flavor`: the flavor ID of the OpenStack instance. * `__meta_openstack_instance_id`: the OpenStack instance ID. * `__meta_openstack_instance_image`: the ID of the image the OpenStack instance is using. * `__meta_openstack_instance_name`: the OpenStack instance name. @@ -1357,7 +1357,7 @@ The following meta labels are available on targets during [relabeling](#relabel_ * `__meta_openstack_private_ip`: the private IP of the OpenStack instance. * `__meta_openstack_project_id`: the project (tenant) owning this instance. * `__meta_openstack_public_ip`: the public IP of the OpenStack instance. -* `__meta_openstack_tag_`: each tag value of the instance. +* `__meta_openstack_tag_`: each metadata item of the instance, with any unsupported characters converted to an underscore. * `__meta_openstack_user_id`: the user account owning the tenant. See below for the configuration options for OpenStack discovery: From b2396c0c8f6420bbd0681d4500c33851e75681f3 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Mon, 27 May 2024 21:57:45 +0200 Subject: [PATCH 37/47] Upgrade to golangci-lint v1.59.0 Signed-off-by: Arve Knudsen --- .github/workflows/ci.yml | 2 +- .golangci.yml | 2 +- Makefile.common | 2 +- cmd/prometheus/query_log_test.go | 4 ++-- scripts/golangci-lint.yml | 2 +- tsdb/chunks/queue_test.go | 2 +- tsdb/db_test.go | 10 +++++----- tsdb/head_test.go | 4 ++-- tsdb/wlog/checkpoint_test.go | 2 +- web/api/v1/api_test.go | 2 ++ 10 files changed, 17 insertions(+), 15 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cead7abfdb..6d14b9324e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -175,7 +175,7 @@ jobs: with: args: --verbose # Make sure to sync this with Makefile.common and scripts/golangci-lint.yml. - version: v1.56.2 + version: v1.59.0 fuzzing: uses: ./.github/workflows/fuzzing.yml if: github.event_name == 'pull_request' diff --git a/.golangci.yml b/.golangci.yml index c63184877a..f81b29ed2d 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -47,7 +47,7 @@ issues: source: "^// ===" - linters: - perfsprint - text: "fmt.Sprintf can be replaced with string addition" + text: "fmt.Sprintf can be replaced with string concatenation" linters-settings: depguard: rules: diff --git a/Makefile.common b/Makefile.common index 0e9ace29b4..1617292350 100644 --- a/Makefile.common +++ b/Makefile.common @@ -61,7 +61,7 @@ PROMU_URL := https://github.com/prometheus/promu/releases/download/v$(PROMU_ SKIP_GOLANGCI_LINT := GOLANGCI_LINT := GOLANGCI_LINT_OPTS ?= -GOLANGCI_LINT_VERSION ?= v1.56.2 +GOLANGCI_LINT_VERSION ?= v1.59.0 # golangci-lint only supports linux, darwin and windows platforms on i386/amd64/arm64. # windows isn't included here because of the path separator being different. ifeq ($(GOHOSTOS),$(filter $(GOHOSTOS),linux darwin)) diff --git a/cmd/prometheus/query_log_test.go b/cmd/prometheus/query_log_test.go index dd6b56672c..9a7a3ed855 100644 --- a/cmd/prometheus/query_log_test.go +++ b/cmd/prometheus/query_log_test.go @@ -296,7 +296,7 @@ func (p *queryLogTest) run(t *testing.T) { if p.exactQueryCount() { require.Equal(t, 1, qc) } else { - require.Greater(t, qc, 0, "no queries logged") + require.Positive(t, qc, "no queries logged") } p.validateLastQuery(t, ql) @@ -366,7 +366,7 @@ func (p *queryLogTest) run(t *testing.T) { if p.exactQueryCount() { require.Equal(t, 1, qc) } else { - require.Greater(t, qc, 0, "no queries logged") + require.Positive(t, qc, "no queries logged") } } diff --git a/scripts/golangci-lint.yml b/scripts/golangci-lint.yml index b36f71c3c1..da99a131e7 100644 --- a/scripts/golangci-lint.yml +++ b/scripts/golangci-lint.yml @@ -35,4 +35,4 @@ jobs: - name: Lint uses: golangci/golangci-lint-action@9d1e0624a798bb64f6c3cea93db47765312263dc # v5.1.0 with: - version: v1.56.2 + version: v1.59.0 diff --git a/tsdb/chunks/queue_test.go b/tsdb/chunks/queue_test.go index 5756e45856..9f761a5f3b 100644 --- a/tsdb/chunks/queue_test.go +++ b/tsdb/chunks/queue_test.go @@ -55,7 +55,7 @@ func (q *writeJobQueue) assertInvariants(t *testing.T) { require.Len(t, s.segment, s.nextWrite) } // Last segment must have at least one element, or we wouldn't have created it. - require.Greater(t, s.nextWrite, 0) + require.Positive(t, s.nextWrite) } require.Equal(t, q.size, totalSize) diff --git a/tsdb/db_test.go b/tsdb/db_test.go index f0b27dcc2a..5965e53179 100644 --- a/tsdb/db_test.go +++ b/tsdb/db_test.go @@ -4574,7 +4574,7 @@ func TestOOOCompaction(t *testing.T) { ms, created, err := db.head.getOrCreate(lbls.Hash(), lbls) require.NoError(t, err) require.False(t, created) - require.Greater(t, ms.ooo.oooHeadChunk.chunk.NumSamples(), 0) + require.Positive(t, ms.ooo.oooHeadChunk.chunk.NumSamples()) require.Len(t, ms.ooo.oooMmappedChunks, 14) // 7 original, 7 duplicate. } checkNonEmptyOOOChunk(series1) @@ -4715,7 +4715,7 @@ func TestOOOCompactionWithNormalCompaction(t *testing.T) { ms, created, err := db.head.getOrCreate(lbls.Hash(), lbls) require.NoError(t, err) require.False(t, created) - require.Greater(t, ms.ooo.oooHeadChunk.chunk.NumSamples(), 0) + require.Positive(t, ms.ooo.oooHeadChunk.chunk.NumSamples()) } // If the normal Head is not compacted, the OOO head compaction does not take place. @@ -4816,7 +4816,7 @@ func TestOOOCompactionWithDisabledWriteLog(t *testing.T) { ms, created, err := db.head.getOrCreate(lbls.Hash(), lbls) require.NoError(t, err) require.False(t, created) - require.Greater(t, ms.ooo.oooHeadChunk.chunk.NumSamples(), 0) + require.Positive(t, ms.ooo.oooHeadChunk.chunk.NumSamples()) } // If the normal Head is not compacted, the OOO head compaction does not take place. @@ -5517,8 +5517,8 @@ func TestWBLAndMmapReplay(t *testing.T) { addedRecs++ require.NoError(t, newWbl.Log(rec)) } - require.Greater(t, markers, 0) - require.Greater(t, addedRecs, 0) + require.Positive(t, markers) + require.Positive(t, addedRecs) require.NoError(t, newWbl.Close()) require.NoError(t, sr.Close()) require.NoError(t, os.RemoveAll(wblDir)) diff --git a/tsdb/head_test.go b/tsdb/head_test.go index 6b4ec4ca41..bb437ab598 100644 --- a/tsdb/head_test.go +++ b/tsdb/head_test.go @@ -3557,7 +3557,7 @@ func TestHistogramInWALAndMmapChunk(t *testing.T) { expMmapChunks = append(expMmapChunks, &cpy) } expHeadChunkSamples := ms.headChunks.chunk.NumSamples() - require.Greater(t, expHeadChunkSamples, 0) + require.Positive(t, expHeadChunkSamples) // Series with mix of histograms and float. s2 := labels.FromStrings("a", "b2") @@ -4638,7 +4638,7 @@ func TestChunkSnapshotTakenAfterIncompleteSnapshot(t *testing.T) { require.NoError(t, err) require.NotEqual(t, "", name) require.Equal(t, 0, idx) - require.Greater(t, offset, 0) + require.Positive(t, offset) } // TestWBLReplay checks the replay at a low level. diff --git a/tsdb/wlog/checkpoint_test.go b/tsdb/wlog/checkpoint_test.go index ea11c5dd75..a9786454de 100644 --- a/tsdb/wlog/checkpoint_test.go +++ b/tsdb/wlog/checkpoint_test.go @@ -251,7 +251,7 @@ func TestCheckpoint(t *testing.T) { require.NoError(t, w.Truncate(107)) require.NoError(t, DeleteCheckpoints(w.Dir(), 106)) require.Equal(t, histogramsInWAL+floatHistogramsInWAL+samplesInWAL, stats.TotalSamples) - require.Greater(t, stats.DroppedSamples, 0) + require.Positive(t, stats.DroppedSamples) // Only the new checkpoint should be left. files, err := os.ReadDir(dir) diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index 74cd2239d5..b30890893b 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -2973,8 +2973,10 @@ func assertAPIError(t *testing.T, got *apiError, exp errorType) { t.Helper() if exp == errorNone { + //nolint:testifylint require.Nil(t, got) } else { + //nolint:testifylint require.NotNil(t, got) require.Equal(t, exp, got.typ, "(%q)", got) } From 013998fa7fca99b210c0b6cf13f9cea0d33f62b6 Mon Sep 17 00:00:00 2001 From: Matthieu MOREL Date: Tue, 28 May 2024 10:06:08 +0200 Subject: [PATCH 38/47] Bump golangci-lint action (#14154) * Bump golangci-lint action to 6.0.1 * Synchronize script/golangci-lint.yml and workflows/ci.yml Signed-off-by: Matthieu MOREL --- .github/workflows/ci.yml | 5 ++--- scripts/golangci-lint.yml | 7 ++++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6d14b9324e..a56140c191 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -161,17 +161,16 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4.1.4 + uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - name: Install Go uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 with: - cache: false go-version: 1.22.x - name: Install snmp_exporter/generator dependencies run: sudo apt-get update && sudo apt-get -y install libsnmp-dev if: github.repository == 'prometheus/snmp_exporter' - name: Lint - uses: golangci/golangci-lint-action@3cfe3a4abbb849e10058ce4af15d205b6da42804 # v4.0.0 + uses: golangci/golangci-lint-action@a4f60bb28d35aeee14e6880718e0c85ff1882e64 # v6.0.1 with: args: --verbose # Make sure to sync this with Makefile.common and scripts/golangci-lint.yml. diff --git a/scripts/golangci-lint.yml b/scripts/golangci-lint.yml index da99a131e7..5ceb590991 100644 --- a/scripts/golangci-lint.yml +++ b/scripts/golangci-lint.yml @@ -24,8 +24,8 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 - - name: install Go + uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + - name: Install Go uses: actions/setup-go@0c52d547c9bc32b1aa3301fd7a9cb496313a4491 # v5.0.0 with: go-version: 1.22.x @@ -33,6 +33,7 @@ jobs: run: sudo apt-get update && sudo apt-get -y install libsnmp-dev if: github.repository == 'prometheus/snmp_exporter' - name: Lint - uses: golangci/golangci-lint-action@9d1e0624a798bb64f6c3cea93db47765312263dc # v5.1.0 + uses: golangci/golangci-lint-action@a4f60bb28d35aeee14e6880718e0c85ff1882e64 # v6.0.1 with: + args: --verbose version: v1.59.0 From 25b0991c3d8485f5c8f8af0023d6ce46921745b2 Mon Sep 17 00:00:00 2001 From: SuperQ Date: Mon, 27 May 2024 14:43:39 +0200 Subject: [PATCH 39/47] Enable additional Go metrics Enable some additioal Go runtime metrics in order to observe additional performance data. Enables a number of new metrics: ``` HELP go_gc_cycles_automatic_gc_cycles_total Count of completed GC cycles generated by the Go runtime. HELP go_gc_cycles_forced_gc_cycles_total Count of completed GC cycles forced by the application. HELP go_gc_cycles_total_gc_cycles_total Count of all completed GC cycles. HELP go_gc_gogc_percent Heap size target percentage configured by the user, otherwise 100. This value is set by the GOGC environment variable, and the runtime/debug.SetGCPercent function. HELP go_gc_gomemlimit_bytes Go runtime memory limit configured by the user, otherwise math.MaxInt64. This value is set by the GOMEMLIMIT environment variable, and the runtime/debug.SetMemoryLimit function. HELP go_gc_heap_allocs_by_size_bytes Distribution of heap allocations by approximate size. Bucket counts increase monotonically. Note that this does not include tiny objects as defined by /gc/heap/tiny/allocs:objects, only tiny blocks. HELP go_gc_heap_allocs_bytes_total Cumulative sum of memory allocated to the heap by the application. HELP go_gc_heap_allocs_objects_total Cumulative count of heap allocations triggered by the application. Note that this does not include tiny objects as defined by /gc/heap/tiny/allocs:objects, only tiny blocks. HELP go_gc_heap_frees_by_size_bytes Distribution of freed heap allocations by approximate size. Bucket counts increase monotonically. Note that this does not include tiny objects as defined by /gc/heap/tiny/allocs:objects, only tiny blocks. HELP go_gc_heap_frees_bytes_total Cumulative sum of heap memory freed by the garbage collector. HELP go_gc_heap_frees_objects_total Cumulative count of heap allocations whose storage was freed by the garbage collector. Note that this does not include tiny objects as defined by /gc/heap/tiny/allocs:objects, only tiny blocks. HELP go_gc_heap_goal_bytes Heap size target for the end of the GC cycle. HELP go_gc_heap_live_bytes Heap memory occupied by live objects that were marked by the previous GC. HELP go_gc_heap_objects_objects Number of objects, live or unswept, occupying heap memory. HELP go_gc_heap_tiny_allocs_objects_total Count of small allocations that are packed together into blocks. These allocations are counted separately from other allocations because each individual allocation is not tracked by the runtime, only their block. Each block is already accounted for in allocs-by-size and frees-by-size. HELP go_gc_limiter_last_enabled_gc_cycle GC cycle the last time the GC CPU limiter was enabled. This metric is useful for diagnosing the root cause of an out-of-memory error, because the limiter trades memory for CPU time when the GC's CPU time gets too high. This is most likely to occur with use of SetMemoryLimit. The first GC cycle is cycle 1, so a value of 0 indicates that it was never enabled. HELP go_gc_pauses_seconds Deprecated. Prefer the identical /sched/pauses/total/gc:seconds. HELP go_gc_scan_globals_bytes The total amount of global variable space that is scannable. HELP go_gc_scan_heap_bytes The total amount of heap space that is scannable. HELP go_gc_scan_stack_bytes The number of bytes of stack that were scanned last GC cycle. HELP go_gc_scan_total_bytes The total amount space that is scannable. Sum of all metrics in /gc/scan. HELP go_gc_stack_starting_size_bytes The stack size of new goroutines. HELP go_sched_gomaxprocs_threads The current runtime.GOMAXPROCS setting, or the number of operating system threads that can execute user-level Go code simultaneously. HELP go_sched_goroutines_goroutines Count of live goroutines. HELP go_sched_latencies_seconds Distribution of the time goroutines have spent in the scheduler in a runnable state before actually running. Bucket counts increase monotonically. HELP go_sched_pauses_stopping_gc_seconds Distribution of individual GC-related stop-the-world stopping latencies. This is the time it takes from deciding to stop the world until all Ps are stopped. This is a subset of the total GC-related stop-the-world time (/sched/pauses/total/gc:seconds). During this time, some threads may be executing. Bucket counts increase monotonically. HELP go_sched_pauses_stopping_other_seconds Distribution of individual non-GC-related stop-the-world stopping latencies. This is the time it takes from deciding to stop the world until all Ps are stopped. This is a subset of the total non-GC-related stop-the-world time (/sched/pauses/total/other:seconds). During this time, some threads may be executing. Bucket counts increase monotonically. HELP go_sched_pauses_total_gc_seconds Distribution of individual GC-related stop-the-world pause latencies. This is the time from deciding to stop the world until the world is started again. Some of this time is spent getting all threads to stop (this is measured directly in /sched/pauses/stopping/gc:seconds), during which some threads may still be running. Bucket counts increase monotonically. HELP go_sched_pauses_total_other_seconds Distribution of individual non-GC-related stop-the-world pause latencies. This is the time from deciding to stop the world until the world is started again. Some of this time is spent getting all threads to stop (measured directly in /sched/pauses/stopping/other:seconds). Bucket counts increase monotonically. ``` Signed-off-by: SuperQ --- cmd/prometheus/main.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index e250a95c82..f2988b2f2d 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -42,6 +42,7 @@ import ( "github.com/mwitkow/go-conntrack" "github.com/oklog/run" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" versioncollector "github.com/prometheus/client_golang/prometheus/collectors/version" "github.com/prometheus/common/model" "github.com/prometheus/common/promlog" @@ -252,6 +253,18 @@ func main() { newFlagRetentionDuration model.Duration ) + // Unregister the default GoCollector, and reregister with our defaults. + if prometheus.Unregister(collectors.NewGoCollector()) { + prometheus.MustRegister( + collectors.NewGoCollector( + collectors.WithGoCollectorRuntimeMetrics( + collectors.MetricsGC, + collectors.MetricsScheduler, + ), + ), + ) + } + cfg := flagConfig{ notifier: notifier.Options{ Registerer: prometheus.DefaultRegisterer, From 6683895620ac5d6e20b58ccbff8b20604e55881a Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Wed, 29 May 2024 08:03:33 -0700 Subject: [PATCH 40/47] optimize regex matching for empty label values in posting match (#14075) Also update tests. Signed-off-by: Ben Ye --- tsdb/querier.go | 4 ++-- tsdb/querier_test.go | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tsdb/querier.go b/tsdb/querier.go index efd4daf26b..1071c4a716 100644 --- a/tsdb/querier.go +++ b/tsdb/querier.go @@ -357,8 +357,8 @@ func inversePostingsForMatcher(ctx context.Context, ix IndexReader, m *labels.Ma } res := vals[:0] - // If the inverse match is ="", we just want all the values. - if m.Type == labels.MatchEqual && m.Value == "" { + // If the match before inversion was !="" or !~"", we just want all the values. + if m.Value == "" && (m.Type == labels.MatchRegexp || m.Type == labels.MatchEqual) { res = vals } else { count := 1 diff --git a/tsdb/querier_test.go b/tsdb/querier_test.go index bb13531d7d..c7e60a0e16 100644 --- a/tsdb/querier_test.go +++ b/tsdb/querier_test.go @@ -2808,6 +2808,13 @@ func TestPostingsForMatchers(t *testing.T) { }, }, // Not regex. + { + matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchNotRegexp, "i", "")}, + exp: []labels.Labels{ + labels.FromStrings("n", "1", "i", "a"), + labels.FromStrings("n", "1", "i", "b"), + }, + }, { matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchNotRegexp, "n", "^1$")}, exp: []labels.Labels{ From 929fbf860ee8b1391fc94952fbd1ec0879824576 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Sat, 11 May 2024 16:43:34 +0100 Subject: [PATCH 41/47] [Test] TSDB: let BenchmarkAddExemplar reuse slots Test with different amounts of capacity and exemplars, so that sometimes new exemplars are evicting older exemplars. Signed-off-by: Bryan Boreham --- tsdb/exemplar_test.go | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/tsdb/exemplar_test.go b/tsdb/exemplar_test.go index b2be221700..fac7ef57bd 100644 --- a/tsdb/exemplar_test.go +++ b/tsdb/exemplar_test.go @@ -415,27 +415,29 @@ func BenchmarkAddExemplar(b *testing.B) { // before adding. exLabels := labels.FromStrings("trace_id", "89620921") - for _, n := range []int{10000, 100000, 1000000} { - b.Run(strconv.Itoa(n), func(b *testing.B) { - for j := 0; j < b.N; j++ { - b.StopTimer() - exs, err := NewCircularExemplarStorage(int64(n), eMetrics) - require.NoError(b, err) - es := exs.(*CircularExemplarStorage) - var l labels.Labels - b.StartTimer() + for _, capacity := range []int{1000, 10000, 100000} { + for _, n := range []int{10000, 100000, 1000000} { + b.Run(fmt.Sprintf("%d/%d", n, capacity), func(b *testing.B) { + for j := 0; j < b.N; j++ { + b.StopTimer() + exs, err := NewCircularExemplarStorage(int64(capacity), eMetrics) + require.NoError(b, err) + es := exs.(*CircularExemplarStorage) + var l labels.Labels + b.StartTimer() - for i := 0; i < n; i++ { - if i%100 == 0 { - l = labels.FromStrings("service", strconv.Itoa(i)) - } - err = es.AddExemplar(l, exemplar.Exemplar{Value: float64(i), Ts: int64(i), Labels: exLabels}) - if err != nil { - require.NoError(b, err) + for i := 0; i < n; i++ { + if i%100 == 0 { + l = labels.FromStrings("service", strconv.Itoa(i)) + } + err = es.AddExemplar(l, exemplar.Exemplar{Value: float64(i), Ts: int64(i), Labels: exLabels}) + if err != nil { + require.NoError(b, err) + } } } - } - }) + }) + } } } From f0c50b5a66f0ad962e6cbddfc66055b54fecf8e7 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Sat, 11 May 2024 17:00:32 +0100 Subject: [PATCH 42/47] [Test] TSDB: BenchmarkResizeExemplar multiple per series One exemplar per series is not a typical workload. Make it the same as `BenchmarkAddExemplar`. Signed-off-by: Bryan Boreham --- tsdb/exemplar_test.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tsdb/exemplar_test.go b/tsdb/exemplar_test.go index fac7ef57bd..7723ec3894 100644 --- a/tsdb/exemplar_test.go +++ b/tsdb/exemplar_test.go @@ -482,8 +482,11 @@ func BenchmarkResizeExemplars(b *testing.B) { require.NoError(b, err) es := exs.(*CircularExemplarStorage) + var l labels.Labels for i := 0; i < int(float64(tc.startSize)*float64(1.5)); i++ { - l := labels.FromStrings("service", strconv.Itoa(i)) + if i%100 == 0 { + l = labels.FromStrings("service", strconv.Itoa(i)) + } err = es.AddExemplar(l, exemplar.Exemplar{Value: float64(i), Ts: int64(i)}) if err != nil { From 3eb55818778951f953434700184eabf004f817b2 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Sat, 11 May 2024 16:25:00 +0100 Subject: [PATCH 43/47] [ENHANCEMENT] TSDB: Reduce map lookups on exemplar index In many cases we already have a pointer to the entry. Signed-off-by: Bryan Boreham --- tsdb/exemplar.go | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tsdb/exemplar.go b/tsdb/exemplar.go index 213fff5c25..1c149fa0a1 100644 --- a/tsdb/exemplar.go +++ b/tsdb/exemplar.go @@ -371,11 +371,12 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp return err } - _, ok := ce.index[string(seriesLabels)] + idx, ok := ce.index[string(seriesLabels)] if !ok { - ce.index[string(seriesLabels)] = &indexEntry{oldest: ce.nextIndex, seriesLabels: l} + idx = &indexEntry{oldest: ce.nextIndex, seriesLabels: l} + ce.index[string(seriesLabels)] = idx } else { - ce.exemplars[ce.index[string(seriesLabels)].newest].next = ce.nextIndex + ce.exemplars[idx.newest].next = ce.nextIndex } if prev := ce.exemplars[ce.nextIndex]; prev == nil { @@ -383,13 +384,13 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp } else { // There exists an exemplar already on this ce.nextIndex entry, // drop it, to make place for others. - var buf [1024]byte - prevLabels := prev.ref.seriesLabels.Bytes(buf[:]) if prev.next == noExemplar { // Last item for this series, remove index entry. + var buf [1024]byte + prevLabels := prev.ref.seriesLabels.Bytes(buf[:]) delete(ce.index, string(prevLabels)) } else { - ce.index[string(prevLabels)].oldest = prev.next + prev.ref.oldest = prev.next } } @@ -397,8 +398,8 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp // since this is the first exemplar stored for this series. ce.exemplars[ce.nextIndex].next = noExemplar ce.exemplars[ce.nextIndex].exemplar = e - ce.exemplars[ce.nextIndex].ref = ce.index[string(seriesLabels)] - ce.index[string(seriesLabels)].newest = ce.nextIndex + ce.exemplars[ce.nextIndex].ref = idx + idx.newest = ce.nextIndex ce.nextIndex = (ce.nextIndex + 1) % len(ce.exemplars) From c0bb156eca2b6216242d0efdc80627bb0096ea00 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Sat, 11 May 2024 16:37:52 +0100 Subject: [PATCH 44/47] [ENHANCEMENT] TSDB: Eliminate pointer when storing exemplars Saves memory and effort. Signed-off-by: Bryan Boreham --- tsdb/exemplar.go | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/tsdb/exemplar.go b/tsdb/exemplar.go index 1c149fa0a1..a8156669ca 100644 --- a/tsdb/exemplar.go +++ b/tsdb/exemplar.go @@ -37,7 +37,7 @@ const ( type CircularExemplarStorage struct { lock sync.RWMutex - exemplars []*circularBufferEntry + exemplars []circularBufferEntry nextIndex int metrics *ExemplarMetrics @@ -121,7 +121,7 @@ func NewCircularExemplarStorage(length int64, m *ExemplarMetrics) (ExemplarStora length = 0 } c := &CircularExemplarStorage{ - exemplars: make([]*circularBufferEntry, length), + exemplars: make([]circularBufferEntry, length), index: make(map[string]*indexEntry, length/estimatedExemplarsPerSeries), metrics: m, } @@ -292,7 +292,7 @@ func (ce *CircularExemplarStorage) Resize(l int64) int { oldBuffer := ce.exemplars oldNextIndex := int64(ce.nextIndex) - ce.exemplars = make([]*circularBufferEntry, l) + ce.exemplars = make([]circularBufferEntry, l) ce.index = make(map[string]*indexEntry, l/estimatedExemplarsPerSeries) ce.nextIndex = 0 @@ -313,8 +313,8 @@ func (ce *CircularExemplarStorage) Resize(l int64) int { for i := int64(0); i < count; i++ { idx := (startIndex + i) % int64(len(oldBuffer)) - if entry := oldBuffer[idx]; entry != nil { - ce.migrate(entry) + if oldBuffer[idx].ref != nil { + ce.migrate(&oldBuffer[idx]) migrated++ } } @@ -344,7 +344,7 @@ func (ce *CircularExemplarStorage) migrate(entry *circularBufferEntry) { idx.newest = ce.nextIndex entry.next = noExemplar - ce.exemplars[ce.nextIndex] = entry + ce.exemplars[ce.nextIndex] = *entry ce.nextIndex = (ce.nextIndex + 1) % len(ce.exemplars) } @@ -379,9 +379,7 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp ce.exemplars[idx.newest].next = ce.nextIndex } - if prev := ce.exemplars[ce.nextIndex]; prev == nil { - ce.exemplars[ce.nextIndex] = &circularBufferEntry{} - } else { + if prev := &ce.exemplars[ce.nextIndex]; prev.ref != nil { // There exists an exemplar already on this ce.nextIndex entry, // drop it, to make place for others. if prev.next == noExemplar { @@ -417,15 +415,15 @@ func (ce *CircularExemplarStorage) computeMetrics() { return } - if next := ce.exemplars[ce.nextIndex]; next != nil { + if ce.exemplars[ce.nextIndex].ref != nil { ce.metrics.exemplarsInStorage.Set(float64(len(ce.exemplars))) - ce.metrics.lastExemplarsTs.Set(float64(next.exemplar.Ts) / 1000) + ce.metrics.lastExemplarsTs.Set(float64(ce.exemplars[ce.nextIndex].exemplar.Ts) / 1000) return } // We did not yet fill the buffer. ce.metrics.exemplarsInStorage.Set(float64(ce.nextIndex)) - if ce.exemplars[0] != nil { + if ce.exemplars[0].ref != nil { ce.metrics.lastExemplarsTs.Set(float64(ce.exemplars[0].exemplar.Ts) / 1000) } } @@ -439,7 +437,7 @@ func (ce *CircularExemplarStorage) IterateExemplars(f func(seriesLabels labels.L idx := ce.nextIndex l := len(ce.exemplars) for i := 0; i < l; i, idx = i+1, (idx+1)%l { - if ce.exemplars[idx] == nil { + if ce.exemplars[idx].ref == nil { continue } err := f(ce.exemplars[idx].ref.seriesLabels, ce.exemplars[idx].exemplar) From 7d984874471389e8995bcedfeb9f684544c50f30 Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Sat, 11 May 2024 17:22:56 +0100 Subject: [PATCH 45/47] [ENHANCEMENT] TSDB: let Resize re-use buffer This saves having to zero the buffer every time. Signed-off-by: Bryan Boreham --- tsdb/exemplar.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tsdb/exemplar.go b/tsdb/exemplar.go index a8156669ca..898bb24904 100644 --- a/tsdb/exemplar.go +++ b/tsdb/exemplar.go @@ -311,10 +311,11 @@ func (ce *CircularExemplarStorage) Resize(l int64) int { // This way we don't migrate exemplars that would just be overwritten when migrating later exemplars. startIndex := (oldNextIndex - count + int64(len(oldBuffer))) % int64(len(oldBuffer)) + var buf [1024]byte for i := int64(0); i < count; i++ { idx := (startIndex + i) % int64(len(oldBuffer)) if oldBuffer[idx].ref != nil { - ce.migrate(&oldBuffer[idx]) + ce.migrate(&oldBuffer[idx], buf[:]) migrated++ } } @@ -328,9 +329,8 @@ func (ce *CircularExemplarStorage) Resize(l int64) int { // migrate is like AddExemplar but reuses existing structs. Expected to be called in batch and requires // external lock and does not compute metrics. -func (ce *CircularExemplarStorage) migrate(entry *circularBufferEntry) { - var buf [1024]byte - seriesLabels := entry.ref.seriesLabels.Bytes(buf[:]) +func (ce *CircularExemplarStorage) migrate(entry *circularBufferEntry, buf []byte) { + seriesLabels := entry.ref.seriesLabels.Bytes(buf[:0]) idx, ok := ce.index[string(seriesLabels)] if !ok { From 3ee52abb5342728f84cd722fcd2516bed9d066be Mon Sep 17 00:00:00 2001 From: Bryan Boreham Date: Sat, 11 May 2024 17:32:17 +0100 Subject: [PATCH 46/47] [ENHANCEMENT] TSDB: Save map lookup on validation Goes faster. Signed-off-by: Bryan Boreham --- tsdb/exemplar.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tsdb/exemplar.go b/tsdb/exemplar.go index 898bb24904..7545ab9a60 100644 --- a/tsdb/exemplar.go +++ b/tsdb/exemplar.go @@ -214,12 +214,12 @@ func (ce *CircularExemplarStorage) ValidateExemplar(l labels.Labels, e exemplar. // Optimize by moving the lock to be per series (& benchmark it). ce.lock.RLock() defer ce.lock.RUnlock() - return ce.validateExemplar(seriesLabels, e, false) + return ce.validateExemplar(ce.index[string(seriesLabels)], e, false) } // Not thread safe. The appended parameters tells us whether this is an external validation, or internal // as a result of an AddExemplar call, in which case we should update any relevant metrics. -func (ce *CircularExemplarStorage) validateExemplar(key []byte, e exemplar.Exemplar, appended bool) error { +func (ce *CircularExemplarStorage) validateExemplar(idx *indexEntry, e exemplar.Exemplar, appended bool) error { if len(ce.exemplars) == 0 { return storage.ErrExemplarsDisabled } @@ -239,8 +239,7 @@ func (ce *CircularExemplarStorage) validateExemplar(key []byte, e exemplar.Exemp return err } - idx, ok := ce.index[string(key)] - if !ok { + if idx == nil { return nil } @@ -362,7 +361,8 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp ce.lock.Lock() defer ce.lock.Unlock() - err := ce.validateExemplar(seriesLabels, e, true) + idx, ok := ce.index[string(seriesLabels)] + err := ce.validateExemplar(idx, e, true) if err != nil { if errors.Is(err, storage.ErrDuplicateExemplar) { // Duplicate exemplar, noop. @@ -371,7 +371,6 @@ func (ce *CircularExemplarStorage) AddExemplar(l labels.Labels, e exemplar.Exemp return err } - idx, ok := ce.index[string(seriesLabels)] if !ok { idx = &indexEntry{oldest: ce.nextIndex, seriesLabels: l} ce.index[string(seriesLabels)] = idx From 37b408c6cd5284dd250549819d76b52afceb6d73 Mon Sep 17 00:00:00 2001 From: gotjosh Date: Thu, 30 May 2024 11:49:50 +0100 Subject: [PATCH 47/47] Feature: Allow configuration of a rule evaluation delay (#14061) * [PATCH] Allow having evaluation delay for rule groups Signed-off-by: Ganesh Vernekar * [PATCH] Fix lint Signed-off-by: Ganesh Vernekar * [PATCH] Move the option to ManagerOptions Signed-off-by: Ganesh Vernekar * [PATCH] Include evaluation_delay in the group config Signed-off-by: Ganesh Vernekar * Fix comments Signed-off-by: gotjosh * Add a server configuration option. Signed-off-by: gotjosh * Appease the linter #1 Signed-off-by: gotjosh * Add the new server flag documentation Signed-off-by: gotjosh * Improve documentation of the new flag and configuration Signed-off-by: gotjosh * Use named parameters for clarity on the `Rule` interface Signed-off-by: gotjosh * Add `initial` to the flag help Signed-off-by: gotjosh * Change the CHANGELOG area from `ruler` to `rules` Signed-off-by: gotjosh * Rename evaluation_delay to `rule_query_offset`/`query_offset` and make it a global configuration option. Signed-off-by: gotjosh E Your branch is up to date with 'origin/gotjosh/evaluation-delay'. * more docs Signed-off-by: gotjosh * Improve wording on CHANGELOG Signed-off-by: gotjosh * Add `RuleQueryOffset` to the default config in tests in case it changes Signed-off-by: gotjosh * Update docs/configuration/recording_rules.md Co-authored-by: Julius Volz Signed-off-by: gotjosh * Rename `RuleQueryOffset` to `QueryOffset` when in the group context. Signed-off-by: gotjosh * Improve docstring and documentation on the `rule_query_offset` Signed-off-by: gotjosh --------- Signed-off-by: Ganesh Vernekar Signed-off-by: gotjosh Co-authored-by: Ganesh Vernekar Co-authored-by: Julius Volz --- CHANGELOG.md | 1 + cmd/prometheus/main.go | 3 + config/config.go | 4 + docs/configuration/configuration.md | 4 + docs/configuration/recording_rules.md | 6 + model/rulefmt/rulefmt.go | 9 +- rules/alerting.go | 9 +- rules/alerting_test.go | 32 +- rules/group.go | 24 +- rules/manager.go | 2 + rules/manager_test.go | 738 ++++++++++++++------------ rules/origin_test.go | 2 +- rules/recording.go | 5 +- rules/recording_test.go | 10 +- rules/rule.go | 2 +- 15 files changed, 471 insertions(+), 380 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 079dd7595a..941f09da91 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## unreleased * [CHANGE] Rules: Execute 1 query instead of N (where N is the number of alerts within alert rule) when restoring alerts. #13980 +* [FEATURE] Rules: Add new option `query_offset` for each rule group via rule group configuration file and `rule_query_offset` as part of the global configuration to have more resilience for remote write delays. #14061 * [ENHANCEMENT] Rules: Add `rule_group_last_restore_duration_seconds` to measure the time it takes to restore a rule group. #13974 * [ENHANCEMENT] OTLP: Improve remote write format translation performance by using label set hashes for metric identifiers instead of string based ones. #14006 #13991 * [ENHANCEMENT] TSDB: Optimize querying with regexp matchers. #13620 diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index f2988b2f2d..0532bc3809 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -785,6 +785,9 @@ func main() { ResendDelay: time.Duration(cfg.resendDelay), MaxConcurrentEvals: cfg.maxConcurrentEvals, ConcurrentEvalsEnabled: cfg.enableConcurrentRuleEval, + DefaultRuleQueryOffset: func() time.Duration { + return time.Duration(cfgFile.GlobalConfig.RuleQueryOffset) + }, }) } diff --git a/config/config.go b/config/config.go index 1cfd588643..463dbc3571 100644 --- a/config/config.go +++ b/config/config.go @@ -145,6 +145,7 @@ var ( ScrapeInterval: model.Duration(1 * time.Minute), ScrapeTimeout: model.Duration(10 * time.Second), EvaluationInterval: model.Duration(1 * time.Minute), + RuleQueryOffset: model.Duration(0 * time.Minute), // When native histogram feature flag is enabled, ScrapeProtocols default // changes to DefaultNativeHistogramScrapeProtocols. ScrapeProtocols: DefaultScrapeProtocols, @@ -397,6 +398,8 @@ type GlobalConfig struct { ScrapeProtocols []ScrapeProtocol `yaml:"scrape_protocols,omitempty"` // How frequently to evaluate rules by default. EvaluationInterval model.Duration `yaml:"evaluation_interval,omitempty"` + // Offset the rule evaluation timestamp of this particular group by the specified duration into the past to ensure the underlying metrics have been received. + RuleQueryOffset model.Duration `yaml:"rule_query_offset"` // File to which PromQL queries are logged. QueryLogFile string `yaml:"query_log_file,omitempty"` // The labels to add to any timeseries that this Prometheus instance scrapes. @@ -556,6 +559,7 @@ func (c *GlobalConfig) isZero() bool { c.ScrapeInterval == 0 && c.ScrapeTimeout == 0 && c.EvaluationInterval == 0 && + c.RuleQueryOffset == 0 && c.QueryLogFile == "" && c.ScrapeProtocols == nil } diff --git a/docs/configuration/configuration.md b/docs/configuration/configuration.md index dedd7a0f7c..a8fc9c626e 100644 --- a/docs/configuration/configuration.md +++ b/docs/configuration/configuration.md @@ -70,6 +70,10 @@ global: # How frequently to evaluate rules. [ evaluation_interval: | default = 1m ] + + # Offset the rule evaluation timestamp of this particular group by the specified duration into the past to ensure the underlying metrics have been received. + # Metric availability delays are more likely to occur when Prometheus is running as a remote write target, but can also occur when there's anomalies with scraping. + [ rule_query_offset: | default = 0s ] # The labels to add to any time series or alerts when communicating with # external systems (federation, remote storage, Alertmanager). diff --git a/docs/configuration/recording_rules.md b/docs/configuration/recording_rules.md index 48ab951f94..9aa226bbc0 100644 --- a/docs/configuration/recording_rules.md +++ b/docs/configuration/recording_rules.md @@ -86,6 +86,9 @@ name: # rule can produce. 0 is no limit. [ limit: | default = 0 ] +# Offset the rule evaluation timestamp of this particular group by the specified duration into the past. +[ query_offset: | default = global.rule_query_offset ] + rules: [ - ... ] ``` @@ -148,6 +151,9 @@ the rule, active, pending, or inactive, are cleared as well. The event will be recorded as an error in the evaluation, and as such no stale markers are written. +# Rule query offset +This is useful to ensure the underlying metrics have been received and stored in Prometheus. Metric availability delays are more likely to occur when Prometheus is running as a remote write target due to the nature of distributed systems, but can also occur when there's anomalies with scraping and/or short evaluation intervals. + # Failed rule evaluations due to slow evaluation If a rule group hasn't finished evaluating before its next evaluation is supposed to start (as defined by the `evaluation_interval`), the next evaluation will be skipped. Subsequent evaluations of the rule group will continue to be skipped until the initial evaluation either completes or times out. When this happens, there will be a gap in the metric produced by the recording rule. The `rule_group_iterations_missed_total` metric will be incremented for each missed iteration of the rule group. diff --git a/model/rulefmt/rulefmt.go b/model/rulefmt/rulefmt.go index 4ed1619d64..bfb85ce740 100644 --- a/model/rulefmt/rulefmt.go +++ b/model/rulefmt/rulefmt.go @@ -136,10 +136,11 @@ func (g *RuleGroups) Validate(node ruleGroups) (errs []error) { // RuleGroup is a list of sequentially evaluated recording and alerting rules. type RuleGroup struct { - Name string `yaml:"name"` - Interval model.Duration `yaml:"interval,omitempty"` - Limit int `yaml:"limit,omitempty"` - Rules []RuleNode `yaml:"rules"` + Name string `yaml:"name"` + Interval model.Duration `yaml:"interval,omitempty"` + QueryOffset *model.Duration `yaml:"query_offset,omitempty"` + Limit int `yaml:"limit,omitempty"` + Rules []RuleNode `yaml:"rules"` } // Rule describes an alerting or recording rule. diff --git a/rules/alerting.go b/rules/alerting.go index 2d2b19efe7..2dc0917dce 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -338,10 +338,9 @@ const resolvedRetention = 15 * time.Minute // Eval evaluates the rule expression and then creates pending alerts and fires // or removes previously pending alerts accordingly. -func (r *AlertingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, externalURL *url.URL, limit int) (promql.Vector, error) { +func (r *AlertingRule) Eval(ctx context.Context, queryOffset time.Duration, ts time.Time, query QueryFunc, externalURL *url.URL, limit int) (promql.Vector, error) { ctx = NewOriginContext(ctx, NewRuleDetail(r)) - - res, err := query(ctx, r.vector.String(), ts) + res, err := query(ctx, r.vector.String(), ts.Add(-queryOffset)) if err != nil { return nil, err } @@ -484,8 +483,8 @@ func (r *AlertingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, } if r.restored.Load() { - vec = append(vec, r.sample(a, ts)) - vec = append(vec, r.forStateSample(a, ts, float64(a.ActiveAt.Unix()))) + vec = append(vec, r.sample(a, ts.Add(-queryOffset))) + vec = append(vec, r.forStateSample(a, ts.Add(-queryOffset), float64(a.ActiveAt.Unix()))) } } diff --git a/rules/alerting_test.go b/rules/alerting_test.go index a9315b47ee..5ebd049f66 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -123,7 +123,7 @@ func TestAlertingRuleTemplateWithHistogram(t *testing.T) { ) evalTime := time.Now() - res, err := rule.Eval(context.TODO(), evalTime, q, nil, 0) + res, err := rule.Eval(context.TODO(), 0, evalTime, q, nil, 0) require.NoError(t, err) require.Len(t, res, 2) @@ -230,7 +230,7 @@ func TestAlertingRuleLabelsUpdate(t *testing.T) { t.Logf("case %d", i) evalTime := baseTime.Add(time.Duration(i) * time.Minute) result[0].T = timestamp.FromTime(evalTime) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. @@ -247,7 +247,7 @@ func TestAlertingRuleLabelsUpdate(t *testing.T) { testutil.RequireEqual(t, result, filteredRes) } evalTime := baseTime.Add(time.Duration(len(results)) * time.Minute) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) require.Empty(t, res) } @@ -315,7 +315,7 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. res, err := ruleWithoutExternalLabels.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -329,7 +329,7 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { } res, err = ruleWithExternalLabels.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -408,7 +408,7 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) { var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. res, err := ruleWithoutExternalURL.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -422,7 +422,7 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) { } res, err = ruleWithExternalURL.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -477,7 +477,7 @@ func TestAlertingRuleEmptyLabelFromTemplate(t *testing.T) { var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. res, err := rule.Eval( - context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0, + context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0, ) require.NoError(t, err) for _, smpl := range res { @@ -544,7 +544,7 @@ instance: {{ $v.Labels.instance }}, value: {{ printf "%.0f" $v.Value }}; close(getDoneCh) }() _, err = ruleWithQueryInTemplate.Eval( - context.TODO(), evalTime, slowQueryFunc, nil, 0, + context.TODO(), 0, evalTime, slowQueryFunc, nil, 0, ) require.NoError(t, err) } @@ -596,7 +596,7 @@ func TestAlertingRuleDuplicate(t *testing.T) { "", true, log.NewNopLogger(), ) - _, err := rule.Eval(ctx, now, EngineQueryFunc(engine, storage), nil, 0) + _, err := rule.Eval(ctx, 0, now, EngineQueryFunc(engine, storage), nil, 0) require.Error(t, err) require.EqualError(t, err, "vector contains metrics with the same labelset after applying alert labels") } @@ -644,7 +644,7 @@ func TestAlertingRuleLimit(t *testing.T) { evalTime := time.Unix(0, 0) for _, test := range tests { - switch _, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, test.limit); { + switch _, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, test.limit); { case err != nil: require.EqualError(t, err, test.err) case test.err != "": @@ -871,7 +871,7 @@ func TestKeepFiringFor(t *testing.T) { t.Logf("case %d", i) evalTime := baseTime.Add(time.Duration(i) * time.Minute) result[0].T = timestamp.FromTime(evalTime) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. @@ -888,7 +888,7 @@ func TestKeepFiringFor(t *testing.T) { testutil.RequireEqual(t, result, filteredRes) } evalTime := baseTime.Add(time.Duration(len(results)) * time.Minute) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) require.Empty(t, res) } @@ -925,7 +925,7 @@ func TestPendingAndKeepFiringFor(t *testing.T) { baseTime := time.Unix(0, 0) result.T = timestamp.FromTime(baseTime) - res, err := rule.Eval(context.TODO(), baseTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), 0, baseTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) require.Len(t, res, 2) @@ -940,7 +940,7 @@ func TestPendingAndKeepFiringFor(t *testing.T) { } evalTime := baseTime.Add(time.Minute) - res, err = rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err = rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) require.Empty(t, res) } @@ -974,7 +974,7 @@ func TestAlertingEvalWithOrigin(t *testing.T) { true, log.NewNopLogger(), ) - _, err = rule.Eval(ctx, now, func(ctx context.Context, qs string, _ time.Time) (promql.Vector, error) { + _, err = rule.Eval(ctx, 0, now, func(ctx context.Context, qs string, _ time.Time) (promql.Vector, error) { detail = FromOriginContext(ctx) return nil, nil }, nil, 0) diff --git a/rules/group.go b/rules/group.go index 1f4757de3c..9ae89789d0 100644 --- a/rules/group.go +++ b/rules/group.go @@ -47,6 +47,7 @@ type Group struct { name string file string interval time.Duration + queryOffset *time.Duration limit int rules []Rule seriesInPreviousEval []map[string]labels.Labels // One per Rule. @@ -90,6 +91,7 @@ type GroupOptions struct { Rules []Rule ShouldRestore bool Opts *ManagerOptions + QueryOffset *time.Duration done chan struct{} EvalIterationFunc GroupEvalIterationFunc } @@ -126,6 +128,7 @@ func NewGroup(o GroupOptions) *Group { name: o.Name, file: o.File, interval: o.Interval, + queryOffset: o.QueryOffset, limit: o.Limit, rules: o.Rules, shouldRestore: o.ShouldRestore, @@ -443,6 +446,8 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { wg sync.WaitGroup ) + ruleQueryOffset := g.QueryOffset() + for i, rule := range g.rules { select { case <-g.done: @@ -473,7 +478,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { g.metrics.EvalTotal.WithLabelValues(GroupKey(g.File(), g.Name())).Inc() - vector, err := rule.Eval(ctx, ts, g.opts.QueryFunc, g.opts.ExternalURL, g.Limit()) + vector, err := rule.Eval(ctx, ruleQueryOffset, ts, g.opts.QueryFunc, g.opts.ExternalURL, g.Limit()) if err != nil { rule.SetHealth(HealthBad) rule.SetLastError(err) @@ -562,7 +567,7 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { for metric, lset := range g.seriesInPreviousEval[i] { if _, ok := seriesReturned[metric]; !ok { // Series no longer exposed, mark it stale. - _, err = app.Append(0, lset, timestamp.FromTime(ts), math.Float64frombits(value.StaleNaN)) + _, err = app.Append(0, lset, timestamp.FromTime(ts.Add(-ruleQueryOffset)), math.Float64frombits(value.StaleNaN)) unwrappedErr := errors.Unwrap(err) if unwrappedErr == nil { unwrappedErr = err @@ -601,14 +606,27 @@ func (g *Group) Eval(ctx context.Context, ts time.Time) { g.cleanupStaleSeries(ctx, ts) } +func (g *Group) QueryOffset() time.Duration { + if g.queryOffset != nil { + return *g.queryOffset + } + + if g.opts.DefaultRuleQueryOffset != nil { + return g.opts.DefaultRuleQueryOffset() + } + + return time.Duration(0) +} + func (g *Group) cleanupStaleSeries(ctx context.Context, ts time.Time) { if len(g.staleSeries) == 0 { return } app := g.opts.Appendable.Appender(ctx) + queryOffset := g.QueryOffset() for _, s := range g.staleSeries { // Rule that produced series no longer configured, mark it stale. - _, err := app.Append(0, s, timestamp.FromTime(ts), math.Float64frombits(value.StaleNaN)) + _, err := app.Append(0, s, timestamp.FromTime(ts.Add(-queryOffset)), math.Float64frombits(value.StaleNaN)) unwrappedErr := errors.Unwrap(err) if unwrappedErr == nil { unwrappedErr = err diff --git a/rules/manager.go b/rules/manager.go index 165dca144e..063189e0ab 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -116,6 +116,7 @@ type ManagerOptions struct { ForGracePeriod time.Duration ResendDelay time.Duration GroupLoader GroupLoader + DefaultRuleQueryOffset func() time.Duration MaxConcurrentEvals int64 ConcurrentEvalsEnabled bool RuleConcurrencyController RuleConcurrencyController @@ -336,6 +337,7 @@ func (m *Manager) LoadGroups( Rules: rules, ShouldRestore: shouldRestore, Opts: m.opts, + QueryOffset: (*time.Duration)(rg.QueryOffset), done: m.done, EvalIterationFunc: groupEvalIterationFunc, }) diff --git a/rules/manager_test.go b/rules/manager_test.go index 2f7343ebb8..11d1282bd3 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -16,8 +16,10 @@ package rules import ( "context" "fmt" + "io/fs" "math" "os" + "path" "sort" "strconv" "sync" @@ -162,7 +164,7 @@ func TestAlertingRule(t *testing.T) { evalTime := baseTime.Add(test.time) - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + res, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. @@ -192,152 +194,156 @@ func TestAlertingRule(t *testing.T) { } func TestForStateAddSamples(t *testing.T) { - storage := promqltest.LoadedStorage(t, ` + for _, queryOffset := range []time.Duration{0, time.Minute} { + t.Run(fmt.Sprintf("queryOffset %s", queryOffset.String()), func(t *testing.T) { + storage := promqltest.LoadedStorage(t, ` load 5m http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"} 75 85 95 105 105 95 85 http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"} 80 90 100 110 120 130 140 `) - t.Cleanup(func() { storage.Close() }) + t.Cleanup(func() { storage.Close() }) - expr, err := parser.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) - require.NoError(t, err) + expr, err := parser.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) + require.NoError(t, err) - rule := NewAlertingRule( - "HTTPRequestRateLow", - expr, - time.Minute, - 0, - labels.FromStrings("severity", "{{\"c\"}}ritical"), - labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, - ) - result := promql.Vector{ - promql.Sample{ - Metric: labels.FromStrings( - "__name__", "ALERTS_FOR_STATE", - "alertname", "HTTPRequestRateLow", - "group", "canary", - "instance", "0", - "job", "app-server", - "severity", "critical", - ), - F: 1, - }, - promql.Sample{ - Metric: labels.FromStrings( - "__name__", "ALERTS_FOR_STATE", - "alertname", "HTTPRequestRateLow", - "group", "canary", - "instance", "1", - "job", "app-server", - "severity", "critical", - ), - F: 1, - }, - promql.Sample{ - Metric: labels.FromStrings( - "__name__", "ALERTS_FOR_STATE", - "alertname", "HTTPRequestRateLow", - "group", "canary", - "instance", "0", - "job", "app-server", - "severity", "critical", - ), - F: 1, - }, - promql.Sample{ - Metric: labels.FromStrings( - "__name__", "ALERTS_FOR_STATE", - "alertname", "HTTPRequestRateLow", - "group", "canary", - "instance", "1", - "job", "app-server", - "severity", "critical", - ), - F: 1, - }, - } - - baseTime := time.Unix(0, 0) - - tests := []struct { - time time.Duration - result promql.Vector - persistThisTime bool // If true, it means this 'time' is persisted for 'for'. - }{ - { - time: 0, - result: append(promql.Vector{}, result[:2]...), - persistThisTime: true, - }, - { - time: 5 * time.Minute, - result: append(promql.Vector{}, result[2:]...), - }, - { - time: 10 * time.Minute, - result: append(promql.Vector{}, result[2:3]...), - }, - { - time: 15 * time.Minute, - result: nil, - }, - { - time: 20 * time.Minute, - result: nil, - }, - { - time: 25 * time.Minute, - result: append(promql.Vector{}, result[:1]...), - persistThisTime: true, - }, - { - time: 30 * time.Minute, - result: append(promql.Vector{}, result[2:3]...), - }, - } - - var forState float64 - for i, test := range tests { - t.Logf("case %d", i) - evalTime := baseTime.Add(test.time) - - if test.persistThisTime { - forState = float64(evalTime.Unix()) - } - if test.result == nil { - forState = float64(value.StaleNaN) - } - - res, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, 0) - require.NoError(t, err) - - var filteredRes promql.Vector // After removing 'ALERTS' samples. - for _, smpl := range res { - smplName := smpl.Metric.Get("__name__") - if smplName == "ALERTS_FOR_STATE" { - filteredRes = append(filteredRes, smpl) - } else { - // If not 'ALERTS_FOR_STATE', it has to be 'ALERTS'. - require.Equal(t, "ALERTS", smplName) + rule := NewAlertingRule( + "HTTPRequestRateLow", + expr, + time.Minute, + 0, + labels.FromStrings("severity", "{{\"c\"}}ritical"), + labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, + ) + result := promql.Vector{ + promql.Sample{ + Metric: labels.FromStrings( + "__name__", "ALERTS_FOR_STATE", + "alertname", "HTTPRequestRateLow", + "group", "canary", + "instance", "0", + "job", "app-server", + "severity", "critical", + ), + F: 1, + }, + promql.Sample{ + Metric: labels.FromStrings( + "__name__", "ALERTS_FOR_STATE", + "alertname", "HTTPRequestRateLow", + "group", "canary", + "instance", "1", + "job", "app-server", + "severity", "critical", + ), + F: 1, + }, + promql.Sample{ + Metric: labels.FromStrings( + "__name__", "ALERTS_FOR_STATE", + "alertname", "HTTPRequestRateLow", + "group", "canary", + "instance", "0", + "job", "app-server", + "severity", "critical", + ), + F: 1, + }, + promql.Sample{ + Metric: labels.FromStrings( + "__name__", "ALERTS_FOR_STATE", + "alertname", "HTTPRequestRateLow", + "group", "canary", + "instance", "1", + "job", "app-server", + "severity", "critical", + ), + F: 1, + }, } - } - for i := range test.result { - test.result[i].T = timestamp.FromTime(evalTime) - // Updating the expected 'for' state. - if test.result[i].F >= 0 { - test.result[i].F = forState - } - } - require.Equal(t, len(test.result), len(filteredRes), "%d. Number of samples in expected and actual output don't match (%d vs. %d)", i, len(test.result), len(res)) - sort.Slice(filteredRes, func(i, j int) bool { - return labels.Compare(filteredRes[i].Metric, filteredRes[j].Metric) < 0 + baseTime := time.Unix(0, 0) + + tests := []struct { + time time.Duration + result promql.Vector + persistThisTime bool // If true, it means this 'time' is persisted for 'for'. + }{ + { + time: 0, + result: append(promql.Vector{}, result[:2]...), + persistThisTime: true, + }, + { + time: 5 * time.Minute, + result: append(promql.Vector{}, result[2:]...), + }, + { + time: 10 * time.Minute, + result: append(promql.Vector{}, result[2:3]...), + }, + { + time: 15 * time.Minute, + result: nil, + }, + { + time: 20 * time.Minute, + result: nil, + }, + { + time: 25 * time.Minute, + result: append(promql.Vector{}, result[:1]...), + persistThisTime: true, + }, + { + time: 30 * time.Minute, + result: append(promql.Vector{}, result[2:3]...), + }, + } + + var forState float64 + for i, test := range tests { + t.Logf("case %d", i) + evalTime := baseTime.Add(test.time).Add(queryOffset) + + if test.persistThisTime { + forState = float64(evalTime.Unix()) + } + if test.result == nil { + forState = float64(value.StaleNaN) + } + + res, err := rule.Eval(context.TODO(), queryOffset, evalTime, EngineQueryFunc(testEngine, storage), nil, 0) + require.NoError(t, err) + + var filteredRes promql.Vector // After removing 'ALERTS' samples. + for _, smpl := range res { + smplName := smpl.Metric.Get("__name__") + if smplName == "ALERTS_FOR_STATE" { + filteredRes = append(filteredRes, smpl) + } else { + // If not 'ALERTS_FOR_STATE', it has to be 'ALERTS'. + require.Equal(t, "ALERTS", smplName) + } + } + for i := range test.result { + test.result[i].T = timestamp.FromTime(evalTime.Add(-queryOffset)) + // Updating the expected 'for' state. + if test.result[i].F >= 0 { + test.result[i].F = forState + } + } + require.Equal(t, len(test.result), len(filteredRes), "%d. Number of samples in expected and actual output don't match (%d vs. %d)", i, len(test.result), len(res)) + + sort.Slice(filteredRes, func(i, j int) bool { + return labels.Compare(filteredRes[i].Metric, filteredRes[j].Metric) < 0 + }) + prom_testutil.RequireEqual(t, test.result, filteredRes) + + for _, aa := range rule.ActiveAlerts() { + require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) + } + } }) - prom_testutil.RequireEqual(t, test.result, filteredRes) - - for _, aa := range rule.ActiveAlerts() { - require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) - } } } @@ -349,243 +355,251 @@ func sortAlerts(items []*Alert) { } func TestForStateRestore(t *testing.T) { - storage := promqltest.LoadedStorage(t, ` + for _, queryOffset := range []time.Duration{0, time.Minute} { + t.Run(fmt.Sprintf("queryOffset %s", queryOffset.String()), func(t *testing.T) { + storage := promqltest.LoadedStorage(t, ` load 5m http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"} 75 85 50 0 0 25 0 0 40 0 120 http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"} 125 90 60 0 0 25 0 0 40 0 130 `) - t.Cleanup(func() { storage.Close() }) + t.Cleanup(func() { storage.Close() }) - expr, err := parser.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) - require.NoError(t, err) + expr, err := parser.ParseExpr(`http_requests{group="canary", job="app-server"} < 100`) + require.NoError(t, err) - opts := &ManagerOptions{ - QueryFunc: EngineQueryFunc(testEngine, storage), - Appendable: storage, - Queryable: storage, - Context: context.Background(), - Logger: log.NewNopLogger(), - NotifyFunc: func(ctx context.Context, expr string, alerts ...*Alert) {}, - OutageTolerance: 30 * time.Minute, - ForGracePeriod: 10 * time.Minute, - } - - alertForDuration := 25 * time.Minute - // Initial run before prometheus goes down. - rule := NewAlertingRule( - "HTTPRequestRateLow", - expr, - alertForDuration, - 0, - labels.FromStrings("severity", "critical"), - labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, - ) - - group := NewGroup(GroupOptions{ - Name: "default", - Interval: time.Second, - Rules: []Rule{rule}, - ShouldRestore: true, - Opts: opts, - }) - groups := make(map[string]*Group) - groups["default;"] = group - - initialRuns := []time.Duration{0, 5 * time.Minute} - - baseTime := time.Unix(0, 0) - for _, duration := range initialRuns { - evalTime := baseTime.Add(duration) - group.Eval(context.TODO(), evalTime) - } - - // Prometheus goes down here. We create new rules and groups. - type testInput struct { - name string - restoreDuration time.Duration - expectedAlerts []*Alert - - num int - noRestore bool - gracePeriod bool - downDuration time.Duration - before func() - } - - tests := []testInput{ - { - name: "normal restore (alerts were not firing)", - restoreDuration: 15 * time.Minute, - expectedAlerts: rule.ActiveAlerts(), - downDuration: 10 * time.Minute, - }, - { - name: "outage tolerance", - restoreDuration: 40 * time.Minute, - noRestore: true, - num: 2, - }, - { - name: "no active alerts", - restoreDuration: 50 * time.Minute, - expectedAlerts: []*Alert{}, - }, - { - name: "test the grace period", - restoreDuration: 25 * time.Minute, - expectedAlerts: []*Alert{}, - gracePeriod: true, - before: func() { - for _, duration := range []time.Duration{10 * time.Minute, 15 * time.Minute, 20 * time.Minute} { - evalTime := baseTime.Add(duration) - group.Eval(context.TODO(), evalTime) - } - }, - num: 2, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if tt.before != nil { - tt.before() + opts := &ManagerOptions{ + QueryFunc: EngineQueryFunc(testEngine, storage), + Appendable: storage, + Queryable: storage, + Context: context.Background(), + Logger: log.NewNopLogger(), + NotifyFunc: func(ctx context.Context, expr string, alerts ...*Alert) {}, + OutageTolerance: 30 * time.Minute, + ForGracePeriod: 10 * time.Minute, } - newRule := NewAlertingRule( + alertForDuration := 25 * time.Minute + // Initial run before prometheus goes down. + rule := NewAlertingRule( "HTTPRequestRateLow", expr, alertForDuration, 0, labels.FromStrings("severity", "critical"), - labels.EmptyLabels(), labels.EmptyLabels(), "", false, nil, + labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, ) - newGroup := NewGroup(GroupOptions{ + + group := NewGroup(GroupOptions{ Name: "default", Interval: time.Second, - Rules: []Rule{newRule}, + Rules: []Rule{rule}, ShouldRestore: true, Opts: opts, }) + groups := make(map[string]*Group) + groups["default;"] = group - newGroups := make(map[string]*Group) - newGroups["default;"] = newGroup + initialRuns := []time.Duration{0, 5 * time.Minute} - restoreTime := baseTime.Add(tt.restoreDuration) - // First eval before restoration. - newGroup.Eval(context.TODO(), restoreTime) - // Restore happens here. - newGroup.RestoreForState(restoreTime) - - got := newRule.ActiveAlerts() - for _, aa := range got { - require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) + baseTime := time.Unix(0, 0) + for _, duration := range initialRuns { + evalTime := baseTime.Add(duration) + group.Eval(context.TODO(), evalTime) } - sort.Slice(got, func(i, j int) bool { - return labels.Compare(got[i].Labels, got[j].Labels) < 0 - }) - // In all cases, we expect the restoration process to have completed. - require.Truef(t, newRule.Restored(), "expected the rule restoration process to have completed") + // Prometheus goes down here. We create new rules and groups. + type testInput struct { + name string + restoreDuration time.Duration + expectedAlerts []*Alert - // Checking if we have restored it correctly. - switch { - case tt.noRestore: - require.Len(t, got, tt.num) - for _, e := range got { - require.Equal(t, e.ActiveAt, restoreTime) - } - case tt.gracePeriod: + num int + noRestore bool + gracePeriod bool + downDuration time.Duration + before func() + } - require.Len(t, got, tt.num) - for _, e := range got { - require.Equal(t, opts.ForGracePeriod, e.ActiveAt.Add(alertForDuration).Sub(restoreTime)) - } - default: - exp := tt.expectedAlerts - require.Equal(t, len(exp), len(got)) - sortAlerts(exp) - sortAlerts(got) - for i, e := range exp { - require.Equal(t, e.Labels, got[i].Labels) + tests := []testInput{ + { + name: "normal restore (alerts were not firing)", + restoreDuration: 15 * time.Minute, + expectedAlerts: rule.ActiveAlerts(), + downDuration: 10 * time.Minute, + }, + { + name: "outage tolerance", + restoreDuration: 40 * time.Minute, + noRestore: true, + num: 2, + }, + { + name: "no active alerts", + restoreDuration: 50 * time.Minute, + expectedAlerts: []*Alert{}, + }, + { + name: "test the grace period", + restoreDuration: 25 * time.Minute, + expectedAlerts: []*Alert{}, + gracePeriod: true, + before: func() { + for _, duration := range []time.Duration{10 * time.Minute, 15 * time.Minute, 20 * time.Minute} { + evalTime := baseTime.Add(duration) + group.Eval(context.TODO(), evalTime) + } + }, + num: 2, + }, + } - // Difference in time should be within 1e6 ns, i.e. 1ms - // (due to conversion between ns & ms, float64 & int64). - activeAtDiff := float64(e.ActiveAt.Unix() + int64(tt.downDuration/time.Second) - got[i].ActiveAt.Unix()) - require.Equal(t, 0.0, math.Abs(activeAtDiff), "'for' state restored time is wrong") - } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.before != nil { + tt.before() + } + + newRule := NewAlertingRule( + "HTTPRequestRateLow", + expr, + alertForDuration, + 0, + labels.FromStrings("severity", "critical"), + labels.EmptyLabels(), labels.EmptyLabels(), "", false, nil, + ) + newGroup := NewGroup(GroupOptions{ + Name: "default", + Interval: time.Second, + Rules: []Rule{newRule}, + ShouldRestore: true, + Opts: opts, + QueryOffset: &queryOffset, + }) + + newGroups := make(map[string]*Group) + newGroups["default;"] = newGroup + + restoreTime := baseTime.Add(tt.restoreDuration).Add(queryOffset) + // First eval before restoration. + newGroup.Eval(context.TODO(), restoreTime) + // Restore happens here. + newGroup.RestoreForState(restoreTime) + + got := newRule.ActiveAlerts() + for _, aa := range got { + require.Zero(t, aa.Labels.Get(model.MetricNameLabel), "%s label set on active alert: %s", model.MetricNameLabel, aa.Labels) + } + sort.Slice(got, func(i, j int) bool { + return labels.Compare(got[i].Labels, got[j].Labels) < 0 + }) + + // In all cases, we expect the restoration process to have completed. + require.Truef(t, newRule.Restored(), "expected the rule restoration process to have completed") + + // Checking if we have restored it correctly. + switch { + case tt.noRestore: + require.Len(t, got, tt.num) + for _, e := range got { + require.Equal(t, e.ActiveAt, restoreTime) + } + case tt.gracePeriod: + + require.Len(t, got, tt.num) + for _, e := range got { + require.Equal(t, opts.ForGracePeriod, e.ActiveAt.Add(alertForDuration).Sub(restoreTime)) + } + default: + exp := tt.expectedAlerts + require.Equal(t, len(exp), len(got)) + sortAlerts(exp) + sortAlerts(got) + for i, e := range exp { + require.Equal(t, e.Labels, got[i].Labels) + + // Difference in time should be within 1e6 ns, i.e. 1ms + // (due to conversion between ns & ms, float64 & int64). + activeAtDiff := queryOffset.Seconds() + float64(e.ActiveAt.Unix()+int64(tt.downDuration/time.Second)-got[i].ActiveAt.Unix()) + require.Equal(t, 0.0, math.Abs(activeAtDiff), "'for' state restored time is wrong") + } + } + }) } }) } } func TestStaleness(t *testing.T) { - st := teststorage.New(t) - defer st.Close() - engineOpts := promql.EngineOpts{ - Logger: nil, - Reg: nil, - MaxSamples: 10, - Timeout: 10 * time.Second, + for _, queryOffset := range []time.Duration{0, time.Minute} { + st := teststorage.New(t) + defer st.Close() + engineOpts := promql.EngineOpts{ + Logger: nil, + Reg: nil, + MaxSamples: 10, + Timeout: 10 * time.Second, + } + engine := promql.NewEngine(engineOpts) + opts := &ManagerOptions{ + QueryFunc: EngineQueryFunc(engine, st), + Appendable: st, + Queryable: st, + Context: context.Background(), + Logger: log.NewNopLogger(), + } + + expr, err := parser.ParseExpr("a + 1") + require.NoError(t, err) + rule := NewRecordingRule("a_plus_one", expr, labels.Labels{}) + group := NewGroup(GroupOptions{ + Name: "default", + Interval: time.Second, + Rules: []Rule{rule}, + ShouldRestore: true, + Opts: opts, + QueryOffset: &queryOffset, + }) + + // A time series that has two samples and then goes stale. + app := st.Appender(context.Background()) + app.Append(0, labels.FromStrings(model.MetricNameLabel, "a"), 0, 1) + app.Append(0, labels.FromStrings(model.MetricNameLabel, "a"), 1000, 2) + app.Append(0, labels.FromStrings(model.MetricNameLabel, "a"), 2000, math.Float64frombits(value.StaleNaN)) + + err = app.Commit() + require.NoError(t, err) + + ctx := context.Background() + + // Execute 3 times, 1 second apart. + group.Eval(ctx, time.Unix(0, 0).Add(queryOffset)) + group.Eval(ctx, time.Unix(1, 0).Add(queryOffset)) + group.Eval(ctx, time.Unix(2, 0).Add(queryOffset)) + + querier, err := st.Querier(0, 2000) + require.NoError(t, err) + defer querier.Close() + + matcher, err := labels.NewMatcher(labels.MatchEqual, model.MetricNameLabel, "a_plus_one") + require.NoError(t, err) + + set := querier.Select(ctx, false, nil, matcher) + samples, err := readSeriesSet(set) + require.NoError(t, err) + + metric := labels.FromStrings(model.MetricNameLabel, "a_plus_one").String() + metricSample, ok := samples[metric] + + require.True(t, ok, "Series %s not returned.", metric) + require.True(t, value.IsStaleNaN(metricSample[2].F), "Appended second sample not as expected. Wanted: stale NaN Got: %x", math.Float64bits(metricSample[2].F)) + metricSample[2].F = 42 // require.Equal cannot handle NaN. + + want := map[string][]promql.FPoint{ + metric: {{T: 0, F: 2}, {T: 1000, F: 3}, {T: 2000, F: 42}}, + } + + require.Equal(t, want, samples) } - engine := promql.NewEngine(engineOpts) - opts := &ManagerOptions{ - QueryFunc: EngineQueryFunc(engine, st), - Appendable: st, - Queryable: st, - Context: context.Background(), - Logger: log.NewNopLogger(), - } - - expr, err := parser.ParseExpr("a + 1") - require.NoError(t, err) - rule := NewRecordingRule("a_plus_one", expr, labels.Labels{}) - group := NewGroup(GroupOptions{ - Name: "default", - Interval: time.Second, - Rules: []Rule{rule}, - ShouldRestore: true, - Opts: opts, - }) - - // A time series that has two samples and then goes stale. - app := st.Appender(context.Background()) - app.Append(0, labels.FromStrings(model.MetricNameLabel, "a"), 0, 1) - app.Append(0, labels.FromStrings(model.MetricNameLabel, "a"), 1000, 2) - app.Append(0, labels.FromStrings(model.MetricNameLabel, "a"), 2000, math.Float64frombits(value.StaleNaN)) - - err = app.Commit() - require.NoError(t, err) - - ctx := context.Background() - - // Execute 3 times, 1 second apart. - group.Eval(ctx, time.Unix(0, 0)) - group.Eval(ctx, time.Unix(1, 0)) - group.Eval(ctx, time.Unix(2, 0)) - - querier, err := st.Querier(0, 2000) - require.NoError(t, err) - defer querier.Close() - - matcher, err := labels.NewMatcher(labels.MatchEqual, model.MetricNameLabel, "a_plus_one") - require.NoError(t, err) - - set := querier.Select(ctx, false, nil, matcher) - samples, err := readSeriesSet(set) - require.NoError(t, err) - - metric := labels.FromStrings(model.MetricNameLabel, "a_plus_one").String() - metricSample, ok := samples[metric] - - require.True(t, ok, "Series %s not returned.", metric) - require.True(t, value.IsStaleNaN(metricSample[2].F), "Appended second sample not as expected. Wanted: stale NaN Got: %x", math.Float64bits(metricSample[2].F)) - metricSample[2].F = 42 // require.Equal cannot handle NaN. - - want := map[string][]promql.FPoint{ - metric: {{T: 0, F: 2}, {T: 1000, F: 3}, {T: 2000, F: 42}}, - } - - require.Equal(t, want, samples) } // Convert a SeriesSet into a form usable with require.Equal. @@ -609,6 +623,46 @@ func readSeriesSet(ss storage.SeriesSet) (map[string][]promql.FPoint, error) { return result, ss.Err() } +func TestGroup_QueryOffset(t *testing.T) { + config := ` +groups: + - name: group1 + query_offset: 2m + - name: group2 + query_offset: 0s + - name: group3 +` + + dir := t.TempDir() + fname := path.Join(dir, "rules.yaml") + err := os.WriteFile(fname, []byte(config), fs.ModePerm) + require.NoError(t, err) + + m := NewManager(&ManagerOptions{ + Logger: log.NewNopLogger(), + DefaultRuleQueryOffset: func() time.Duration { + return time.Minute + }, + }) + m.start() + err = m.Update(time.Second, []string{fname}, labels.EmptyLabels(), "", nil) + require.NoError(t, err) + + rgs := m.RuleGroups() + sort.Slice(rgs, func(i, j int) bool { + return rgs[i].Name() < rgs[j].Name() + }) + + // From config. + require.Equal(t, 2*time.Minute, rgs[0].QueryOffset()) + // Setting 0 in config is detected. + require.Equal(t, time.Duration(0), rgs[1].QueryOffset()) + // Default when nothing is set. + require.Equal(t, time.Minute, rgs[2].QueryOffset()) + + m.Stop() +} + func TestCopyState(t *testing.T) { oldGroup := &Group{ rules: []Rule{ diff --git a/rules/origin_test.go b/rules/origin_test.go index ca466301dd..75c83f9a4e 100644 --- a/rules/origin_test.go +++ b/rules/origin_test.go @@ -31,7 +31,7 @@ type unknownRule struct{} func (u unknownRule) Name() string { return "" } func (u unknownRule) Labels() labels.Labels { return labels.EmptyLabels() } -func (u unknownRule) Eval(context.Context, time.Time, QueryFunc, *url.URL, int) (promql.Vector, error) { +func (u unknownRule) Eval(context.Context, time.Duration, time.Time, QueryFunc, *url.URL, int) (promql.Vector, error) { return nil, nil } func (u unknownRule) String() string { return "" } diff --git a/rules/recording.go b/rules/recording.go index e2b0a31a03..17a75fdd1a 100644 --- a/rules/recording.go +++ b/rules/recording.go @@ -77,10 +77,9 @@ func (rule *RecordingRule) Labels() labels.Labels { } // Eval evaluates the rule and then overrides the metric names and labels accordingly. -func (rule *RecordingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, _ *url.URL, limit int) (promql.Vector, error) { +func (rule *RecordingRule) Eval(ctx context.Context, queryOffset time.Duration, ts time.Time, query QueryFunc, _ *url.URL, limit int) (promql.Vector, error) { ctx = NewOriginContext(ctx, NewRuleDetail(rule)) - - vector, err := query(ctx, rule.vector.String(), ts) + vector, err := query(ctx, rule.vector.String(), ts.Add(-queryOffset)) if err != nil { return nil, err } diff --git a/rules/recording_test.go b/rules/recording_test.go index 49f37b1ac9..fdddd4e02e 100644 --- a/rules/recording_test.go +++ b/rules/recording_test.go @@ -126,7 +126,7 @@ func TestRuleEval(t *testing.T) { for _, scenario := range ruleEvalTestScenarios { t.Run(scenario.name, func(t *testing.T) { rule := NewRecordingRule("test_rule", scenario.expr, scenario.ruleLabels) - result, err := rule.Eval(context.TODO(), ruleEvaluationTime, EngineQueryFunc(testEngine, storage), nil, 0) + result, err := rule.Eval(context.TODO(), 0, ruleEvaluationTime, EngineQueryFunc(testEngine, storage), nil, 0) require.NoError(t, err) testutil.RequireEqual(t, scenario.expected, result) }) @@ -144,7 +144,7 @@ func BenchmarkRuleEval(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - _, err := rule.Eval(context.TODO(), ruleEvaluationTime, EngineQueryFunc(testEngine, storage), nil, 0) + _, err := rule.Eval(context.TODO(), 0, ruleEvaluationTime, EngineQueryFunc(testEngine, storage), nil, 0) if err != nil { require.NoError(b, err) } @@ -173,7 +173,7 @@ func TestRuleEvalDuplicate(t *testing.T) { expr, _ := parser.ParseExpr(`vector(0) or label_replace(vector(0),"test","x","","")`) rule := NewRecordingRule("foo", expr, labels.FromStrings("test", "test")) - _, err := rule.Eval(ctx, now, EngineQueryFunc(engine, storage), nil, 0) + _, err := rule.Eval(ctx, 0, now, EngineQueryFunc(engine, storage), nil, 0) require.Error(t, err) require.EqualError(t, err, "vector contains metrics with the same labelset after applying rule labels") } @@ -215,7 +215,7 @@ func TestRecordingRuleLimit(t *testing.T) { evalTime := time.Unix(0, 0) for _, test := range tests { - switch _, err := rule.Eval(context.TODO(), evalTime, EngineQueryFunc(testEngine, storage), nil, test.limit); { + switch _, err := rule.Eval(context.TODO(), 0, evalTime, EngineQueryFunc(testEngine, storage), nil, test.limit); { case err != nil: require.EqualError(t, err, test.err) case test.err != "": @@ -243,7 +243,7 @@ func TestRecordingEvalWithOrigin(t *testing.T) { require.NoError(t, err) rule := NewRecordingRule(name, expr, lbs) - _, err = rule.Eval(ctx, now, func(ctx context.Context, qs string, _ time.Time) (promql.Vector, error) { + _, err = rule.Eval(ctx, 0, now, func(ctx context.Context, qs string, _ time.Time) (promql.Vector, error) { detail = FromOriginContext(ctx) return nil, nil }, nil, 0) diff --git a/rules/rule.go b/rules/rule.go index 59af3e0bba..687c03d000 100644 --- a/rules/rule.go +++ b/rules/rule.go @@ -40,7 +40,7 @@ type Rule interface { // Labels of the rule. Labels() labels.Labels // Eval evaluates the rule, including any associated recording or alerting actions. - Eval(context.Context, time.Time, QueryFunc, *url.URL, int) (promql.Vector, error) + Eval(ctx context.Context, queryOffset time.Duration, evaluationTime time.Time, queryFunc QueryFunc, externalURL *url.URL, limit int) (promql.Vector, error) // String returns a human-readable string representation of the rule. String() string // Query returns the rule query expression.